{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1000694927032661, "eval_steps": 500, "global_step": 2375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000463284688441047, "grad_norm": 1.1484375, "learning_rate": 0.0, "loss": 1.3776911497116089, "step": 1 }, { "epoch": 0.000926569376882094, "grad_norm": 1.90625, "learning_rate": 6.31578947368421e-07, "loss": 1.3288359642028809, "step": 2 }, { "epoch": 0.001389854065323141, "grad_norm": 1.4765625, "learning_rate": 1.263157894736842e-06, "loss": 1.3400298357009888, "step": 3 }, { "epoch": 0.001853138753764188, "grad_norm": 1.1796875, "learning_rate": 1.8947368421052632e-06, "loss": 1.2718102931976318, "step": 4 }, { "epoch": 0.0023164234422052353, "grad_norm": 1.53125, "learning_rate": 2.526315789473684e-06, "loss": 1.3158093690872192, "step": 5 }, { "epoch": 0.002779708130646282, "grad_norm": 1.53125, "learning_rate": 3.157894736842105e-06, "loss": 1.3021347522735596, "step": 6 }, { "epoch": 0.0032429928190873293, "grad_norm": 1.4921875, "learning_rate": 3.7894736842105264e-06, "loss": 1.3042571544647217, "step": 7 }, { "epoch": 0.003706277507528376, "grad_norm": 1.515625, "learning_rate": 4.4210526315789476e-06, "loss": 1.5282930135726929, "step": 8 }, { "epoch": 0.004169562195969423, "grad_norm": 1.4921875, "learning_rate": 5.052631578947368e-06, "loss": 1.1924694776535034, "step": 9 }, { "epoch": 0.0046328468844104706, "grad_norm": 1.5625, "learning_rate": 5.684210526315789e-06, "loss": 1.2525532245635986, "step": 10 }, { "epoch": 0.005096131572851517, "grad_norm": 1.2890625, "learning_rate": 6.31578947368421e-06, "loss": 1.354798436164856, "step": 11 }, { "epoch": 0.005559416261292564, "grad_norm": 1.359375, "learning_rate": 6.947368421052632e-06, "loss": 1.2614648342132568, "step": 12 }, { "epoch": 0.006022700949733611, "grad_norm": 1.59375, "learning_rate": 7.578947368421053e-06, "loss": 1.441209077835083, "step": 13 }, { "epoch": 0.006485985638174659, "grad_norm": 1.0546875, "learning_rate": 8.210526315789475e-06, "loss": 1.2802681922912598, "step": 14 }, { "epoch": 0.006949270326615705, "grad_norm": 1.265625, "learning_rate": 8.842105263157895e-06, "loss": 1.3814109563827515, "step": 15 }, { "epoch": 0.007412555015056752, "grad_norm": 1.2421875, "learning_rate": 9.473684210526315e-06, "loss": 1.0409232378005981, "step": 16 }, { "epoch": 0.007875839703497799, "grad_norm": 1.015625, "learning_rate": 1.0105263157894736e-05, "loss": 1.2293620109558105, "step": 17 }, { "epoch": 0.008339124391938846, "grad_norm": 1.0390625, "learning_rate": 1.0736842105263158e-05, "loss": 1.2468554973602295, "step": 18 }, { "epoch": 0.008802409080379893, "grad_norm": 0.91796875, "learning_rate": 1.1368421052631578e-05, "loss": 1.2527501583099365, "step": 19 }, { "epoch": 0.009265693768820941, "grad_norm": 1.140625, "learning_rate": 1.2e-05, "loss": 1.2057502269744873, "step": 20 }, { "epoch": 0.009728978457261988, "grad_norm": 1.078125, "learning_rate": 1.263157894736842e-05, "loss": 1.190750241279602, "step": 21 }, { "epoch": 0.010192263145703035, "grad_norm": 1.0, "learning_rate": 1.3263157894736844e-05, "loss": 1.2279465198516846, "step": 22 }, { "epoch": 0.010655547834144082, "grad_norm": 0.8828125, "learning_rate": 1.3894736842105265e-05, "loss": 1.2643662691116333, "step": 23 }, { "epoch": 0.011118832522585128, "grad_norm": 1.0390625, "learning_rate": 1.4526315789473685e-05, "loss": 1.1729130744934082, "step": 24 }, { "epoch": 0.011582117211026175, "grad_norm": 1.0, "learning_rate": 1.5157894736842105e-05, "loss": 1.249855875968933, "step": 25 }, { "epoch": 0.012045401899467222, "grad_norm": 0.828125, "learning_rate": 1.578947368421053e-05, "loss": 1.2158374786376953, "step": 26 }, { "epoch": 0.01250868658790827, "grad_norm": 0.89453125, "learning_rate": 1.642105263157895e-05, "loss": 1.1104485988616943, "step": 27 }, { "epoch": 0.012971971276349317, "grad_norm": 0.828125, "learning_rate": 1.705263157894737e-05, "loss": 1.1513713598251343, "step": 28 }, { "epoch": 0.013435255964790364, "grad_norm": 0.89453125, "learning_rate": 1.768421052631579e-05, "loss": 1.1879022121429443, "step": 29 }, { "epoch": 0.01389854065323141, "grad_norm": 0.83984375, "learning_rate": 1.831578947368421e-05, "loss": 1.250070333480835, "step": 30 }, { "epoch": 0.014361825341672458, "grad_norm": 0.859375, "learning_rate": 1.894736842105263e-05, "loss": 1.1055482625961304, "step": 31 }, { "epoch": 0.014825110030113504, "grad_norm": 0.7890625, "learning_rate": 1.957894736842105e-05, "loss": 1.1359180212020874, "step": 32 }, { "epoch": 0.015288394718554551, "grad_norm": 0.83203125, "learning_rate": 2.0210526315789472e-05, "loss": 1.426615595817566, "step": 33 }, { "epoch": 0.015751679406995598, "grad_norm": 0.828125, "learning_rate": 2.0842105263157895e-05, "loss": 1.1893213987350464, "step": 34 }, { "epoch": 0.016214964095436647, "grad_norm": 0.84765625, "learning_rate": 2.1473684210526316e-05, "loss": 1.2659728527069092, "step": 35 }, { "epoch": 0.01667824878387769, "grad_norm": 0.7734375, "learning_rate": 2.2105263157894736e-05, "loss": 1.2023910284042358, "step": 36 }, { "epoch": 0.01714153347231874, "grad_norm": 0.8203125, "learning_rate": 2.2736842105263157e-05, "loss": 1.4574058055877686, "step": 37 }, { "epoch": 0.017604818160759785, "grad_norm": 0.90625, "learning_rate": 2.336842105263158e-05, "loss": 0.9998283386230469, "step": 38 }, { "epoch": 0.018068102849200834, "grad_norm": 0.81640625, "learning_rate": 2.4e-05, "loss": 1.094596266746521, "step": 39 }, { "epoch": 0.018531387537641882, "grad_norm": 0.80859375, "learning_rate": 2.4631578947368424e-05, "loss": 1.2685940265655518, "step": 40 }, { "epoch": 0.018994672226082927, "grad_norm": 0.86328125, "learning_rate": 2.526315789473684e-05, "loss": 1.370650053024292, "step": 41 }, { "epoch": 0.019457956914523976, "grad_norm": 0.9140625, "learning_rate": 2.5894736842105265e-05, "loss": 1.0709939002990723, "step": 42 }, { "epoch": 0.01992124160296502, "grad_norm": 0.73046875, "learning_rate": 2.652631578947369e-05, "loss": 1.2332732677459717, "step": 43 }, { "epoch": 0.02038452629140607, "grad_norm": 0.81640625, "learning_rate": 2.7157894736842106e-05, "loss": 1.1283347606658936, "step": 44 }, { "epoch": 0.020847810979847115, "grad_norm": 0.87109375, "learning_rate": 2.778947368421053e-05, "loss": 1.050850749015808, "step": 45 }, { "epoch": 0.021311095668288163, "grad_norm": 0.74609375, "learning_rate": 2.8421052631578946e-05, "loss": 1.1958825588226318, "step": 46 }, { "epoch": 0.02177438035672921, "grad_norm": 0.7265625, "learning_rate": 2.905263157894737e-05, "loss": 1.439139485359192, "step": 47 }, { "epoch": 0.022237665045170257, "grad_norm": 0.7109375, "learning_rate": 2.968421052631579e-05, "loss": 1.2316217422485352, "step": 48 }, { "epoch": 0.022700949733611305, "grad_norm": 0.79296875, "learning_rate": 3.031578947368421e-05, "loss": 1.177676796913147, "step": 49 }, { "epoch": 0.02316423442205235, "grad_norm": 0.74609375, "learning_rate": 3.094736842105263e-05, "loss": 1.1291377544403076, "step": 50 }, { "epoch": 0.0236275191104934, "grad_norm": 0.83984375, "learning_rate": 3.157894736842106e-05, "loss": 1.2162253856658936, "step": 51 }, { "epoch": 0.024090803798934444, "grad_norm": 0.875, "learning_rate": 3.221052631578947e-05, "loss": 1.4145865440368652, "step": 52 }, { "epoch": 0.024554088487375492, "grad_norm": 0.859375, "learning_rate": 3.28421052631579e-05, "loss": 1.2053899765014648, "step": 53 }, { "epoch": 0.02501737317581654, "grad_norm": 0.8671875, "learning_rate": 3.347368421052631e-05, "loss": 1.406412959098816, "step": 54 }, { "epoch": 0.025480657864257586, "grad_norm": 0.828125, "learning_rate": 3.410526315789474e-05, "loss": 1.3238595724105835, "step": 55 }, { "epoch": 0.025943942552698634, "grad_norm": 0.8203125, "learning_rate": 3.473684210526316e-05, "loss": 1.089475154876709, "step": 56 }, { "epoch": 0.02640722724113968, "grad_norm": 0.85546875, "learning_rate": 3.536842105263158e-05, "loss": 0.9788758754730225, "step": 57 }, { "epoch": 0.026870511929580728, "grad_norm": 0.93359375, "learning_rate": 3.6e-05, "loss": 1.3209675550460815, "step": 58 }, { "epoch": 0.027333796618021773, "grad_norm": 0.73828125, "learning_rate": 3.663157894736842e-05, "loss": 1.0189337730407715, "step": 59 }, { "epoch": 0.02779708130646282, "grad_norm": 0.92578125, "learning_rate": 3.726315789473684e-05, "loss": 1.2801414728164673, "step": 60 }, { "epoch": 0.028260365994903867, "grad_norm": 0.765625, "learning_rate": 3.789473684210526e-05, "loss": 1.1465449333190918, "step": 61 }, { "epoch": 0.028723650683344915, "grad_norm": 0.87890625, "learning_rate": 3.852631578947369e-05, "loss": 1.259995937347412, "step": 62 }, { "epoch": 0.029186935371785964, "grad_norm": 0.8125, "learning_rate": 3.91578947368421e-05, "loss": 1.0937800407409668, "step": 63 }, { "epoch": 0.02965022006022701, "grad_norm": 0.8046875, "learning_rate": 3.978947368421053e-05, "loss": 1.218725562095642, "step": 64 }, { "epoch": 0.030113504748668057, "grad_norm": 0.7265625, "learning_rate": 4.0421052631578943e-05, "loss": 1.2543126344680786, "step": 65 }, { "epoch": 0.030576789437109102, "grad_norm": 0.75, "learning_rate": 4.105263157894737e-05, "loss": 1.2053662538528442, "step": 66 }, { "epoch": 0.03104007412555015, "grad_norm": 0.78515625, "learning_rate": 4.168421052631579e-05, "loss": 1.1383905410766602, "step": 67 }, { "epoch": 0.031503358813991196, "grad_norm": 0.76953125, "learning_rate": 4.231578947368421e-05, "loss": 1.1001615524291992, "step": 68 }, { "epoch": 0.031966643502432245, "grad_norm": 0.86328125, "learning_rate": 4.294736842105263e-05, "loss": 1.2164722681045532, "step": 69 }, { "epoch": 0.03242992819087329, "grad_norm": 0.78125, "learning_rate": 4.357894736842106e-05, "loss": 1.1911834478378296, "step": 70 }, { "epoch": 0.03289321287931434, "grad_norm": 0.9375, "learning_rate": 4.421052631578947e-05, "loss": 1.1898846626281738, "step": 71 }, { "epoch": 0.03335649756775538, "grad_norm": 0.79296875, "learning_rate": 4.48421052631579e-05, "loss": 1.3878438472747803, "step": 72 }, { "epoch": 0.03381978225619643, "grad_norm": 0.8515625, "learning_rate": 4.547368421052631e-05, "loss": 1.1823328733444214, "step": 73 }, { "epoch": 0.03428306694463748, "grad_norm": 0.7265625, "learning_rate": 4.610526315789474e-05, "loss": 1.1236375570297241, "step": 74 }, { "epoch": 0.03474635163307853, "grad_norm": 0.73046875, "learning_rate": 4.673684210526316e-05, "loss": 1.2263376712799072, "step": 75 }, { "epoch": 0.03520963632151957, "grad_norm": 0.7734375, "learning_rate": 4.736842105263158e-05, "loss": 1.0898189544677734, "step": 76 }, { "epoch": 0.03567292100996062, "grad_norm": 0.72265625, "learning_rate": 4.8e-05, "loss": 1.1455436944961548, "step": 77 }, { "epoch": 0.03613620569840167, "grad_norm": 0.765625, "learning_rate": 4.799997759200591e-05, "loss": 1.3289111852645874, "step": 78 }, { "epoch": 0.036599490386842716, "grad_norm": 0.7890625, "learning_rate": 4.799991036806548e-05, "loss": 1.103787899017334, "step": 79 }, { "epoch": 0.037062775075283765, "grad_norm": 0.7890625, "learning_rate": 4.799979832830423e-05, "loss": 1.1579338312149048, "step": 80 }, { "epoch": 0.037526059763724806, "grad_norm": 0.7734375, "learning_rate": 4.799964147293139e-05, "loss": 1.3283060789108276, "step": 81 }, { "epoch": 0.037989344452165855, "grad_norm": 0.86328125, "learning_rate": 4.799943980223985e-05, "loss": 0.9260512590408325, "step": 82 }, { "epoch": 0.0384526291406069, "grad_norm": 0.76171875, "learning_rate": 4.7999193316606205e-05, "loss": 1.1989682912826538, "step": 83 }, { "epoch": 0.03891591382904795, "grad_norm": 0.8046875, "learning_rate": 4.799890201649072e-05, "loss": 1.2116103172302246, "step": 84 }, { "epoch": 0.039379198517489, "grad_norm": 0.86328125, "learning_rate": 4.7998565902437354e-05, "loss": 1.2562410831451416, "step": 85 }, { "epoch": 0.03984248320593004, "grad_norm": 0.80078125, "learning_rate": 4.799818497507374e-05, "loss": 1.1803618669509888, "step": 86 }, { "epoch": 0.04030576789437109, "grad_norm": 0.78125, "learning_rate": 4.79977592351112e-05, "loss": 1.193110466003418, "step": 87 }, { "epoch": 0.04076905258281214, "grad_norm": 0.74609375, "learning_rate": 4.799728868334472e-05, "loss": 1.1182022094726562, "step": 88 }, { "epoch": 0.04123233727125319, "grad_norm": 0.78515625, "learning_rate": 4.799677332065299e-05, "loss": 1.052176833152771, "step": 89 }, { "epoch": 0.04169562195969423, "grad_norm": 0.80078125, "learning_rate": 4.799621314799836e-05, "loss": 1.3748915195465088, "step": 90 }, { "epoch": 0.04215890664813528, "grad_norm": 0.75, "learning_rate": 4.799560816642687e-05, "loss": 1.1480522155761719, "step": 91 }, { "epoch": 0.042622191336576326, "grad_norm": 0.73046875, "learning_rate": 4.79949583770682e-05, "loss": 1.0132099390029907, "step": 92 }, { "epoch": 0.043085476025017375, "grad_norm": 0.83203125, "learning_rate": 4.799426378113573e-05, "loss": 1.1310032606124878, "step": 93 }, { "epoch": 0.04354876071345842, "grad_norm": 0.9296875, "learning_rate": 4.799352437992651e-05, "loss": 1.1506468057632446, "step": 94 }, { "epoch": 0.044012045401899465, "grad_norm": 0.76953125, "learning_rate": 4.7992740174821246e-05, "loss": 0.833928108215332, "step": 95 }, { "epoch": 0.04447533009034051, "grad_norm": 0.734375, "learning_rate": 4.79919111672843e-05, "loss": 1.064217209815979, "step": 96 }, { "epoch": 0.04493861477878156, "grad_norm": 0.80859375, "learning_rate": 4.799103735886371e-05, "loss": 1.0427517890930176, "step": 97 }, { "epoch": 0.04540189946722261, "grad_norm": 0.90625, "learning_rate": 4.7990118751191185e-05, "loss": 1.0797550678253174, "step": 98 }, { "epoch": 0.04586518415566365, "grad_norm": 1.0234375, "learning_rate": 4.798915534598205e-05, "loss": 1.0864336490631104, "step": 99 }, { "epoch": 0.0463284688441047, "grad_norm": 0.9609375, "learning_rate": 4.79881471450353e-05, "loss": 1.1602739095687866, "step": 100 }, { "epoch": 0.04679175353254575, "grad_norm": 0.82421875, "learning_rate": 4.79870941502336e-05, "loss": 1.045765995979309, "step": 101 }, { "epoch": 0.0472550382209868, "grad_norm": 0.77734375, "learning_rate": 4.798599636354323e-05, "loss": 1.2531412839889526, "step": 102 }, { "epoch": 0.047718322909427846, "grad_norm": 0.8125, "learning_rate": 4.7984853787014124e-05, "loss": 1.208916425704956, "step": 103 }, { "epoch": 0.04818160759786889, "grad_norm": 0.81640625, "learning_rate": 4.798366642277986e-05, "loss": 1.238208532333374, "step": 104 }, { "epoch": 0.048644892286309936, "grad_norm": 0.8828125, "learning_rate": 4.7982434273057635e-05, "loss": 0.9851164817810059, "step": 105 }, { "epoch": 0.049108176974750985, "grad_norm": 0.875, "learning_rate": 4.798115734014828e-05, "loss": 1.0408838987350464, "step": 106 }, { "epoch": 0.04957146166319203, "grad_norm": 0.83984375, "learning_rate": 4.7979835626436254e-05, "loss": 1.191272497177124, "step": 107 }, { "epoch": 0.05003474635163308, "grad_norm": 0.828125, "learning_rate": 4.797846913438965e-05, "loss": 0.9605915546417236, "step": 108 }, { "epoch": 0.05049803104007412, "grad_norm": 0.73828125, "learning_rate": 4.797705786656015e-05, "loss": 1.1408090591430664, "step": 109 }, { "epoch": 0.05096131572851517, "grad_norm": 0.796875, "learning_rate": 4.797560182558307e-05, "loss": 1.277418613433838, "step": 110 }, { "epoch": 0.05142460041695622, "grad_norm": 0.859375, "learning_rate": 4.797410101417731e-05, "loss": 1.1940449476242065, "step": 111 }, { "epoch": 0.05188788510539727, "grad_norm": 0.703125, "learning_rate": 4.7972555435145395e-05, "loss": 1.1584206819534302, "step": 112 }, { "epoch": 0.05235116979383831, "grad_norm": 0.8125, "learning_rate": 4.7970965091373425e-05, "loss": 1.314054250717163, "step": 113 }, { "epoch": 0.05281445448227936, "grad_norm": 0.765625, "learning_rate": 4.796932998583113e-05, "loss": 1.155271053314209, "step": 114 }, { "epoch": 0.05327773917072041, "grad_norm": 0.87890625, "learning_rate": 4.7967650121571754e-05, "loss": 1.0596888065338135, "step": 115 }, { "epoch": 0.053741023859161456, "grad_norm": 0.8203125, "learning_rate": 4.796592550173219e-05, "loss": 1.0339300632476807, "step": 116 }, { "epoch": 0.054204308547602505, "grad_norm": 0.81640625, "learning_rate": 4.7964156129532876e-05, "loss": 1.0572959184646606, "step": 117 }, { "epoch": 0.054667593236043546, "grad_norm": 1.15625, "learning_rate": 4.796234200827781e-05, "loss": 1.2064818143844604, "step": 118 }, { "epoch": 0.055130877924484595, "grad_norm": 0.7890625, "learning_rate": 4.796048314135457e-05, "loss": 1.0486756563186646, "step": 119 }, { "epoch": 0.05559416261292564, "grad_norm": 0.78125, "learning_rate": 4.7958579532234265e-05, "loss": 1.2740678787231445, "step": 120 }, { "epoch": 0.05605744730136669, "grad_norm": 0.76171875, "learning_rate": 4.795663118447158e-05, "loss": 1.2558701038360596, "step": 121 }, { "epoch": 0.05652073198980773, "grad_norm": 0.76953125, "learning_rate": 4.7954638101704724e-05, "loss": 1.0604870319366455, "step": 122 }, { "epoch": 0.05698401667824878, "grad_norm": 0.77734375, "learning_rate": 4.7952600287655444e-05, "loss": 1.0435048341751099, "step": 123 }, { "epoch": 0.05744730136668983, "grad_norm": 0.703125, "learning_rate": 4.795051774612902e-05, "loss": 1.0488735437393188, "step": 124 }, { "epoch": 0.05791058605513088, "grad_norm": 0.8359375, "learning_rate": 4.7948390481014245e-05, "loss": 1.0075401067733765, "step": 125 }, { "epoch": 0.05837387074357193, "grad_norm": 0.7890625, "learning_rate": 4.7946218496283435e-05, "loss": 1.1233757734298706, "step": 126 }, { "epoch": 0.05883715543201297, "grad_norm": 0.83203125, "learning_rate": 4.794400179599242e-05, "loss": 1.1474615335464478, "step": 127 }, { "epoch": 0.05930044012045402, "grad_norm": 0.76171875, "learning_rate": 4.794174038428049e-05, "loss": 1.0086901187896729, "step": 128 }, { "epoch": 0.059763724808895066, "grad_norm": 0.76171875, "learning_rate": 4.793943426537048e-05, "loss": 1.119909644126892, "step": 129 }, { "epoch": 0.060227009497336115, "grad_norm": 0.765625, "learning_rate": 4.793708344356867e-05, "loss": 1.0933876037597656, "step": 130 }, { "epoch": 0.06069029418577716, "grad_norm": 0.73828125, "learning_rate": 4.793468792326482e-05, "loss": 1.424509882926941, "step": 131 }, { "epoch": 0.061153578874218205, "grad_norm": 0.8125, "learning_rate": 4.7932247708932184e-05, "loss": 1.086850643157959, "step": 132 }, { "epoch": 0.06161686356265925, "grad_norm": 0.76171875, "learning_rate": 4.7929762805127435e-05, "loss": 1.4302482604980469, "step": 133 }, { "epoch": 0.0620801482511003, "grad_norm": 0.8125, "learning_rate": 4.7927233216490726e-05, "loss": 0.9927620887756348, "step": 134 }, { "epoch": 0.06254343293954134, "grad_norm": 0.8359375, "learning_rate": 4.792465894774563e-05, "loss": 1.1411983966827393, "step": 135 }, { "epoch": 0.06300671762798239, "grad_norm": 0.76171875, "learning_rate": 4.792204000369917e-05, "loss": 1.3219720125198364, "step": 136 }, { "epoch": 0.06347000231642344, "grad_norm": 0.75390625, "learning_rate": 4.791937638924179e-05, "loss": 1.2182328701019287, "step": 137 }, { "epoch": 0.06393328700486449, "grad_norm": 0.85546875, "learning_rate": 4.7916668109347346e-05, "loss": 1.2830442190170288, "step": 138 }, { "epoch": 0.06439657169330554, "grad_norm": 0.85546875, "learning_rate": 4.791391516907309e-05, "loss": 1.1010041236877441, "step": 139 }, { "epoch": 0.06485985638174659, "grad_norm": 0.8359375, "learning_rate": 4.7911117573559676e-05, "loss": 1.0438331365585327, "step": 140 }, { "epoch": 0.06532314107018763, "grad_norm": 0.74609375, "learning_rate": 4.7908275328031156e-05, "loss": 1.039322853088379, "step": 141 }, { "epoch": 0.06578642575862868, "grad_norm": 0.82421875, "learning_rate": 4.7905388437794946e-05, "loss": 1.1718674898147583, "step": 142 }, { "epoch": 0.06624971044706972, "grad_norm": 0.75, "learning_rate": 4.7902456908241836e-05, "loss": 1.0182360410690308, "step": 143 }, { "epoch": 0.06671299513551077, "grad_norm": 0.76171875, "learning_rate": 4.789948074484594e-05, "loss": 0.9300652742385864, "step": 144 }, { "epoch": 0.06717627982395181, "grad_norm": 0.796875, "learning_rate": 4.7896459953164785e-05, "loss": 1.1588186025619507, "step": 145 }, { "epoch": 0.06763956451239286, "grad_norm": 0.8203125, "learning_rate": 4.7893394538839164e-05, "loss": 1.1683034896850586, "step": 146 }, { "epoch": 0.06810284920083391, "grad_norm": 0.73828125, "learning_rate": 4.7890284507593236e-05, "loss": 1.2006157636642456, "step": 147 }, { "epoch": 0.06856613388927496, "grad_norm": 0.83203125, "learning_rate": 4.788712986523447e-05, "loss": 1.1696548461914062, "step": 148 }, { "epoch": 0.06902941857771601, "grad_norm": 0.80078125, "learning_rate": 4.788393061765363e-05, "loss": 1.0099486112594604, "step": 149 }, { "epoch": 0.06949270326615706, "grad_norm": 0.76171875, "learning_rate": 4.7880686770824775e-05, "loss": 1.000266671180725, "step": 150 }, { "epoch": 0.0699559879545981, "grad_norm": 0.92578125, "learning_rate": 4.7877398330805246e-05, "loss": 1.1522239446640015, "step": 151 }, { "epoch": 0.07041927264303914, "grad_norm": 0.83203125, "learning_rate": 4.7874065303735655e-05, "loss": 1.11775803565979, "step": 152 }, { "epoch": 0.07088255733148019, "grad_norm": 0.76171875, "learning_rate": 4.787068769583987e-05, "loss": 1.0676116943359375, "step": 153 }, { "epoch": 0.07134584201992124, "grad_norm": 0.7890625, "learning_rate": 4.786726551342502e-05, "loss": 0.9372677206993103, "step": 154 }, { "epoch": 0.07180912670836229, "grad_norm": 0.78125, "learning_rate": 4.7863798762881446e-05, "loss": 1.0489038228988647, "step": 155 }, { "epoch": 0.07227241139680333, "grad_norm": 0.7890625, "learning_rate": 4.7860287450682735e-05, "loss": 1.2821038961410522, "step": 156 }, { "epoch": 0.07273569608524438, "grad_norm": 0.7265625, "learning_rate": 4.7856731583385665e-05, "loss": 1.3503544330596924, "step": 157 }, { "epoch": 0.07319898077368543, "grad_norm": 0.74609375, "learning_rate": 4.7853131167630235e-05, "loss": 1.1046172380447388, "step": 158 }, { "epoch": 0.07366226546212648, "grad_norm": 0.73828125, "learning_rate": 4.7849486210139616e-05, "loss": 1.5007928609848022, "step": 159 }, { "epoch": 0.07412555015056753, "grad_norm": 0.7734375, "learning_rate": 4.784579671772015e-05, "loss": 1.1768280267715454, "step": 160 }, { "epoch": 0.07458883483900858, "grad_norm": 0.703125, "learning_rate": 4.784206269726136e-05, "loss": 1.2257065773010254, "step": 161 }, { "epoch": 0.07505211952744961, "grad_norm": 0.76953125, "learning_rate": 4.7838284155735886e-05, "loss": 1.2349004745483398, "step": 162 }, { "epoch": 0.07551540421589066, "grad_norm": 0.84375, "learning_rate": 4.783446110019954e-05, "loss": 1.1083492040634155, "step": 163 }, { "epoch": 0.07597868890433171, "grad_norm": 0.83984375, "learning_rate": 4.7830593537791244e-05, "loss": 1.0492440462112427, "step": 164 }, { "epoch": 0.07644197359277276, "grad_norm": 0.8359375, "learning_rate": 4.7826681475733e-05, "loss": 1.0901589393615723, "step": 165 }, { "epoch": 0.0769052582812138, "grad_norm": 0.8359375, "learning_rate": 4.7822724921329945e-05, "loss": 1.196974515914917, "step": 166 }, { "epoch": 0.07736854296965485, "grad_norm": 0.84375, "learning_rate": 4.781872388197029e-05, "loss": 1.2492700815200806, "step": 167 }, { "epoch": 0.0778318276580959, "grad_norm": 0.890625, "learning_rate": 4.781467836512529e-05, "loss": 0.9922595620155334, "step": 168 }, { "epoch": 0.07829511234653695, "grad_norm": 0.73828125, "learning_rate": 4.781058837834929e-05, "loss": 1.2686748504638672, "step": 169 }, { "epoch": 0.078758397034978, "grad_norm": 0.77734375, "learning_rate": 4.780645392927964e-05, "loss": 0.9617519378662109, "step": 170 }, { "epoch": 0.07922168172341904, "grad_norm": 0.76953125, "learning_rate": 4.780227502563674e-05, "loss": 1.0572490692138672, "step": 171 }, { "epoch": 0.07968496641186008, "grad_norm": 0.7734375, "learning_rate": 4.7798051675223994e-05, "loss": 1.1447961330413818, "step": 172 }, { "epoch": 0.08014825110030113, "grad_norm": 0.765625, "learning_rate": 4.77937838859278e-05, "loss": 0.9723523855209351, "step": 173 }, { "epoch": 0.08061153578874218, "grad_norm": 0.83984375, "learning_rate": 4.778947166571755e-05, "loss": 1.1690819263458252, "step": 174 }, { "epoch": 0.08107482047718323, "grad_norm": 0.859375, "learning_rate": 4.778511502264559e-05, "loss": 1.043947458267212, "step": 175 }, { "epoch": 0.08153810516562428, "grad_norm": 0.8515625, "learning_rate": 4.778071396484721e-05, "loss": 1.0934100151062012, "step": 176 }, { "epoch": 0.08200138985406533, "grad_norm": 0.79296875, "learning_rate": 4.777626850054067e-05, "loss": 1.1645115613937378, "step": 177 }, { "epoch": 0.08246467454250637, "grad_norm": 0.72265625, "learning_rate": 4.7771778638027116e-05, "loss": 1.0093110799789429, "step": 178 }, { "epoch": 0.08292795923094742, "grad_norm": 0.7734375, "learning_rate": 4.7767244385690624e-05, "loss": 1.2085744142532349, "step": 179 }, { "epoch": 0.08339124391938846, "grad_norm": 0.69140625, "learning_rate": 4.776266575199815e-05, "loss": 1.048790693283081, "step": 180 }, { "epoch": 0.0838545286078295, "grad_norm": 0.734375, "learning_rate": 4.775804274549953e-05, "loss": 1.0067102909088135, "step": 181 }, { "epoch": 0.08431781329627056, "grad_norm": 0.73828125, "learning_rate": 4.775337537482744e-05, "loss": 1.0322071313858032, "step": 182 }, { "epoch": 0.0847810979847116, "grad_norm": 0.70703125, "learning_rate": 4.7748663648697436e-05, "loss": 0.8763373494148254, "step": 183 }, { "epoch": 0.08524438267315265, "grad_norm": 0.890625, "learning_rate": 4.774390757590787e-05, "loss": 1.1351971626281738, "step": 184 }, { "epoch": 0.0857076673615937, "grad_norm": 0.7734375, "learning_rate": 4.773910716533992e-05, "loss": 1.21125066280365, "step": 185 }, { "epoch": 0.08617095205003475, "grad_norm": 0.8046875, "learning_rate": 4.773426242595754e-05, "loss": 1.0625823736190796, "step": 186 }, { "epoch": 0.0866342367384758, "grad_norm": 0.80859375, "learning_rate": 4.772937336680748e-05, "loss": 1.2072420120239258, "step": 187 }, { "epoch": 0.08709752142691685, "grad_norm": 0.72265625, "learning_rate": 4.772443999701922e-05, "loss": 1.1252281665802002, "step": 188 }, { "epoch": 0.08756080611535788, "grad_norm": 0.81640625, "learning_rate": 4.771946232580503e-05, "loss": 1.1829332113265991, "step": 189 }, { "epoch": 0.08802409080379893, "grad_norm": 0.83984375, "learning_rate": 4.771444036245987e-05, "loss": 1.177690029144287, "step": 190 }, { "epoch": 0.08848737549223998, "grad_norm": 0.70703125, "learning_rate": 4.7709374116361405e-05, "loss": 1.025864839553833, "step": 191 }, { "epoch": 0.08895066018068103, "grad_norm": 1.046875, "learning_rate": 4.770426359697001e-05, "loss": 1.2552249431610107, "step": 192 }, { "epoch": 0.08941394486912208, "grad_norm": 0.91796875, "learning_rate": 4.7699108813828735e-05, "loss": 1.1388694047927856, "step": 193 }, { "epoch": 0.08987722955756312, "grad_norm": 0.7578125, "learning_rate": 4.769390977656328e-05, "loss": 1.1736036539077759, "step": 194 }, { "epoch": 0.09034051424600417, "grad_norm": 0.84765625, "learning_rate": 4.768866649488196e-05, "loss": 1.1389501094818115, "step": 195 }, { "epoch": 0.09080379893444522, "grad_norm": 0.81640625, "learning_rate": 4.768337897857572e-05, "loss": 1.0693917274475098, "step": 196 }, { "epoch": 0.09126708362288627, "grad_norm": 1.1640625, "learning_rate": 4.767804723751814e-05, "loss": 1.139711856842041, "step": 197 }, { "epoch": 0.0917303683113273, "grad_norm": 0.8125, "learning_rate": 4.767267128166534e-05, "loss": 1.021757960319519, "step": 198 }, { "epoch": 0.09219365299976835, "grad_norm": 0.8828125, "learning_rate": 4.766725112105602e-05, "loss": 1.2109063863754272, "step": 199 }, { "epoch": 0.0926569376882094, "grad_norm": 0.78515625, "learning_rate": 4.7661786765811425e-05, "loss": 1.1550836563110352, "step": 200 }, { "epoch": 0.09312022237665045, "grad_norm": 0.88671875, "learning_rate": 4.765627822613532e-05, "loss": 1.1337045431137085, "step": 201 }, { "epoch": 0.0935835070650915, "grad_norm": 0.734375, "learning_rate": 4.7650725512313996e-05, "loss": 1.1244243383407593, "step": 202 }, { "epoch": 0.09404679175353255, "grad_norm": 0.76171875, "learning_rate": 4.76451286347162e-05, "loss": 1.1743513345718384, "step": 203 }, { "epoch": 0.0945100764419736, "grad_norm": 0.74609375, "learning_rate": 4.763948760379319e-05, "loss": 1.1148113012313843, "step": 204 }, { "epoch": 0.09497336113041464, "grad_norm": 0.75, "learning_rate": 4.763380243007862e-05, "loss": 0.9122455716133118, "step": 205 }, { "epoch": 0.09543664581885569, "grad_norm": 0.76171875, "learning_rate": 4.7628073124188615e-05, "loss": 1.0933022499084473, "step": 206 }, { "epoch": 0.09589993050729674, "grad_norm": 0.8359375, "learning_rate": 4.7622299696821693e-05, "loss": 1.0184919834136963, "step": 207 }, { "epoch": 0.09636321519573778, "grad_norm": 0.76953125, "learning_rate": 4.7616482158758773e-05, "loss": 1.1238012313842773, "step": 208 }, { "epoch": 0.09682649988417882, "grad_norm": 0.71875, "learning_rate": 4.761062052086313e-05, "loss": 1.22682523727417, "step": 209 }, { "epoch": 0.09728978457261987, "grad_norm": 0.875, "learning_rate": 4.760471479408038e-05, "loss": 1.1153074502944946, "step": 210 }, { "epoch": 0.09775306926106092, "grad_norm": 0.7890625, "learning_rate": 4.7598764989438495e-05, "loss": 1.0884509086608887, "step": 211 }, { "epoch": 0.09821635394950197, "grad_norm": 0.7109375, "learning_rate": 4.7592771118047746e-05, "loss": 0.9598002433776855, "step": 212 }, { "epoch": 0.09867963863794302, "grad_norm": 0.75, "learning_rate": 4.758673319110067e-05, "loss": 1.1340510845184326, "step": 213 }, { "epoch": 0.09914292332638407, "grad_norm": 0.7890625, "learning_rate": 4.75806512198721e-05, "loss": 0.9966357946395874, "step": 214 }, { "epoch": 0.09960620801482511, "grad_norm": 0.765625, "learning_rate": 4.757452521571909e-05, "loss": 1.0271143913269043, "step": 215 }, { "epoch": 0.10006949270326616, "grad_norm": 0.78125, "learning_rate": 4.7568355190080936e-05, "loss": 0.938353419303894, "step": 216 }, { "epoch": 0.1005327773917072, "grad_norm": 0.890625, "learning_rate": 4.756214115447912e-05, "loss": 1.047834873199463, "step": 217 }, { "epoch": 0.10099606208014825, "grad_norm": 0.85546875, "learning_rate": 4.7555883120517335e-05, "loss": 1.1204979419708252, "step": 218 }, { "epoch": 0.1014593467685893, "grad_norm": 0.76953125, "learning_rate": 4.7549581099881384e-05, "loss": 1.2204188108444214, "step": 219 }, { "epoch": 0.10192263145703034, "grad_norm": 0.73828125, "learning_rate": 4.7543235104339265e-05, "loss": 1.1481391191482544, "step": 220 }, { "epoch": 0.10238591614547139, "grad_norm": 0.71484375, "learning_rate": 4.753684514574105e-05, "loss": 1.201314091682434, "step": 221 }, { "epoch": 0.10284920083391244, "grad_norm": 0.7109375, "learning_rate": 4.753041123601891e-05, "loss": 1.159132480621338, "step": 222 }, { "epoch": 0.10331248552235349, "grad_norm": 0.79296875, "learning_rate": 4.752393338718712e-05, "loss": 1.1852577924728394, "step": 223 }, { "epoch": 0.10377577021079454, "grad_norm": 0.8203125, "learning_rate": 4.7517411611341954e-05, "loss": 1.0710164308547974, "step": 224 }, { "epoch": 0.10423905489923559, "grad_norm": 0.8828125, "learning_rate": 4.7510845920661756e-05, "loss": 1.097131371498108, "step": 225 }, { "epoch": 0.10470233958767662, "grad_norm": 0.76171875, "learning_rate": 4.7504236327406854e-05, "loss": 0.9716182351112366, "step": 226 }, { "epoch": 0.10516562427611767, "grad_norm": 0.8828125, "learning_rate": 4.749758284391955e-05, "loss": 1.2137223482131958, "step": 227 }, { "epoch": 0.10562890896455872, "grad_norm": 0.7421875, "learning_rate": 4.7490885482624115e-05, "loss": 0.9825916886329651, "step": 228 }, { "epoch": 0.10609219365299977, "grad_norm": 0.73046875, "learning_rate": 4.748414425602676e-05, "loss": 1.0940011739730835, "step": 229 }, { "epoch": 0.10655547834144082, "grad_norm": 0.859375, "learning_rate": 4.7477359176715584e-05, "loss": 0.9418008923530579, "step": 230 }, { "epoch": 0.10701876302988186, "grad_norm": 0.7109375, "learning_rate": 4.747053025736061e-05, "loss": 0.9472661018371582, "step": 231 }, { "epoch": 0.10748204771832291, "grad_norm": 0.7890625, "learning_rate": 4.746365751071368e-05, "loss": 1.2847800254821777, "step": 232 }, { "epoch": 0.10794533240676396, "grad_norm": 0.76171875, "learning_rate": 4.745674094960851e-05, "loss": 1.0107430219650269, "step": 233 }, { "epoch": 0.10840861709520501, "grad_norm": 0.79296875, "learning_rate": 4.744978058696062e-05, "loss": 1.1128199100494385, "step": 234 }, { "epoch": 0.10887190178364604, "grad_norm": 1.1171875, "learning_rate": 4.744277643576733e-05, "loss": 0.9219973087310791, "step": 235 }, { "epoch": 0.10933518647208709, "grad_norm": 0.86328125, "learning_rate": 4.743572850910772e-05, "loss": 1.2181633710861206, "step": 236 }, { "epoch": 0.10979847116052814, "grad_norm": 0.765625, "learning_rate": 4.74286368201426e-05, "loss": 1.2480499744415283, "step": 237 }, { "epoch": 0.11026175584896919, "grad_norm": 0.78125, "learning_rate": 4.7421501382114536e-05, "loss": 1.171923279762268, "step": 238 }, { "epoch": 0.11072504053741024, "grad_norm": 0.78515625, "learning_rate": 4.741432220834775e-05, "loss": 1.0495820045471191, "step": 239 }, { "epoch": 0.11118832522585129, "grad_norm": 0.84765625, "learning_rate": 4.740709931224815e-05, "loss": 1.1960190534591675, "step": 240 }, { "epoch": 0.11165160991429234, "grad_norm": 0.75390625, "learning_rate": 4.73998327073033e-05, "loss": 1.1030462980270386, "step": 241 }, { "epoch": 0.11211489460273338, "grad_norm": 0.796875, "learning_rate": 4.739252240708236e-05, "loss": 1.1221550703048706, "step": 242 }, { "epoch": 0.11257817929117443, "grad_norm": 1.109375, "learning_rate": 4.738516842523609e-05, "loss": 1.2760341167449951, "step": 243 }, { "epoch": 0.11304146397961547, "grad_norm": 0.76953125, "learning_rate": 4.737777077549683e-05, "loss": 1.1421096324920654, "step": 244 }, { "epoch": 0.11350474866805652, "grad_norm": 0.734375, "learning_rate": 4.737032947167845e-05, "loss": 1.06126070022583, "step": 245 }, { "epoch": 0.11396803335649756, "grad_norm": 0.7734375, "learning_rate": 4.7362844527676346e-05, "loss": 1.1655036211013794, "step": 246 }, { "epoch": 0.11443131804493861, "grad_norm": 0.796875, "learning_rate": 4.735531595746739e-05, "loss": 1.069222092628479, "step": 247 }, { "epoch": 0.11489460273337966, "grad_norm": 0.7265625, "learning_rate": 4.7347743775109935e-05, "loss": 1.2415424585342407, "step": 248 }, { "epoch": 0.11535788742182071, "grad_norm": 0.79296875, "learning_rate": 4.734012799474377e-05, "loss": 0.9880377650260925, "step": 249 }, { "epoch": 0.11582117211026176, "grad_norm": 0.84375, "learning_rate": 4.733246863059008e-05, "loss": 1.1792749166488647, "step": 250 }, { "epoch": 0.1162844567987028, "grad_norm": 0.75, "learning_rate": 4.732476569695146e-05, "loss": 1.2084414958953857, "step": 251 }, { "epoch": 0.11674774148714386, "grad_norm": 0.71484375, "learning_rate": 4.731701920821184e-05, "loss": 1.2508437633514404, "step": 252 }, { "epoch": 0.1172110261755849, "grad_norm": 0.78125, "learning_rate": 4.73092291788365e-05, "loss": 0.9441017508506775, "step": 253 }, { "epoch": 0.11767431086402594, "grad_norm": 0.80078125, "learning_rate": 4.7301395623372014e-05, "loss": 1.1250604391098022, "step": 254 }, { "epoch": 0.11813759555246699, "grad_norm": 0.75390625, "learning_rate": 4.729351855644624e-05, "loss": 1.2054286003112793, "step": 255 }, { "epoch": 0.11860088024090804, "grad_norm": 0.79296875, "learning_rate": 4.7285597992768285e-05, "loss": 1.2127487659454346, "step": 256 }, { "epoch": 0.11906416492934908, "grad_norm": 0.70703125, "learning_rate": 4.727763394712847e-05, "loss": 1.1345865726470947, "step": 257 }, { "epoch": 0.11952744961779013, "grad_norm": 0.77734375, "learning_rate": 4.726962643439833e-05, "loss": 1.1558208465576172, "step": 258 }, { "epoch": 0.11999073430623118, "grad_norm": 0.80078125, "learning_rate": 4.726157546953055e-05, "loss": 1.0446807146072388, "step": 259 }, { "epoch": 0.12045401899467223, "grad_norm": 0.83203125, "learning_rate": 4.7253481067558954e-05, "loss": 1.1157430410385132, "step": 260 }, { "epoch": 0.12091730368311328, "grad_norm": 0.828125, "learning_rate": 4.72453432435985e-05, "loss": 1.026274561882019, "step": 261 }, { "epoch": 0.12138058837155433, "grad_norm": 0.69140625, "learning_rate": 4.7237162012845206e-05, "loss": 0.9912748336791992, "step": 262 }, { "epoch": 0.12184387305999536, "grad_norm": 0.7421875, "learning_rate": 4.7228937390576154e-05, "loss": 1.0849391222000122, "step": 263 }, { "epoch": 0.12230715774843641, "grad_norm": 0.78125, "learning_rate": 4.722066939214945e-05, "loss": 1.133726716041565, "step": 264 }, { "epoch": 0.12277044243687746, "grad_norm": 0.765625, "learning_rate": 4.72123580330042e-05, "loss": 1.0994298458099365, "step": 265 }, { "epoch": 0.1232337271253185, "grad_norm": 0.73828125, "learning_rate": 4.720400332866047e-05, "loss": 1.2458348274230957, "step": 266 }, { "epoch": 0.12369701181375956, "grad_norm": 0.7890625, "learning_rate": 4.7195605294719286e-05, "loss": 0.8411968946456909, "step": 267 }, { "epoch": 0.1241602965022006, "grad_norm": 0.8671875, "learning_rate": 4.718716394686257e-05, "loss": 1.0096313953399658, "step": 268 }, { "epoch": 0.12462358119064165, "grad_norm": 0.70703125, "learning_rate": 4.7178679300853125e-05, "loss": 0.9048255085945129, "step": 269 }, { "epoch": 0.1250868658790827, "grad_norm": 0.703125, "learning_rate": 4.7170151372534615e-05, "loss": 1.0907902717590332, "step": 270 }, { "epoch": 0.12555015056752375, "grad_norm": 0.83203125, "learning_rate": 4.71615801778315e-05, "loss": 1.1986042261123657, "step": 271 }, { "epoch": 0.12601343525596478, "grad_norm": 0.7890625, "learning_rate": 4.7152965732749085e-05, "loss": 0.93548184633255, "step": 272 }, { "epoch": 0.12647671994440585, "grad_norm": 0.78515625, "learning_rate": 4.714430805337338e-05, "loss": 0.8795110583305359, "step": 273 }, { "epoch": 0.12694000463284688, "grad_norm": 0.78515625, "learning_rate": 4.713560715587117e-05, "loss": 1.1542648077011108, "step": 274 }, { "epoch": 0.12740328932128794, "grad_norm": 0.85546875, "learning_rate": 4.7126863056489925e-05, "loss": 1.1123528480529785, "step": 275 }, { "epoch": 0.12786657400972898, "grad_norm": 0.76953125, "learning_rate": 4.7118075771557775e-05, "loss": 0.946189820766449, "step": 276 }, { "epoch": 0.12832985869817, "grad_norm": 0.78515625, "learning_rate": 4.710924531748352e-05, "loss": 1.0332181453704834, "step": 277 }, { "epoch": 0.12879314338661108, "grad_norm": 0.828125, "learning_rate": 4.7100371710756555e-05, "loss": 1.1407872438430786, "step": 278 }, { "epoch": 0.1292564280750521, "grad_norm": 0.79296875, "learning_rate": 4.709145496794685e-05, "loss": 1.0078046321868896, "step": 279 }, { "epoch": 0.12971971276349317, "grad_norm": 0.828125, "learning_rate": 4.7082495105704936e-05, "loss": 1.1784926652908325, "step": 280 }, { "epoch": 0.1301829974519342, "grad_norm": 0.81640625, "learning_rate": 4.707349214076186e-05, "loss": 1.055182695388794, "step": 281 }, { "epoch": 0.13064628214037527, "grad_norm": 0.7578125, "learning_rate": 4.706444608992915e-05, "loss": 1.2529042959213257, "step": 282 }, { "epoch": 0.1311095668288163, "grad_norm": 0.81640625, "learning_rate": 4.70553569700988e-05, "loss": 1.1405866146087646, "step": 283 }, { "epoch": 0.13157285151725737, "grad_norm": 0.7890625, "learning_rate": 4.7046224798243215e-05, "loss": 1.025738000869751, "step": 284 }, { "epoch": 0.1320361362056984, "grad_norm": 1.0078125, "learning_rate": 4.7037049591415213e-05, "loss": 1.2285195589065552, "step": 285 }, { "epoch": 0.13249942089413944, "grad_norm": 0.8984375, "learning_rate": 4.702783136674794e-05, "loss": 1.0521762371063232, "step": 286 }, { "epoch": 0.1329627055825805, "grad_norm": 0.69140625, "learning_rate": 4.70185701414549e-05, "loss": 1.0171458721160889, "step": 287 }, { "epoch": 0.13342599027102153, "grad_norm": 4.5625, "learning_rate": 4.700926593282988e-05, "loss": 1.017797589302063, "step": 288 }, { "epoch": 0.1338892749594626, "grad_norm": 0.7578125, "learning_rate": 4.699991875824693e-05, "loss": 1.098080039024353, "step": 289 }, { "epoch": 0.13435255964790363, "grad_norm": 0.87109375, "learning_rate": 4.6990528635160354e-05, "loss": 1.069311261177063, "step": 290 }, { "epoch": 0.1348158443363447, "grad_norm": 0.7265625, "learning_rate": 4.6981095581104625e-05, "loss": 1.1987462043762207, "step": 291 }, { "epoch": 0.13527912902478573, "grad_norm": 0.78515625, "learning_rate": 4.697161961369438e-05, "loss": 0.9862013459205627, "step": 292 }, { "epoch": 0.1357424137132268, "grad_norm": 0.83984375, "learning_rate": 4.696210075062443e-05, "loss": 1.101189136505127, "step": 293 }, { "epoch": 0.13620569840166782, "grad_norm": 0.796875, "learning_rate": 4.695253900966965e-05, "loss": 1.0801221132278442, "step": 294 }, { "epoch": 0.13666898309010886, "grad_norm": 0.80859375, "learning_rate": 4.694293440868499e-05, "loss": 1.0982296466827393, "step": 295 }, { "epoch": 0.13713226777854992, "grad_norm": 0.8359375, "learning_rate": 4.693328696560544e-05, "loss": 1.1208291053771973, "step": 296 }, { "epoch": 0.13759555246699096, "grad_norm": 0.78515625, "learning_rate": 4.692359669844599e-05, "loss": 1.137648344039917, "step": 297 }, { "epoch": 0.13805883715543202, "grad_norm": 0.76171875, "learning_rate": 4.691386362530158e-05, "loss": 1.225368618965149, "step": 298 }, { "epoch": 0.13852212184387305, "grad_norm": 0.8046875, "learning_rate": 4.690408776434712e-05, "loss": 1.021425485610962, "step": 299 }, { "epoch": 0.13898540653231412, "grad_norm": 0.78515625, "learning_rate": 4.6894269133837377e-05, "loss": 1.244565486907959, "step": 300 }, { "epoch": 0.13944869122075515, "grad_norm": 0.7890625, "learning_rate": 4.688440775210701e-05, "loss": 1.11764657497406, "step": 301 }, { "epoch": 0.1399119759091962, "grad_norm": 0.82421875, "learning_rate": 4.6874503637570496e-05, "loss": 1.2240279912948608, "step": 302 }, { "epoch": 0.14037526059763725, "grad_norm": 0.76171875, "learning_rate": 4.6864556808722126e-05, "loss": 1.058721899986267, "step": 303 }, { "epoch": 0.14083854528607828, "grad_norm": 0.7734375, "learning_rate": 4.685456728413593e-05, "loss": 0.9557834267616272, "step": 304 }, { "epoch": 0.14130182997451934, "grad_norm": 0.7734375, "learning_rate": 4.684453508246567e-05, "loss": 1.1231224536895752, "step": 305 }, { "epoch": 0.14176511466296038, "grad_norm": 0.7109375, "learning_rate": 4.683446022244482e-05, "loss": 1.1800084114074707, "step": 306 }, { "epoch": 0.14222839935140144, "grad_norm": 0.76953125, "learning_rate": 4.682434272288649e-05, "loss": 0.9980816841125488, "step": 307 }, { "epoch": 0.14269168403984248, "grad_norm": 0.77734375, "learning_rate": 4.681418260268341e-05, "loss": 1.141348958015442, "step": 308 }, { "epoch": 0.14315496872828354, "grad_norm": 0.69140625, "learning_rate": 4.680397988080792e-05, "loss": 1.0376156568527222, "step": 309 }, { "epoch": 0.14361825341672457, "grad_norm": 0.83203125, "learning_rate": 4.67937345763119e-05, "loss": 0.9319735765457153, "step": 310 }, { "epoch": 0.14408153810516564, "grad_norm": 0.80078125, "learning_rate": 4.678344670832673e-05, "loss": 1.093515157699585, "step": 311 }, { "epoch": 0.14454482279360667, "grad_norm": 0.8125, "learning_rate": 4.677311629606328e-05, "loss": 0.9726182222366333, "step": 312 }, { "epoch": 0.1450081074820477, "grad_norm": 0.84765625, "learning_rate": 4.6762743358811894e-05, "loss": 1.2113114595413208, "step": 313 }, { "epoch": 0.14547139217048877, "grad_norm": 0.72265625, "learning_rate": 4.675232791594227e-05, "loss": 1.0208406448364258, "step": 314 }, { "epoch": 0.1459346768589298, "grad_norm": 0.72265625, "learning_rate": 4.674186998690353e-05, "loss": 0.9950704574584961, "step": 315 }, { "epoch": 0.14639796154737086, "grad_norm": 0.7890625, "learning_rate": 4.673136959122409e-05, "loss": 1.0458511114120483, "step": 316 }, { "epoch": 0.1468612462358119, "grad_norm": 0.7265625, "learning_rate": 4.672082674851169e-05, "loss": 1.0969946384429932, "step": 317 }, { "epoch": 0.14732453092425296, "grad_norm": 0.81640625, "learning_rate": 4.6710241478453334e-05, "loss": 1.00065016746521, "step": 318 }, { "epoch": 0.147787815612694, "grad_norm": 0.71875, "learning_rate": 4.669961380081523e-05, "loss": 0.9182780981063843, "step": 319 }, { "epoch": 0.14825110030113506, "grad_norm": 0.83203125, "learning_rate": 4.6688943735442805e-05, "loss": 1.0130627155303955, "step": 320 }, { "epoch": 0.1487143849895761, "grad_norm": 0.78515625, "learning_rate": 4.667823130226061e-05, "loss": 1.091713547706604, "step": 321 }, { "epoch": 0.14917766967801716, "grad_norm": 0.80078125, "learning_rate": 4.666747652127233e-05, "loss": 1.0551024675369263, "step": 322 }, { "epoch": 0.1496409543664582, "grad_norm": 0.74609375, "learning_rate": 4.665667941256072e-05, "loss": 1.0696836709976196, "step": 323 }, { "epoch": 0.15010423905489922, "grad_norm": 0.74609375, "learning_rate": 4.664583999628757e-05, "loss": 1.0591177940368652, "step": 324 }, { "epoch": 0.1505675237433403, "grad_norm": 0.8359375, "learning_rate": 4.663495829269368e-05, "loss": 1.1695055961608887, "step": 325 }, { "epoch": 0.15103080843178132, "grad_norm": 0.8203125, "learning_rate": 4.662403432209882e-05, "loss": 1.1184823513031006, "step": 326 }, { "epoch": 0.15149409312022238, "grad_norm": 0.671875, "learning_rate": 4.661306810490168e-05, "loss": 1.0364640951156616, "step": 327 }, { "epoch": 0.15195737780866342, "grad_norm": 0.78125, "learning_rate": 4.660205966157982e-05, "loss": 1.2171732187271118, "step": 328 }, { "epoch": 0.15242066249710448, "grad_norm": 0.8515625, "learning_rate": 4.6591009012689685e-05, "loss": 1.1131620407104492, "step": 329 }, { "epoch": 0.15288394718554552, "grad_norm": 0.75, "learning_rate": 4.6579916178866506e-05, "loss": 0.9144288301467896, "step": 330 }, { "epoch": 0.15334723187398658, "grad_norm": 0.8125, "learning_rate": 4.6568781180824304e-05, "loss": 1.180692434310913, "step": 331 }, { "epoch": 0.1538105165624276, "grad_norm": 0.8125, "learning_rate": 4.655760403935581e-05, "loss": 1.1063326597213745, "step": 332 }, { "epoch": 0.15427380125086865, "grad_norm": 0.77734375, "learning_rate": 4.654638477533249e-05, "loss": 1.1317967176437378, "step": 333 }, { "epoch": 0.1547370859393097, "grad_norm": 0.8671875, "learning_rate": 4.653512340970443e-05, "loss": 1.0568040609359741, "step": 334 }, { "epoch": 0.15520037062775074, "grad_norm": 0.73828125, "learning_rate": 4.6523819963500345e-05, "loss": 1.0148340463638306, "step": 335 }, { "epoch": 0.1556636553161918, "grad_norm": 0.8046875, "learning_rate": 4.651247445782754e-05, "loss": 0.9750385284423828, "step": 336 }, { "epoch": 0.15612694000463284, "grad_norm": 0.92578125, "learning_rate": 4.650108691387185e-05, "loss": 1.0633890628814697, "step": 337 }, { "epoch": 0.1565902246930739, "grad_norm": 0.81640625, "learning_rate": 4.648965735289761e-05, "loss": 1.223706603050232, "step": 338 }, { "epoch": 0.15705350938151494, "grad_norm": 0.78125, "learning_rate": 4.647818579624761e-05, "loss": 1.2269283533096313, "step": 339 }, { "epoch": 0.157516794069956, "grad_norm": 0.7109375, "learning_rate": 4.6466672265343056e-05, "loss": 0.990770697593689, "step": 340 }, { "epoch": 0.15798007875839704, "grad_norm": 0.765625, "learning_rate": 4.645511678168356e-05, "loss": 1.1373369693756104, "step": 341 }, { "epoch": 0.15844336344683807, "grad_norm": 0.90625, "learning_rate": 4.644351936684705e-05, "loss": 1.106075406074524, "step": 342 }, { "epoch": 0.15890664813527913, "grad_norm": 0.74609375, "learning_rate": 4.643188004248975e-05, "loss": 0.9842250943183899, "step": 343 }, { "epoch": 0.15936993282372017, "grad_norm": 0.81640625, "learning_rate": 4.642019883034617e-05, "loss": 1.1008222103118896, "step": 344 }, { "epoch": 0.15983321751216123, "grad_norm": 0.75, "learning_rate": 4.640847575222901e-05, "loss": 1.127953052520752, "step": 345 }, { "epoch": 0.16029650220060226, "grad_norm": 0.95703125, "learning_rate": 4.6396710830029164e-05, "loss": 1.2000129222869873, "step": 346 }, { "epoch": 0.16075978688904333, "grad_norm": 0.96484375, "learning_rate": 4.638490408571564e-05, "loss": 1.0962949991226196, "step": 347 }, { "epoch": 0.16122307157748436, "grad_norm": 0.77734375, "learning_rate": 4.637305554133559e-05, "loss": 1.0607415437698364, "step": 348 }, { "epoch": 0.16168635626592542, "grad_norm": 0.71875, "learning_rate": 4.636116521901417e-05, "loss": 1.0603266954421997, "step": 349 }, { "epoch": 0.16214964095436646, "grad_norm": 0.80078125, "learning_rate": 4.6349233140954573e-05, "loss": 1.084631085395813, "step": 350 }, { "epoch": 0.1626129256428075, "grad_norm": 0.7421875, "learning_rate": 4.633725932943795e-05, "loss": 1.1179983615875244, "step": 351 }, { "epoch": 0.16307621033124856, "grad_norm": 0.7578125, "learning_rate": 4.632524380682341e-05, "loss": 0.9485760927200317, "step": 352 }, { "epoch": 0.1635394950196896, "grad_norm": 0.83984375, "learning_rate": 4.631318659554793e-05, "loss": 0.8660714626312256, "step": 353 }, { "epoch": 0.16400277970813065, "grad_norm": 0.90234375, "learning_rate": 4.6301087718126324e-05, "loss": 1.2922559976577759, "step": 354 }, { "epoch": 0.1644660643965717, "grad_norm": 0.8984375, "learning_rate": 4.628894719715124e-05, "loss": 1.1782947778701782, "step": 355 }, { "epoch": 0.16492934908501275, "grad_norm": 0.7890625, "learning_rate": 4.627676505529306e-05, "loss": 1.32277512550354, "step": 356 }, { "epoch": 0.16539263377345378, "grad_norm": 0.8359375, "learning_rate": 4.62645413152999e-05, "loss": 1.2258048057556152, "step": 357 }, { "epoch": 0.16585591846189485, "grad_norm": 0.84375, "learning_rate": 4.6252275999997546e-05, "loss": 1.1965945959091187, "step": 358 }, { "epoch": 0.16631920315033588, "grad_norm": 0.75390625, "learning_rate": 4.6239969132289436e-05, "loss": 1.1148847341537476, "step": 359 }, { "epoch": 0.16678248783877692, "grad_norm": 0.87109375, "learning_rate": 4.622762073515658e-05, "loss": 1.1224826574325562, "step": 360 }, { "epoch": 0.16724577252721798, "grad_norm": 0.796875, "learning_rate": 4.621523083165755e-05, "loss": 1.1334441900253296, "step": 361 }, { "epoch": 0.167709057215659, "grad_norm": 0.78515625, "learning_rate": 4.620279944492841e-05, "loss": 1.0212844610214233, "step": 362 }, { "epoch": 0.16817234190410008, "grad_norm": 0.78125, "learning_rate": 4.619032659818271e-05, "loss": 1.0923480987548828, "step": 363 }, { "epoch": 0.1686356265925411, "grad_norm": 0.828125, "learning_rate": 4.61778123147114e-05, "loss": 1.0250271558761597, "step": 364 }, { "epoch": 0.16909891128098217, "grad_norm": 0.765625, "learning_rate": 4.616525661788281e-05, "loss": 1.1997897624969482, "step": 365 }, { "epoch": 0.1695621959694232, "grad_norm": 0.7578125, "learning_rate": 4.6152659531142605e-05, "loss": 1.0714635848999023, "step": 366 }, { "epoch": 0.17002548065786427, "grad_norm": 0.875, "learning_rate": 4.614002107801375e-05, "loss": 1.1388036012649536, "step": 367 }, { "epoch": 0.1704887653463053, "grad_norm": 0.7421875, "learning_rate": 4.612734128209643e-05, "loss": 1.0413213968276978, "step": 368 }, { "epoch": 0.17095205003474634, "grad_norm": 0.72265625, "learning_rate": 4.6114620167068055e-05, "loss": 1.0464006662368774, "step": 369 }, { "epoch": 0.1714153347231874, "grad_norm": 0.8125, "learning_rate": 4.610185775668317e-05, "loss": 0.9965865015983582, "step": 370 }, { "epoch": 0.17187861941162844, "grad_norm": 0.7890625, "learning_rate": 4.6089054074773446e-05, "loss": 1.0935486555099487, "step": 371 }, { "epoch": 0.1723419041000695, "grad_norm": 0.8046875, "learning_rate": 4.6076209145247627e-05, "loss": 1.158833384513855, "step": 372 }, { "epoch": 0.17280518878851053, "grad_norm": 0.7421875, "learning_rate": 4.606332299209146e-05, "loss": 0.897361695766449, "step": 373 }, { "epoch": 0.1732684734769516, "grad_norm": 0.83984375, "learning_rate": 4.60503956393677e-05, "loss": 1.0273572206497192, "step": 374 }, { "epoch": 0.17373175816539263, "grad_norm": 0.82421875, "learning_rate": 4.603742711121599e-05, "loss": 1.222002387046814, "step": 375 }, { "epoch": 0.1741950428538337, "grad_norm": 0.89453125, "learning_rate": 4.602441743185291e-05, "loss": 1.1995201110839844, "step": 376 }, { "epoch": 0.17465832754227473, "grad_norm": 0.7421875, "learning_rate": 4.601136662557185e-05, "loss": 1.0347135066986084, "step": 377 }, { "epoch": 0.17512161223071576, "grad_norm": 0.859375, "learning_rate": 4.599827471674302e-05, "loss": 1.1969027519226074, "step": 378 }, { "epoch": 0.17558489691915682, "grad_norm": 0.7265625, "learning_rate": 4.5985141729813366e-05, "loss": 1.1000288724899292, "step": 379 }, { "epoch": 0.17604818160759786, "grad_norm": 0.80859375, "learning_rate": 4.5971967689306545e-05, "loss": 1.0385537147521973, "step": 380 }, { "epoch": 0.17651146629603892, "grad_norm": 0.7421875, "learning_rate": 4.595875261982288e-05, "loss": 0.8807584643363953, "step": 381 }, { "epoch": 0.17697475098447996, "grad_norm": 0.77734375, "learning_rate": 4.5945496546039286e-05, "loss": 0.9811716675758362, "step": 382 }, { "epoch": 0.17743803567292102, "grad_norm": 0.875, "learning_rate": 4.59321994927093e-05, "loss": 1.0565159320831299, "step": 383 }, { "epoch": 0.17790132036136205, "grad_norm": 0.765625, "learning_rate": 4.5918861484662906e-05, "loss": 1.0541253089904785, "step": 384 }, { "epoch": 0.17836460504980312, "grad_norm": 0.78515625, "learning_rate": 4.5905482546806626e-05, "loss": 1.1991245746612549, "step": 385 }, { "epoch": 0.17882788973824415, "grad_norm": 0.75390625, "learning_rate": 4.58920627041234e-05, "loss": 0.9940633177757263, "step": 386 }, { "epoch": 0.17929117442668518, "grad_norm": 0.8046875, "learning_rate": 4.587860198167252e-05, "loss": 1.0880647897720337, "step": 387 }, { "epoch": 0.17975445911512625, "grad_norm": 0.77734375, "learning_rate": 4.586510040458965e-05, "loss": 0.9566104412078857, "step": 388 }, { "epoch": 0.18021774380356728, "grad_norm": 0.875, "learning_rate": 4.585155799808672e-05, "loss": 1.0409622192382812, "step": 389 }, { "epoch": 0.18068102849200834, "grad_norm": 0.75390625, "learning_rate": 4.583797478745191e-05, "loss": 1.0287699699401855, "step": 390 }, { "epoch": 0.18114431318044938, "grad_norm": 0.76953125, "learning_rate": 4.58243507980496e-05, "loss": 1.1182043552398682, "step": 391 }, { "epoch": 0.18160759786889044, "grad_norm": 0.765625, "learning_rate": 4.581068605532031e-05, "loss": 1.040753722190857, "step": 392 }, { "epoch": 0.18207088255733148, "grad_norm": 0.7265625, "learning_rate": 4.5796980584780665e-05, "loss": 1.105460524559021, "step": 393 }, { "epoch": 0.18253416724577254, "grad_norm": 0.859375, "learning_rate": 4.578323441202334e-05, "loss": 0.9269900918006897, "step": 394 }, { "epoch": 0.18299745193421357, "grad_norm": 0.7890625, "learning_rate": 4.5769447562717005e-05, "loss": 1.0313459634780884, "step": 395 }, { "epoch": 0.1834607366226546, "grad_norm": 0.7265625, "learning_rate": 4.5755620062606313e-05, "loss": 0.9970820546150208, "step": 396 }, { "epoch": 0.18392402131109567, "grad_norm": 0.83203125, "learning_rate": 4.5741751937511796e-05, "loss": 1.0869134664535522, "step": 397 }, { "epoch": 0.1843873059995367, "grad_norm": 0.77734375, "learning_rate": 4.572784321332987e-05, "loss": 1.0493508577346802, "step": 398 }, { "epoch": 0.18485059068797777, "grad_norm": 0.80859375, "learning_rate": 4.571389391603275e-05, "loss": 0.9378384947776794, "step": 399 }, { "epoch": 0.1853138753764188, "grad_norm": 0.76953125, "learning_rate": 4.56999040716684e-05, "loss": 0.9289635419845581, "step": 400 }, { "epoch": 0.18577716006485986, "grad_norm": 0.84375, "learning_rate": 4.568587370636055e-05, "loss": 1.065589427947998, "step": 401 }, { "epoch": 0.1862404447533009, "grad_norm": 0.7734375, "learning_rate": 4.567180284630853e-05, "loss": 0.9970924258232117, "step": 402 }, { "epoch": 0.18670372944174196, "grad_norm": 0.85546875, "learning_rate": 4.565769151778733e-05, "loss": 1.1094486713409424, "step": 403 }, { "epoch": 0.187167014130183, "grad_norm": 0.73046875, "learning_rate": 4.5643539747147506e-05, "loss": 1.0456472635269165, "step": 404 }, { "epoch": 0.18763029881862406, "grad_norm": 0.796875, "learning_rate": 4.562934756081511e-05, "loss": 1.055879831314087, "step": 405 }, { "epoch": 0.1880935835070651, "grad_norm": 4.5, "learning_rate": 4.5615114985291684e-05, "loss": 1.4064699411392212, "step": 406 }, { "epoch": 0.18855686819550613, "grad_norm": 0.890625, "learning_rate": 4.5600842047154176e-05, "loss": 1.0524810552597046, "step": 407 }, { "epoch": 0.1890201528839472, "grad_norm": 0.77734375, "learning_rate": 4.558652877305494e-05, "loss": 1.052716851234436, "step": 408 }, { "epoch": 0.18948343757238822, "grad_norm": 0.8046875, "learning_rate": 4.5572175189721586e-05, "loss": 1.1580179929733276, "step": 409 }, { "epoch": 0.1899467222608293, "grad_norm": 0.80078125, "learning_rate": 4.5557781323957055e-05, "loss": 1.0525214672088623, "step": 410 }, { "epoch": 0.19041000694927032, "grad_norm": 0.80078125, "learning_rate": 4.5543347202639477e-05, "loss": 0.9145269989967346, "step": 411 }, { "epoch": 0.19087329163771138, "grad_norm": 0.80078125, "learning_rate": 4.5528872852722156e-05, "loss": 0.9920161366462708, "step": 412 }, { "epoch": 0.19133657632615242, "grad_norm": 0.77734375, "learning_rate": 4.551435830123353e-05, "loss": 1.0678218603134155, "step": 413 }, { "epoch": 0.19179986101459348, "grad_norm": 0.796875, "learning_rate": 4.54998035752771e-05, "loss": 1.0776031017303467, "step": 414 }, { "epoch": 0.19226314570303452, "grad_norm": 0.76171875, "learning_rate": 4.5485208702031374e-05, "loss": 1.1205060482025146, "step": 415 }, { "epoch": 0.19272643039147555, "grad_norm": 0.84375, "learning_rate": 4.547057370874984e-05, "loss": 1.0913411378860474, "step": 416 }, { "epoch": 0.1931897150799166, "grad_norm": 0.8125, "learning_rate": 4.545589862276091e-05, "loss": 1.0345311164855957, "step": 417 }, { "epoch": 0.19365299976835765, "grad_norm": 0.80859375, "learning_rate": 4.544118347146784e-05, "loss": 1.1177470684051514, "step": 418 }, { "epoch": 0.1941162844567987, "grad_norm": 0.86328125, "learning_rate": 4.542642828234873e-05, "loss": 1.1447192430496216, "step": 419 }, { "epoch": 0.19457956914523974, "grad_norm": 0.77734375, "learning_rate": 4.5411633082956416e-05, "loss": 1.1146210432052612, "step": 420 }, { "epoch": 0.1950428538336808, "grad_norm": 0.8671875, "learning_rate": 4.539679790091847e-05, "loss": 1.0338633060455322, "step": 421 }, { "epoch": 0.19550613852212184, "grad_norm": 0.78515625, "learning_rate": 4.538192276393712e-05, "loss": 1.0040104389190674, "step": 422 }, { "epoch": 0.1959694232105629, "grad_norm": 0.81640625, "learning_rate": 4.536700769978918e-05, "loss": 1.1796895265579224, "step": 423 }, { "epoch": 0.19643270789900394, "grad_norm": 0.8125, "learning_rate": 4.535205273632605e-05, "loss": 1.0307509899139404, "step": 424 }, { "epoch": 0.19689599258744497, "grad_norm": 0.6953125, "learning_rate": 4.533705790147362e-05, "loss": 0.9913015365600586, "step": 425 }, { "epoch": 0.19735927727588604, "grad_norm": 0.80859375, "learning_rate": 4.532202322323224e-05, "loss": 1.1445434093475342, "step": 426 }, { "epoch": 0.19782256196432707, "grad_norm": 0.83984375, "learning_rate": 4.530694872967666e-05, "loss": 1.3544632196426392, "step": 427 }, { "epoch": 0.19828584665276813, "grad_norm": 1.1484375, "learning_rate": 4.5291834448955975e-05, "loss": 0.9704390168190002, "step": 428 }, { "epoch": 0.19874913134120917, "grad_norm": 0.84375, "learning_rate": 4.5276680409293576e-05, "loss": 0.9586291909217834, "step": 429 }, { "epoch": 0.19921241602965023, "grad_norm": 0.7890625, "learning_rate": 4.52614866389871e-05, "loss": 1.1021863222122192, "step": 430 }, { "epoch": 0.19967570071809126, "grad_norm": 0.8125, "learning_rate": 4.5246253166408376e-05, "loss": 1.0716869831085205, "step": 431 }, { "epoch": 0.20013898540653233, "grad_norm": 0.875, "learning_rate": 4.523098002000336e-05, "loss": 0.9597651958465576, "step": 432 }, { "epoch": 0.20060227009497336, "grad_norm": 0.6875, "learning_rate": 4.5215667228292114e-05, "loss": 0.910536527633667, "step": 433 }, { "epoch": 0.2010655547834144, "grad_norm": 0.7734375, "learning_rate": 4.52003148198687e-05, "loss": 1.0383825302124023, "step": 434 }, { "epoch": 0.20152883947185546, "grad_norm": 0.71484375, "learning_rate": 4.51849228234012e-05, "loss": 1.1084293127059937, "step": 435 }, { "epoch": 0.2019921241602965, "grad_norm": 0.796875, "learning_rate": 4.516949126763156e-05, "loss": 1.1191846132278442, "step": 436 }, { "epoch": 0.20245540884873756, "grad_norm": 0.71484375, "learning_rate": 4.515402018137565e-05, "loss": 0.9708357453346252, "step": 437 }, { "epoch": 0.2029186935371786, "grad_norm": 0.84765625, "learning_rate": 4.513850959352314e-05, "loss": 1.1482406854629517, "step": 438 }, { "epoch": 0.20338197822561965, "grad_norm": 0.6953125, "learning_rate": 4.512295953303746e-05, "loss": 0.9390287399291992, "step": 439 }, { "epoch": 0.2038452629140607, "grad_norm": 0.78125, "learning_rate": 4.510737002895574e-05, "loss": 1.125487208366394, "step": 440 }, { "epoch": 0.20430854760250175, "grad_norm": 0.76171875, "learning_rate": 4.5091741110388775e-05, "loss": 1.0969908237457275, "step": 441 }, { "epoch": 0.20477183229094278, "grad_norm": 0.94140625, "learning_rate": 4.507607280652096e-05, "loss": 0.9356522560119629, "step": 442 }, { "epoch": 0.20523511697938382, "grad_norm": 0.76953125, "learning_rate": 4.506036514661024e-05, "loss": 1.131638526916504, "step": 443 }, { "epoch": 0.20569840166782488, "grad_norm": 0.76953125, "learning_rate": 4.504461815998803e-05, "loss": 1.0650880336761475, "step": 444 }, { "epoch": 0.20616168635626592, "grad_norm": 0.7890625, "learning_rate": 4.502883187605921e-05, "loss": 1.0747191905975342, "step": 445 }, { "epoch": 0.20662497104470698, "grad_norm": 0.859375, "learning_rate": 4.5013006324302014e-05, "loss": 1.0188624858856201, "step": 446 }, { "epoch": 0.207088255733148, "grad_norm": 0.73046875, "learning_rate": 4.4997141534268026e-05, "loss": 1.117804765701294, "step": 447 }, { "epoch": 0.20755154042158908, "grad_norm": 0.80859375, "learning_rate": 4.498123753558208e-05, "loss": 0.9615070819854736, "step": 448 }, { "epoch": 0.2080148251100301, "grad_norm": 0.8984375, "learning_rate": 4.496529435794224e-05, "loss": 1.1164673566818237, "step": 449 }, { "epoch": 0.20847810979847117, "grad_norm": 1.0078125, "learning_rate": 4.494931203111972e-05, "loss": 1.2451194524765015, "step": 450 }, { "epoch": 0.2089413944869122, "grad_norm": 0.82421875, "learning_rate": 4.493329058495885e-05, "loss": 1.0730493068695068, "step": 451 }, { "epoch": 0.20940467917535324, "grad_norm": 0.8203125, "learning_rate": 4.491723004937699e-05, "loss": 1.1245779991149902, "step": 452 }, { "epoch": 0.2098679638637943, "grad_norm": 0.73828125, "learning_rate": 4.490113045436454e-05, "loss": 1.1549571752548218, "step": 453 }, { "epoch": 0.21033124855223534, "grad_norm": 0.80859375, "learning_rate": 4.488499182998475e-05, "loss": 1.1194530725479126, "step": 454 }, { "epoch": 0.2107945332406764, "grad_norm": 0.86328125, "learning_rate": 4.486881420637385e-05, "loss": 0.9296231865882874, "step": 455 }, { "epoch": 0.21125781792911744, "grad_norm": 0.75, "learning_rate": 4.4852597613740826e-05, "loss": 0.9777655601501465, "step": 456 }, { "epoch": 0.2117211026175585, "grad_norm": 0.8984375, "learning_rate": 4.4836342082367454e-05, "loss": 1.2194538116455078, "step": 457 }, { "epoch": 0.21218438730599953, "grad_norm": 0.7578125, "learning_rate": 4.482004764260822e-05, "loss": 1.1565169095993042, "step": 458 }, { "epoch": 0.2126476719944406, "grad_norm": 0.6796875, "learning_rate": 4.4803714324890286e-05, "loss": 1.0961380004882812, "step": 459 }, { "epoch": 0.21311095668288163, "grad_norm": 0.83984375, "learning_rate": 4.478734215971337e-05, "loss": 0.9595807790756226, "step": 460 }, { "epoch": 0.21357424137132267, "grad_norm": 0.74609375, "learning_rate": 4.477093117764976e-05, "loss": 1.0418174266815186, "step": 461 }, { "epoch": 0.21403752605976373, "grad_norm": 0.8125, "learning_rate": 4.4754481409344225e-05, "loss": 1.1094303131103516, "step": 462 }, { "epoch": 0.21450081074820476, "grad_norm": 0.765625, "learning_rate": 4.4737992885513955e-05, "loss": 0.8423942923545837, "step": 463 }, { "epoch": 0.21496409543664582, "grad_norm": 0.84765625, "learning_rate": 4.472146563694851e-05, "loss": 1.148449182510376, "step": 464 }, { "epoch": 0.21542738012508686, "grad_norm": 0.78515625, "learning_rate": 4.470489969450977e-05, "loss": 0.9094064831733704, "step": 465 }, { "epoch": 0.21589066481352792, "grad_norm": 0.78515625, "learning_rate": 4.4688295089131864e-05, "loss": 1.0065088272094727, "step": 466 }, { "epoch": 0.21635394950196896, "grad_norm": 0.69921875, "learning_rate": 4.46716518518211e-05, "loss": 1.0104409456253052, "step": 467 }, { "epoch": 0.21681723419041002, "grad_norm": 0.75390625, "learning_rate": 4.465497001365598e-05, "loss": 1.0070152282714844, "step": 468 }, { "epoch": 0.21728051887885105, "grad_norm": 0.76953125, "learning_rate": 4.4638249605787e-05, "loss": 1.160359263420105, "step": 469 }, { "epoch": 0.2177438035672921, "grad_norm": 0.796875, "learning_rate": 4.462149065943676e-05, "loss": 1.0029304027557373, "step": 470 }, { "epoch": 0.21820708825573315, "grad_norm": 0.859375, "learning_rate": 4.4604693205899775e-05, "loss": 1.1525189876556396, "step": 471 }, { "epoch": 0.21867037294417419, "grad_norm": 0.83984375, "learning_rate": 4.458785727654249e-05, "loss": 0.9948219060897827, "step": 472 }, { "epoch": 0.21913365763261525, "grad_norm": 0.859375, "learning_rate": 4.457098290280319e-05, "loss": 1.1646232604980469, "step": 473 }, { "epoch": 0.21959694232105628, "grad_norm": 0.7265625, "learning_rate": 4.455407011619194e-05, "loss": 1.1069519519805908, "step": 474 }, { "epoch": 0.22006022700949734, "grad_norm": 0.828125, "learning_rate": 4.4537118948290546e-05, "loss": 1.155336618423462, "step": 475 }, { "epoch": 0.22052351169793838, "grad_norm": 0.78515625, "learning_rate": 4.4520129430752487e-05, "loss": 0.8776676058769226, "step": 476 }, { "epoch": 0.22098679638637944, "grad_norm": 0.79296875, "learning_rate": 4.4503101595302826e-05, "loss": 1.0383992195129395, "step": 477 }, { "epoch": 0.22145008107482048, "grad_norm": 0.734375, "learning_rate": 4.448603547373822e-05, "loss": 1.0792429447174072, "step": 478 }, { "epoch": 0.2219133657632615, "grad_norm": 0.8671875, "learning_rate": 4.4468931097926796e-05, "loss": 1.2104331254959106, "step": 479 }, { "epoch": 0.22237665045170257, "grad_norm": 0.7578125, "learning_rate": 4.44517884998081e-05, "loss": 1.1502063274383545, "step": 480 }, { "epoch": 0.2228399351401436, "grad_norm": 0.8359375, "learning_rate": 4.443460771139309e-05, "loss": 1.0817224979400635, "step": 481 }, { "epoch": 0.22330321982858467, "grad_norm": 0.859375, "learning_rate": 4.441738876476401e-05, "loss": 0.880224347114563, "step": 482 }, { "epoch": 0.2237665045170257, "grad_norm": 0.765625, "learning_rate": 4.4400131692074355e-05, "loss": 0.9947736263275146, "step": 483 }, { "epoch": 0.22422978920546677, "grad_norm": 1.1328125, "learning_rate": 4.438283652554883e-05, "loss": 1.0493063926696777, "step": 484 }, { "epoch": 0.2246930738939078, "grad_norm": 0.80859375, "learning_rate": 4.436550329748328e-05, "loss": 1.156111240386963, "step": 485 }, { "epoch": 0.22515635858234886, "grad_norm": 0.75, "learning_rate": 4.4348132040244586e-05, "loss": 1.1266316175460815, "step": 486 }, { "epoch": 0.2256196432707899, "grad_norm": 0.75, "learning_rate": 4.4330722786270686e-05, "loss": 0.9672824740409851, "step": 487 }, { "epoch": 0.22608292795923093, "grad_norm": 0.71875, "learning_rate": 4.431327556807044e-05, "loss": 1.073356032371521, "step": 488 }, { "epoch": 0.226546212647672, "grad_norm": 0.69921875, "learning_rate": 4.429579041822362e-05, "loss": 1.0501450300216675, "step": 489 }, { "epoch": 0.22700949733611303, "grad_norm": 1.0546875, "learning_rate": 4.427826736938081e-05, "loss": 1.253738522529602, "step": 490 }, { "epoch": 0.2274727820245541, "grad_norm": 0.76171875, "learning_rate": 4.426070645426339e-05, "loss": 0.9925602078437805, "step": 491 }, { "epoch": 0.22793606671299513, "grad_norm": 1.1875, "learning_rate": 4.424310770566343e-05, "loss": 1.0422255992889404, "step": 492 }, { "epoch": 0.2283993514014362, "grad_norm": 0.77734375, "learning_rate": 4.4225471156443644e-05, "loss": 0.9284833669662476, "step": 493 }, { "epoch": 0.22886263608987722, "grad_norm": 0.71875, "learning_rate": 4.420779683953734e-05, "loss": 0.9178367853164673, "step": 494 }, { "epoch": 0.2293259207783183, "grad_norm": 0.890625, "learning_rate": 4.419008478794835e-05, "loss": 1.1734018325805664, "step": 495 }, { "epoch": 0.22978920546675932, "grad_norm": 0.7734375, "learning_rate": 4.4172335034750976e-05, "loss": 1.1945644617080688, "step": 496 }, { "epoch": 0.23025249015520038, "grad_norm": 0.9921875, "learning_rate": 4.415454761308991e-05, "loss": 1.0181314945220947, "step": 497 }, { "epoch": 0.23071577484364142, "grad_norm": 0.97265625, "learning_rate": 4.413672255618019e-05, "loss": 1.0368403196334839, "step": 498 }, { "epoch": 0.23117905953208245, "grad_norm": 0.78125, "learning_rate": 4.411885989730713e-05, "loss": 1.0601047277450562, "step": 499 }, { "epoch": 0.23164234422052352, "grad_norm": 0.77734375, "learning_rate": 4.410095966982626e-05, "loss": 0.901918888092041, "step": 500 }, { "epoch": 0.23210562890896455, "grad_norm": 0.91796875, "learning_rate": 4.408302190716327e-05, "loss": 1.1393412351608276, "step": 501 }, { "epoch": 0.2325689135974056, "grad_norm": 0.8984375, "learning_rate": 4.406504664281392e-05, "loss": 1.2997722625732422, "step": 502 }, { "epoch": 0.23303219828584665, "grad_norm": 0.7890625, "learning_rate": 4.4047033910344015e-05, "loss": 0.889095664024353, "step": 503 }, { "epoch": 0.2334954829742877, "grad_norm": 0.8359375, "learning_rate": 4.4028983743389327e-05, "loss": 1.096193790435791, "step": 504 }, { "epoch": 0.23395876766272874, "grad_norm": 0.8125, "learning_rate": 4.4010896175655516e-05, "loss": 0.8893133997917175, "step": 505 }, { "epoch": 0.2344220523511698, "grad_norm": 0.8359375, "learning_rate": 4.39927712409181e-05, "loss": 1.0893774032592773, "step": 506 }, { "epoch": 0.23488533703961084, "grad_norm": 0.80859375, "learning_rate": 4.397460897302237e-05, "loss": 1.2915987968444824, "step": 507 }, { "epoch": 0.23534862172805188, "grad_norm": 0.83984375, "learning_rate": 4.395640940588332e-05, "loss": 1.1468744277954102, "step": 508 }, { "epoch": 0.23581190641649294, "grad_norm": 0.875, "learning_rate": 4.3938172573485584e-05, "loss": 1.081978678703308, "step": 509 }, { "epoch": 0.23627519110493397, "grad_norm": 0.921875, "learning_rate": 4.391989850988342e-05, "loss": 1.1137498617172241, "step": 510 }, { "epoch": 0.23673847579337504, "grad_norm": 0.78125, "learning_rate": 4.390158724920058e-05, "loss": 0.9725139141082764, "step": 511 }, { "epoch": 0.23720176048181607, "grad_norm": 0.78125, "learning_rate": 4.388323882563028e-05, "loss": 1.2336326837539673, "step": 512 }, { "epoch": 0.23766504517025713, "grad_norm": 0.734375, "learning_rate": 4.3864853273435136e-05, "loss": 0.9616613984107971, "step": 513 }, { "epoch": 0.23812832985869817, "grad_norm": 0.734375, "learning_rate": 4.384643062694709e-05, "loss": 0.9157605767250061, "step": 514 }, { "epoch": 0.23859161454713923, "grad_norm": 0.91015625, "learning_rate": 4.382797092056735e-05, "loss": 1.1036900281906128, "step": 515 }, { "epoch": 0.23905489923558026, "grad_norm": 0.87109375, "learning_rate": 4.380947418876636e-05, "loss": 0.9066743850708008, "step": 516 }, { "epoch": 0.2395181839240213, "grad_norm": 0.8515625, "learning_rate": 4.379094046608364e-05, "loss": 1.0424668788909912, "step": 517 }, { "epoch": 0.23998146861246236, "grad_norm": 0.8515625, "learning_rate": 4.3772369787127826e-05, "loss": 1.0981203317642212, "step": 518 }, { "epoch": 0.2404447533009034, "grad_norm": 0.8125, "learning_rate": 4.3753762186576575e-05, "loss": 1.098775863647461, "step": 519 }, { "epoch": 0.24090803798934446, "grad_norm": 0.78515625, "learning_rate": 4.3735117699176455e-05, "loss": 0.8571038246154785, "step": 520 }, { "epoch": 0.2413713226777855, "grad_norm": 0.80859375, "learning_rate": 4.3716436359742935e-05, "loss": 0.991769552230835, "step": 521 }, { "epoch": 0.24183460736622656, "grad_norm": 0.80078125, "learning_rate": 4.369771820316029e-05, "loss": 1.2347557544708252, "step": 522 }, { "epoch": 0.2422978920546676, "grad_norm": 0.796875, "learning_rate": 4.367896326438155e-05, "loss": 0.941724956035614, "step": 523 }, { "epoch": 0.24276117674310865, "grad_norm": 0.8046875, "learning_rate": 4.366017157842844e-05, "loss": 0.9241411685943604, "step": 524 }, { "epoch": 0.2432244614315497, "grad_norm": 0.8359375, "learning_rate": 4.3641343180391275e-05, "loss": 1.107820987701416, "step": 525 }, { "epoch": 0.24368774611999072, "grad_norm": 0.83984375, "learning_rate": 4.362247810542894e-05, "loss": 1.052571177482605, "step": 526 }, { "epoch": 0.24415103080843178, "grad_norm": 0.75, "learning_rate": 4.360357638876883e-05, "loss": 0.8987835645675659, "step": 527 }, { "epoch": 0.24461431549687282, "grad_norm": 0.83203125, "learning_rate": 4.3584638065706724e-05, "loss": 1.1791050434112549, "step": 528 }, { "epoch": 0.24507760018531388, "grad_norm": 0.77734375, "learning_rate": 4.356566317160677e-05, "loss": 1.173535704612732, "step": 529 }, { "epoch": 0.24554088487375492, "grad_norm": 0.87890625, "learning_rate": 4.354665174190142e-05, "loss": 0.8905298709869385, "step": 530 }, { "epoch": 0.24600416956219598, "grad_norm": 0.92578125, "learning_rate": 4.352760381209135e-05, "loss": 1.2305561304092407, "step": 531 }, { "epoch": 0.246467454250637, "grad_norm": 0.75390625, "learning_rate": 4.350851941774537e-05, "loss": 1.010733723640442, "step": 532 }, { "epoch": 0.24693073893907808, "grad_norm": 0.76953125, "learning_rate": 4.34893985945004e-05, "loss": 1.0408154726028442, "step": 533 }, { "epoch": 0.2473940236275191, "grad_norm": 0.9375, "learning_rate": 4.347024137806139e-05, "loss": 1.157252550125122, "step": 534 }, { "epoch": 0.24785730831596015, "grad_norm": 0.859375, "learning_rate": 4.345104780420122e-05, "loss": 1.2410048246383667, "step": 535 }, { "epoch": 0.2483205930044012, "grad_norm": 0.8359375, "learning_rate": 4.34318179087607e-05, "loss": 1.0325219631195068, "step": 536 }, { "epoch": 0.24878387769284224, "grad_norm": 0.859375, "learning_rate": 4.3412551727648435e-05, "loss": 1.166888952255249, "step": 537 }, { "epoch": 0.2492471623812833, "grad_norm": 0.8046875, "learning_rate": 4.33932492968408e-05, "loss": 1.1053187847137451, "step": 538 }, { "epoch": 0.24971044706972434, "grad_norm": 0.796875, "learning_rate": 4.337391065238187e-05, "loss": 1.1279836893081665, "step": 539 }, { "epoch": 0.2501737317581654, "grad_norm": 0.72265625, "learning_rate": 4.335453583038331e-05, "loss": 1.048471450805664, "step": 540 }, { "epoch": 0.25063701644660646, "grad_norm": 0.828125, "learning_rate": 4.333512486702438e-05, "loss": 0.949547290802002, "step": 541 }, { "epoch": 0.2511003011350475, "grad_norm": 0.80859375, "learning_rate": 4.331567779855179e-05, "loss": 1.026901364326477, "step": 542 }, { "epoch": 0.25156358582348853, "grad_norm": 0.88671875, "learning_rate": 4.3296194661279704e-05, "loss": 0.979106605052948, "step": 543 }, { "epoch": 0.25202687051192957, "grad_norm": 0.8046875, "learning_rate": 4.327667549158962e-05, "loss": 0.9530601501464844, "step": 544 }, { "epoch": 0.2524901552003706, "grad_norm": 0.9375, "learning_rate": 4.325712032593032e-05, "loss": 1.4264435768127441, "step": 545 }, { "epoch": 0.2529534398888117, "grad_norm": 0.875, "learning_rate": 4.3237529200817824e-05, "loss": 1.0840469598770142, "step": 546 }, { "epoch": 0.2534167245772527, "grad_norm": 0.8359375, "learning_rate": 4.321790215283526e-05, "loss": 0.9668251872062683, "step": 547 }, { "epoch": 0.25388000926569376, "grad_norm": 0.72265625, "learning_rate": 4.3198239218632874e-05, "loss": 1.0909249782562256, "step": 548 }, { "epoch": 0.2543432939541348, "grad_norm": 0.73828125, "learning_rate": 4.317854043492791e-05, "loss": 1.1680148839950562, "step": 549 }, { "epoch": 0.2548065786425759, "grad_norm": 0.82421875, "learning_rate": 4.315880583850454e-05, "loss": 1.0644400119781494, "step": 550 }, { "epoch": 0.2552698633310169, "grad_norm": 0.76171875, "learning_rate": 4.313903546621384e-05, "loss": 1.0424561500549316, "step": 551 }, { "epoch": 0.25573314801945796, "grad_norm": 0.80859375, "learning_rate": 4.3119229354973664e-05, "loss": 0.989732563495636, "step": 552 }, { "epoch": 0.256196432707899, "grad_norm": 0.82421875, "learning_rate": 4.309938754176862e-05, "loss": 1.0276066064834595, "step": 553 }, { "epoch": 0.25665971739634, "grad_norm": 0.8125, "learning_rate": 4.307951006364998e-05, "loss": 1.0524067878723145, "step": 554 }, { "epoch": 0.2571230020847811, "grad_norm": 0.984375, "learning_rate": 4.3059596957735606e-05, "loss": 1.1999335289001465, "step": 555 }, { "epoch": 0.25758628677322215, "grad_norm": 0.74609375, "learning_rate": 4.3039648261209896e-05, "loss": 1.0140695571899414, "step": 556 }, { "epoch": 0.2580495714616632, "grad_norm": 0.82421875, "learning_rate": 4.3019664011323705e-05, "loss": 1.0452879667282104, "step": 557 }, { "epoch": 0.2585128561501042, "grad_norm": 0.79296875, "learning_rate": 4.2999644245394275e-05, "loss": 1.1013998985290527, "step": 558 }, { "epoch": 0.2589761408385453, "grad_norm": 0.87890625, "learning_rate": 4.297958900080519e-05, "loss": 0.9173800945281982, "step": 559 }, { "epoch": 0.25943942552698634, "grad_norm": 0.7734375, "learning_rate": 4.295949831500624e-05, "loss": 1.2523088455200195, "step": 560 }, { "epoch": 0.2599027102154274, "grad_norm": 0.78125, "learning_rate": 4.293937222551345e-05, "loss": 0.9910227060317993, "step": 561 }, { "epoch": 0.2603659949038684, "grad_norm": 0.83203125, "learning_rate": 4.2919210769908905e-05, "loss": 1.015892744064331, "step": 562 }, { "epoch": 0.26082927959230945, "grad_norm": 0.8046875, "learning_rate": 4.289901398584077e-05, "loss": 1.1399426460266113, "step": 563 }, { "epoch": 0.26129256428075054, "grad_norm": 0.796875, "learning_rate": 4.287878191102316e-05, "loss": 0.9163965582847595, "step": 564 }, { "epoch": 0.2617558489691916, "grad_norm": 0.69921875, "learning_rate": 4.285851458323608e-05, "loss": 1.0406631231307983, "step": 565 }, { "epoch": 0.2622191336576326, "grad_norm": 0.87109375, "learning_rate": 4.283821204032539e-05, "loss": 0.952318549156189, "step": 566 }, { "epoch": 0.26268241834607364, "grad_norm": 0.921875, "learning_rate": 4.281787432020269e-05, "loss": 1.0432265996932983, "step": 567 }, { "epoch": 0.26314570303451473, "grad_norm": 0.8359375, "learning_rate": 4.279750146084527e-05, "loss": 1.1223399639129639, "step": 568 }, { "epoch": 0.26360898772295577, "grad_norm": 0.80859375, "learning_rate": 4.2777093500296055e-05, "loss": 1.0468631982803345, "step": 569 }, { "epoch": 0.2640722724113968, "grad_norm": 0.76953125, "learning_rate": 4.2756650476663475e-05, "loss": 1.0525509119033813, "step": 570 }, { "epoch": 0.26453555709983784, "grad_norm": 0.80078125, "learning_rate": 4.273617242812147e-05, "loss": 0.9959677457809448, "step": 571 }, { "epoch": 0.26499884178827887, "grad_norm": 0.8359375, "learning_rate": 4.271565939290939e-05, "loss": 0.8867281675338745, "step": 572 }, { "epoch": 0.26546212647671996, "grad_norm": 0.79296875, "learning_rate": 4.269511140933187e-05, "loss": 1.0435187816619873, "step": 573 }, { "epoch": 0.265925411165161, "grad_norm": 0.74609375, "learning_rate": 4.267452851575886e-05, "loss": 0.9710588455200195, "step": 574 }, { "epoch": 0.26638869585360203, "grad_norm": 0.828125, "learning_rate": 4.2653910750625455e-05, "loss": 1.3287699222564697, "step": 575 }, { "epoch": 0.26685198054204307, "grad_norm": 0.80859375, "learning_rate": 4.2633258152431896e-05, "loss": 1.0961614847183228, "step": 576 }, { "epoch": 0.26731526523048416, "grad_norm": 0.77734375, "learning_rate": 4.261257075974345e-05, "loss": 0.9179559946060181, "step": 577 }, { "epoch": 0.2677785499189252, "grad_norm": 0.98828125, "learning_rate": 4.2591848611190364e-05, "loss": 1.1007611751556396, "step": 578 }, { "epoch": 0.2682418346073662, "grad_norm": 0.75, "learning_rate": 4.257109174546781e-05, "loss": 1.0560678243637085, "step": 579 }, { "epoch": 0.26870511929580726, "grad_norm": 0.74609375, "learning_rate": 4.2550300201335725e-05, "loss": 1.0741382837295532, "step": 580 }, { "epoch": 0.2691684039842483, "grad_norm": 0.76171875, "learning_rate": 4.252947401761887e-05, "loss": 0.9691828489303589, "step": 581 }, { "epoch": 0.2696316886726894, "grad_norm": 0.76953125, "learning_rate": 4.250861323320666e-05, "loss": 1.2666388750076294, "step": 582 }, { "epoch": 0.2700949733611304, "grad_norm": 0.8203125, "learning_rate": 4.248771788705311e-05, "loss": 0.8697996735572815, "step": 583 }, { "epoch": 0.27055825804957145, "grad_norm": 0.80078125, "learning_rate": 4.24667880181768e-05, "loss": 0.9978750348091125, "step": 584 }, { "epoch": 0.2710215427380125, "grad_norm": 0.8203125, "learning_rate": 4.244582366566075e-05, "loss": 1.2406501770019531, "step": 585 }, { "epoch": 0.2714848274264536, "grad_norm": 0.7890625, "learning_rate": 4.24248248686524e-05, "loss": 1.0365958213806152, "step": 586 }, { "epoch": 0.2719481121148946, "grad_norm": 0.8359375, "learning_rate": 4.240379166636347e-05, "loss": 0.9354648590087891, "step": 587 }, { "epoch": 0.27241139680333565, "grad_norm": 0.796875, "learning_rate": 4.238272409806997e-05, "loss": 1.082112431526184, "step": 588 }, { "epoch": 0.2728746814917767, "grad_norm": 0.828125, "learning_rate": 4.2361622203112054e-05, "loss": 1.1324368715286255, "step": 589 }, { "epoch": 0.2733379661802177, "grad_norm": 0.76953125, "learning_rate": 4.234048602089398e-05, "loss": 0.9812889695167542, "step": 590 }, { "epoch": 0.2738012508686588, "grad_norm": 0.7890625, "learning_rate": 4.231931559088404e-05, "loss": 0.9894756078720093, "step": 591 }, { "epoch": 0.27426453555709984, "grad_norm": 0.8125, "learning_rate": 4.2298110952614474e-05, "loss": 1.006495475769043, "step": 592 }, { "epoch": 0.2747278202455409, "grad_norm": 0.87109375, "learning_rate": 4.22768721456814e-05, "loss": 1.0490379333496094, "step": 593 }, { "epoch": 0.2751911049339819, "grad_norm": 0.77734375, "learning_rate": 4.225559920974473e-05, "loss": 1.0940771102905273, "step": 594 }, { "epoch": 0.275654389622423, "grad_norm": 0.84765625, "learning_rate": 4.223429218452812e-05, "loss": 1.1644221544265747, "step": 595 }, { "epoch": 0.27611767431086404, "grad_norm": 0.75390625, "learning_rate": 4.2212951109818895e-05, "loss": 0.9143954515457153, "step": 596 }, { "epoch": 0.27658095899930507, "grad_norm": 0.7734375, "learning_rate": 4.219157602546792e-05, "loss": 0.9301037192344666, "step": 597 }, { "epoch": 0.2770442436877461, "grad_norm": 0.69921875, "learning_rate": 4.217016697138961e-05, "loss": 0.889419436454773, "step": 598 }, { "epoch": 0.27750752837618714, "grad_norm": 0.83203125, "learning_rate": 4.2148723987561786e-05, "loss": 1.0732734203338623, "step": 599 }, { "epoch": 0.27797081306462823, "grad_norm": 0.76953125, "learning_rate": 4.212724711402563e-05, "loss": 1.0122696161270142, "step": 600 }, { "epoch": 0.27843409775306927, "grad_norm": 0.83203125, "learning_rate": 4.2105736390885625e-05, "loss": 0.8314121961593628, "step": 601 }, { "epoch": 0.2788973824415103, "grad_norm": 0.75, "learning_rate": 4.208419185830945e-05, "loss": 1.0255941152572632, "step": 602 }, { "epoch": 0.27936066712995133, "grad_norm": 0.7265625, "learning_rate": 4.206261355652791e-05, "loss": 1.0456650257110596, "step": 603 }, { "epoch": 0.2798239518183924, "grad_norm": 0.74609375, "learning_rate": 4.204100152583488e-05, "loss": 0.9093706607818604, "step": 604 }, { "epoch": 0.28028723650683346, "grad_norm": 2.3125, "learning_rate": 4.201935580658723e-05, "loss": 1.0478147268295288, "step": 605 }, { "epoch": 0.2807505211952745, "grad_norm": 0.8359375, "learning_rate": 4.199767643920469e-05, "loss": 1.1493206024169922, "step": 606 }, { "epoch": 0.28121380588371553, "grad_norm": 0.75, "learning_rate": 4.197596346416988e-05, "loss": 1.017486810684204, "step": 607 }, { "epoch": 0.28167709057215656, "grad_norm": 0.84765625, "learning_rate": 4.195421692202815e-05, "loss": 1.1652302742004395, "step": 608 }, { "epoch": 0.28214037526059765, "grad_norm": 0.84375, "learning_rate": 4.1932436853387514e-05, "loss": 0.9417747259140015, "step": 609 }, { "epoch": 0.2826036599490387, "grad_norm": 0.74609375, "learning_rate": 4.191062329891863e-05, "loss": 0.8607147932052612, "step": 610 }, { "epoch": 0.2830669446374797, "grad_norm": 0.92578125, "learning_rate": 4.1888776299354656e-05, "loss": 1.025602102279663, "step": 611 }, { "epoch": 0.28353022932592076, "grad_norm": 0.8046875, "learning_rate": 4.186689589549121e-05, "loss": 1.0558090209960938, "step": 612 }, { "epoch": 0.28399351401436185, "grad_norm": 0.8828125, "learning_rate": 4.1844982128186294e-05, "loss": 0.9318227171897888, "step": 613 }, { "epoch": 0.2844567987028029, "grad_norm": 0.8828125, "learning_rate": 4.18230350383602e-05, "loss": 0.9264402389526367, "step": 614 }, { "epoch": 0.2849200833912439, "grad_norm": 0.80859375, "learning_rate": 4.1801054666995453e-05, "loss": 1.160361647605896, "step": 615 }, { "epoch": 0.28538336807968495, "grad_norm": 0.87890625, "learning_rate": 4.177904105513673e-05, "loss": 0.902491569519043, "step": 616 }, { "epoch": 0.285846652768126, "grad_norm": 0.81640625, "learning_rate": 4.175699424389075e-05, "loss": 1.1254316568374634, "step": 617 }, { "epoch": 0.2863099374565671, "grad_norm": 0.7421875, "learning_rate": 4.173491427442627e-05, "loss": 0.8220522999763489, "step": 618 }, { "epoch": 0.2867732221450081, "grad_norm": 0.859375, "learning_rate": 4.1712801187973925e-05, "loss": 1.1775267124176025, "step": 619 }, { "epoch": 0.28723650683344915, "grad_norm": 0.8203125, "learning_rate": 4.1690655025826225e-05, "loss": 1.0950840711593628, "step": 620 }, { "epoch": 0.2876997915218902, "grad_norm": 0.7734375, "learning_rate": 4.166847582933741e-05, "loss": 1.2453440427780151, "step": 621 }, { "epoch": 0.28816307621033127, "grad_norm": 0.79296875, "learning_rate": 4.164626363992343e-05, "loss": 0.9733505845069885, "step": 622 }, { "epoch": 0.2886263608987723, "grad_norm": 0.8984375, "learning_rate": 4.162401849906183e-05, "loss": 1.0875972509384155, "step": 623 }, { "epoch": 0.28908964558721334, "grad_norm": 0.8359375, "learning_rate": 4.16017404482917e-05, "loss": 1.1624879837036133, "step": 624 }, { "epoch": 0.2895529302756544, "grad_norm": 1.125, "learning_rate": 4.1579429529213564e-05, "loss": 1.1447054147720337, "step": 625 }, { "epoch": 0.2900162149640954, "grad_norm": 0.70703125, "learning_rate": 4.155708578348935e-05, "loss": 0.92429119348526, "step": 626 }, { "epoch": 0.2904794996525365, "grad_norm": 0.8125, "learning_rate": 4.1534709252842254e-05, "loss": 0.9543266296386719, "step": 627 }, { "epoch": 0.29094278434097753, "grad_norm": 0.89453125, "learning_rate": 4.151229997905672e-05, "loss": 1.0998059511184692, "step": 628 }, { "epoch": 0.29140606902941857, "grad_norm": 0.77734375, "learning_rate": 4.148985800397831e-05, "loss": 0.9753661155700684, "step": 629 }, { "epoch": 0.2918693537178596, "grad_norm": 0.84375, "learning_rate": 4.146738336951367e-05, "loss": 0.9926996231079102, "step": 630 }, { "epoch": 0.2923326384063007, "grad_norm": 0.671875, "learning_rate": 4.144487611763041e-05, "loss": 0.9744971394538879, "step": 631 }, { "epoch": 0.29279592309474173, "grad_norm": 0.84375, "learning_rate": 4.142233629035706e-05, "loss": 1.1101515293121338, "step": 632 }, { "epoch": 0.29325920778318276, "grad_norm": 0.7421875, "learning_rate": 4.1399763929783e-05, "loss": 1.1098037958145142, "step": 633 }, { "epoch": 0.2937224924716238, "grad_norm": 0.83984375, "learning_rate": 4.137715907805832e-05, "loss": 1.0720516443252563, "step": 634 }, { "epoch": 0.2941857771600649, "grad_norm": 0.7734375, "learning_rate": 4.135452177739382e-05, "loss": 1.0267348289489746, "step": 635 }, { "epoch": 0.2946490618485059, "grad_norm": 0.76171875, "learning_rate": 4.133185207006086e-05, "loss": 0.9987479448318481, "step": 636 }, { "epoch": 0.29511234653694696, "grad_norm": 0.74609375, "learning_rate": 4.130914999839133e-05, "loss": 0.9802069664001465, "step": 637 }, { "epoch": 0.295575631225388, "grad_norm": 0.84765625, "learning_rate": 4.128641560477756e-05, "loss": 1.0791590213775635, "step": 638 }, { "epoch": 0.296038915913829, "grad_norm": 0.7109375, "learning_rate": 4.1263648931672234e-05, "loss": 0.8927035927772522, "step": 639 }, { "epoch": 0.2965022006022701, "grad_norm": 0.75, "learning_rate": 4.124085002158829e-05, "loss": 0.9619215726852417, "step": 640 }, { "epoch": 0.29696548529071115, "grad_norm": 0.953125, "learning_rate": 4.12180189170989e-05, "loss": 1.0131444931030273, "step": 641 }, { "epoch": 0.2974287699791522, "grad_norm": 0.8203125, "learning_rate": 4.119515566083733e-05, "loss": 1.0167940855026245, "step": 642 }, { "epoch": 0.2978920546675932, "grad_norm": 0.94140625, "learning_rate": 4.117226029549689e-05, "loss": 1.1122088432312012, "step": 643 }, { "epoch": 0.2983553393560343, "grad_norm": 0.8125, "learning_rate": 4.114933286383084e-05, "loss": 0.8055898547172546, "step": 644 }, { "epoch": 0.29881862404447534, "grad_norm": 0.8515625, "learning_rate": 4.112637340865234e-05, "loss": 1.1266543865203857, "step": 645 }, { "epoch": 0.2992819087329164, "grad_norm": 0.79296875, "learning_rate": 4.110338197283431e-05, "loss": 1.0558011531829834, "step": 646 }, { "epoch": 0.2997451934213574, "grad_norm": 0.9140625, "learning_rate": 4.108035859930944e-05, "loss": 1.0644391775131226, "step": 647 }, { "epoch": 0.30020847810979845, "grad_norm": 0.7578125, "learning_rate": 4.105730333107003e-05, "loss": 1.043839454650879, "step": 648 }, { "epoch": 0.30067176279823954, "grad_norm": 0.9375, "learning_rate": 4.1034216211167914e-05, "loss": 1.147243857383728, "step": 649 }, { "epoch": 0.3011350474866806, "grad_norm": 0.76953125, "learning_rate": 4.1011097282714454e-05, "loss": 1.051954984664917, "step": 650 }, { "epoch": 0.3015983321751216, "grad_norm": 0.82421875, "learning_rate": 4.0987946588880385e-05, "loss": 1.002161979675293, "step": 651 }, { "epoch": 0.30206161686356264, "grad_norm": 0.8359375, "learning_rate": 4.096476417289574e-05, "loss": 0.9687187671661377, "step": 652 }, { "epoch": 0.30252490155200373, "grad_norm": 0.796875, "learning_rate": 4.094155007804981e-05, "loss": 1.1040300130844116, "step": 653 }, { "epoch": 0.30298818624044477, "grad_norm": 0.83203125, "learning_rate": 4.091830434769105e-05, "loss": 1.0147384405136108, "step": 654 }, { "epoch": 0.3034514709288858, "grad_norm": 0.70703125, "learning_rate": 4.089502702522696e-05, "loss": 0.8908687233924866, "step": 655 }, { "epoch": 0.30391475561732684, "grad_norm": 0.9140625, "learning_rate": 4.087171815412406e-05, "loss": 1.1389329433441162, "step": 656 }, { "epoch": 0.30437804030576787, "grad_norm": 0.78515625, "learning_rate": 4.0848377777907765e-05, "loss": 1.061093807220459, "step": 657 }, { "epoch": 0.30484132499420896, "grad_norm": 0.81640625, "learning_rate": 4.0825005940162326e-05, "loss": 1.0024491548538208, "step": 658 }, { "epoch": 0.30530460968265, "grad_norm": 0.81640625, "learning_rate": 4.080160268453075e-05, "loss": 1.2541595697402954, "step": 659 }, { "epoch": 0.30576789437109103, "grad_norm": 0.75390625, "learning_rate": 4.07781680547147e-05, "loss": 0.9933417439460754, "step": 660 }, { "epoch": 0.30623117905953207, "grad_norm": 0.828125, "learning_rate": 4.075470209447442e-05, "loss": 1.053157091140747, "step": 661 }, { "epoch": 0.30669446374797316, "grad_norm": 0.91015625, "learning_rate": 4.073120484762868e-05, "loss": 1.1531765460968018, "step": 662 }, { "epoch": 0.3071577484364142, "grad_norm": 0.81640625, "learning_rate": 4.070767635805466e-05, "loss": 1.125023603439331, "step": 663 }, { "epoch": 0.3076210331248552, "grad_norm": 0.8046875, "learning_rate": 4.068411666968788e-05, "loss": 0.8804372549057007, "step": 664 }, { "epoch": 0.30808431781329626, "grad_norm": 0.91015625, "learning_rate": 4.066052582652213e-05, "loss": 1.0438697338104248, "step": 665 }, { "epoch": 0.3085476025017373, "grad_norm": 0.796875, "learning_rate": 4.0636903872609336e-05, "loss": 0.9848630428314209, "step": 666 }, { "epoch": 0.3090108871901784, "grad_norm": 0.7578125, "learning_rate": 4.061325085205958e-05, "loss": 1.0336278676986694, "step": 667 }, { "epoch": 0.3094741718786194, "grad_norm": 0.80078125, "learning_rate": 4.058956680904091e-05, "loss": 1.0741722583770752, "step": 668 }, { "epoch": 0.30993745656706045, "grad_norm": 1.03125, "learning_rate": 4.0565851787779316e-05, "loss": 1.1203691959381104, "step": 669 }, { "epoch": 0.3104007412555015, "grad_norm": 1.046875, "learning_rate": 4.054210583255864e-05, "loss": 1.099678874015808, "step": 670 }, { "epoch": 0.3108640259439426, "grad_norm": 0.73046875, "learning_rate": 4.051832898772048e-05, "loss": 1.0561059713363647, "step": 671 }, { "epoch": 0.3113273106323836, "grad_norm": 0.79296875, "learning_rate": 4.049452129766413e-05, "loss": 1.186478853225708, "step": 672 }, { "epoch": 0.31179059532082465, "grad_norm": 0.73828125, "learning_rate": 4.047068280684646e-05, "loss": 0.9179085493087769, "step": 673 }, { "epoch": 0.3122538800092657, "grad_norm": 0.83203125, "learning_rate": 4.044681355978187e-05, "loss": 0.9686939716339111, "step": 674 }, { "epoch": 0.3127171646977067, "grad_norm": 0.875, "learning_rate": 4.042291360104219e-05, "loss": 1.121710181236267, "step": 675 }, { "epoch": 0.3131804493861478, "grad_norm": 0.85546875, "learning_rate": 4.03989829752566e-05, "loss": 1.0991014242172241, "step": 676 }, { "epoch": 0.31364373407458884, "grad_norm": 0.75, "learning_rate": 4.0375021727111543e-05, "loss": 1.0645341873168945, "step": 677 }, { "epoch": 0.3141070187630299, "grad_norm": 0.78125, "learning_rate": 4.0351029901350636e-05, "loss": 1.1132837533950806, "step": 678 }, { "epoch": 0.3145703034514709, "grad_norm": 0.7265625, "learning_rate": 4.032700754277461e-05, "loss": 1.0442454814910889, "step": 679 }, { "epoch": 0.315033588139912, "grad_norm": 0.7578125, "learning_rate": 4.0302954696241206e-05, "loss": 0.9802740812301636, "step": 680 }, { "epoch": 0.31549687282835304, "grad_norm": 0.984375, "learning_rate": 4.02788714066651e-05, "loss": 0.9827386140823364, "step": 681 }, { "epoch": 0.31596015751679407, "grad_norm": 0.8515625, "learning_rate": 4.025475771901779e-05, "loss": 1.1004528999328613, "step": 682 }, { "epoch": 0.3164234422052351, "grad_norm": 0.8203125, "learning_rate": 4.023061367832757e-05, "loss": 1.0051753520965576, "step": 683 }, { "epoch": 0.31688672689367614, "grad_norm": 0.83984375, "learning_rate": 4.02064393296794e-05, "loss": 0.9524490833282471, "step": 684 }, { "epoch": 0.31735001158211723, "grad_norm": 0.828125, "learning_rate": 4.018223471821483e-05, "loss": 1.079671859741211, "step": 685 }, { "epoch": 0.31781329627055827, "grad_norm": 0.7578125, "learning_rate": 4.0157999889131936e-05, "loss": 0.912105917930603, "step": 686 }, { "epoch": 0.3182765809589993, "grad_norm": 0.82421875, "learning_rate": 4.01337348876852e-05, "loss": 1.2040618658065796, "step": 687 }, { "epoch": 0.31873986564744033, "grad_norm": 0.7578125, "learning_rate": 4.0109439759185465e-05, "loss": 0.8994999527931213, "step": 688 }, { "epoch": 0.3192031503358814, "grad_norm": 0.75390625, "learning_rate": 4.0085114548999816e-05, "loss": 1.244059681892395, "step": 689 }, { "epoch": 0.31966643502432246, "grad_norm": 0.71875, "learning_rate": 4.006075930255152e-05, "loss": 1.126865029335022, "step": 690 }, { "epoch": 0.3201297197127635, "grad_norm": 0.8203125, "learning_rate": 4.003637406531992e-05, "loss": 1.0775233507156372, "step": 691 }, { "epoch": 0.32059300440120453, "grad_norm": 0.94140625, "learning_rate": 4.001195888284037e-05, "loss": 1.0177921056747437, "step": 692 }, { "epoch": 0.32105628908964556, "grad_norm": 0.87890625, "learning_rate": 3.998751380070416e-05, "loss": 1.057099461555481, "step": 693 }, { "epoch": 0.32151957377808665, "grad_norm": 0.85546875, "learning_rate": 3.9963038864558385e-05, "loss": 0.9934321045875549, "step": 694 }, { "epoch": 0.3219828584665277, "grad_norm": 0.7734375, "learning_rate": 3.993853412010589e-05, "loss": 0.9183391332626343, "step": 695 }, { "epoch": 0.3224461431549687, "grad_norm": 0.75390625, "learning_rate": 3.9913999613105204e-05, "loss": 0.9654147624969482, "step": 696 }, { "epoch": 0.32290942784340976, "grad_norm": 0.76171875, "learning_rate": 3.98894353893704e-05, "loss": 1.0339151620864868, "step": 697 }, { "epoch": 0.32337271253185085, "grad_norm": 0.89453125, "learning_rate": 3.986484149477107e-05, "loss": 0.9817367792129517, "step": 698 }, { "epoch": 0.3238359972202919, "grad_norm": 0.8515625, "learning_rate": 3.98402179752322e-05, "loss": 0.9361385703086853, "step": 699 }, { "epoch": 0.3242992819087329, "grad_norm": 0.92578125, "learning_rate": 3.981556487673409e-05, "loss": 0.9315399527549744, "step": 700 }, { "epoch": 0.32476256659717395, "grad_norm": 0.7734375, "learning_rate": 3.979088224531229e-05, "loss": 1.005590796470642, "step": 701 }, { "epoch": 0.325225851285615, "grad_norm": 0.84765625, "learning_rate": 3.976617012705749e-05, "loss": 0.8338845372200012, "step": 702 }, { "epoch": 0.3256891359740561, "grad_norm": 0.75, "learning_rate": 3.9741428568115435e-05, "loss": 1.0329554080963135, "step": 703 }, { "epoch": 0.3261524206624971, "grad_norm": 0.9140625, "learning_rate": 3.9716657614686844e-05, "loss": 0.8598560094833374, "step": 704 }, { "epoch": 0.32661570535093815, "grad_norm": 0.796875, "learning_rate": 3.9691857313027335e-05, "loss": 0.9257340431213379, "step": 705 }, { "epoch": 0.3270789900393792, "grad_norm": 0.8359375, "learning_rate": 3.966702770944734e-05, "loss": 0.8521995544433594, "step": 706 }, { "epoch": 0.32754227472782027, "grad_norm": 0.94140625, "learning_rate": 3.964216885031197e-05, "loss": 1.1843841075897217, "step": 707 }, { "epoch": 0.3280055594162613, "grad_norm": 0.90234375, "learning_rate": 3.961728078204101e-05, "loss": 1.1602882146835327, "step": 708 }, { "epoch": 0.32846884410470234, "grad_norm": 0.765625, "learning_rate": 3.9592363551108756e-05, "loss": 1.020529866218567, "step": 709 }, { "epoch": 0.3289321287931434, "grad_norm": 0.72265625, "learning_rate": 3.956741720404397e-05, "loss": 0.9926280975341797, "step": 710 }, { "epoch": 0.3293954134815844, "grad_norm": 0.75390625, "learning_rate": 3.9542441787429795e-05, "loss": 0.7993087768554688, "step": 711 }, { "epoch": 0.3298586981700255, "grad_norm": 0.77734375, "learning_rate": 3.9517437347903635e-05, "loss": 0.9188562631607056, "step": 712 }, { "epoch": 0.33032198285846653, "grad_norm": 0.75390625, "learning_rate": 3.949240393215711e-05, "loss": 0.9771900177001953, "step": 713 }, { "epoch": 0.33078526754690757, "grad_norm": 0.8203125, "learning_rate": 3.9467341586935936e-05, "loss": 0.989328145980835, "step": 714 }, { "epoch": 0.3312485522353486, "grad_norm": 0.82421875, "learning_rate": 3.9442250359039855e-05, "loss": 1.002003788948059, "step": 715 }, { "epoch": 0.3317118369237897, "grad_norm": 0.8203125, "learning_rate": 3.941713029532253e-05, "loss": 1.0104445219039917, "step": 716 }, { "epoch": 0.33217512161223073, "grad_norm": 0.71484375, "learning_rate": 3.93919814426915e-05, "loss": 1.0114507675170898, "step": 717 }, { "epoch": 0.33263840630067176, "grad_norm": 0.75390625, "learning_rate": 3.936680384810803e-05, "loss": 0.9771013855934143, "step": 718 }, { "epoch": 0.3331016909891128, "grad_norm": 0.98046875, "learning_rate": 3.934159755858707e-05, "loss": 1.0455206632614136, "step": 719 }, { "epoch": 0.33356497567755383, "grad_norm": 0.953125, "learning_rate": 3.931636262119716e-05, "loss": 0.875360369682312, "step": 720 }, { "epoch": 0.3340282603659949, "grad_norm": 0.671875, "learning_rate": 3.929109908306032e-05, "loss": 0.9550399780273438, "step": 721 }, { "epoch": 0.33449154505443596, "grad_norm": 0.86328125, "learning_rate": 3.9265806991351995e-05, "loss": 1.1120067834854126, "step": 722 }, { "epoch": 0.334954829742877, "grad_norm": 0.765625, "learning_rate": 3.9240486393300924e-05, "loss": 0.9513478875160217, "step": 723 }, { "epoch": 0.335418114431318, "grad_norm": 0.86328125, "learning_rate": 3.9215137336189096e-05, "loss": 0.9390691518783569, "step": 724 }, { "epoch": 0.3358813991197591, "grad_norm": 0.8125, "learning_rate": 3.918975986735164e-05, "loss": 1.0198416709899902, "step": 725 }, { "epoch": 0.33634468380820015, "grad_norm": 0.7734375, "learning_rate": 3.916435403417674e-05, "loss": 0.9613708257675171, "step": 726 }, { "epoch": 0.3368079684966412, "grad_norm": 0.8359375, "learning_rate": 3.913891988410554e-05, "loss": 0.9495355486869812, "step": 727 }, { "epoch": 0.3372712531850822, "grad_norm": 0.76953125, "learning_rate": 3.9113457464632056e-05, "loss": 0.9577147960662842, "step": 728 }, { "epoch": 0.33773453787352326, "grad_norm": 0.9765625, "learning_rate": 3.9087966823303105e-05, "loss": 0.9388977885246277, "step": 729 }, { "epoch": 0.33819782256196435, "grad_norm": 0.75390625, "learning_rate": 3.906244800771821e-05, "loss": 0.9760944247245789, "step": 730 }, { "epoch": 0.3386611072504054, "grad_norm": 0.9609375, "learning_rate": 3.903690106552948e-05, "loss": 1.0838488340377808, "step": 731 }, { "epoch": 0.3391243919388464, "grad_norm": 0.88671875, "learning_rate": 3.9011326044441564e-05, "loss": 0.937881350517273, "step": 732 }, { "epoch": 0.33958767662728745, "grad_norm": 0.8125, "learning_rate": 3.898572299221153e-05, "loss": 1.1330440044403076, "step": 733 }, { "epoch": 0.34005096131572854, "grad_norm": 1.109375, "learning_rate": 3.896009195664882e-05, "loss": 1.1508278846740723, "step": 734 }, { "epoch": 0.3405142460041696, "grad_norm": 0.79296875, "learning_rate": 3.893443298561508e-05, "loss": 0.9706493020057678, "step": 735 }, { "epoch": 0.3409775306926106, "grad_norm": 0.8515625, "learning_rate": 3.890874612702417e-05, "loss": 0.9598948955535889, "step": 736 }, { "epoch": 0.34144081538105164, "grad_norm": 0.87109375, "learning_rate": 3.8883031428842e-05, "loss": 1.0114010572433472, "step": 737 }, { "epoch": 0.3419041000694927, "grad_norm": 0.73046875, "learning_rate": 3.8857288939086474e-05, "loss": 1.0451589822769165, "step": 738 }, { "epoch": 0.34236738475793377, "grad_norm": 0.7421875, "learning_rate": 3.8831518705827376e-05, "loss": 1.100400686264038, "step": 739 }, { "epoch": 0.3428306694463748, "grad_norm": 0.73046875, "learning_rate": 3.8805720777186314e-05, "loss": 0.907010555267334, "step": 740 }, { "epoch": 0.34329395413481584, "grad_norm": 0.72265625, "learning_rate": 3.877989520133662e-05, "loss": 1.0915554761886597, "step": 741 }, { "epoch": 0.34375723882325687, "grad_norm": 0.7890625, "learning_rate": 3.8754042026503224e-05, "loss": 1.011785626411438, "step": 742 }, { "epoch": 0.34422052351169796, "grad_norm": 0.8671875, "learning_rate": 3.872816130096261e-05, "loss": 1.0811213254928589, "step": 743 }, { "epoch": 0.344683808200139, "grad_norm": 0.85546875, "learning_rate": 3.8702253073042716e-05, "loss": 0.834938645362854, "step": 744 }, { "epoch": 0.34514709288858003, "grad_norm": 0.8203125, "learning_rate": 3.8676317391122824e-05, "loss": 0.9235035181045532, "step": 745 }, { "epoch": 0.34561037757702107, "grad_norm": 0.765625, "learning_rate": 3.865035430363348e-05, "loss": 1.0086536407470703, "step": 746 }, { "epoch": 0.3460736622654621, "grad_norm": 0.86328125, "learning_rate": 3.862436385905641e-05, "loss": 0.987399697303772, "step": 747 }, { "epoch": 0.3465369469539032, "grad_norm": 0.73046875, "learning_rate": 3.859834610592443e-05, "loss": 1.1993310451507568, "step": 748 }, { "epoch": 0.3470002316423442, "grad_norm": 0.828125, "learning_rate": 3.857230109282134e-05, "loss": 1.0457353591918945, "step": 749 }, { "epoch": 0.34746351633078526, "grad_norm": 0.91015625, "learning_rate": 3.854622886838185e-05, "loss": 1.139293909072876, "step": 750 }, { "epoch": 0.3479268010192263, "grad_norm": 0.78125, "learning_rate": 3.852012948129148e-05, "loss": 1.0585147142410278, "step": 751 }, { "epoch": 0.3483900857076674, "grad_norm": 0.92578125, "learning_rate": 3.849400298028647e-05, "loss": 0.9727704524993896, "step": 752 }, { "epoch": 0.3488533703961084, "grad_norm": 0.71875, "learning_rate": 3.846784941415371e-05, "loss": 0.9992061257362366, "step": 753 }, { "epoch": 0.34931665508454945, "grad_norm": 0.75390625, "learning_rate": 3.8441668831730586e-05, "loss": 1.1475231647491455, "step": 754 }, { "epoch": 0.3497799397729905, "grad_norm": 0.875, "learning_rate": 3.8415461281904984e-05, "loss": 1.036689281463623, "step": 755 }, { "epoch": 0.3502432244614315, "grad_norm": 0.81640625, "learning_rate": 3.83892268136151e-05, "loss": 0.87852543592453, "step": 756 }, { "epoch": 0.3507065091498726, "grad_norm": 0.91015625, "learning_rate": 3.8362965475849445e-05, "loss": 0.9990617036819458, "step": 757 }, { "epoch": 0.35116979383831365, "grad_norm": 0.96484375, "learning_rate": 3.833667731764665e-05, "loss": 1.1460075378417969, "step": 758 }, { "epoch": 0.3516330785267547, "grad_norm": 0.75, "learning_rate": 3.831036238809548e-05, "loss": 0.9850847721099854, "step": 759 }, { "epoch": 0.3520963632151957, "grad_norm": 0.7734375, "learning_rate": 3.828402073633464e-05, "loss": 0.9239014387130737, "step": 760 }, { "epoch": 0.3525596479036368, "grad_norm": 0.80078125, "learning_rate": 3.825765241155279e-05, "loss": 0.9535107016563416, "step": 761 }, { "epoch": 0.35302293259207784, "grad_norm": 0.74609375, "learning_rate": 3.8231257462988355e-05, "loss": 0.9702818989753723, "step": 762 }, { "epoch": 0.3534862172805189, "grad_norm": 0.83984375, "learning_rate": 3.820483593992948e-05, "loss": 0.9807397127151489, "step": 763 }, { "epoch": 0.3539495019689599, "grad_norm": 0.859375, "learning_rate": 3.817838789171397e-05, "loss": 0.9782893061637878, "step": 764 }, { "epoch": 0.35441278665740095, "grad_norm": 0.87109375, "learning_rate": 3.815191336772911e-05, "loss": 1.050409197807312, "step": 765 }, { "epoch": 0.35487607134584204, "grad_norm": 0.79296875, "learning_rate": 3.812541241741164e-05, "loss": 0.9888057112693787, "step": 766 }, { "epoch": 0.35533935603428307, "grad_norm": 0.89453125, "learning_rate": 3.809888509024769e-05, "loss": 0.9265248775482178, "step": 767 }, { "epoch": 0.3558026407227241, "grad_norm": 0.80078125, "learning_rate": 3.807233143577258e-05, "loss": 0.9314517378807068, "step": 768 }, { "epoch": 0.35626592541116514, "grad_norm": 0.80078125, "learning_rate": 3.804575150357084e-05, "loss": 1.0255682468414307, "step": 769 }, { "epoch": 0.35672921009960623, "grad_norm": 0.79296875, "learning_rate": 3.8019145343276026e-05, "loss": 1.2007423639297485, "step": 770 }, { "epoch": 0.35719249478804727, "grad_norm": 0.7734375, "learning_rate": 3.799251300457071e-05, "loss": 1.0465315580368042, "step": 771 }, { "epoch": 0.3576557794764883, "grad_norm": 0.84375, "learning_rate": 3.796585453718635e-05, "loss": 0.9410252571105957, "step": 772 }, { "epoch": 0.35811906416492933, "grad_norm": 0.76171875, "learning_rate": 3.7939169990903146e-05, "loss": 0.8860654830932617, "step": 773 }, { "epoch": 0.35858234885337037, "grad_norm": 0.8203125, "learning_rate": 3.791245941555004e-05, "loss": 0.9542768001556396, "step": 774 }, { "epoch": 0.35904563354181146, "grad_norm": 0.98046875, "learning_rate": 3.788572286100457e-05, "loss": 1.121732473373413, "step": 775 }, { "epoch": 0.3595089182302525, "grad_norm": 0.796875, "learning_rate": 3.785896037719278e-05, "loss": 1.0113410949707031, "step": 776 }, { "epoch": 0.35997220291869353, "grad_norm": 0.84765625, "learning_rate": 3.7832172014089136e-05, "loss": 0.9060476422309875, "step": 777 }, { "epoch": 0.36043548760713456, "grad_norm": 0.76953125, "learning_rate": 3.780535782171643e-05, "loss": 0.9059662818908691, "step": 778 }, { "epoch": 0.36089877229557565, "grad_norm": 0.765625, "learning_rate": 3.777851785014569e-05, "loss": 1.008833646774292, "step": 779 }, { "epoch": 0.3613620569840167, "grad_norm": 0.75390625, "learning_rate": 3.775165214949605e-05, "loss": 0.9600525498390198, "step": 780 }, { "epoch": 0.3618253416724577, "grad_norm": 0.890625, "learning_rate": 3.772476076993474e-05, "loss": 1.1373387575149536, "step": 781 }, { "epoch": 0.36228862636089876, "grad_norm": 0.8203125, "learning_rate": 3.769784376167691e-05, "loss": 0.9134207367897034, "step": 782 }, { "epoch": 0.3627519110493398, "grad_norm": 0.8046875, "learning_rate": 3.767090117498558e-05, "loss": 0.9469197988510132, "step": 783 }, { "epoch": 0.3632151957377809, "grad_norm": 0.79296875, "learning_rate": 3.764393306017151e-05, "loss": 1.1006484031677246, "step": 784 }, { "epoch": 0.3636784804262219, "grad_norm": 0.79296875, "learning_rate": 3.761693946759315e-05, "loss": 0.9012340307235718, "step": 785 }, { "epoch": 0.36414176511466295, "grad_norm": 0.8359375, "learning_rate": 3.758992044765654e-05, "loss": 1.0260119438171387, "step": 786 }, { "epoch": 0.364605049803104, "grad_norm": 0.82421875, "learning_rate": 3.756287605081517e-05, "loss": 1.1195753812789917, "step": 787 }, { "epoch": 0.3650683344915451, "grad_norm": 0.75, "learning_rate": 3.753580632756993e-05, "loss": 0.9033543467521667, "step": 788 }, { "epoch": 0.3655316191799861, "grad_norm": 0.9375, "learning_rate": 3.7508711328469e-05, "loss": 0.8747038841247559, "step": 789 }, { "epoch": 0.36599490386842715, "grad_norm": 0.83203125, "learning_rate": 3.7481591104107775e-05, "loss": 1.0821847915649414, "step": 790 }, { "epoch": 0.3664581885568682, "grad_norm": 0.83203125, "learning_rate": 3.745444570512872e-05, "loss": 1.023503303527832, "step": 791 }, { "epoch": 0.3669214732453092, "grad_norm": 0.90625, "learning_rate": 3.7427275182221356e-05, "loss": 1.1093895435333252, "step": 792 }, { "epoch": 0.3673847579337503, "grad_norm": 0.78125, "learning_rate": 3.740007958612207e-05, "loss": 1.0260508060455322, "step": 793 }, { "epoch": 0.36784804262219134, "grad_norm": 0.8203125, "learning_rate": 3.73728589676141e-05, "loss": 0.961272120475769, "step": 794 }, { "epoch": 0.3683113273106324, "grad_norm": 0.77734375, "learning_rate": 3.734561337752741e-05, "loss": 1.2031164169311523, "step": 795 }, { "epoch": 0.3687746119990734, "grad_norm": 0.8515625, "learning_rate": 3.7318342866738565e-05, "loss": 0.998257577419281, "step": 796 }, { "epoch": 0.3692378966875145, "grad_norm": 0.9296875, "learning_rate": 3.72910474861707e-05, "loss": 1.0234450101852417, "step": 797 }, { "epoch": 0.36970118137595553, "grad_norm": 0.80859375, "learning_rate": 3.726372728679338e-05, "loss": 1.0095072984695435, "step": 798 }, { "epoch": 0.37016446606439657, "grad_norm": 0.85546875, "learning_rate": 3.7236382319622494e-05, "loss": 1.0547491312026978, "step": 799 }, { "epoch": 0.3706277507528376, "grad_norm": 0.84375, "learning_rate": 3.720901263572021e-05, "loss": 1.1043885946273804, "step": 800 }, { "epoch": 0.37109103544127864, "grad_norm": 0.91015625, "learning_rate": 3.7181618286194834e-05, "loss": 1.0135180950164795, "step": 801 }, { "epoch": 0.37155432012971973, "grad_norm": 0.71875, "learning_rate": 3.715419932220074e-05, "loss": 0.9376970529556274, "step": 802 }, { "epoch": 0.37201760481816076, "grad_norm": 0.953125, "learning_rate": 3.7126755794938255e-05, "loss": 1.0911214351654053, "step": 803 }, { "epoch": 0.3724808895066018, "grad_norm": 0.80859375, "learning_rate": 3.7099287755653566e-05, "loss": 1.1035547256469727, "step": 804 }, { "epoch": 0.37294417419504283, "grad_norm": 0.87890625, "learning_rate": 3.707179525563866e-05, "loss": 1.1932406425476074, "step": 805 }, { "epoch": 0.3734074588834839, "grad_norm": 0.78515625, "learning_rate": 3.704427834623118e-05, "loss": 1.0412805080413818, "step": 806 }, { "epoch": 0.37387074357192496, "grad_norm": 0.90625, "learning_rate": 3.7016737078814365e-05, "loss": 1.1123768091201782, "step": 807 }, { "epoch": 0.374334028260366, "grad_norm": 0.765625, "learning_rate": 3.698917150481692e-05, "loss": 0.9360041618347168, "step": 808 }, { "epoch": 0.374797312948807, "grad_norm": 0.77734375, "learning_rate": 3.696158167571294e-05, "loss": 0.9965537190437317, "step": 809 }, { "epoch": 0.3752605976372481, "grad_norm": 0.8125, "learning_rate": 3.693396764302183e-05, "loss": 0.9210027456283569, "step": 810 }, { "epoch": 0.37572388232568915, "grad_norm": 0.8203125, "learning_rate": 3.690632945830817e-05, "loss": 0.9936932325363159, "step": 811 }, { "epoch": 0.3761871670141302, "grad_norm": 0.95703125, "learning_rate": 3.687866717318166e-05, "loss": 1.0398387908935547, "step": 812 }, { "epoch": 0.3766504517025712, "grad_norm": 0.8984375, "learning_rate": 3.685098083929699e-05, "loss": 0.9467533826828003, "step": 813 }, { "epoch": 0.37711373639101226, "grad_norm": 0.8359375, "learning_rate": 3.682327050835376e-05, "loss": 1.156292200088501, "step": 814 }, { "epoch": 0.37757702107945335, "grad_norm": 0.83203125, "learning_rate": 3.6795536232096374e-05, "loss": 0.986288845539093, "step": 815 }, { "epoch": 0.3780403057678944, "grad_norm": 0.8671875, "learning_rate": 3.676777806231396e-05, "loss": 1.123473882675171, "step": 816 }, { "epoch": 0.3785035904563354, "grad_norm": 0.88671875, "learning_rate": 3.673999605084028e-05, "loss": 1.0756930112838745, "step": 817 }, { "epoch": 0.37896687514477645, "grad_norm": 0.88671875, "learning_rate": 3.671219024955357e-05, "loss": 1.0483829975128174, "step": 818 }, { "epoch": 0.37943015983321754, "grad_norm": 0.90625, "learning_rate": 3.668436071037653e-05, "loss": 0.9522889852523804, "step": 819 }, { "epoch": 0.3798934445216586, "grad_norm": 0.7890625, "learning_rate": 3.665650748527616e-05, "loss": 1.056382179260254, "step": 820 }, { "epoch": 0.3803567292100996, "grad_norm": 0.78125, "learning_rate": 3.662863062626371e-05, "loss": 1.141240119934082, "step": 821 }, { "epoch": 0.38082001389854064, "grad_norm": 0.8828125, "learning_rate": 3.660073018539456e-05, "loss": 0.861331582069397, "step": 822 }, { "epoch": 0.3812832985869817, "grad_norm": 0.85546875, "learning_rate": 3.657280621476811e-05, "loss": 1.1151138544082642, "step": 823 }, { "epoch": 0.38174658327542277, "grad_norm": 0.69140625, "learning_rate": 3.654485876652772e-05, "loss": 0.9755687117576599, "step": 824 }, { "epoch": 0.3822098679638638, "grad_norm": 0.734375, "learning_rate": 3.651688789286056e-05, "loss": 0.9543071985244751, "step": 825 }, { "epoch": 0.38267315265230484, "grad_norm": 0.73046875, "learning_rate": 3.6488893645997575e-05, "loss": 0.9777738451957703, "step": 826 }, { "epoch": 0.3831364373407459, "grad_norm": 0.75390625, "learning_rate": 3.646087607821333e-05, "loss": 1.010209321975708, "step": 827 }, { "epoch": 0.38359972202918696, "grad_norm": 0.81640625, "learning_rate": 3.6432835241825965e-05, "loss": 1.0441359281539917, "step": 828 }, { "epoch": 0.384063006717628, "grad_norm": 0.83203125, "learning_rate": 3.640477118919705e-05, "loss": 0.8406580090522766, "step": 829 }, { "epoch": 0.38452629140606903, "grad_norm": 0.90234375, "learning_rate": 3.637668397273149e-05, "loss": 1.0144675970077515, "step": 830 }, { "epoch": 0.38498957609451007, "grad_norm": 0.8828125, "learning_rate": 3.6348573644877495e-05, "loss": 1.2290412187576294, "step": 831 }, { "epoch": 0.3854528607829511, "grad_norm": 0.76171875, "learning_rate": 3.63204402581264e-05, "loss": 0.8894533514976501, "step": 832 }, { "epoch": 0.3859161454713922, "grad_norm": 0.8046875, "learning_rate": 3.629228386501259e-05, "loss": 1.1188613176345825, "step": 833 }, { "epoch": 0.3863794301598332, "grad_norm": 0.8203125, "learning_rate": 3.626410451811342e-05, "loss": 0.9740458726882935, "step": 834 }, { "epoch": 0.38684271484827426, "grad_norm": 0.8046875, "learning_rate": 3.623590227004913e-05, "loss": 0.7910479307174683, "step": 835 }, { "epoch": 0.3873059995367153, "grad_norm": 0.828125, "learning_rate": 3.620767717348268e-05, "loss": 0.9454694986343384, "step": 836 }, { "epoch": 0.3877692842251564, "grad_norm": 0.875, "learning_rate": 3.617942928111973e-05, "loss": 1.0109909772872925, "step": 837 }, { "epoch": 0.3882325689135974, "grad_norm": 0.84765625, "learning_rate": 3.615115864570851e-05, "loss": 0.9801681041717529, "step": 838 }, { "epoch": 0.38869585360203845, "grad_norm": 0.7890625, "learning_rate": 3.612286532003969e-05, "loss": 1.106335163116455, "step": 839 }, { "epoch": 0.3891591382904795, "grad_norm": 0.83203125, "learning_rate": 3.609454935694634e-05, "loss": 0.9830104112625122, "step": 840 }, { "epoch": 0.3896224229789205, "grad_norm": 0.90234375, "learning_rate": 3.606621080930376e-05, "loss": 1.0451645851135254, "step": 841 }, { "epoch": 0.3900857076673616, "grad_norm": 0.80078125, "learning_rate": 3.603784973002948e-05, "loss": 0.980257511138916, "step": 842 }, { "epoch": 0.39054899235580265, "grad_norm": 0.78515625, "learning_rate": 3.600946617208306e-05, "loss": 0.9005157351493835, "step": 843 }, { "epoch": 0.3910122770442437, "grad_norm": 0.8203125, "learning_rate": 3.5981060188466055e-05, "loss": 0.9599143266677856, "step": 844 }, { "epoch": 0.3914755617326847, "grad_norm": 0.7734375, "learning_rate": 3.5952631832221895e-05, "loss": 0.9783821702003479, "step": 845 }, { "epoch": 0.3919388464211258, "grad_norm": 0.95703125, "learning_rate": 3.592418115643576e-05, "loss": 0.9757992625236511, "step": 846 }, { "epoch": 0.39240213110956684, "grad_norm": 0.7734375, "learning_rate": 3.589570821423457e-05, "loss": 0.9026694297790527, "step": 847 }, { "epoch": 0.3928654157980079, "grad_norm": 0.84765625, "learning_rate": 3.586721305878676e-05, "loss": 0.9864629507064819, "step": 848 }, { "epoch": 0.3933287004864489, "grad_norm": 0.88671875, "learning_rate": 3.583869574330227e-05, "loss": 0.9566922783851624, "step": 849 }, { "epoch": 0.39379198517488995, "grad_norm": 0.77734375, "learning_rate": 3.5810156321032424e-05, "loss": 1.0206118822097778, "step": 850 }, { "epoch": 0.39425526986333104, "grad_norm": 0.76953125, "learning_rate": 3.5781594845269824e-05, "loss": 1.2455644607543945, "step": 851 }, { "epoch": 0.39471855455177207, "grad_norm": 0.8203125, "learning_rate": 3.575301136934825e-05, "loss": 0.9965265393257141, "step": 852 }, { "epoch": 0.3951818392402131, "grad_norm": 0.80078125, "learning_rate": 3.5724405946642565e-05, "loss": 1.058623194694519, "step": 853 }, { "epoch": 0.39564512392865414, "grad_norm": 0.71484375, "learning_rate": 3.569577863056861e-05, "loss": 0.9021344184875488, "step": 854 }, { "epoch": 0.39610840861709523, "grad_norm": 0.8984375, "learning_rate": 3.5667129474583116e-05, "loss": 1.1672606468200684, "step": 855 }, { "epoch": 0.39657169330553627, "grad_norm": 0.7109375, "learning_rate": 3.5638458532183604e-05, "loss": 0.8217394351959229, "step": 856 }, { "epoch": 0.3970349779939773, "grad_norm": 0.75390625, "learning_rate": 3.5609765856908244e-05, "loss": 1.0040171146392822, "step": 857 }, { "epoch": 0.39749826268241834, "grad_norm": 0.81640625, "learning_rate": 3.5581051502335834e-05, "loss": 1.053956389427185, "step": 858 }, { "epoch": 0.39796154737085937, "grad_norm": 0.8359375, "learning_rate": 3.555231552208561e-05, "loss": 1.0706506967544556, "step": 859 }, { "epoch": 0.39842483205930046, "grad_norm": 0.84375, "learning_rate": 3.5523557969817226e-05, "loss": 0.8872452974319458, "step": 860 }, { "epoch": 0.3988881167477415, "grad_norm": 0.8671875, "learning_rate": 3.5494778899230605e-05, "loss": 0.9684238433837891, "step": 861 }, { "epoch": 0.39935140143618253, "grad_norm": 0.75, "learning_rate": 3.5465978364065835e-05, "loss": 1.1116052865982056, "step": 862 }, { "epoch": 0.39981468612462356, "grad_norm": 0.99609375, "learning_rate": 3.543715641810312e-05, "loss": 0.9077733159065247, "step": 863 }, { "epoch": 0.40027797081306465, "grad_norm": 1.0234375, "learning_rate": 3.540831311516261e-05, "loss": 1.0570735931396484, "step": 864 }, { "epoch": 0.4007412555015057, "grad_norm": 0.76171875, "learning_rate": 3.537944850910436e-05, "loss": 1.1230758428573608, "step": 865 }, { "epoch": 0.4012045401899467, "grad_norm": 0.7578125, "learning_rate": 3.5350562653828204e-05, "loss": 0.9723849296569824, "step": 866 }, { "epoch": 0.40166782487838776, "grad_norm": 0.8515625, "learning_rate": 3.532165560327364e-05, "loss": 0.9751421213150024, "step": 867 }, { "epoch": 0.4021311095668288, "grad_norm": 0.796875, "learning_rate": 3.529272741141974e-05, "loss": 0.9020988941192627, "step": 868 }, { "epoch": 0.4025943942552699, "grad_norm": 0.8046875, "learning_rate": 3.5263778132285085e-05, "loss": 0.9929109811782837, "step": 869 }, { "epoch": 0.4030576789437109, "grad_norm": 0.79296875, "learning_rate": 3.5234807819927625e-05, "loss": 1.088818073272705, "step": 870 }, { "epoch": 0.40352096363215195, "grad_norm": 0.86328125, "learning_rate": 3.520581652844454e-05, "loss": 1.1746731996536255, "step": 871 }, { "epoch": 0.403984248320593, "grad_norm": 0.796875, "learning_rate": 3.517680431197226e-05, "loss": 1.0509936809539795, "step": 872 }, { "epoch": 0.4044475330090341, "grad_norm": 0.91015625, "learning_rate": 3.514777122468621e-05, "loss": 1.023998737335205, "step": 873 }, { "epoch": 0.4049108176974751, "grad_norm": 0.87109375, "learning_rate": 3.511871732080087e-05, "loss": 0.9446095824241638, "step": 874 }, { "epoch": 0.40537410238591615, "grad_norm": 0.68359375, "learning_rate": 3.508964265456951e-05, "loss": 0.9351980686187744, "step": 875 }, { "epoch": 0.4058373870743572, "grad_norm": 0.6953125, "learning_rate": 3.506054728028423e-05, "loss": 0.9516130685806274, "step": 876 }, { "epoch": 0.4063006717627982, "grad_norm": 0.8359375, "learning_rate": 3.503143125227577e-05, "loss": 1.006507158279419, "step": 877 }, { "epoch": 0.4067639564512393, "grad_norm": 0.6953125, "learning_rate": 3.500229462491346e-05, "loss": 0.8910001516342163, "step": 878 }, { "epoch": 0.40722724113968034, "grad_norm": 0.76953125, "learning_rate": 3.497313745260507e-05, "loss": 0.8634387850761414, "step": 879 }, { "epoch": 0.4076905258281214, "grad_norm": 0.71484375, "learning_rate": 3.494395978979673e-05, "loss": 1.022470235824585, "step": 880 }, { "epoch": 0.4081538105165624, "grad_norm": 0.74609375, "learning_rate": 3.491476169097288e-05, "loss": 1.0753809213638306, "step": 881 }, { "epoch": 0.4086170952050035, "grad_norm": 0.7578125, "learning_rate": 3.488554321065606e-05, "loss": 0.8573417067527771, "step": 882 }, { "epoch": 0.40908037989344453, "grad_norm": 0.9296875, "learning_rate": 3.485630440340692e-05, "loss": 0.9140716195106506, "step": 883 }, { "epoch": 0.40954366458188557, "grad_norm": 0.7421875, "learning_rate": 3.482704532382404e-05, "loss": 0.8698415756225586, "step": 884 }, { "epoch": 0.4100069492703266, "grad_norm": 0.95703125, "learning_rate": 3.479776602654384e-05, "loss": 0.9196599721908569, "step": 885 }, { "epoch": 0.41047023395876764, "grad_norm": 0.8125, "learning_rate": 3.476846656624054e-05, "loss": 1.182805061340332, "step": 886 }, { "epoch": 0.41093351864720873, "grad_norm": 0.92578125, "learning_rate": 3.4739146997625966e-05, "loss": 1.1990854740142822, "step": 887 }, { "epoch": 0.41139680333564976, "grad_norm": 0.76171875, "learning_rate": 3.4709807375449526e-05, "loss": 0.9600467681884766, "step": 888 }, { "epoch": 0.4118600880240908, "grad_norm": 0.72265625, "learning_rate": 3.468044775449804e-05, "loss": 0.9062017202377319, "step": 889 }, { "epoch": 0.41232337271253183, "grad_norm": 0.83203125, "learning_rate": 3.4651068189595725e-05, "loss": 1.0649828910827637, "step": 890 }, { "epoch": 0.4127866574009729, "grad_norm": 0.79296875, "learning_rate": 3.4621668735603974e-05, "loss": 0.8955351710319519, "step": 891 }, { "epoch": 0.41324994208941396, "grad_norm": 0.82421875, "learning_rate": 3.459224944742137e-05, "loss": 0.8985044360160828, "step": 892 }, { "epoch": 0.413713226777855, "grad_norm": 0.765625, "learning_rate": 3.4562810379983515e-05, "loss": 0.9573203921318054, "step": 893 }, { "epoch": 0.414176511466296, "grad_norm": 0.8515625, "learning_rate": 3.453335158826294e-05, "loss": 0.8726100921630859, "step": 894 }, { "epoch": 0.41463979615473706, "grad_norm": 0.796875, "learning_rate": 3.450387312726902e-05, "loss": 0.9778281450271606, "step": 895 }, { "epoch": 0.41510308084317815, "grad_norm": 0.87109375, "learning_rate": 3.447437505204785e-05, "loss": 0.9495804905891418, "step": 896 }, { "epoch": 0.4155663655316192, "grad_norm": 0.8125, "learning_rate": 3.444485741768216e-05, "loss": 1.0823774337768555, "step": 897 }, { "epoch": 0.4160296502200602, "grad_norm": 0.80078125, "learning_rate": 3.441532027929119e-05, "loss": 1.0764063596725464, "step": 898 }, { "epoch": 0.41649293490850126, "grad_norm": 0.95703125, "learning_rate": 3.438576369203061e-05, "loss": 0.9195699691772461, "step": 899 }, { "epoch": 0.41695621959694235, "grad_norm": 0.95703125, "learning_rate": 3.435618771109241e-05, "loss": 1.0315985679626465, "step": 900 }, { "epoch": 0.4174195042853834, "grad_norm": 0.9609375, "learning_rate": 3.43265923917048e-05, "loss": 0.9629949331283569, "step": 901 }, { "epoch": 0.4178827889738244, "grad_norm": 0.86328125, "learning_rate": 3.4296977789132076e-05, "loss": 0.9754863977432251, "step": 902 }, { "epoch": 0.41834607366226545, "grad_norm": 0.85546875, "learning_rate": 3.4267343958674553e-05, "loss": 1.0928244590759277, "step": 903 }, { "epoch": 0.4188093583507065, "grad_norm": 0.7421875, "learning_rate": 3.423769095566848e-05, "loss": 0.9829870462417603, "step": 904 }, { "epoch": 0.4192726430391476, "grad_norm": 0.78515625, "learning_rate": 3.420801883548586e-05, "loss": 1.0500094890594482, "step": 905 }, { "epoch": 0.4197359277275886, "grad_norm": 0.80859375, "learning_rate": 3.417832765353443e-05, "loss": 1.0949947834014893, "step": 906 }, { "epoch": 0.42019921241602964, "grad_norm": 0.74609375, "learning_rate": 3.4148617465257505e-05, "loss": 0.9589704275131226, "step": 907 }, { "epoch": 0.4206624971044707, "grad_norm": 0.703125, "learning_rate": 3.41188883261339e-05, "loss": 0.9546459913253784, "step": 908 }, { "epoch": 0.42112578179291177, "grad_norm": 0.828125, "learning_rate": 3.40891402916778e-05, "loss": 1.1055099964141846, "step": 909 }, { "epoch": 0.4215890664813528, "grad_norm": 0.84765625, "learning_rate": 3.40593734174387e-05, "loss": 0.9318978786468506, "step": 910 }, { "epoch": 0.42205235116979384, "grad_norm": 0.79296875, "learning_rate": 3.402958775900126e-05, "loss": 0.8830047249794006, "step": 911 }, { "epoch": 0.4225156358582349, "grad_norm": 0.7734375, "learning_rate": 3.399978337198521e-05, "loss": 0.9814854264259338, "step": 912 }, { "epoch": 0.4229789205466759, "grad_norm": 0.7890625, "learning_rate": 3.3969960312045276e-05, "loss": 0.9006556272506714, "step": 913 }, { "epoch": 0.423442205235117, "grad_norm": 0.77734375, "learning_rate": 3.394011863487102e-05, "loss": 1.1782516241073608, "step": 914 }, { "epoch": 0.42390548992355803, "grad_norm": 0.80078125, "learning_rate": 3.39102583961868e-05, "loss": 0.9220030903816223, "step": 915 }, { "epoch": 0.42436877461199907, "grad_norm": 0.78515625, "learning_rate": 3.388037965175161e-05, "loss": 1.0260250568389893, "step": 916 }, { "epoch": 0.4248320593004401, "grad_norm": 0.71875, "learning_rate": 3.385048245735901e-05, "loss": 0.9909316301345825, "step": 917 }, { "epoch": 0.4252953439888812, "grad_norm": 0.7421875, "learning_rate": 3.3820566868837025e-05, "loss": 0.9483840465545654, "step": 918 }, { "epoch": 0.4257586286773222, "grad_norm": 0.7890625, "learning_rate": 3.3790632942048e-05, "loss": 0.9133286476135254, "step": 919 }, { "epoch": 0.42622191336576326, "grad_norm": 0.7265625, "learning_rate": 3.376068073288856e-05, "loss": 0.8733887076377869, "step": 920 }, { "epoch": 0.4266851980542043, "grad_norm": 0.8046875, "learning_rate": 3.373071029728942e-05, "loss": 0.9942792654037476, "step": 921 }, { "epoch": 0.42714848274264533, "grad_norm": 0.75, "learning_rate": 3.370072169121539e-05, "loss": 1.031928539276123, "step": 922 }, { "epoch": 0.4276117674310864, "grad_norm": 0.859375, "learning_rate": 3.367071497066516e-05, "loss": 1.044718861579895, "step": 923 }, { "epoch": 0.42807505211952745, "grad_norm": 0.7421875, "learning_rate": 3.364069019167127e-05, "loss": 1.10916268825531, "step": 924 }, { "epoch": 0.4285383368079685, "grad_norm": 0.9453125, "learning_rate": 3.361064741029997e-05, "loss": 1.1920192241668701, "step": 925 }, { "epoch": 0.4290016214964095, "grad_norm": 0.796875, "learning_rate": 3.3580586682651144e-05, "loss": 0.8233553171157837, "step": 926 }, { "epoch": 0.4294649061848506, "grad_norm": 0.80078125, "learning_rate": 3.3550508064858165e-05, "loss": 0.8930643796920776, "step": 927 }, { "epoch": 0.42992819087329165, "grad_norm": 0.81640625, "learning_rate": 3.352041161308782e-05, "loss": 0.9572421312332153, "step": 928 }, { "epoch": 0.4303914755617327, "grad_norm": 0.76171875, "learning_rate": 3.349029738354023e-05, "loss": 1.018913984298706, "step": 929 }, { "epoch": 0.4308547602501737, "grad_norm": 0.83984375, "learning_rate": 3.346016543244865e-05, "loss": 0.9118576049804688, "step": 930 }, { "epoch": 0.43131804493861475, "grad_norm": 0.76171875, "learning_rate": 3.343001581607949e-05, "loss": 0.8083831071853638, "step": 931 }, { "epoch": 0.43178132962705584, "grad_norm": 0.87109375, "learning_rate": 3.339984859073209e-05, "loss": 0.8767201900482178, "step": 932 }, { "epoch": 0.4322446143154969, "grad_norm": 0.8046875, "learning_rate": 3.3369663812738717e-05, "loss": 0.9277627468109131, "step": 933 }, { "epoch": 0.4327078990039379, "grad_norm": 1.0, "learning_rate": 3.333946153846441e-05, "loss": 1.1319029331207275, "step": 934 }, { "epoch": 0.43317118369237895, "grad_norm": 0.78515625, "learning_rate": 3.330924182430684e-05, "loss": 0.8661171197891235, "step": 935 }, { "epoch": 0.43363446838082004, "grad_norm": 0.7890625, "learning_rate": 3.327900472669629e-05, "loss": 0.9028452634811401, "step": 936 }, { "epoch": 0.43409775306926107, "grad_norm": 1.2421875, "learning_rate": 3.324875030209549e-05, "loss": 1.1232396364212036, "step": 937 }, { "epoch": 0.4345610377577021, "grad_norm": 0.94921875, "learning_rate": 3.32184786069995e-05, "loss": 1.0140894651412964, "step": 938 }, { "epoch": 0.43502432244614314, "grad_norm": 0.94140625, "learning_rate": 3.318818969793567e-05, "loss": 1.0171058177947998, "step": 939 }, { "epoch": 0.4354876071345842, "grad_norm": 0.83984375, "learning_rate": 3.3157883631463465e-05, "loss": 1.07037353515625, "step": 940 }, { "epoch": 0.43595089182302527, "grad_norm": 0.86328125, "learning_rate": 3.312756046417441e-05, "loss": 1.1801575422286987, "step": 941 }, { "epoch": 0.4364141765114663, "grad_norm": 0.85546875, "learning_rate": 3.309722025269193e-05, "loss": 1.1600738763809204, "step": 942 }, { "epoch": 0.43687746119990734, "grad_norm": 0.84765625, "learning_rate": 3.306686305367132e-05, "loss": 0.9927069544792175, "step": 943 }, { "epoch": 0.43734074588834837, "grad_norm": 0.84765625, "learning_rate": 3.303648892379956e-05, "loss": 1.0282708406448364, "step": 944 }, { "epoch": 0.43780403057678946, "grad_norm": 0.86328125, "learning_rate": 3.300609791979526e-05, "loss": 1.0274934768676758, "step": 945 }, { "epoch": 0.4382673152652305, "grad_norm": 0.8671875, "learning_rate": 3.2975690098408555e-05, "loss": 0.9179637432098389, "step": 946 }, { "epoch": 0.43873059995367153, "grad_norm": 0.78125, "learning_rate": 3.2945265516420954e-05, "loss": 0.9191789627075195, "step": 947 }, { "epoch": 0.43919388464211256, "grad_norm": 0.890625, "learning_rate": 3.291482423064528e-05, "loss": 1.0582070350646973, "step": 948 }, { "epoch": 0.4396571693305536, "grad_norm": 0.8671875, "learning_rate": 3.2884366297925543e-05, "loss": 0.9630937576293945, "step": 949 }, { "epoch": 0.4401204540189947, "grad_norm": 0.6875, "learning_rate": 3.2853891775136854e-05, "loss": 1.039337158203125, "step": 950 }, { "epoch": 0.4405837387074357, "grad_norm": 0.92578125, "learning_rate": 3.2823400719185286e-05, "loss": 1.1467114686965942, "step": 951 }, { "epoch": 0.44104702339587676, "grad_norm": 0.73046875, "learning_rate": 3.279289318700778e-05, "loss": 1.0572043657302856, "step": 952 }, { "epoch": 0.4415103080843178, "grad_norm": 0.81640625, "learning_rate": 3.276236923557206e-05, "loss": 1.015242099761963, "step": 953 }, { "epoch": 0.4419735927727589, "grad_norm": 0.83984375, "learning_rate": 3.27318289218765e-05, "loss": 1.0399034023284912, "step": 954 }, { "epoch": 0.4424368774611999, "grad_norm": 0.79296875, "learning_rate": 3.2701272302950036e-05, "loss": 1.0193357467651367, "step": 955 }, { "epoch": 0.44290016214964095, "grad_norm": 0.8203125, "learning_rate": 3.2670699435852034e-05, "loss": 0.9832947850227356, "step": 956 }, { "epoch": 0.443363446838082, "grad_norm": 0.890625, "learning_rate": 3.2640110377672225e-05, "loss": 0.9701854586601257, "step": 957 }, { "epoch": 0.443826731526523, "grad_norm": 0.8203125, "learning_rate": 3.260950518553056e-05, "loss": 1.1149054765701294, "step": 958 }, { "epoch": 0.4442900162149641, "grad_norm": 1.0546875, "learning_rate": 3.257888391657711e-05, "loss": 0.949036180973053, "step": 959 }, { "epoch": 0.44475330090340515, "grad_norm": 0.796875, "learning_rate": 3.254824662799199e-05, "loss": 0.976753294467926, "step": 960 }, { "epoch": 0.4452165855918462, "grad_norm": 0.8125, "learning_rate": 3.2517593376985216e-05, "loss": 0.9391505122184753, "step": 961 }, { "epoch": 0.4456798702802872, "grad_norm": 0.8671875, "learning_rate": 3.248692422079659e-05, "loss": 1.0805474519729614, "step": 962 }, { "epoch": 0.4461431549687283, "grad_norm": 0.88671875, "learning_rate": 3.245623921669565e-05, "loss": 0.8896968364715576, "step": 963 }, { "epoch": 0.44660643965716934, "grad_norm": 0.76171875, "learning_rate": 3.2425538421981515e-05, "loss": 0.9105522036552429, "step": 964 }, { "epoch": 0.4470697243456104, "grad_norm": 0.85546875, "learning_rate": 3.2394821893982765e-05, "loss": 1.0720794200897217, "step": 965 }, { "epoch": 0.4475330090340514, "grad_norm": 0.84765625, "learning_rate": 3.2364089690057414e-05, "loss": 0.9761070013046265, "step": 966 }, { "epoch": 0.44799629372249244, "grad_norm": 0.78515625, "learning_rate": 3.2333341867592697e-05, "loss": 0.9775373339653015, "step": 967 }, { "epoch": 0.44845957841093353, "grad_norm": 0.7578125, "learning_rate": 3.230257848400503e-05, "loss": 1.045255184173584, "step": 968 }, { "epoch": 0.44892286309937457, "grad_norm": 0.90234375, "learning_rate": 3.22717995967399e-05, "loss": 0.9985790848731995, "step": 969 }, { "epoch": 0.4493861477878156, "grad_norm": 0.69140625, "learning_rate": 3.224100526327173e-05, "loss": 0.8842822909355164, "step": 970 }, { "epoch": 0.44984943247625664, "grad_norm": 1.15625, "learning_rate": 3.221019554110378e-05, "loss": 1.080345630645752, "step": 971 }, { "epoch": 0.45031271716469773, "grad_norm": 0.82421875, "learning_rate": 3.2179370487768067e-05, "loss": 1.0478893518447876, "step": 972 }, { "epoch": 0.45077600185313876, "grad_norm": 0.6953125, "learning_rate": 3.214853016082523e-05, "loss": 0.9434666037559509, "step": 973 }, { "epoch": 0.4512392865415798, "grad_norm": 0.91015625, "learning_rate": 3.211767461786441e-05, "loss": 0.9274519681930542, "step": 974 }, { "epoch": 0.45170257123002083, "grad_norm": 0.796875, "learning_rate": 3.208680391650319e-05, "loss": 1.0596266984939575, "step": 975 }, { "epoch": 0.45216585591846187, "grad_norm": 0.8828125, "learning_rate": 3.205591811438744e-05, "loss": 0.9719846844673157, "step": 976 }, { "epoch": 0.45262914060690296, "grad_norm": 0.91796875, "learning_rate": 3.2025017269191223e-05, "loss": 1.1913756132125854, "step": 977 }, { "epoch": 0.453092425295344, "grad_norm": 0.7578125, "learning_rate": 3.199410143861671e-05, "loss": 0.8659987449645996, "step": 978 }, { "epoch": 0.453555709983785, "grad_norm": 0.94140625, "learning_rate": 3.196317068039405e-05, "loss": 1.1136746406555176, "step": 979 }, { "epoch": 0.45401899467222606, "grad_norm": 0.80859375, "learning_rate": 3.193222505228125e-05, "loss": 0.954369068145752, "step": 980 }, { "epoch": 0.45448227936066715, "grad_norm": 0.85546875, "learning_rate": 3.1901264612064124e-05, "loss": 1.025739073753357, "step": 981 }, { "epoch": 0.4549455640491082, "grad_norm": 0.77734375, "learning_rate": 3.1870289417556095e-05, "loss": 1.1050188541412354, "step": 982 }, { "epoch": 0.4554088487375492, "grad_norm": 0.9765625, "learning_rate": 3.1839299526598156e-05, "loss": 0.9074011445045471, "step": 983 }, { "epoch": 0.45587213342599026, "grad_norm": 0.80078125, "learning_rate": 3.180829499705879e-05, "loss": 0.8634052872657776, "step": 984 }, { "epoch": 0.4563354181144313, "grad_norm": 0.8203125, "learning_rate": 3.1777275886833714e-05, "loss": 0.9090867638587952, "step": 985 }, { "epoch": 0.4567987028028724, "grad_norm": 0.71875, "learning_rate": 3.1746242253845975e-05, "loss": 0.90366530418396, "step": 986 }, { "epoch": 0.4572619874913134, "grad_norm": 0.9296875, "learning_rate": 3.1715194156045676e-05, "loss": 0.9696229696273804, "step": 987 }, { "epoch": 0.45772527217975445, "grad_norm": 0.73828125, "learning_rate": 3.168413165140996e-05, "loss": 0.9196410179138184, "step": 988 }, { "epoch": 0.4581885568681955, "grad_norm": 0.765625, "learning_rate": 3.165305479794285e-05, "loss": 1.135054111480713, "step": 989 }, { "epoch": 0.4586518415566366, "grad_norm": 0.87890625, "learning_rate": 3.162196365367518e-05, "loss": 0.9602132439613342, "step": 990 }, { "epoch": 0.4591151262450776, "grad_norm": 0.8671875, "learning_rate": 3.1590858276664475e-05, "loss": 0.875511884689331, "step": 991 }, { "epoch": 0.45957841093351864, "grad_norm": 0.7578125, "learning_rate": 3.155973872499481e-05, "loss": 0.9517480731010437, "step": 992 }, { "epoch": 0.4600416956219597, "grad_norm": 0.9140625, "learning_rate": 3.152860505677676e-05, "loss": 1.126634120941162, "step": 993 }, { "epoch": 0.46050498031040077, "grad_norm": 0.74609375, "learning_rate": 3.149745733014724e-05, "loss": 1.0964951515197754, "step": 994 }, { "epoch": 0.4609682649988418, "grad_norm": 0.8671875, "learning_rate": 3.146629560326942e-05, "loss": 1.0599464178085327, "step": 995 }, { "epoch": 0.46143154968728284, "grad_norm": 0.8828125, "learning_rate": 3.143511993433263e-05, "loss": 0.894392728805542, "step": 996 }, { "epoch": 0.4618948343757239, "grad_norm": 0.7109375, "learning_rate": 3.140393038155219e-05, "loss": 0.9883759021759033, "step": 997 }, { "epoch": 0.4623581190641649, "grad_norm": 0.81640625, "learning_rate": 3.1372727003169414e-05, "loss": 1.1054998636245728, "step": 998 }, { "epoch": 0.462821403752606, "grad_norm": 0.73828125, "learning_rate": 3.1341509857451374e-05, "loss": 0.9076305031776428, "step": 999 }, { "epoch": 0.46328468844104703, "grad_norm": 0.91796875, "learning_rate": 3.131027900269087e-05, "loss": 1.0859215259552002, "step": 1000 }, { "epoch": 0.46374797312948807, "grad_norm": 0.78515625, "learning_rate": 3.12790344972063e-05, "loss": 0.8842138648033142, "step": 1001 }, { "epoch": 0.4642112578179291, "grad_norm": 0.96875, "learning_rate": 3.1247776399341574e-05, "loss": 0.953213632106781, "step": 1002 }, { "epoch": 0.4646745425063702, "grad_norm": 0.77734375, "learning_rate": 3.121650476746595e-05, "loss": 0.9613085389137268, "step": 1003 }, { "epoch": 0.4651378271948112, "grad_norm": 0.7578125, "learning_rate": 3.1185219659973974e-05, "loss": 1.0341384410858154, "step": 1004 }, { "epoch": 0.46560111188325226, "grad_norm": 0.85546875, "learning_rate": 3.115392113528536e-05, "loss": 0.8540540337562561, "step": 1005 }, { "epoch": 0.4660643965716933, "grad_norm": 0.80859375, "learning_rate": 3.112260925184487e-05, "loss": 0.8874945044517517, "step": 1006 }, { "epoch": 0.46652768126013433, "grad_norm": 0.83984375, "learning_rate": 3.1091284068122206e-05, "loss": 0.9373153448104858, "step": 1007 }, { "epoch": 0.4669909659485754, "grad_norm": 0.83984375, "learning_rate": 3.1059945642611913e-05, "loss": 0.9182353615760803, "step": 1008 }, { "epoch": 0.46745425063701646, "grad_norm": 0.84765625, "learning_rate": 3.1028594033833274e-05, "loss": 0.9852114319801331, "step": 1009 }, { "epoch": 0.4679175353254575, "grad_norm": 0.9609375, "learning_rate": 3.099722930033017e-05, "loss": 1.0278407335281372, "step": 1010 }, { "epoch": 0.4683808200138985, "grad_norm": 0.79296875, "learning_rate": 3.0965851500670984e-05, "loss": 0.9236195683479309, "step": 1011 }, { "epoch": 0.4688441047023396, "grad_norm": 0.7421875, "learning_rate": 3.093446069344854e-05, "loss": 0.8953359127044678, "step": 1012 }, { "epoch": 0.46930738939078065, "grad_norm": 0.81640625, "learning_rate": 3.09030569372799e-05, "loss": 0.8945424556732178, "step": 1013 }, { "epoch": 0.4697706740792217, "grad_norm": 0.86328125, "learning_rate": 3.087164029080634e-05, "loss": 0.9093310236930847, "step": 1014 }, { "epoch": 0.4702339587676627, "grad_norm": 0.81640625, "learning_rate": 3.084021081269319e-05, "loss": 1.0238938331604004, "step": 1015 }, { "epoch": 0.47069724345610375, "grad_norm": 0.76171875, "learning_rate": 3.080876856162976e-05, "loss": 0.9495306015014648, "step": 1016 }, { "epoch": 0.47116052814454484, "grad_norm": 0.77734375, "learning_rate": 3.0777313596329175e-05, "loss": 0.8257219791412354, "step": 1017 }, { "epoch": 0.4716238128329859, "grad_norm": 0.7890625, "learning_rate": 3.074584597552834e-05, "loss": 1.2120308876037598, "step": 1018 }, { "epoch": 0.4720870975214269, "grad_norm": 0.85546875, "learning_rate": 3.071436575798779e-05, "loss": 0.8118107318878174, "step": 1019 }, { "epoch": 0.47255038220986795, "grad_norm": 0.75, "learning_rate": 3.068287300249154e-05, "loss": 0.9661107659339905, "step": 1020 }, { "epoch": 0.47301366689830904, "grad_norm": 0.73828125, "learning_rate": 3.065136776784706e-05, "loss": 0.9633262753486633, "step": 1021 }, { "epoch": 0.47347695158675007, "grad_norm": 0.8046875, "learning_rate": 3.061985011288511e-05, "loss": 0.8450291752815247, "step": 1022 }, { "epoch": 0.4739402362751911, "grad_norm": 0.8671875, "learning_rate": 3.0588320096459646e-05, "loss": 0.8301048278808594, "step": 1023 }, { "epoch": 0.47440352096363214, "grad_norm": 0.87890625, "learning_rate": 3.0556777777447695e-05, "loss": 1.1722376346588135, "step": 1024 }, { "epoch": 0.4748668056520732, "grad_norm": 0.80078125, "learning_rate": 3.0525223214749266e-05, "loss": 1.0436758995056152, "step": 1025 }, { "epoch": 0.47533009034051427, "grad_norm": 0.8515625, "learning_rate": 3.0493656467287242e-05, "loss": 1.0242863893508911, "step": 1026 }, { "epoch": 0.4757933750289553, "grad_norm": 0.84375, "learning_rate": 3.046207759400723e-05, "loss": 1.0861283540725708, "step": 1027 }, { "epoch": 0.47625665971739634, "grad_norm": 0.83984375, "learning_rate": 3.043048665387751e-05, "loss": 1.0913084745407104, "step": 1028 }, { "epoch": 0.47671994440583737, "grad_norm": 0.828125, "learning_rate": 3.0398883705888867e-05, "loss": 0.9872074723243713, "step": 1029 }, { "epoch": 0.47718322909427846, "grad_norm": 0.80859375, "learning_rate": 3.0367268809054554e-05, "loss": 0.9497346878051758, "step": 1030 }, { "epoch": 0.4776465137827195, "grad_norm": 0.78515625, "learning_rate": 3.0335642022410072e-05, "loss": 0.9556658864021301, "step": 1031 }, { "epoch": 0.47810979847116053, "grad_norm": 1.34375, "learning_rate": 3.0304003405013176e-05, "loss": 1.1097627878189087, "step": 1032 }, { "epoch": 0.47857308315960156, "grad_norm": 0.89453125, "learning_rate": 3.0272353015943694e-05, "loss": 1.0165081024169922, "step": 1033 }, { "epoch": 0.4790363678480426, "grad_norm": 0.7890625, "learning_rate": 3.024069091430343e-05, "loss": 0.957397997379303, "step": 1034 }, { "epoch": 0.4794996525364837, "grad_norm": 0.84375, "learning_rate": 3.0209017159216076e-05, "loss": 0.8591142892837524, "step": 1035 }, { "epoch": 0.4799629372249247, "grad_norm": 0.75390625, "learning_rate": 3.0177331809827064e-05, "loss": 0.8509551286697388, "step": 1036 }, { "epoch": 0.48042622191336576, "grad_norm": 0.95703125, "learning_rate": 3.0145634925303502e-05, "loss": 0.9679578542709351, "step": 1037 }, { "epoch": 0.4808895066018068, "grad_norm": 0.9296875, "learning_rate": 3.011392656483401e-05, "loss": 1.2282400131225586, "step": 1038 }, { "epoch": 0.4813527912902479, "grad_norm": 0.765625, "learning_rate": 3.0082206787628658e-05, "loss": 0.8720540404319763, "step": 1039 }, { "epoch": 0.4818160759786889, "grad_norm": 0.734375, "learning_rate": 3.005047565291882e-05, "loss": 1.1174383163452148, "step": 1040 }, { "epoch": 0.48227936066712995, "grad_norm": 0.80078125, "learning_rate": 3.0018733219957094e-05, "loss": 0.9691076278686523, "step": 1041 }, { "epoch": 0.482742645355571, "grad_norm": 0.81640625, "learning_rate": 2.998697954801717e-05, "loss": 1.0722172260284424, "step": 1042 }, { "epoch": 0.483205930044012, "grad_norm": 0.9765625, "learning_rate": 2.9955214696393707e-05, "loss": 1.0778554677963257, "step": 1043 }, { "epoch": 0.4836692147324531, "grad_norm": 0.84375, "learning_rate": 2.9923438724402278e-05, "loss": 0.9753159880638123, "step": 1044 }, { "epoch": 0.48413249942089415, "grad_norm": 0.8046875, "learning_rate": 2.989165169137918e-05, "loss": 0.9562061429023743, "step": 1045 }, { "epoch": 0.4845957841093352, "grad_norm": 0.75, "learning_rate": 2.9859853656681395e-05, "loss": 0.8260341882705688, "step": 1046 }, { "epoch": 0.4850590687977762, "grad_norm": 0.87109375, "learning_rate": 2.982804467968643e-05, "loss": 1.0546704530715942, "step": 1047 }, { "epoch": 0.4855223534862173, "grad_norm": 0.83984375, "learning_rate": 2.979622481979224e-05, "loss": 0.9112711548805237, "step": 1048 }, { "epoch": 0.48598563817465834, "grad_norm": 0.79296875, "learning_rate": 2.9764394136417088e-05, "loss": 0.9864487648010254, "step": 1049 }, { "epoch": 0.4864489228630994, "grad_norm": 0.75390625, "learning_rate": 2.973255268899945e-05, "loss": 0.9202637076377869, "step": 1050 }, { "epoch": 0.4869122075515404, "grad_norm": 0.90625, "learning_rate": 2.970070053699792e-05, "loss": 1.0404424667358398, "step": 1051 }, { "epoch": 0.48737549223998144, "grad_norm": 0.89453125, "learning_rate": 2.9668837739891063e-05, "loss": 1.0003234148025513, "step": 1052 }, { "epoch": 0.48783877692842254, "grad_norm": 0.890625, "learning_rate": 2.9636964357177317e-05, "loss": 0.9466184377670288, "step": 1053 }, { "epoch": 0.48830206161686357, "grad_norm": 0.828125, "learning_rate": 2.9605080448374903e-05, "loss": 1.1342371702194214, "step": 1054 }, { "epoch": 0.4887653463053046, "grad_norm": 0.8828125, "learning_rate": 2.9573186073021696e-05, "loss": 1.0444382429122925, "step": 1055 }, { "epoch": 0.48922863099374564, "grad_norm": 0.67578125, "learning_rate": 2.95412812906751e-05, "loss": 0.9123279452323914, "step": 1056 }, { "epoch": 0.48969191568218673, "grad_norm": 0.9453125, "learning_rate": 2.9509366160911977e-05, "loss": 0.9924875497817993, "step": 1057 }, { "epoch": 0.49015520037062776, "grad_norm": 0.703125, "learning_rate": 2.9477440743328484e-05, "loss": 0.9442932605743408, "step": 1058 }, { "epoch": 0.4906184850590688, "grad_norm": 0.8984375, "learning_rate": 2.944550509754e-05, "loss": 0.9229353666305542, "step": 1059 }, { "epoch": 0.49108176974750983, "grad_norm": 0.84765625, "learning_rate": 2.9413559283181028e-05, "loss": 0.8888018727302551, "step": 1060 }, { "epoch": 0.49154505443595087, "grad_norm": 0.7265625, "learning_rate": 2.9381603359905006e-05, "loss": 0.8846039175987244, "step": 1061 }, { "epoch": 0.49200833912439196, "grad_norm": 0.9921875, "learning_rate": 2.9349637387384297e-05, "loss": 1.0494961738586426, "step": 1062 }, { "epoch": 0.492471623812833, "grad_norm": 0.8515625, "learning_rate": 2.9317661425310004e-05, "loss": 0.9032423496246338, "step": 1063 }, { "epoch": 0.492934908501274, "grad_norm": 0.81640625, "learning_rate": 2.928567553339189e-05, "loss": 1.0323213338851929, "step": 1064 }, { "epoch": 0.49339819318971506, "grad_norm": 0.734375, "learning_rate": 2.925367977135828e-05, "loss": 1.0107799768447876, "step": 1065 }, { "epoch": 0.49386147787815615, "grad_norm": 0.77734375, "learning_rate": 2.9221674198955883e-05, "loss": 1.0235852003097534, "step": 1066 }, { "epoch": 0.4943247625665972, "grad_norm": 0.9609375, "learning_rate": 2.9189658875949772e-05, "loss": 1.0340992212295532, "step": 1067 }, { "epoch": 0.4947880472550382, "grad_norm": 0.8515625, "learning_rate": 2.9157633862123216e-05, "loss": 0.9333528280258179, "step": 1068 }, { "epoch": 0.49525133194347926, "grad_norm": 0.98828125, "learning_rate": 2.9125599217277566e-05, "loss": 1.2312861680984497, "step": 1069 }, { "epoch": 0.4957146166319203, "grad_norm": 0.796875, "learning_rate": 2.9093555001232157e-05, "loss": 0.8942990303039551, "step": 1070 }, { "epoch": 0.4961779013203614, "grad_norm": 0.78125, "learning_rate": 2.9061501273824226e-05, "loss": 0.9628137350082397, "step": 1071 }, { "epoch": 0.4966411860088024, "grad_norm": 0.73046875, "learning_rate": 2.902943809490874e-05, "loss": 0.9911953806877136, "step": 1072 }, { "epoch": 0.49710447069724345, "grad_norm": 0.77734375, "learning_rate": 2.8997365524358323e-05, "loss": 0.8831644058227539, "step": 1073 }, { "epoch": 0.4975677553856845, "grad_norm": 0.8359375, "learning_rate": 2.8965283622063147e-05, "loss": 1.0550167560577393, "step": 1074 }, { "epoch": 0.4980310400741256, "grad_norm": 0.890625, "learning_rate": 2.89331924479308e-05, "loss": 0.9322593212127686, "step": 1075 }, { "epoch": 0.4984943247625666, "grad_norm": 0.81640625, "learning_rate": 2.8901092061886184e-05, "loss": 0.9539169073104858, "step": 1076 }, { "epoch": 0.49895760945100764, "grad_norm": 0.8203125, "learning_rate": 2.88689825238714e-05, "loss": 0.9450077414512634, "step": 1077 }, { "epoch": 0.4994208941394487, "grad_norm": 0.734375, "learning_rate": 2.883686389384565e-05, "loss": 0.9067424535751343, "step": 1078 }, { "epoch": 0.4998841788278897, "grad_norm": 0.7421875, "learning_rate": 2.8804736231785098e-05, "loss": 0.8575161695480347, "step": 1079 }, { "epoch": 0.5003474635163307, "grad_norm": 0.88671875, "learning_rate": 2.87725995976828e-05, "loss": 0.9551107287406921, "step": 1080 }, { "epoch": 0.5008107482047718, "grad_norm": 0.984375, "learning_rate": 2.8740454051548526e-05, "loss": 1.1050188541412354, "step": 1081 }, { "epoch": 0.5012740328932129, "grad_norm": 0.86328125, "learning_rate": 2.8708299653408722e-05, "loss": 0.9398777484893799, "step": 1082 }, { "epoch": 0.501737317581654, "grad_norm": 0.75390625, "learning_rate": 2.8676136463306354e-05, "loss": 0.9366370439529419, "step": 1083 }, { "epoch": 0.502200602270095, "grad_norm": 0.79296875, "learning_rate": 2.8643964541300793e-05, "loss": 0.916124701499939, "step": 1084 }, { "epoch": 0.502663886958536, "grad_norm": 0.7890625, "learning_rate": 2.861178394746774e-05, "loss": 0.94605952501297, "step": 1085 }, { "epoch": 0.5031271716469771, "grad_norm": 0.81640625, "learning_rate": 2.8579594741899052e-05, "loss": 0.812589168548584, "step": 1086 }, { "epoch": 0.5035904563354181, "grad_norm": 0.87109375, "learning_rate": 2.8547396984702716e-05, "loss": 1.3426378965377808, "step": 1087 }, { "epoch": 0.5040537410238591, "grad_norm": 0.71875, "learning_rate": 2.851519073600265e-05, "loss": 0.992534875869751, "step": 1088 }, { "epoch": 0.5045170257123002, "grad_norm": 0.75, "learning_rate": 2.8482976055938628e-05, "loss": 1.0410428047180176, "step": 1089 }, { "epoch": 0.5049803104007412, "grad_norm": 0.81640625, "learning_rate": 2.8450753004666203e-05, "loss": 0.9198648929595947, "step": 1090 }, { "epoch": 0.5054435950891824, "grad_norm": 0.8828125, "learning_rate": 2.841852164235652e-05, "loss": 1.0291709899902344, "step": 1091 }, { "epoch": 0.5059068797776234, "grad_norm": 1.21875, "learning_rate": 2.8386282029196264e-05, "loss": 1.1194522380828857, "step": 1092 }, { "epoch": 0.5063701644660644, "grad_norm": 0.73828125, "learning_rate": 2.8354034225387525e-05, "loss": 0.843272864818573, "step": 1093 }, { "epoch": 0.5068334491545055, "grad_norm": 0.8359375, "learning_rate": 2.832177829114769e-05, "loss": 0.8743926882743835, "step": 1094 }, { "epoch": 0.5072967338429465, "grad_norm": 0.96484375, "learning_rate": 2.8289514286709315e-05, "loss": 1.130947470664978, "step": 1095 }, { "epoch": 0.5077600185313875, "grad_norm": 0.859375, "learning_rate": 2.825724227232004e-05, "loss": 0.961344838142395, "step": 1096 }, { "epoch": 0.5082233032198286, "grad_norm": 0.796875, "learning_rate": 2.8224962308242467e-05, "loss": 0.9592534303665161, "step": 1097 }, { "epoch": 0.5086865879082696, "grad_norm": 0.921875, "learning_rate": 2.8192674454754016e-05, "loss": 0.9232071042060852, "step": 1098 }, { "epoch": 0.5091498725967106, "grad_norm": 0.828125, "learning_rate": 2.8160378772146868e-05, "loss": 0.9333939552307129, "step": 1099 }, { "epoch": 0.5096131572851518, "grad_norm": 0.78125, "learning_rate": 2.8128075320727803e-05, "loss": 0.8948829174041748, "step": 1100 }, { "epoch": 0.5100764419735928, "grad_norm": 0.87109375, "learning_rate": 2.8095764160818127e-05, "loss": 1.1355596780776978, "step": 1101 }, { "epoch": 0.5105397266620338, "grad_norm": 0.93359375, "learning_rate": 2.8063445352753518e-05, "loss": 0.9624730944633484, "step": 1102 }, { "epoch": 0.5110030113504749, "grad_norm": 0.8515625, "learning_rate": 2.8031118956883958e-05, "loss": 1.002945899963379, "step": 1103 }, { "epoch": 0.5114662960389159, "grad_norm": 0.875, "learning_rate": 2.7998785033573583e-05, "loss": 0.8556519746780396, "step": 1104 }, { "epoch": 0.511929580727357, "grad_norm": 1.1875, "learning_rate": 2.7966443643200585e-05, "loss": 0.9405410289764404, "step": 1105 }, { "epoch": 0.512392865415798, "grad_norm": 0.875, "learning_rate": 2.7934094846157126e-05, "loss": 1.1397373676300049, "step": 1106 }, { "epoch": 0.512856150104239, "grad_norm": 0.78125, "learning_rate": 2.7901738702849144e-05, "loss": 0.7816023230552673, "step": 1107 }, { "epoch": 0.51331943479268, "grad_norm": 0.84375, "learning_rate": 2.7869375273696355e-05, "loss": 0.8463333249092102, "step": 1108 }, { "epoch": 0.5137827194811212, "grad_norm": 0.7890625, "learning_rate": 2.7837004619132037e-05, "loss": 0.8399050831794739, "step": 1109 }, { "epoch": 0.5142460041695622, "grad_norm": 0.76953125, "learning_rate": 2.7804626799602988e-05, "loss": 1.0282368659973145, "step": 1110 }, { "epoch": 0.5147092888580033, "grad_norm": 0.8515625, "learning_rate": 2.7772241875569362e-05, "loss": 0.944800853729248, "step": 1111 }, { "epoch": 0.5151725735464443, "grad_norm": 0.90234375, "learning_rate": 2.77398499075046e-05, "loss": 0.9259645938873291, "step": 1112 }, { "epoch": 0.5156358582348853, "grad_norm": 0.859375, "learning_rate": 2.7707450955895287e-05, "loss": 1.0247880220413208, "step": 1113 }, { "epoch": 0.5160991429233264, "grad_norm": 0.83984375, "learning_rate": 2.7675045081241056e-05, "loss": 0.8392752408981323, "step": 1114 }, { "epoch": 0.5165624276117674, "grad_norm": 0.7890625, "learning_rate": 2.7642632344054442e-05, "loss": 0.9830076098442078, "step": 1115 }, { "epoch": 0.5170257123002084, "grad_norm": 0.83984375, "learning_rate": 2.761021280486082e-05, "loss": 0.8453760147094727, "step": 1116 }, { "epoch": 0.5174889969886495, "grad_norm": 0.81640625, "learning_rate": 2.7577786524198273e-05, "loss": 0.9899021983146667, "step": 1117 }, { "epoch": 0.5179522816770906, "grad_norm": 0.66796875, "learning_rate": 2.7545353562617444e-05, "loss": 0.8067998290061951, "step": 1118 }, { "epoch": 0.5184155663655317, "grad_norm": 0.9609375, "learning_rate": 2.7512913980681483e-05, "loss": 1.0544780492782593, "step": 1119 }, { "epoch": 0.5188788510539727, "grad_norm": 0.83203125, "learning_rate": 2.7480467838965872e-05, "loss": 1.0132243633270264, "step": 1120 }, { "epoch": 0.5193421357424137, "grad_norm": 0.765625, "learning_rate": 2.744801519805837e-05, "loss": 0.9769008755683899, "step": 1121 }, { "epoch": 0.5198054204308548, "grad_norm": 0.82421875, "learning_rate": 2.7415556118558848e-05, "loss": 0.9457086324691772, "step": 1122 }, { "epoch": 0.5202687051192958, "grad_norm": 0.8984375, "learning_rate": 2.7383090661079215e-05, "loss": 0.9511300921440125, "step": 1123 }, { "epoch": 0.5207319898077368, "grad_norm": 0.80859375, "learning_rate": 2.7350618886243286e-05, "loss": 0.9526143670082092, "step": 1124 }, { "epoch": 0.5211952744961779, "grad_norm": 0.80078125, "learning_rate": 2.731814085468667e-05, "loss": 0.8233035206794739, "step": 1125 }, { "epoch": 0.5216585591846189, "grad_norm": 0.90625, "learning_rate": 2.7285656627056673e-05, "loss": 0.8465626239776611, "step": 1126 }, { "epoch": 0.52212184387306, "grad_norm": 0.8203125, "learning_rate": 2.7253166264012142e-05, "loss": 0.8446078896522522, "step": 1127 }, { "epoch": 0.5225851285615011, "grad_norm": 0.8359375, "learning_rate": 2.7220669826223418e-05, "loss": 0.8863167762756348, "step": 1128 }, { "epoch": 0.5230484132499421, "grad_norm": 0.8984375, "learning_rate": 2.7188167374372155e-05, "loss": 0.9343461394309998, "step": 1129 }, { "epoch": 0.5235116979383831, "grad_norm": 0.85546875, "learning_rate": 2.715565896915125e-05, "loss": 0.9686758518218994, "step": 1130 }, { "epoch": 0.5239749826268242, "grad_norm": 0.84375, "learning_rate": 2.7123144671264715e-05, "loss": 0.9815382361412048, "step": 1131 }, { "epoch": 0.5244382673152652, "grad_norm": 0.80859375, "learning_rate": 2.7090624541427566e-05, "loss": 1.1032721996307373, "step": 1132 }, { "epoch": 0.5249015520037063, "grad_norm": 0.76171875, "learning_rate": 2.7058098640365716e-05, "loss": 0.9356403350830078, "step": 1133 }, { "epoch": 0.5253648366921473, "grad_norm": 0.8515625, "learning_rate": 2.702556702881584e-05, "loss": 0.8864633440971375, "step": 1134 }, { "epoch": 0.5258281213805883, "grad_norm": 0.75, "learning_rate": 2.6993029767525295e-05, "loss": 0.8282592296600342, "step": 1135 }, { "epoch": 0.5262914060690295, "grad_norm": 0.82421875, "learning_rate": 2.6960486917251967e-05, "loss": 0.9786428213119507, "step": 1136 }, { "epoch": 0.5267546907574705, "grad_norm": 0.91796875, "learning_rate": 2.6927938538764197e-05, "loss": 0.8919640183448792, "step": 1137 }, { "epoch": 0.5272179754459115, "grad_norm": 0.8359375, "learning_rate": 2.6895384692840634e-05, "loss": 1.1693425178527832, "step": 1138 }, { "epoch": 0.5276812601343526, "grad_norm": 0.83984375, "learning_rate": 2.6862825440270143e-05, "loss": 0.961223304271698, "step": 1139 }, { "epoch": 0.5281445448227936, "grad_norm": 0.9453125, "learning_rate": 2.683026084185169e-05, "loss": 1.0943102836608887, "step": 1140 }, { "epoch": 0.5286078295112346, "grad_norm": 0.77734375, "learning_rate": 2.679769095839422e-05, "loss": 0.9912340641021729, "step": 1141 }, { "epoch": 0.5290711141996757, "grad_norm": 0.85546875, "learning_rate": 2.6765115850716548e-05, "loss": 1.1055166721343994, "step": 1142 }, { "epoch": 0.5295343988881167, "grad_norm": 0.76171875, "learning_rate": 2.673253557964724e-05, "loss": 0.9419571161270142, "step": 1143 }, { "epoch": 0.5299976835765577, "grad_norm": 0.8828125, "learning_rate": 2.66999502060245e-05, "loss": 0.949909508228302, "step": 1144 }, { "epoch": 0.5304609682649989, "grad_norm": 0.953125, "learning_rate": 2.6667359790696074e-05, "loss": 0.9741145372390747, "step": 1145 }, { "epoch": 0.5309242529534399, "grad_norm": 0.87109375, "learning_rate": 2.6634764394519106e-05, "loss": 0.9066289663314819, "step": 1146 }, { "epoch": 0.531387537641881, "grad_norm": 0.94921875, "learning_rate": 2.660216407836006e-05, "loss": 1.090651512145996, "step": 1147 }, { "epoch": 0.531850822330322, "grad_norm": 0.80859375, "learning_rate": 2.656955890309457e-05, "loss": 1.0257591009140015, "step": 1148 }, { "epoch": 0.532314107018763, "grad_norm": 0.92578125, "learning_rate": 2.6536948929607355e-05, "loss": 1.2080005407333374, "step": 1149 }, { "epoch": 0.5327773917072041, "grad_norm": 0.796875, "learning_rate": 2.6504334218792082e-05, "loss": 0.9306067228317261, "step": 1150 }, { "epoch": 0.5332406763956451, "grad_norm": 0.796875, "learning_rate": 2.6471714831551293e-05, "loss": 1.0611542463302612, "step": 1151 }, { "epoch": 0.5337039610840861, "grad_norm": 0.80859375, "learning_rate": 2.6439090828796214e-05, "loss": 0.9698439240455627, "step": 1152 }, { "epoch": 0.5341672457725272, "grad_norm": 0.8515625, "learning_rate": 2.6406462271446732e-05, "loss": 0.8757132887840271, "step": 1153 }, { "epoch": 0.5346305304609683, "grad_norm": 0.703125, "learning_rate": 2.6373829220431218e-05, "loss": 0.7780014872550964, "step": 1154 }, { "epoch": 0.5350938151494093, "grad_norm": 1.0390625, "learning_rate": 2.6341191736686438e-05, "loss": 1.0552921295166016, "step": 1155 }, { "epoch": 0.5355570998378504, "grad_norm": 0.84765625, "learning_rate": 2.6308549881157446e-05, "loss": 0.8510526418685913, "step": 1156 }, { "epoch": 0.5360203845262914, "grad_norm": 0.7265625, "learning_rate": 2.6275903714797442e-05, "loss": 1.0102177858352661, "step": 1157 }, { "epoch": 0.5364836692147325, "grad_norm": 0.78515625, "learning_rate": 2.624325329856769e-05, "loss": 0.8171606063842773, "step": 1158 }, { "epoch": 0.5369469539031735, "grad_norm": 0.76171875, "learning_rate": 2.621059869343739e-05, "loss": 0.8785892128944397, "step": 1159 }, { "epoch": 0.5374102385916145, "grad_norm": 0.8125, "learning_rate": 2.6177939960383546e-05, "loss": 0.7128728628158569, "step": 1160 }, { "epoch": 0.5378735232800556, "grad_norm": 0.77734375, "learning_rate": 2.6145277160390888e-05, "loss": 0.9567815661430359, "step": 1161 }, { "epoch": 0.5383368079684966, "grad_norm": 0.83984375, "learning_rate": 2.611261035445173e-05, "loss": 1.065600872039795, "step": 1162 }, { "epoch": 0.5388000926569377, "grad_norm": 0.87109375, "learning_rate": 2.6079939603565884e-05, "loss": 0.9629444479942322, "step": 1163 }, { "epoch": 0.5392633773453788, "grad_norm": 0.7265625, "learning_rate": 2.6047264968740505e-05, "loss": 1.0867857933044434, "step": 1164 }, { "epoch": 0.5397266620338198, "grad_norm": 0.83203125, "learning_rate": 2.6014586510990015e-05, "loss": 1.0058200359344482, "step": 1165 }, { "epoch": 0.5401899467222608, "grad_norm": 0.91015625, "learning_rate": 2.598190429133598e-05, "loss": 1.0715608596801758, "step": 1166 }, { "epoch": 0.5406532314107019, "grad_norm": 0.7890625, "learning_rate": 2.5949218370806967e-05, "loss": 0.9926679730415344, "step": 1167 }, { "epoch": 0.5411165160991429, "grad_norm": 0.703125, "learning_rate": 2.591652881043848e-05, "loss": 0.9041029810905457, "step": 1168 }, { "epoch": 0.5415798007875839, "grad_norm": 0.72265625, "learning_rate": 2.5883835671272798e-05, "loss": 0.8764104843139648, "step": 1169 }, { "epoch": 0.542043085476025, "grad_norm": 0.83203125, "learning_rate": 2.5851139014358903e-05, "loss": 0.8071422576904297, "step": 1170 }, { "epoch": 0.542506370164466, "grad_norm": 0.76171875, "learning_rate": 2.581843890075233e-05, "loss": 1.064335584640503, "step": 1171 }, { "epoch": 0.5429696548529072, "grad_norm": 0.8125, "learning_rate": 2.5785735391515083e-05, "loss": 0.9307787418365479, "step": 1172 }, { "epoch": 0.5434329395413482, "grad_norm": 0.73046875, "learning_rate": 2.5753028547715487e-05, "loss": 0.8384194374084473, "step": 1173 }, { "epoch": 0.5438962242297892, "grad_norm": 0.78515625, "learning_rate": 2.572031843042813e-05, "loss": 0.9470226764678955, "step": 1174 }, { "epoch": 0.5443595089182303, "grad_norm": 0.8671875, "learning_rate": 2.5687605100733652e-05, "loss": 1.0152946710586548, "step": 1175 }, { "epoch": 0.5448227936066713, "grad_norm": 0.8046875, "learning_rate": 2.5654888619718762e-05, "loss": 0.8778582811355591, "step": 1176 }, { "epoch": 0.5452860782951123, "grad_norm": 0.80078125, "learning_rate": 2.5622169048475996e-05, "loss": 1.002034306526184, "step": 1177 }, { "epoch": 0.5457493629835534, "grad_norm": 0.77734375, "learning_rate": 2.5589446448103687e-05, "loss": 1.00825834274292, "step": 1178 }, { "epoch": 0.5462126476719944, "grad_norm": 0.734375, "learning_rate": 2.5556720879705834e-05, "loss": 0.8981512784957886, "step": 1179 }, { "epoch": 0.5466759323604354, "grad_norm": 1.1484375, "learning_rate": 2.552399240439195e-05, "loss": 0.9638339281082153, "step": 1180 }, { "epoch": 0.5471392170488766, "grad_norm": 0.82421875, "learning_rate": 2.5491261083277014e-05, "loss": 0.832276463508606, "step": 1181 }, { "epoch": 0.5476025017373176, "grad_norm": 0.8046875, "learning_rate": 2.545852697748127e-05, "loss": 0.7502810955047607, "step": 1182 }, { "epoch": 0.5480657864257586, "grad_norm": 0.66796875, "learning_rate": 2.5425790148130212e-05, "loss": 0.8867089748382568, "step": 1183 }, { "epoch": 0.5485290711141997, "grad_norm": 0.7421875, "learning_rate": 2.5393050656354378e-05, "loss": 1.0290521383285522, "step": 1184 }, { "epoch": 0.5489923558026407, "grad_norm": 0.828125, "learning_rate": 2.5360308563289304e-05, "loss": 0.9290564060211182, "step": 1185 }, { "epoch": 0.5494556404910818, "grad_norm": 0.84765625, "learning_rate": 2.5327563930075376e-05, "loss": 1.1053215265274048, "step": 1186 }, { "epoch": 0.5499189251795228, "grad_norm": 0.9140625, "learning_rate": 2.5294816817857718e-05, "loss": 0.9043922424316406, "step": 1187 }, { "epoch": 0.5503822098679638, "grad_norm": 0.8359375, "learning_rate": 2.5262067287786105e-05, "loss": 0.8395538926124573, "step": 1188 }, { "epoch": 0.5508454945564049, "grad_norm": 0.9296875, "learning_rate": 2.5229315401014793e-05, "loss": 0.9732888340950012, "step": 1189 }, { "epoch": 0.551308779244846, "grad_norm": 0.90625, "learning_rate": 2.5196561218702462e-05, "loss": 0.9241658449172974, "step": 1190 }, { "epoch": 0.551772063933287, "grad_norm": 0.82421875, "learning_rate": 2.5163804802012067e-05, "loss": 0.810673713684082, "step": 1191 }, { "epoch": 0.5522353486217281, "grad_norm": 0.84765625, "learning_rate": 2.5131046212110748e-05, "loss": 0.9831432700157166, "step": 1192 }, { "epoch": 0.5526986333101691, "grad_norm": 0.8359375, "learning_rate": 2.509828551016969e-05, "loss": 0.9300224781036377, "step": 1193 }, { "epoch": 0.5531619179986101, "grad_norm": 0.87890625, "learning_rate": 2.5065522757364026e-05, "loss": 1.0542097091674805, "step": 1194 }, { "epoch": 0.5536252026870512, "grad_norm": 0.890625, "learning_rate": 2.5032758014872737e-05, "loss": 1.0396784543991089, "step": 1195 }, { "epoch": 0.5540884873754922, "grad_norm": 0.765625, "learning_rate": 2.4999991343878476e-05, "loss": 0.999907910823822, "step": 1196 }, { "epoch": 0.5545517720639332, "grad_norm": 0.875, "learning_rate": 2.496722280556755e-05, "loss": 1.0439246892929077, "step": 1197 }, { "epoch": 0.5550150567523743, "grad_norm": 0.8984375, "learning_rate": 2.4934452461129705e-05, "loss": 1.1016960144042969, "step": 1198 }, { "epoch": 0.5554783414408154, "grad_norm": 0.87109375, "learning_rate": 2.49016803717581e-05, "loss": 1.0953267812728882, "step": 1199 }, { "epoch": 0.5559416261292565, "grad_norm": 0.78125, "learning_rate": 2.486890659864912e-05, "loss": 1.0697580575942993, "step": 1200 }, { "epoch": 0.5564049108176975, "grad_norm": 0.77734375, "learning_rate": 2.4836131203002307e-05, "loss": 0.9801178574562073, "step": 1201 }, { "epoch": 0.5568681955061385, "grad_norm": 0.83203125, "learning_rate": 2.4803354246020246e-05, "loss": 0.8415432572364807, "step": 1202 }, { "epoch": 0.5573314801945796, "grad_norm": 0.88671875, "learning_rate": 2.4770575788908413e-05, "loss": 0.8861055374145508, "step": 1203 }, { "epoch": 0.5577947648830206, "grad_norm": 0.77734375, "learning_rate": 2.4737795892875114e-05, "loss": 0.8379161357879639, "step": 1204 }, { "epoch": 0.5582580495714616, "grad_norm": 0.76171875, "learning_rate": 2.4705014619131302e-05, "loss": 0.7862236499786377, "step": 1205 }, { "epoch": 0.5587213342599027, "grad_norm": 0.921875, "learning_rate": 2.4672232028890544e-05, "loss": 1.1108901500701904, "step": 1206 }, { "epoch": 0.5591846189483437, "grad_norm": 0.91015625, "learning_rate": 2.4639448183368848e-05, "loss": 0.9187421798706055, "step": 1207 }, { "epoch": 0.5596479036367848, "grad_norm": 0.80859375, "learning_rate": 2.4606663143784546e-05, "loss": 0.9021953344345093, "step": 1208 }, { "epoch": 0.5601111883252259, "grad_norm": 1.0546875, "learning_rate": 2.457387697135824e-05, "loss": 0.8863352537155151, "step": 1209 }, { "epoch": 0.5605744730136669, "grad_norm": 0.7421875, "learning_rate": 2.454108972731261e-05, "loss": 0.9452582597732544, "step": 1210 }, { "epoch": 0.561037757702108, "grad_norm": 0.859375, "learning_rate": 2.4508301472872372e-05, "loss": 0.9802470803260803, "step": 1211 }, { "epoch": 0.561501042390549, "grad_norm": 0.828125, "learning_rate": 2.4475512269264102e-05, "loss": 1.040086269378662, "step": 1212 }, { "epoch": 0.56196432707899, "grad_norm": 0.8125, "learning_rate": 2.4442722177716155e-05, "loss": 0.8640920519828796, "step": 1213 }, { "epoch": 0.5624276117674311, "grad_norm": 0.96484375, "learning_rate": 2.4409931259458534e-05, "loss": 1.000259518623352, "step": 1214 }, { "epoch": 0.5628908964558721, "grad_norm": 0.88671875, "learning_rate": 2.4377139575722816e-05, "loss": 1.037365198135376, "step": 1215 }, { "epoch": 0.5633541811443131, "grad_norm": 0.84375, "learning_rate": 2.4344347187741992e-05, "loss": 0.8693150281906128, "step": 1216 }, { "epoch": 0.5638174658327543, "grad_norm": 1.09375, "learning_rate": 2.4311554156750342e-05, "loss": 1.0040010213851929, "step": 1217 }, { "epoch": 0.5642807505211953, "grad_norm": 0.87890625, "learning_rate": 2.427876054398339e-05, "loss": 1.1392626762390137, "step": 1218 }, { "epoch": 0.5647440352096363, "grad_norm": 1.2265625, "learning_rate": 2.424596641067772e-05, "loss": 1.005986213684082, "step": 1219 }, { "epoch": 0.5652073198980774, "grad_norm": 0.890625, "learning_rate": 2.4213171818070896e-05, "loss": 0.8679553270339966, "step": 1220 }, { "epoch": 0.5656706045865184, "grad_norm": 0.97265625, "learning_rate": 2.4180376827401326e-05, "loss": 1.0555665493011475, "step": 1221 }, { "epoch": 0.5661338892749594, "grad_norm": 0.7421875, "learning_rate": 2.4147581499908194e-05, "loss": 0.7991371154785156, "step": 1222 }, { "epoch": 0.5665971739634005, "grad_norm": 1.546875, "learning_rate": 2.4114785896831272e-05, "loss": 1.079115390777588, "step": 1223 }, { "epoch": 0.5670604586518415, "grad_norm": 0.7890625, "learning_rate": 2.4081990079410877e-05, "loss": 0.9234097599983215, "step": 1224 }, { "epoch": 0.5675237433402825, "grad_norm": 0.79296875, "learning_rate": 2.4049194108887712e-05, "loss": 0.8750408291816711, "step": 1225 }, { "epoch": 0.5679870280287237, "grad_norm": 0.8359375, "learning_rate": 2.401639804650277e-05, "loss": 0.959878146648407, "step": 1226 }, { "epoch": 0.5684503127171647, "grad_norm": 1.0234375, "learning_rate": 2.398360195349723e-05, "loss": 0.8520339131355286, "step": 1227 }, { "epoch": 0.5689135974056058, "grad_norm": 1.1484375, "learning_rate": 2.3950805891112282e-05, "loss": 0.9484376907348633, "step": 1228 }, { "epoch": 0.5693768820940468, "grad_norm": 0.796875, "learning_rate": 2.391800992058913e-05, "loss": 1.069106101989746, "step": 1229 }, { "epoch": 0.5698401667824878, "grad_norm": 0.76171875, "learning_rate": 2.3885214103168733e-05, "loss": 0.8645256161689758, "step": 1230 }, { "epoch": 0.5703034514709289, "grad_norm": 0.74609375, "learning_rate": 2.3852418500091814e-05, "loss": 0.8843850493431091, "step": 1231 }, { "epoch": 0.5707667361593699, "grad_norm": 0.77734375, "learning_rate": 2.3819623172598675e-05, "loss": 0.848798394203186, "step": 1232 }, { "epoch": 0.5712300208478109, "grad_norm": 0.75390625, "learning_rate": 2.378682818192911e-05, "loss": 0.867169201374054, "step": 1233 }, { "epoch": 0.571693305536252, "grad_norm": 0.87109375, "learning_rate": 2.375403358932229e-05, "loss": 0.9214097857475281, "step": 1234 }, { "epoch": 0.5721565902246931, "grad_norm": 1.0234375, "learning_rate": 2.372123945601661e-05, "loss": 1.0255508422851562, "step": 1235 }, { "epoch": 0.5726198749131342, "grad_norm": 0.83203125, "learning_rate": 2.3688445843249663e-05, "loss": 1.0348937511444092, "step": 1236 }, { "epoch": 0.5730831596015752, "grad_norm": 0.83203125, "learning_rate": 2.3655652812258016e-05, "loss": 0.9052950143814087, "step": 1237 }, { "epoch": 0.5735464442900162, "grad_norm": 0.8046875, "learning_rate": 2.3622860424277185e-05, "loss": 1.0120303630828857, "step": 1238 }, { "epoch": 0.5740097289784573, "grad_norm": 0.8984375, "learning_rate": 2.359006874054147e-05, "loss": 1.0887019634246826, "step": 1239 }, { "epoch": 0.5744730136668983, "grad_norm": 0.921875, "learning_rate": 2.3557277822283853e-05, "loss": 0.9740076065063477, "step": 1240 }, { "epoch": 0.5749362983553393, "grad_norm": 0.83984375, "learning_rate": 2.352448773073591e-05, "loss": 0.9325964450836182, "step": 1241 }, { "epoch": 0.5753995830437804, "grad_norm": 0.8828125, "learning_rate": 2.3491698527127626e-05, "loss": 0.9349949955940247, "step": 1242 }, { "epoch": 0.5758628677322214, "grad_norm": 0.984375, "learning_rate": 2.345891027268739e-05, "loss": 0.8547556400299072, "step": 1243 }, { "epoch": 0.5763261524206625, "grad_norm": 0.87109375, "learning_rate": 2.3426123028641764e-05, "loss": 0.8496311902999878, "step": 1244 }, { "epoch": 0.5767894371091036, "grad_norm": 0.828125, "learning_rate": 2.339333685621546e-05, "loss": 0.8979066610336304, "step": 1245 }, { "epoch": 0.5772527217975446, "grad_norm": 0.87890625, "learning_rate": 2.3360551816631163e-05, "loss": 0.9577649831771851, "step": 1246 }, { "epoch": 0.5777160064859856, "grad_norm": 0.921875, "learning_rate": 2.3327767971109457e-05, "loss": 0.844273567199707, "step": 1247 }, { "epoch": 0.5781792911744267, "grad_norm": 0.890625, "learning_rate": 2.32949853808687e-05, "loss": 0.9688032269477844, "step": 1248 }, { "epoch": 0.5786425758628677, "grad_norm": 0.828125, "learning_rate": 2.326220410712489e-05, "loss": 0.8966050148010254, "step": 1249 }, { "epoch": 0.5791058605513087, "grad_norm": 0.78125, "learning_rate": 2.322942421109159e-05, "loss": 0.8237859010696411, "step": 1250 }, { "epoch": 0.5795691452397498, "grad_norm": 0.8515625, "learning_rate": 2.319664575397975e-05, "loss": 0.998030424118042, "step": 1251 }, { "epoch": 0.5800324299281908, "grad_norm": 0.66796875, "learning_rate": 2.3163868796997697e-05, "loss": 0.8741839528083801, "step": 1252 }, { "epoch": 0.580495714616632, "grad_norm": 0.79296875, "learning_rate": 2.3131093401350886e-05, "loss": 0.8840314745903015, "step": 1253 }, { "epoch": 0.580958999305073, "grad_norm": 0.8046875, "learning_rate": 2.309831962824191e-05, "loss": 0.8226364850997925, "step": 1254 }, { "epoch": 0.581422283993514, "grad_norm": 0.7734375, "learning_rate": 2.30655475388703e-05, "loss": 1.046274185180664, "step": 1255 }, { "epoch": 0.5818855686819551, "grad_norm": 0.76171875, "learning_rate": 2.3032777194432454e-05, "loss": 0.962317943572998, "step": 1256 }, { "epoch": 0.5823488533703961, "grad_norm": 0.8203125, "learning_rate": 2.3000008656121528e-05, "loss": 1.0184528827667236, "step": 1257 }, { "epoch": 0.5828121380588371, "grad_norm": 0.7578125, "learning_rate": 2.2967241985127264e-05, "loss": 0.9843693375587463, "step": 1258 }, { "epoch": 0.5832754227472782, "grad_norm": 0.765625, "learning_rate": 2.2934477242635975e-05, "loss": 0.8737512826919556, "step": 1259 }, { "epoch": 0.5837387074357192, "grad_norm": 0.78515625, "learning_rate": 2.2901714489830313e-05, "loss": 0.9993818998336792, "step": 1260 }, { "epoch": 0.5842019921241604, "grad_norm": 0.76171875, "learning_rate": 2.2868953787889257e-05, "loss": 0.8710107207298279, "step": 1261 }, { "epoch": 0.5846652768126014, "grad_norm": 0.765625, "learning_rate": 2.283619519798794e-05, "loss": 0.9011490345001221, "step": 1262 }, { "epoch": 0.5851285615010424, "grad_norm": 0.8671875, "learning_rate": 2.2803438781297542e-05, "loss": 1.043053150177002, "step": 1263 }, { "epoch": 0.5855918461894835, "grad_norm": 0.8515625, "learning_rate": 2.2770684598985215e-05, "loss": 0.9350489377975464, "step": 1264 }, { "epoch": 0.5860551308779245, "grad_norm": 0.8125, "learning_rate": 2.2737932712213897e-05, "loss": 0.9060037136077881, "step": 1265 }, { "epoch": 0.5865184155663655, "grad_norm": 0.796875, "learning_rate": 2.2705183182142283e-05, "loss": 1.0310790538787842, "step": 1266 }, { "epoch": 0.5869817002548066, "grad_norm": 0.90234375, "learning_rate": 2.2672436069924625e-05, "loss": 1.060436725616455, "step": 1267 }, { "epoch": 0.5874449849432476, "grad_norm": 0.8359375, "learning_rate": 2.2639691436710704e-05, "loss": 0.9399782419204712, "step": 1268 }, { "epoch": 0.5879082696316886, "grad_norm": 0.67578125, "learning_rate": 2.2606949343645633e-05, "loss": 0.7602555155754089, "step": 1269 }, { "epoch": 0.5883715543201298, "grad_norm": 0.8203125, "learning_rate": 2.2574209851869796e-05, "loss": 0.8838884830474854, "step": 1270 }, { "epoch": 0.5888348390085708, "grad_norm": 0.7578125, "learning_rate": 2.2541473022518734e-05, "loss": 0.9206914305686951, "step": 1271 }, { "epoch": 0.5892981236970118, "grad_norm": 0.96484375, "learning_rate": 2.250873891672299e-05, "loss": 1.0698550939559937, "step": 1272 }, { "epoch": 0.5897614083854529, "grad_norm": 0.95703125, "learning_rate": 2.247600759560805e-05, "loss": 1.186781406402588, "step": 1273 }, { "epoch": 0.5902246930738939, "grad_norm": 0.88671875, "learning_rate": 2.2443279120294167e-05, "loss": 0.859512209892273, "step": 1274 }, { "epoch": 0.590687977762335, "grad_norm": 1.0, "learning_rate": 2.2410553551896318e-05, "loss": 1.1249911785125732, "step": 1275 }, { "epoch": 0.591151262450776, "grad_norm": 1.015625, "learning_rate": 2.2377830951524016e-05, "loss": 0.8921389579772949, "step": 1276 }, { "epoch": 0.591614547139217, "grad_norm": 0.7578125, "learning_rate": 2.2345111380281246e-05, "loss": 1.017206072807312, "step": 1277 }, { "epoch": 0.592077831827658, "grad_norm": 0.85546875, "learning_rate": 2.231239489926635e-05, "loss": 0.8455703258514404, "step": 1278 }, { "epoch": 0.5925411165160992, "grad_norm": 0.82421875, "learning_rate": 2.2279681569571873e-05, "loss": 1.062213659286499, "step": 1279 }, { "epoch": 0.5930044012045402, "grad_norm": 0.8515625, "learning_rate": 2.2246971452284514e-05, "loss": 0.9728783965110779, "step": 1280 }, { "epoch": 0.5934676858929813, "grad_norm": 0.76171875, "learning_rate": 2.221426460848491e-05, "loss": 0.9838144779205322, "step": 1281 }, { "epoch": 0.5939309705814223, "grad_norm": 0.8125, "learning_rate": 2.218156109924767e-05, "loss": 0.996467649936676, "step": 1282 }, { "epoch": 0.5943942552698633, "grad_norm": 0.83984375, "learning_rate": 2.2148860985641098e-05, "loss": 0.9236133694648743, "step": 1283 }, { "epoch": 0.5948575399583044, "grad_norm": 0.92578125, "learning_rate": 2.2116164328727207e-05, "loss": 0.9230520129203796, "step": 1284 }, { "epoch": 0.5953208246467454, "grad_norm": 0.8515625, "learning_rate": 2.2083471189561532e-05, "loss": 1.163870096206665, "step": 1285 }, { "epoch": 0.5957841093351864, "grad_norm": 0.8359375, "learning_rate": 2.2050781629193035e-05, "loss": 0.9241350293159485, "step": 1286 }, { "epoch": 0.5962473940236275, "grad_norm": 0.81640625, "learning_rate": 2.2018095708664025e-05, "loss": 0.9975556135177612, "step": 1287 }, { "epoch": 0.5967106787120686, "grad_norm": 0.96875, "learning_rate": 2.198541348900998e-05, "loss": 0.9642297625541687, "step": 1288 }, { "epoch": 0.5971739634005097, "grad_norm": 0.74609375, "learning_rate": 2.19527350312595e-05, "loss": 1.0175257921218872, "step": 1289 }, { "epoch": 0.5976372480889507, "grad_norm": 0.84765625, "learning_rate": 2.1920060396434114e-05, "loss": 0.8721986413002014, "step": 1290 }, { "epoch": 0.5981005327773917, "grad_norm": 0.8359375, "learning_rate": 2.188738964554827e-05, "loss": 0.9678272008895874, "step": 1291 }, { "epoch": 0.5985638174658328, "grad_norm": 0.91015625, "learning_rate": 2.185472283960912e-05, "loss": 0.8769973516464233, "step": 1292 }, { "epoch": 0.5990271021542738, "grad_norm": 0.84765625, "learning_rate": 2.182206003961646e-05, "loss": 0.8562041521072388, "step": 1293 }, { "epoch": 0.5994903868427148, "grad_norm": 0.765625, "learning_rate": 2.1789401306562614e-05, "loss": 0.9333268404006958, "step": 1294 }, { "epoch": 0.5999536715311559, "grad_norm": 0.76171875, "learning_rate": 2.1756746701432304e-05, "loss": 1.0222218036651611, "step": 1295 }, { "epoch": 0.6004169562195969, "grad_norm": 0.80859375, "learning_rate": 2.172409628520256e-05, "loss": 0.9753302931785583, "step": 1296 }, { "epoch": 0.600880240908038, "grad_norm": 0.78125, "learning_rate": 2.169145011884255e-05, "loss": 0.7853343486785889, "step": 1297 }, { "epoch": 0.6013435255964791, "grad_norm": 0.890625, "learning_rate": 2.1658808263313563e-05, "loss": 0.9388110637664795, "step": 1298 }, { "epoch": 0.6018068102849201, "grad_norm": 0.8203125, "learning_rate": 2.162617077956879e-05, "loss": 1.0297507047653198, "step": 1299 }, { "epoch": 0.6022700949733611, "grad_norm": 0.890625, "learning_rate": 2.1593537728553272e-05, "loss": 1.123502492904663, "step": 1300 }, { "epoch": 0.6027333796618022, "grad_norm": 0.85546875, "learning_rate": 2.156090917120379e-05, "loss": 0.987675130367279, "step": 1301 }, { "epoch": 0.6031966643502432, "grad_norm": 0.8046875, "learning_rate": 2.152828516844871e-05, "loss": 0.9241386651992798, "step": 1302 }, { "epoch": 0.6036599490386843, "grad_norm": 0.765625, "learning_rate": 2.149566578120792e-05, "loss": 1.0078705549240112, "step": 1303 }, { "epoch": 0.6041232337271253, "grad_norm": 0.80078125, "learning_rate": 2.1463051070392643e-05, "loss": 1.0738126039505005, "step": 1304 }, { "epoch": 0.6045865184155663, "grad_norm": 0.75390625, "learning_rate": 2.1430441096905437e-05, "loss": 0.9489339590072632, "step": 1305 }, { "epoch": 0.6050498031040075, "grad_norm": 0.9375, "learning_rate": 2.139783592163994e-05, "loss": 0.7799573540687561, "step": 1306 }, { "epoch": 0.6055130877924485, "grad_norm": 0.80859375, "learning_rate": 2.13652356054809e-05, "loss": 0.957494854927063, "step": 1307 }, { "epoch": 0.6059763724808895, "grad_norm": 0.89453125, "learning_rate": 2.1332640209303938e-05, "loss": 0.939851701259613, "step": 1308 }, { "epoch": 0.6064396571693306, "grad_norm": 0.70703125, "learning_rate": 2.1300049793975505e-05, "loss": 0.8805224895477295, "step": 1309 }, { "epoch": 0.6069029418577716, "grad_norm": 0.79296875, "learning_rate": 2.126746442035277e-05, "loss": 0.8852795958518982, "step": 1310 }, { "epoch": 0.6073662265462126, "grad_norm": 1.046875, "learning_rate": 2.1234884149283453e-05, "loss": 1.1347432136535645, "step": 1311 }, { "epoch": 0.6078295112346537, "grad_norm": 0.8515625, "learning_rate": 2.1202309041605784e-05, "loss": 0.8818974494934082, "step": 1312 }, { "epoch": 0.6082927959230947, "grad_norm": 0.90625, "learning_rate": 2.1169739158148307e-05, "loss": 1.2017409801483154, "step": 1313 }, { "epoch": 0.6087560806115357, "grad_norm": 0.73828125, "learning_rate": 2.113717455972986e-05, "loss": 0.8007137179374695, "step": 1314 }, { "epoch": 0.6092193652999769, "grad_norm": 0.85546875, "learning_rate": 2.1104615307159377e-05, "loss": 1.1176743507385254, "step": 1315 }, { "epoch": 0.6096826499884179, "grad_norm": 0.72265625, "learning_rate": 2.1072061461235807e-05, "loss": 1.1391681432724, "step": 1316 }, { "epoch": 0.610145934676859, "grad_norm": 1.0625, "learning_rate": 2.1039513082748037e-05, "loss": 1.0258846282958984, "step": 1317 }, { "epoch": 0.6106092193653, "grad_norm": 0.7578125, "learning_rate": 2.1006970232474706e-05, "loss": 0.8150526881217957, "step": 1318 }, { "epoch": 0.611072504053741, "grad_norm": 0.796875, "learning_rate": 2.0974432971184163e-05, "loss": 0.9746679067611694, "step": 1319 }, { "epoch": 0.6115357887421821, "grad_norm": 0.74609375, "learning_rate": 2.0941901359634282e-05, "loss": 0.9087226390838623, "step": 1320 }, { "epoch": 0.6119990734306231, "grad_norm": 0.75, "learning_rate": 2.0909375458572435e-05, "loss": 0.7601380348205566, "step": 1321 }, { "epoch": 0.6124623581190641, "grad_norm": 0.71875, "learning_rate": 2.0876855328735293e-05, "loss": 0.8713633418083191, "step": 1322 }, { "epoch": 0.6129256428075052, "grad_norm": 0.9296875, "learning_rate": 2.0844341030848755e-05, "loss": 0.8106034398078918, "step": 1323 }, { "epoch": 0.6133889274959463, "grad_norm": 0.890625, "learning_rate": 2.0811832625627853e-05, "loss": 0.9880518913269043, "step": 1324 }, { "epoch": 0.6138522121843873, "grad_norm": 1.0, "learning_rate": 2.0779330173776586e-05, "loss": 1.0225927829742432, "step": 1325 }, { "epoch": 0.6143154968728284, "grad_norm": 0.80078125, "learning_rate": 2.074683373598786e-05, "loss": 0.8777214288711548, "step": 1326 }, { "epoch": 0.6147787815612694, "grad_norm": 0.81640625, "learning_rate": 2.0714343372943328e-05, "loss": 0.9182271957397461, "step": 1327 }, { "epoch": 0.6152420662497105, "grad_norm": 0.87109375, "learning_rate": 2.0681859145313334e-05, "loss": 1.0506292581558228, "step": 1328 }, { "epoch": 0.6157053509381515, "grad_norm": 0.78125, "learning_rate": 2.0649381113756725e-05, "loss": 0.9353680610656738, "step": 1329 }, { "epoch": 0.6161686356265925, "grad_norm": 0.85546875, "learning_rate": 2.0616909338920793e-05, "loss": 0.9634753465652466, "step": 1330 }, { "epoch": 0.6166319203150336, "grad_norm": 0.77734375, "learning_rate": 2.058444388144116e-05, "loss": 0.8841714859008789, "step": 1331 }, { "epoch": 0.6170952050034746, "grad_norm": 0.7265625, "learning_rate": 2.0551984801941635e-05, "loss": 0.8491650819778442, "step": 1332 }, { "epoch": 0.6175584896919157, "grad_norm": 0.86328125, "learning_rate": 2.051953216103413e-05, "loss": 1.1438350677490234, "step": 1333 }, { "epoch": 0.6180217743803568, "grad_norm": 0.80859375, "learning_rate": 2.048708601931852e-05, "loss": 0.9793174862861633, "step": 1334 }, { "epoch": 0.6184850590687978, "grad_norm": 0.9375, "learning_rate": 2.0454646437382557e-05, "loss": 0.933502197265625, "step": 1335 }, { "epoch": 0.6189483437572388, "grad_norm": 0.734375, "learning_rate": 2.0422213475801728e-05, "loss": 1.0236554145812988, "step": 1336 }, { "epoch": 0.6194116284456799, "grad_norm": 0.984375, "learning_rate": 2.0389787195139183e-05, "loss": 1.0013253688812256, "step": 1337 }, { "epoch": 0.6198749131341209, "grad_norm": 0.84375, "learning_rate": 2.035736765594557e-05, "loss": 0.8655804991722107, "step": 1338 }, { "epoch": 0.6203381978225619, "grad_norm": 0.8515625, "learning_rate": 2.0324954918758952e-05, "loss": 0.9141555428504944, "step": 1339 }, { "epoch": 0.620801482511003, "grad_norm": 0.79296875, "learning_rate": 2.0292549044104714e-05, "loss": 0.8629526495933533, "step": 1340 }, { "epoch": 0.621264767199444, "grad_norm": 0.84765625, "learning_rate": 2.0260150092495398e-05, "loss": 0.8775175213813782, "step": 1341 }, { "epoch": 0.6217280518878852, "grad_norm": 0.75390625, "learning_rate": 2.0227758124430642e-05, "loss": 0.9920527935028076, "step": 1342 }, { "epoch": 0.6221913365763262, "grad_norm": 0.8359375, "learning_rate": 2.019537320039701e-05, "loss": 0.9502109289169312, "step": 1343 }, { "epoch": 0.6226546212647672, "grad_norm": 0.73828125, "learning_rate": 2.0162995380867968e-05, "loss": 0.9974714517593384, "step": 1344 }, { "epoch": 0.6231179059532083, "grad_norm": 0.890625, "learning_rate": 2.0130624726303653e-05, "loss": 0.8903458714485168, "step": 1345 }, { "epoch": 0.6235811906416493, "grad_norm": 0.79296875, "learning_rate": 2.0098261297150857e-05, "loss": 0.8124538660049438, "step": 1346 }, { "epoch": 0.6240444753300903, "grad_norm": 0.94140625, "learning_rate": 2.0065905153842885e-05, "loss": 0.925495982170105, "step": 1347 }, { "epoch": 0.6245077600185314, "grad_norm": 0.796875, "learning_rate": 2.0033556356799412e-05, "loss": 1.0063080787658691, "step": 1348 }, { "epoch": 0.6249710447069724, "grad_norm": 0.78125, "learning_rate": 2.0001214966426418e-05, "loss": 0.857069730758667, "step": 1349 }, { "epoch": 0.6254343293954134, "grad_norm": 0.8046875, "learning_rate": 1.9968881043116043e-05, "loss": 1.0339258909225464, "step": 1350 }, { "epoch": 0.6258976140838546, "grad_norm": 0.84375, "learning_rate": 1.993655464724649e-05, "loss": 0.8328740000724792, "step": 1351 }, { "epoch": 0.6263608987722956, "grad_norm": 0.75390625, "learning_rate": 1.9904235839181884e-05, "loss": 1.000057578086853, "step": 1352 }, { "epoch": 0.6268241834607367, "grad_norm": 0.84765625, "learning_rate": 1.98719246792722e-05, "loss": 0.9434694647789001, "step": 1353 }, { "epoch": 0.6272874681491777, "grad_norm": 0.84765625, "learning_rate": 1.983962122785314e-05, "loss": 0.8397997617721558, "step": 1354 }, { "epoch": 0.6277507528376187, "grad_norm": 0.90625, "learning_rate": 1.9807325545245985e-05, "loss": 1.1510478258132935, "step": 1355 }, { "epoch": 0.6282140375260598, "grad_norm": 0.8203125, "learning_rate": 1.9775037691757538e-05, "loss": 0.9587162733078003, "step": 1356 }, { "epoch": 0.6286773222145008, "grad_norm": 0.984375, "learning_rate": 1.9742757727679956e-05, "loss": 0.8667926788330078, "step": 1357 }, { "epoch": 0.6291406069029418, "grad_norm": 0.7890625, "learning_rate": 1.971048571329069e-05, "loss": 0.8282247185707092, "step": 1358 }, { "epoch": 0.6296038915913829, "grad_norm": 0.94921875, "learning_rate": 1.967822170885231e-05, "loss": 1.0788094997406006, "step": 1359 }, { "epoch": 0.630067176279824, "grad_norm": 0.8046875, "learning_rate": 1.964596577461248e-05, "loss": 0.9873729348182678, "step": 1360 }, { "epoch": 0.630530460968265, "grad_norm": 0.8046875, "learning_rate": 1.9613717970803744e-05, "loss": 0.8831279873847961, "step": 1361 }, { "epoch": 0.6309937456567061, "grad_norm": 0.7890625, "learning_rate": 1.9581478357643482e-05, "loss": 0.9821964502334595, "step": 1362 }, { "epoch": 0.6314570303451471, "grad_norm": 0.83984375, "learning_rate": 1.9549246995333805e-05, "loss": 0.9710639119148254, "step": 1363 }, { "epoch": 0.6319203150335881, "grad_norm": 0.828125, "learning_rate": 1.951702394406137e-05, "loss": 0.8819524645805359, "step": 1364 }, { "epoch": 0.6323835997220292, "grad_norm": 0.765625, "learning_rate": 1.948480926399736e-05, "loss": 0.8822925090789795, "step": 1365 }, { "epoch": 0.6328468844104702, "grad_norm": 0.87109375, "learning_rate": 1.9452603015297282e-05, "loss": 0.9902225136756897, "step": 1366 }, { "epoch": 0.6333101690989112, "grad_norm": 0.87890625, "learning_rate": 1.942040525810095e-05, "loss": 0.877446174621582, "step": 1367 }, { "epoch": 0.6337734537873523, "grad_norm": 0.8515625, "learning_rate": 1.938821605253227e-05, "loss": 1.0983047485351562, "step": 1368 }, { "epoch": 0.6342367384757934, "grad_norm": 0.88671875, "learning_rate": 1.9356035458699208e-05, "loss": 1.289228916168213, "step": 1369 }, { "epoch": 0.6347000231642345, "grad_norm": 0.8984375, "learning_rate": 1.9323863536693654e-05, "loss": 0.9632030129432678, "step": 1370 }, { "epoch": 0.6351633078526755, "grad_norm": 0.90625, "learning_rate": 1.929170034659128e-05, "loss": 0.8725213408470154, "step": 1371 }, { "epoch": 0.6356265925411165, "grad_norm": 0.91796875, "learning_rate": 1.925954594845148e-05, "loss": 1.0300366878509521, "step": 1372 }, { "epoch": 0.6360898772295576, "grad_norm": 0.80859375, "learning_rate": 1.9227400402317202e-05, "loss": 1.045912504196167, "step": 1373 }, { "epoch": 0.6365531619179986, "grad_norm": 0.72265625, "learning_rate": 1.9195263768214903e-05, "loss": 0.9979844093322754, "step": 1374 }, { "epoch": 0.6370164466064396, "grad_norm": 0.765625, "learning_rate": 1.9163136106154358e-05, "loss": 0.8099144697189331, "step": 1375 }, { "epoch": 0.6374797312948807, "grad_norm": 0.89453125, "learning_rate": 1.9131017476128604e-05, "loss": 0.9197705388069153, "step": 1376 }, { "epoch": 0.6379430159833217, "grad_norm": 0.72265625, "learning_rate": 1.9098907938113824e-05, "loss": 0.8932551741600037, "step": 1377 }, { "epoch": 0.6384063006717628, "grad_norm": 0.88671875, "learning_rate": 1.90668075520692e-05, "loss": 0.8447968363761902, "step": 1378 }, { "epoch": 0.6388695853602039, "grad_norm": 1.125, "learning_rate": 1.9034716377936855e-05, "loss": 1.2065844535827637, "step": 1379 }, { "epoch": 0.6393328700486449, "grad_norm": 0.87109375, "learning_rate": 1.9002634475641678e-05, "loss": 0.8183916211128235, "step": 1380 }, { "epoch": 0.639796154737086, "grad_norm": 0.86328125, "learning_rate": 1.8970561905091263e-05, "loss": 0.875165581703186, "step": 1381 }, { "epoch": 0.640259439425527, "grad_norm": 0.7421875, "learning_rate": 1.8938498726175775e-05, "loss": 0.9768784642219543, "step": 1382 }, { "epoch": 0.640722724113968, "grad_norm": 0.87109375, "learning_rate": 1.8906444998767847e-05, "loss": 0.8940538763999939, "step": 1383 }, { "epoch": 0.6411860088024091, "grad_norm": 0.80859375, "learning_rate": 1.887440078272245e-05, "loss": 0.8721079230308533, "step": 1384 }, { "epoch": 0.6416492934908501, "grad_norm": 0.828125, "learning_rate": 1.884236613787679e-05, "loss": 0.8797422051429749, "step": 1385 }, { "epoch": 0.6421125781792911, "grad_norm": 0.98046875, "learning_rate": 1.881034112405023e-05, "loss": 0.978660523891449, "step": 1386 }, { "epoch": 0.6425758628677323, "grad_norm": 0.73828125, "learning_rate": 1.8778325801044118e-05, "loss": 0.9717423319816589, "step": 1387 }, { "epoch": 0.6430391475561733, "grad_norm": 0.8125, "learning_rate": 1.8746320228641726e-05, "loss": 0.9432107210159302, "step": 1388 }, { "epoch": 0.6435024322446143, "grad_norm": 0.78515625, "learning_rate": 1.8714324466608103e-05, "loss": 0.7963858246803284, "step": 1389 }, { "epoch": 0.6439657169330554, "grad_norm": 0.8359375, "learning_rate": 1.8682338574690004e-05, "loss": 1.0017811059951782, "step": 1390 }, { "epoch": 0.6444290016214964, "grad_norm": 0.73046875, "learning_rate": 1.8650362612615715e-05, "loss": 0.9127390384674072, "step": 1391 }, { "epoch": 0.6448922863099374, "grad_norm": 0.94140625, "learning_rate": 1.8618396640095e-05, "loss": 1.0621755123138428, "step": 1392 }, { "epoch": 0.6453555709983785, "grad_norm": 0.78125, "learning_rate": 1.8586440716818984e-05, "loss": 0.8622088432312012, "step": 1393 }, { "epoch": 0.6458188556868195, "grad_norm": 0.75390625, "learning_rate": 1.8554494902459995e-05, "loss": 0.8669024705886841, "step": 1394 }, { "epoch": 0.6462821403752605, "grad_norm": 0.734375, "learning_rate": 1.852255925667152e-05, "loss": 1.1172361373901367, "step": 1395 }, { "epoch": 0.6467454250637017, "grad_norm": 0.8046875, "learning_rate": 1.8490633839088025e-05, "loss": 0.879069447517395, "step": 1396 }, { "epoch": 0.6472087097521427, "grad_norm": 0.765625, "learning_rate": 1.84587187093249e-05, "loss": 0.9635634422302246, "step": 1397 }, { "epoch": 0.6476719944405838, "grad_norm": 0.85546875, "learning_rate": 1.8426813926978312e-05, "loss": 1.092876672744751, "step": 1398 }, { "epoch": 0.6481352791290248, "grad_norm": 0.71875, "learning_rate": 1.83949195516251e-05, "loss": 0.7953818440437317, "step": 1399 }, { "epoch": 0.6485985638174658, "grad_norm": 0.87890625, "learning_rate": 1.836303564282269e-05, "loss": 0.9496350288391113, "step": 1400 }, { "epoch": 0.6490618485059069, "grad_norm": 0.796875, "learning_rate": 1.8331162260108945e-05, "loss": 0.9301480054855347, "step": 1401 }, { "epoch": 0.6495251331943479, "grad_norm": 0.8515625, "learning_rate": 1.8299299463002083e-05, "loss": 0.8314633965492249, "step": 1402 }, { "epoch": 0.6499884178827889, "grad_norm": 0.87890625, "learning_rate": 1.826744731100055e-05, "loss": 0.7800553441047668, "step": 1403 }, { "epoch": 0.65045170257123, "grad_norm": 0.86328125, "learning_rate": 1.823560586358292e-05, "loss": 0.8455618619918823, "step": 1404 }, { "epoch": 0.6509149872596711, "grad_norm": 0.8203125, "learning_rate": 1.8203775180207772e-05, "loss": 0.7878734469413757, "step": 1405 }, { "epoch": 0.6513782719481122, "grad_norm": 0.81640625, "learning_rate": 1.8171955320313575e-05, "loss": 0.8364105820655823, "step": 1406 }, { "epoch": 0.6518415566365532, "grad_norm": 0.828125, "learning_rate": 1.814014634331861e-05, "loss": 0.9706379771232605, "step": 1407 }, { "epoch": 0.6523048413249942, "grad_norm": 0.82421875, "learning_rate": 1.8108348308620824e-05, "loss": 0.9855102300643921, "step": 1408 }, { "epoch": 0.6527681260134353, "grad_norm": 0.7265625, "learning_rate": 1.8076561275597727e-05, "loss": 0.8652253746986389, "step": 1409 }, { "epoch": 0.6532314107018763, "grad_norm": 0.85546875, "learning_rate": 1.8044785303606288e-05, "loss": 0.8510443568229675, "step": 1410 }, { "epoch": 0.6536946953903173, "grad_norm": 0.8828125, "learning_rate": 1.8013020451982835e-05, "loss": 0.9615334868431091, "step": 1411 }, { "epoch": 0.6541579800787584, "grad_norm": 0.82421875, "learning_rate": 1.7981266780042904e-05, "loss": 0.8480163812637329, "step": 1412 }, { "epoch": 0.6546212647671994, "grad_norm": 0.81640625, "learning_rate": 1.7949524347081187e-05, "loss": 0.9262988567352295, "step": 1413 }, { "epoch": 0.6550845494556405, "grad_norm": 1.078125, "learning_rate": 1.7917793212371354e-05, "loss": 1.2862350940704346, "step": 1414 }, { "epoch": 0.6555478341440816, "grad_norm": 0.87109375, "learning_rate": 1.7886073435165996e-05, "loss": 1.0281234979629517, "step": 1415 }, { "epoch": 0.6560111188325226, "grad_norm": 0.890625, "learning_rate": 1.785436507469651e-05, "loss": 0.8532906770706177, "step": 1416 }, { "epoch": 0.6564744035209636, "grad_norm": 0.95703125, "learning_rate": 1.7822668190172938e-05, "loss": 0.8838380575180054, "step": 1417 }, { "epoch": 0.6569376882094047, "grad_norm": 0.86328125, "learning_rate": 1.779098284078393e-05, "loss": 0.8856160044670105, "step": 1418 }, { "epoch": 0.6574009728978457, "grad_norm": 0.859375, "learning_rate": 1.775930908569657e-05, "loss": 0.8010708093643188, "step": 1419 }, { "epoch": 0.6578642575862867, "grad_norm": 0.91015625, "learning_rate": 1.772764698405631e-05, "loss": 0.8682946562767029, "step": 1420 }, { "epoch": 0.6583275422747278, "grad_norm": 0.78515625, "learning_rate": 1.7695996594986836e-05, "loss": 1.0009998083114624, "step": 1421 }, { "epoch": 0.6587908269631688, "grad_norm": 0.953125, "learning_rate": 1.7664357977589932e-05, "loss": 0.9323142170906067, "step": 1422 }, { "epoch": 0.65925411165161, "grad_norm": 0.82421875, "learning_rate": 1.7632731190945454e-05, "loss": 0.9634347558021545, "step": 1423 }, { "epoch": 0.659717396340051, "grad_norm": 0.859375, "learning_rate": 1.7601116294111127e-05, "loss": 1.1587272882461548, "step": 1424 }, { "epoch": 0.660180681028492, "grad_norm": 0.87890625, "learning_rate": 1.7569513346122498e-05, "loss": 1.0130343437194824, "step": 1425 }, { "epoch": 0.6606439657169331, "grad_norm": 0.95703125, "learning_rate": 1.7537922405992772e-05, "loss": 0.904670000076294, "step": 1426 }, { "epoch": 0.6611072504053741, "grad_norm": 0.96875, "learning_rate": 1.7506343532712762e-05, "loss": 1.015366554260254, "step": 1427 }, { "epoch": 0.6615705350938151, "grad_norm": 0.828125, "learning_rate": 1.7474776785250742e-05, "loss": 1.0041009187698364, "step": 1428 }, { "epoch": 0.6620338197822562, "grad_norm": 0.84375, "learning_rate": 1.744322222255231e-05, "loss": 0.8108413815498352, "step": 1429 }, { "epoch": 0.6624971044706972, "grad_norm": 0.80859375, "learning_rate": 1.7411679903540366e-05, "loss": 0.8150144815444946, "step": 1430 }, { "epoch": 0.6629603891591382, "grad_norm": 0.81640625, "learning_rate": 1.7380149887114892e-05, "loss": 0.9165526032447815, "step": 1431 }, { "epoch": 0.6634236738475794, "grad_norm": 0.86328125, "learning_rate": 1.7348632232152943e-05, "loss": 0.8653296828269958, "step": 1432 }, { "epoch": 0.6638869585360204, "grad_norm": 0.765625, "learning_rate": 1.7317126997508464e-05, "loss": 0.8013156652450562, "step": 1433 }, { "epoch": 0.6643502432244615, "grad_norm": 0.765625, "learning_rate": 1.7285634242012216e-05, "loss": 0.800615668296814, "step": 1434 }, { "epoch": 0.6648135279129025, "grad_norm": 0.71875, "learning_rate": 1.725415402447165e-05, "loss": 0.8959740996360779, "step": 1435 }, { "epoch": 0.6652768126013435, "grad_norm": 0.77734375, "learning_rate": 1.722268640367083e-05, "loss": 0.9206319451332092, "step": 1436 }, { "epoch": 0.6657400972897846, "grad_norm": 0.8203125, "learning_rate": 1.719123143837025e-05, "loss": 0.9993884563446045, "step": 1437 }, { "epoch": 0.6662033819782256, "grad_norm": 0.97265625, "learning_rate": 1.715978918730681e-05, "loss": 0.9378257393836975, "step": 1438 }, { "epoch": 0.6666666666666666, "grad_norm": 0.87109375, "learning_rate": 1.7128359709193664e-05, "loss": 1.0335369110107422, "step": 1439 }, { "epoch": 0.6671299513551077, "grad_norm": 0.75, "learning_rate": 1.70969430627201e-05, "loss": 0.843043327331543, "step": 1440 }, { "epoch": 0.6675932360435488, "grad_norm": 0.8359375, "learning_rate": 1.7065539306551467e-05, "loss": 0.9367977380752563, "step": 1441 }, { "epoch": 0.6680565207319898, "grad_norm": 0.8984375, "learning_rate": 1.7034148499329014e-05, "loss": 0.9247215390205383, "step": 1442 }, { "epoch": 0.6685198054204309, "grad_norm": 0.8125, "learning_rate": 1.700277069966984e-05, "loss": 0.9778249859809875, "step": 1443 }, { "epoch": 0.6689830901088719, "grad_norm": 0.8125, "learning_rate": 1.6971405966166737e-05, "loss": 0.7912523150444031, "step": 1444 }, { "epoch": 0.669446374797313, "grad_norm": 0.734375, "learning_rate": 1.6940054357388088e-05, "loss": 0.891732394695282, "step": 1445 }, { "epoch": 0.669909659485754, "grad_norm": 0.81640625, "learning_rate": 1.6908715931877802e-05, "loss": 1.0751419067382812, "step": 1446 }, { "epoch": 0.670372944174195, "grad_norm": 0.82421875, "learning_rate": 1.6877390748155137e-05, "loss": 0.952225923538208, "step": 1447 }, { "epoch": 0.670836228862636, "grad_norm": 1.0703125, "learning_rate": 1.6846078864714642e-05, "loss": 0.9902151823043823, "step": 1448 }, { "epoch": 0.6712995135510771, "grad_norm": 0.8515625, "learning_rate": 1.6814780340026027e-05, "loss": 0.9151983261108398, "step": 1449 }, { "epoch": 0.6717627982395182, "grad_norm": 0.8125, "learning_rate": 1.6783495232534053e-05, "loss": 0.81586754322052, "step": 1450 }, { "epoch": 0.6722260829279593, "grad_norm": 0.8203125, "learning_rate": 1.6752223600658437e-05, "loss": 0.8691772818565369, "step": 1451 }, { "epoch": 0.6726893676164003, "grad_norm": 0.94921875, "learning_rate": 1.67209655027937e-05, "loss": 0.7808753848075867, "step": 1452 }, { "epoch": 0.6731526523048413, "grad_norm": 0.9453125, "learning_rate": 1.6689720997309144e-05, "loss": 0.9605461955070496, "step": 1453 }, { "epoch": 0.6736159369932824, "grad_norm": 0.859375, "learning_rate": 1.6658490142548634e-05, "loss": 0.8777569532394409, "step": 1454 }, { "epoch": 0.6740792216817234, "grad_norm": 0.75390625, "learning_rate": 1.6627272996830594e-05, "loss": 0.8905514478683472, "step": 1455 }, { "epoch": 0.6745425063701644, "grad_norm": 0.80078125, "learning_rate": 1.659606961844781e-05, "loss": 1.0816737413406372, "step": 1456 }, { "epoch": 0.6750057910586055, "grad_norm": 0.9296875, "learning_rate": 1.656488006566738e-05, "loss": 0.9656401872634888, "step": 1457 }, { "epoch": 0.6754690757470465, "grad_norm": 0.78125, "learning_rate": 1.6533704396730586e-05, "loss": 0.9407315850257874, "step": 1458 }, { "epoch": 0.6759323604354877, "grad_norm": 0.80078125, "learning_rate": 1.6502542669852762e-05, "loss": 0.9171674847602844, "step": 1459 }, { "epoch": 0.6763956451239287, "grad_norm": 0.8671875, "learning_rate": 1.647139494322325e-05, "loss": 0.8651185035705566, "step": 1460 }, { "epoch": 0.6768589298123697, "grad_norm": 0.9140625, "learning_rate": 1.644026127500519e-05, "loss": 0.9177205562591553, "step": 1461 }, { "epoch": 0.6773222145008108, "grad_norm": 0.85546875, "learning_rate": 1.6409141723335533e-05, "loss": 0.8896920680999756, "step": 1462 }, { "epoch": 0.6777854991892518, "grad_norm": 0.82421875, "learning_rate": 1.637803634632482e-05, "loss": 0.9356686472892761, "step": 1463 }, { "epoch": 0.6782487838776928, "grad_norm": 0.86328125, "learning_rate": 1.6346945202057156e-05, "loss": 0.9953092336654663, "step": 1464 }, { "epoch": 0.6787120685661339, "grad_norm": 0.85546875, "learning_rate": 1.6315868348590043e-05, "loss": 0.9929192066192627, "step": 1465 }, { "epoch": 0.6791753532545749, "grad_norm": 0.8359375, "learning_rate": 1.6284805843954325e-05, "loss": 0.9500339031219482, "step": 1466 }, { "epoch": 0.6796386379430159, "grad_norm": 0.84375, "learning_rate": 1.6253757746154036e-05, "loss": 0.932491660118103, "step": 1467 }, { "epoch": 0.6801019226314571, "grad_norm": 0.8203125, "learning_rate": 1.622272411316629e-05, "loss": 0.9832372665405273, "step": 1468 }, { "epoch": 0.6805652073198981, "grad_norm": 0.80078125, "learning_rate": 1.6191705002941227e-05, "loss": 0.8932191133499146, "step": 1469 }, { "epoch": 0.6810284920083391, "grad_norm": 0.765625, "learning_rate": 1.6160700473401838e-05, "loss": 0.9749459624290466, "step": 1470 }, { "epoch": 0.6814917766967802, "grad_norm": 1.0390625, "learning_rate": 1.6129710582443913e-05, "loss": 1.025862216949463, "step": 1471 }, { "epoch": 0.6819550613852212, "grad_norm": 0.78515625, "learning_rate": 1.609873538793588e-05, "loss": 0.7363277077674866, "step": 1472 }, { "epoch": 0.6824183460736623, "grad_norm": 0.9296875, "learning_rate": 1.6067774947718745e-05, "loss": 0.9932308793067932, "step": 1473 }, { "epoch": 0.6828816307621033, "grad_norm": 0.984375, "learning_rate": 1.6036829319605963e-05, "loss": 1.0912789106369019, "step": 1474 }, { "epoch": 0.6833449154505443, "grad_norm": 0.8046875, "learning_rate": 1.6005898561383296e-05, "loss": 0.8973681330680847, "step": 1475 }, { "epoch": 0.6838082001389854, "grad_norm": 0.88671875, "learning_rate": 1.5974982730808785e-05, "loss": 0.9494137167930603, "step": 1476 }, { "epoch": 0.6842714848274265, "grad_norm": 0.7578125, "learning_rate": 1.5944081885612567e-05, "loss": 1.014467477798462, "step": 1477 }, { "epoch": 0.6847347695158675, "grad_norm": 0.859375, "learning_rate": 1.5913196083496813e-05, "loss": 0.8571305871009827, "step": 1478 }, { "epoch": 0.6851980542043086, "grad_norm": 0.90625, "learning_rate": 1.588232538213559e-05, "loss": 1.0085415840148926, "step": 1479 }, { "epoch": 0.6856613388927496, "grad_norm": 0.8671875, "learning_rate": 1.5851469839174773e-05, "loss": 0.9112807512283325, "step": 1480 }, { "epoch": 0.6861246235811906, "grad_norm": 0.7109375, "learning_rate": 1.5820629512231938e-05, "loss": 0.7599020004272461, "step": 1481 }, { "epoch": 0.6865879082696317, "grad_norm": 0.81640625, "learning_rate": 1.578980445889622e-05, "loss": 1.0665782690048218, "step": 1482 }, { "epoch": 0.6870511929580727, "grad_norm": 1.046875, "learning_rate": 1.5758994736728285e-05, "loss": 0.9333577752113342, "step": 1483 }, { "epoch": 0.6875144776465137, "grad_norm": 0.84375, "learning_rate": 1.5728200403260104e-05, "loss": 1.0278582572937012, "step": 1484 }, { "epoch": 0.6879777623349548, "grad_norm": 0.890625, "learning_rate": 1.5697421515994976e-05, "loss": 0.8942716121673584, "step": 1485 }, { "epoch": 0.6884410470233959, "grad_norm": 0.8203125, "learning_rate": 1.5666658132407308e-05, "loss": 0.8894409537315369, "step": 1486 }, { "epoch": 0.688904331711837, "grad_norm": 0.90234375, "learning_rate": 1.563591030994259e-05, "loss": 0.9284682273864746, "step": 1487 }, { "epoch": 0.689367616400278, "grad_norm": 0.8984375, "learning_rate": 1.560517810601723e-05, "loss": 0.9897350072860718, "step": 1488 }, { "epoch": 0.689830901088719, "grad_norm": 0.8671875, "learning_rate": 1.5574461578018493e-05, "loss": 0.9923799633979797, "step": 1489 }, { "epoch": 0.6902941857771601, "grad_norm": 0.90234375, "learning_rate": 1.554376078330436e-05, "loss": 0.8437385559082031, "step": 1490 }, { "epoch": 0.6907574704656011, "grad_norm": 0.8984375, "learning_rate": 1.5513075779203417e-05, "loss": 1.065632700920105, "step": 1491 }, { "epoch": 0.6912207551540421, "grad_norm": 0.7890625, "learning_rate": 1.5482406623014795e-05, "loss": 0.9187345504760742, "step": 1492 }, { "epoch": 0.6916840398424832, "grad_norm": 0.76171875, "learning_rate": 1.545175337200801e-05, "loss": 0.8924624919891357, "step": 1493 }, { "epoch": 0.6921473245309242, "grad_norm": 0.94921875, "learning_rate": 1.5421116083422887e-05, "loss": 0.9492448568344116, "step": 1494 }, { "epoch": 0.6926106092193653, "grad_norm": 0.80859375, "learning_rate": 1.539049481446944e-05, "loss": 0.738587498664856, "step": 1495 }, { "epoch": 0.6930738939078064, "grad_norm": 0.76953125, "learning_rate": 1.5359889622327773e-05, "loss": 1.0237951278686523, "step": 1496 }, { "epoch": 0.6935371785962474, "grad_norm": 0.82421875, "learning_rate": 1.5329300564147974e-05, "loss": 0.9008263349533081, "step": 1497 }, { "epoch": 0.6940004632846885, "grad_norm": 1.046875, "learning_rate": 1.529872769704997e-05, "loss": 1.008281946182251, "step": 1498 }, { "epoch": 0.6944637479731295, "grad_norm": 0.98828125, "learning_rate": 1.5268171078123503e-05, "loss": 1.0567467212677002, "step": 1499 }, { "epoch": 0.6949270326615705, "grad_norm": 0.90234375, "learning_rate": 1.5237630764427945e-05, "loss": 0.8660717606544495, "step": 1500 }, { "epoch": 0.6953903173500116, "grad_norm": 0.8984375, "learning_rate": 1.5207106812992225e-05, "loss": 0.9844351410865784, "step": 1501 }, { "epoch": 0.6958536020384526, "grad_norm": 0.953125, "learning_rate": 1.5176599280814716e-05, "loss": 0.9823206067085266, "step": 1502 }, { "epoch": 0.6963168867268936, "grad_norm": 0.84765625, "learning_rate": 1.5146108224863147e-05, "loss": 0.8877736330032349, "step": 1503 }, { "epoch": 0.6967801714153348, "grad_norm": 0.765625, "learning_rate": 1.5115633702074463e-05, "loss": 0.8826972842216492, "step": 1504 }, { "epoch": 0.6972434561037758, "grad_norm": 0.9375, "learning_rate": 1.5085175769354723e-05, "loss": 0.9058219194412231, "step": 1505 }, { "epoch": 0.6977067407922168, "grad_norm": 0.8125, "learning_rate": 1.5054734483579058e-05, "loss": 0.9776875972747803, "step": 1506 }, { "epoch": 0.6981700254806579, "grad_norm": 0.89453125, "learning_rate": 1.5024309901591453e-05, "loss": 0.8491601347923279, "step": 1507 }, { "epoch": 0.6986333101690989, "grad_norm": 0.76171875, "learning_rate": 1.4993902080204744e-05, "loss": 0.9605410099029541, "step": 1508 }, { "epoch": 0.6990965948575399, "grad_norm": 0.87890625, "learning_rate": 1.4963511076200446e-05, "loss": 0.8161846995353699, "step": 1509 }, { "epoch": 0.699559879545981, "grad_norm": 0.93359375, "learning_rate": 1.4933136946328686e-05, "loss": 0.992790699005127, "step": 1510 }, { "epoch": 0.700023164234422, "grad_norm": 0.80078125, "learning_rate": 1.4902779747308069e-05, "loss": 0.75239497423172, "step": 1511 }, { "epoch": 0.700486448922863, "grad_norm": 0.82421875, "learning_rate": 1.4872439535825595e-05, "loss": 0.8581488132476807, "step": 1512 }, { "epoch": 0.7009497336113042, "grad_norm": 1.015625, "learning_rate": 1.484211636853654e-05, "loss": 0.9911954998970032, "step": 1513 }, { "epoch": 0.7014130182997452, "grad_norm": 0.8203125, "learning_rate": 1.4811810302064333e-05, "loss": 0.9692468047142029, "step": 1514 }, { "epoch": 0.7018763029881863, "grad_norm": 0.875, "learning_rate": 1.4781521393000504e-05, "loss": 0.9069231152534485, "step": 1515 }, { "epoch": 0.7023395876766273, "grad_norm": 0.85546875, "learning_rate": 1.4751249697904517e-05, "loss": 0.8558336496353149, "step": 1516 }, { "epoch": 0.7028028723650683, "grad_norm": 0.828125, "learning_rate": 1.4720995273303713e-05, "loss": 0.8646467328071594, "step": 1517 }, { "epoch": 0.7032661570535094, "grad_norm": 0.765625, "learning_rate": 1.4690758175693161e-05, "loss": 0.9294448494911194, "step": 1518 }, { "epoch": 0.7037294417419504, "grad_norm": 0.8984375, "learning_rate": 1.46605384615356e-05, "loss": 1.5108270645141602, "step": 1519 }, { "epoch": 0.7041927264303914, "grad_norm": 0.765625, "learning_rate": 1.463033618726129e-05, "loss": 0.7721649408340454, "step": 1520 }, { "epoch": 0.7046560111188325, "grad_norm": 0.921875, "learning_rate": 1.4600151409267915e-05, "loss": 0.9843413233757019, "step": 1521 }, { "epoch": 0.7051192958072736, "grad_norm": 0.77734375, "learning_rate": 1.4569984183920527e-05, "loss": 1.0829393863677979, "step": 1522 }, { "epoch": 0.7055825804957147, "grad_norm": 0.84375, "learning_rate": 1.453983456755135e-05, "loss": 0.8989307284355164, "step": 1523 }, { "epoch": 0.7060458651841557, "grad_norm": 0.89453125, "learning_rate": 1.4509702616459779e-05, "loss": 1.061563491821289, "step": 1524 }, { "epoch": 0.7065091498725967, "grad_norm": 0.7265625, "learning_rate": 1.4479588386912172e-05, "loss": 0.9384868741035461, "step": 1525 }, { "epoch": 0.7069724345610378, "grad_norm": 0.75, "learning_rate": 1.4449491935141836e-05, "loss": 0.7927857637405396, "step": 1526 }, { "epoch": 0.7074357192494788, "grad_norm": 0.9140625, "learning_rate": 1.4419413317348868e-05, "loss": 0.7902013063430786, "step": 1527 }, { "epoch": 0.7078990039379198, "grad_norm": 0.859375, "learning_rate": 1.4389352589700028e-05, "loss": 0.8877607583999634, "step": 1528 }, { "epoch": 0.7083622886263609, "grad_norm": 0.85546875, "learning_rate": 1.4359309808328738e-05, "loss": 0.8904527425765991, "step": 1529 }, { "epoch": 0.7088255733148019, "grad_norm": 0.9453125, "learning_rate": 1.4329285029334844e-05, "loss": 0.9153241515159607, "step": 1530 }, { "epoch": 0.709288858003243, "grad_norm": 0.76171875, "learning_rate": 1.4299278308784614e-05, "loss": 0.8823608756065369, "step": 1531 }, { "epoch": 0.7097521426916841, "grad_norm": 0.66796875, "learning_rate": 1.4269289702710575e-05, "loss": 0.8942029476165771, "step": 1532 }, { "epoch": 0.7102154273801251, "grad_norm": 0.78125, "learning_rate": 1.4239319267111447e-05, "loss": 0.9542202353477478, "step": 1533 }, { "epoch": 0.7106787120685661, "grad_norm": 0.9296875, "learning_rate": 1.4209367057952005e-05, "loss": 0.834882915019989, "step": 1534 }, { "epoch": 0.7111419967570072, "grad_norm": 0.80859375, "learning_rate": 1.4179433131162978e-05, "loss": 0.8554872870445251, "step": 1535 }, { "epoch": 0.7116052814454482, "grad_norm": 0.88671875, "learning_rate": 1.4149517542640996e-05, "loss": 0.9996330738067627, "step": 1536 }, { "epoch": 0.7120685661338892, "grad_norm": 0.953125, "learning_rate": 1.4119620348248392e-05, "loss": 0.8751652240753174, "step": 1537 }, { "epoch": 0.7125318508223303, "grad_norm": 0.8984375, "learning_rate": 1.4089741603813209e-05, "loss": 0.9856979846954346, "step": 1538 }, { "epoch": 0.7129951355107713, "grad_norm": 0.9765625, "learning_rate": 1.4059881365128982e-05, "loss": 0.9445462226867676, "step": 1539 }, { "epoch": 0.7134584201992125, "grad_norm": 0.92578125, "learning_rate": 1.4030039687954728e-05, "loss": 0.7988513112068176, "step": 1540 }, { "epoch": 0.7139217048876535, "grad_norm": 0.8359375, "learning_rate": 1.4000216628014782e-05, "loss": 0.8697713017463684, "step": 1541 }, { "epoch": 0.7143849895760945, "grad_norm": 0.80078125, "learning_rate": 1.3970412240998741e-05, "loss": 1.1426811218261719, "step": 1542 }, { "epoch": 0.7148482742645356, "grad_norm": 0.7734375, "learning_rate": 1.3940626582561308e-05, "loss": 0.932056188583374, "step": 1543 }, { "epoch": 0.7153115589529766, "grad_norm": 0.84765625, "learning_rate": 1.3910859708322204e-05, "loss": 0.9006248116493225, "step": 1544 }, { "epoch": 0.7157748436414176, "grad_norm": 0.73046875, "learning_rate": 1.3881111673866106e-05, "loss": 0.869684636592865, "step": 1545 }, { "epoch": 0.7162381283298587, "grad_norm": 0.84765625, "learning_rate": 1.3851382534742493e-05, "loss": 1.0497184991836548, "step": 1546 }, { "epoch": 0.7167014130182997, "grad_norm": 0.80078125, "learning_rate": 1.3821672346465575e-05, "loss": 0.9837194681167603, "step": 1547 }, { "epoch": 0.7171646977067407, "grad_norm": 0.9453125, "learning_rate": 1.3791981164514141e-05, "loss": 0.9810088872909546, "step": 1548 }, { "epoch": 0.7176279823951819, "grad_norm": 0.8828125, "learning_rate": 1.376230904433153e-05, "loss": 0.9475647211074829, "step": 1549 }, { "epoch": 0.7180912670836229, "grad_norm": 0.87109375, "learning_rate": 1.3732656041325448e-05, "loss": 0.7182843089103699, "step": 1550 }, { "epoch": 0.718554551772064, "grad_norm": 0.78515625, "learning_rate": 1.370302221086793e-05, "loss": 0.8560316562652588, "step": 1551 }, { "epoch": 0.719017836460505, "grad_norm": 0.8125, "learning_rate": 1.3673407608295208e-05, "loss": 0.9808245301246643, "step": 1552 }, { "epoch": 0.719481121148946, "grad_norm": 0.796875, "learning_rate": 1.3643812288907586e-05, "loss": 0.8075791597366333, "step": 1553 }, { "epoch": 0.7199444058373871, "grad_norm": 0.7109375, "learning_rate": 1.3614236307969388e-05, "loss": 0.8571614027023315, "step": 1554 }, { "epoch": 0.7204076905258281, "grad_norm": 0.94140625, "learning_rate": 1.3584679720708808e-05, "loss": 1.0778717994689941, "step": 1555 }, { "epoch": 0.7208709752142691, "grad_norm": 0.703125, "learning_rate": 1.3555142582317846e-05, "loss": 0.726076602935791, "step": 1556 }, { "epoch": 0.7213342599027102, "grad_norm": 0.87109375, "learning_rate": 1.352562494795216e-05, "loss": 0.8657900094985962, "step": 1557 }, { "epoch": 0.7217975445911513, "grad_norm": 0.77734375, "learning_rate": 1.3496126872730989e-05, "loss": 1.003196120262146, "step": 1558 }, { "epoch": 0.7222608292795923, "grad_norm": 0.8046875, "learning_rate": 1.3466648411737065e-05, "loss": 0.990123450756073, "step": 1559 }, { "epoch": 0.7227241139680334, "grad_norm": 0.890625, "learning_rate": 1.3437189620016487e-05, "loss": 0.8534030914306641, "step": 1560 }, { "epoch": 0.7231873986564744, "grad_norm": 0.78515625, "learning_rate": 1.3407750552578635e-05, "loss": 0.9646372199058533, "step": 1561 }, { "epoch": 0.7236506833449154, "grad_norm": 0.8359375, "learning_rate": 1.3378331264396029e-05, "loss": 0.7858661413192749, "step": 1562 }, { "epoch": 0.7241139680333565, "grad_norm": 0.73828125, "learning_rate": 1.3348931810404288e-05, "loss": 0.7996460795402527, "step": 1563 }, { "epoch": 0.7245772527217975, "grad_norm": 0.91796875, "learning_rate": 1.331955224550195e-05, "loss": 0.9326428174972534, "step": 1564 }, { "epoch": 0.7250405374102386, "grad_norm": 0.85546875, "learning_rate": 1.329019262455048e-05, "loss": 1.1491047143936157, "step": 1565 }, { "epoch": 0.7255038220986796, "grad_norm": 0.8359375, "learning_rate": 1.3260853002374042e-05, "loss": 0.8331834077835083, "step": 1566 }, { "epoch": 0.7259671067871207, "grad_norm": 0.92578125, "learning_rate": 1.323153343375947e-05, "loss": 0.920341432094574, "step": 1567 }, { "epoch": 0.7264303914755618, "grad_norm": 0.85546875, "learning_rate": 1.3202233973456163e-05, "loss": 1.0477654933929443, "step": 1568 }, { "epoch": 0.7268936761640028, "grad_norm": 0.89453125, "learning_rate": 1.3172954676175968e-05, "loss": 0.877657949924469, "step": 1569 }, { "epoch": 0.7273569608524438, "grad_norm": 0.91796875, "learning_rate": 1.3143695596593085e-05, "loss": 0.8888318538665771, "step": 1570 }, { "epoch": 0.7278202455408849, "grad_norm": 0.7890625, "learning_rate": 1.3114456789343936e-05, "loss": 1.0591609477996826, "step": 1571 }, { "epoch": 0.7282835302293259, "grad_norm": 0.7421875, "learning_rate": 1.308523830902713e-05, "loss": 0.9316011071205139, "step": 1572 }, { "epoch": 0.7287468149177669, "grad_norm": 0.93359375, "learning_rate": 1.3056040210203272e-05, "loss": 0.9526464343070984, "step": 1573 }, { "epoch": 0.729210099606208, "grad_norm": 0.86328125, "learning_rate": 1.302686254739494e-05, "loss": 0.900551438331604, "step": 1574 }, { "epoch": 0.729673384294649, "grad_norm": 0.88671875, "learning_rate": 1.299770537508655e-05, "loss": 0.9370485544204712, "step": 1575 }, { "epoch": 0.7301366689830902, "grad_norm": 0.890625, "learning_rate": 1.2968568747724228e-05, "loss": 0.9385664463043213, "step": 1576 }, { "epoch": 0.7305999536715312, "grad_norm": 0.82421875, "learning_rate": 1.2939452719715771e-05, "loss": 0.8683898448944092, "step": 1577 }, { "epoch": 0.7310632383599722, "grad_norm": 0.78515625, "learning_rate": 1.291035734543049e-05, "loss": 0.8029768466949463, "step": 1578 }, { "epoch": 0.7315265230484133, "grad_norm": 0.890625, "learning_rate": 1.2881282679199143e-05, "loss": 0.9224568605422974, "step": 1579 }, { "epoch": 0.7319898077368543, "grad_norm": 0.75, "learning_rate": 1.2852228775313794e-05, "loss": 0.8857653737068176, "step": 1580 }, { "epoch": 0.7324530924252953, "grad_norm": 0.8046875, "learning_rate": 1.2823195688027754e-05, "loss": 0.9866558313369751, "step": 1581 }, { "epoch": 0.7329163771137364, "grad_norm": 0.83203125, "learning_rate": 1.2794183471555458e-05, "loss": 0.8291232585906982, "step": 1582 }, { "epoch": 0.7333796618021774, "grad_norm": 0.84765625, "learning_rate": 1.2765192180072382e-05, "loss": 1.0451363325119019, "step": 1583 }, { "epoch": 0.7338429464906184, "grad_norm": 0.7734375, "learning_rate": 1.2736221867714914e-05, "loss": 0.8132802248001099, "step": 1584 }, { "epoch": 0.7343062311790596, "grad_norm": 0.82421875, "learning_rate": 1.2707272588580259e-05, "loss": 0.9321390390396118, "step": 1585 }, { "epoch": 0.7347695158675006, "grad_norm": 0.74609375, "learning_rate": 1.2678344396726367e-05, "loss": 0.8528720140457153, "step": 1586 }, { "epoch": 0.7352328005559416, "grad_norm": 0.7578125, "learning_rate": 1.2649437346171806e-05, "loss": 0.9147552847862244, "step": 1587 }, { "epoch": 0.7356960852443827, "grad_norm": 0.8671875, "learning_rate": 1.262055149089564e-05, "loss": 0.938007652759552, "step": 1588 }, { "epoch": 0.7361593699328237, "grad_norm": 0.83984375, "learning_rate": 1.2591686884837398e-05, "loss": 0.9420212507247925, "step": 1589 }, { "epoch": 0.7366226546212647, "grad_norm": 0.93359375, "learning_rate": 1.2562843581896892e-05, "loss": 0.8320150375366211, "step": 1590 }, { "epoch": 0.7370859393097058, "grad_norm": 0.9375, "learning_rate": 1.2534021635934168e-05, "loss": 0.8456387519836426, "step": 1591 }, { "epoch": 0.7375492239981468, "grad_norm": 0.87890625, "learning_rate": 1.25052211007694e-05, "loss": 0.8855749368667603, "step": 1592 }, { "epoch": 0.7380125086865879, "grad_norm": 0.91015625, "learning_rate": 1.2476442030182779e-05, "loss": 0.9768263101577759, "step": 1593 }, { "epoch": 0.738475793375029, "grad_norm": 0.70703125, "learning_rate": 1.244768447791439e-05, "loss": 0.7757108211517334, "step": 1594 }, { "epoch": 0.73893907806347, "grad_norm": 0.91796875, "learning_rate": 1.2418948497664178e-05, "loss": 0.7897142767906189, "step": 1595 }, { "epoch": 0.7394023627519111, "grad_norm": 0.9140625, "learning_rate": 1.2390234143091761e-05, "loss": 1.009446620941162, "step": 1596 }, { "epoch": 0.7398656474403521, "grad_norm": 0.83203125, "learning_rate": 1.2361541467816402e-05, "loss": 0.7995726466178894, "step": 1597 }, { "epoch": 0.7403289321287931, "grad_norm": 0.92578125, "learning_rate": 1.2332870525416888e-05, "loss": 0.9182596206665039, "step": 1598 }, { "epoch": 0.7407922168172342, "grad_norm": 0.89453125, "learning_rate": 1.2304221369431394e-05, "loss": 0.8457068800926208, "step": 1599 }, { "epoch": 0.7412555015056752, "grad_norm": 0.8046875, "learning_rate": 1.227559405335744e-05, "loss": 1.0693312883377075, "step": 1600 }, { "epoch": 0.7417187861941162, "grad_norm": 0.90234375, "learning_rate": 1.2246988630651752e-05, "loss": 0.9207885265350342, "step": 1601 }, { "epoch": 0.7421820708825573, "grad_norm": 0.79296875, "learning_rate": 1.2218405154730182e-05, "loss": 0.9426785111427307, "step": 1602 }, { "epoch": 0.7426453555709984, "grad_norm": 0.79296875, "learning_rate": 1.2189843678967586e-05, "loss": 0.8506474494934082, "step": 1603 }, { "epoch": 0.7431086402594395, "grad_norm": 0.89453125, "learning_rate": 1.2161304256697735e-05, "loss": 0.8817695379257202, "step": 1604 }, { "epoch": 0.7435719249478805, "grad_norm": 0.8828125, "learning_rate": 1.2132786941213243e-05, "loss": 0.9216139316558838, "step": 1605 }, { "epoch": 0.7440352096363215, "grad_norm": 0.79296875, "learning_rate": 1.2104291785765427e-05, "loss": 0.9422940611839294, "step": 1606 }, { "epoch": 0.7444984943247626, "grad_norm": 0.7578125, "learning_rate": 1.2075818843564235e-05, "loss": 0.8162217140197754, "step": 1607 }, { "epoch": 0.7449617790132036, "grad_norm": 0.85546875, "learning_rate": 1.2047368167778111e-05, "loss": 0.8420689105987549, "step": 1608 }, { "epoch": 0.7454250637016446, "grad_norm": 0.80859375, "learning_rate": 1.2018939811533943e-05, "loss": 0.8831995129585266, "step": 1609 }, { "epoch": 0.7458883483900857, "grad_norm": 0.87109375, "learning_rate": 1.1990533827916945e-05, "loss": 0.9695085287094116, "step": 1610 }, { "epoch": 0.7463516330785267, "grad_norm": 0.83984375, "learning_rate": 1.1962150269970522e-05, "loss": 0.7822751402854919, "step": 1611 }, { "epoch": 0.7468149177669678, "grad_norm": 0.99609375, "learning_rate": 1.1933789190696248e-05, "loss": 1.0877046585083008, "step": 1612 }, { "epoch": 0.7472782024554089, "grad_norm": 1.0078125, "learning_rate": 1.1905450643053673e-05, "loss": 0.9393079280853271, "step": 1613 }, { "epoch": 0.7477414871438499, "grad_norm": 0.83984375, "learning_rate": 1.187713467996031e-05, "loss": 0.8477166295051575, "step": 1614 }, { "epoch": 0.748204771832291, "grad_norm": 0.86328125, "learning_rate": 1.1848841354291486e-05, "loss": 1.0002130270004272, "step": 1615 }, { "epoch": 0.748668056520732, "grad_norm": 0.76953125, "learning_rate": 1.1820570718880265e-05, "loss": 0.8591433167457581, "step": 1616 }, { "epoch": 0.749131341209173, "grad_norm": 0.80859375, "learning_rate": 1.179232282651732e-05, "loss": 0.8319763541221619, "step": 1617 }, { "epoch": 0.749594625897614, "grad_norm": 0.78515625, "learning_rate": 1.1764097729950881e-05, "loss": 0.8276806473731995, "step": 1618 }, { "epoch": 0.7500579105860551, "grad_norm": 0.81640625, "learning_rate": 1.1735895481886583e-05, "loss": 0.940848708152771, "step": 1619 }, { "epoch": 0.7505211952744962, "grad_norm": 0.84765625, "learning_rate": 1.1707716134987416e-05, "loss": 0.8151571154594421, "step": 1620 }, { "epoch": 0.7509844799629373, "grad_norm": 0.81640625, "learning_rate": 1.167955974187361e-05, "loss": 0.8742914795875549, "step": 1621 }, { "epoch": 0.7514477646513783, "grad_norm": 0.8125, "learning_rate": 1.1651426355122506e-05, "loss": 0.8702387809753418, "step": 1622 }, { "epoch": 0.7519110493398193, "grad_norm": 1.0703125, "learning_rate": 1.1623316027268506e-05, "loss": 0.8356806039810181, "step": 1623 }, { "epoch": 0.7523743340282604, "grad_norm": 0.828125, "learning_rate": 1.1595228810802956e-05, "loss": 0.8744298815727234, "step": 1624 }, { "epoch": 0.7528376187167014, "grad_norm": 0.9140625, "learning_rate": 1.156716475817404e-05, "loss": 0.8392553329467773, "step": 1625 }, { "epoch": 0.7533009034051424, "grad_norm": 0.859375, "learning_rate": 1.1539123921786677e-05, "loss": 0.9593334197998047, "step": 1626 }, { "epoch": 0.7537641880935835, "grad_norm": 0.8359375, "learning_rate": 1.1511106354002433e-05, "loss": 0.9393202066421509, "step": 1627 }, { "epoch": 0.7542274727820245, "grad_norm": 0.88671875, "learning_rate": 1.1483112107139444e-05, "loss": 0.8775235414505005, "step": 1628 }, { "epoch": 0.7546907574704657, "grad_norm": 0.796875, "learning_rate": 1.1455141233472282e-05, "loss": 0.9231534004211426, "step": 1629 }, { "epoch": 0.7551540421589067, "grad_norm": 0.8984375, "learning_rate": 1.1427193785231894e-05, "loss": 1.0438674688339233, "step": 1630 }, { "epoch": 0.7556173268473477, "grad_norm": 1.0625, "learning_rate": 1.1399269814605442e-05, "loss": 1.0147441625595093, "step": 1631 }, { "epoch": 0.7560806115357888, "grad_norm": 0.8671875, "learning_rate": 1.1371369373736287e-05, "loss": 1.0305769443511963, "step": 1632 }, { "epoch": 0.7565438962242298, "grad_norm": 0.9375, "learning_rate": 1.1343492514723849e-05, "loss": 0.9958128333091736, "step": 1633 }, { "epoch": 0.7570071809126708, "grad_norm": 0.75390625, "learning_rate": 1.131563928962348e-05, "loss": 0.997496485710144, "step": 1634 }, { "epoch": 0.7574704656011119, "grad_norm": 0.921875, "learning_rate": 1.128780975044644e-05, "loss": 0.885054886341095, "step": 1635 }, { "epoch": 0.7579337502895529, "grad_norm": 0.7734375, "learning_rate": 1.1260003949159729e-05, "loss": 0.9202278256416321, "step": 1636 }, { "epoch": 0.7583970349779939, "grad_norm": 0.73046875, "learning_rate": 1.1232221937686033e-05, "loss": 0.7514294385910034, "step": 1637 }, { "epoch": 0.7588603196664351, "grad_norm": 0.75390625, "learning_rate": 1.1204463767903624e-05, "loss": 0.7889110445976257, "step": 1638 }, { "epoch": 0.7593236043548761, "grad_norm": 0.7890625, "learning_rate": 1.1176729491646248e-05, "loss": 0.8639605045318604, "step": 1639 }, { "epoch": 0.7597868890433171, "grad_norm": 0.87890625, "learning_rate": 1.1149019160703012e-05, "loss": 0.9009301662445068, "step": 1640 }, { "epoch": 0.7602501737317582, "grad_norm": 0.84765625, "learning_rate": 1.1121332826818346e-05, "loss": 0.8193938732147217, "step": 1641 }, { "epoch": 0.7607134584201992, "grad_norm": 0.890625, "learning_rate": 1.1093670541691834e-05, "loss": 0.9459896683692932, "step": 1642 }, { "epoch": 0.7611767431086403, "grad_norm": 0.8125, "learning_rate": 1.1066032356978173e-05, "loss": 1.0018055438995361, "step": 1643 }, { "epoch": 0.7616400277970813, "grad_norm": 0.72265625, "learning_rate": 1.1038418324287065e-05, "loss": 0.9474573731422424, "step": 1644 }, { "epoch": 0.7621033124855223, "grad_norm": 0.8671875, "learning_rate": 1.1010828495183086e-05, "loss": 0.8761516213417053, "step": 1645 }, { "epoch": 0.7625665971739634, "grad_norm": 0.83203125, "learning_rate": 1.0983262921185635e-05, "loss": 0.8465573191642761, "step": 1646 }, { "epoch": 0.7630298818624045, "grad_norm": 0.9921875, "learning_rate": 1.095572165376881e-05, "loss": 1.064576268196106, "step": 1647 }, { "epoch": 0.7634931665508455, "grad_norm": 0.86328125, "learning_rate": 1.0928204744361344e-05, "loss": 0.9846107363700867, "step": 1648 }, { "epoch": 0.7639564512392866, "grad_norm": 0.78515625, "learning_rate": 1.0900712244346447e-05, "loss": 0.9933382272720337, "step": 1649 }, { "epoch": 0.7644197359277276, "grad_norm": 0.96875, "learning_rate": 1.087324420506176e-05, "loss": 1.019972324371338, "step": 1650 }, { "epoch": 0.7648830206161686, "grad_norm": 0.7890625, "learning_rate": 1.0845800677799265e-05, "loss": 0.8567442893981934, "step": 1651 }, { "epoch": 0.7653463053046097, "grad_norm": 0.8359375, "learning_rate": 1.0818381713805164e-05, "loss": 0.833297610282898, "step": 1652 }, { "epoch": 0.7658095899930507, "grad_norm": 0.75, "learning_rate": 1.0790987364279792e-05, "loss": 1.0111546516418457, "step": 1653 }, { "epoch": 0.7662728746814917, "grad_norm": 0.79296875, "learning_rate": 1.0763617680377507e-05, "loss": 0.9289141893386841, "step": 1654 }, { "epoch": 0.7667361593699328, "grad_norm": 0.859375, "learning_rate": 1.0736272713206621e-05, "loss": 0.8431038856506348, "step": 1655 }, { "epoch": 0.7671994440583739, "grad_norm": 0.78125, "learning_rate": 1.0708952513829302e-05, "loss": 0.911212682723999, "step": 1656 }, { "epoch": 0.767662728746815, "grad_norm": 1.109375, "learning_rate": 1.0681657133261436e-05, "loss": 1.0007601976394653, "step": 1657 }, { "epoch": 0.768126013435256, "grad_norm": 0.76171875, "learning_rate": 1.0654386622472605e-05, "loss": 0.7746074795722961, "step": 1658 }, { "epoch": 0.768589298123697, "grad_norm": 0.84765625, "learning_rate": 1.0627141032385904e-05, "loss": 0.8235775828361511, "step": 1659 }, { "epoch": 0.7690525828121381, "grad_norm": 0.84375, "learning_rate": 1.0599920413877935e-05, "loss": 1.0395088195800781, "step": 1660 }, { "epoch": 0.7695158675005791, "grad_norm": 0.796875, "learning_rate": 1.0572724817778647e-05, "loss": 0.9071087837219238, "step": 1661 }, { "epoch": 0.7699791521890201, "grad_norm": 0.86328125, "learning_rate": 1.0545554294871282e-05, "loss": 0.836544930934906, "step": 1662 }, { "epoch": 0.7704424368774612, "grad_norm": 0.765625, "learning_rate": 1.051840889589224e-05, "loss": 0.8565930724143982, "step": 1663 }, { "epoch": 0.7709057215659022, "grad_norm": 0.80078125, "learning_rate": 1.0491288671531e-05, "loss": 0.9731587171554565, "step": 1664 }, { "epoch": 0.7713690062543433, "grad_norm": 0.91796875, "learning_rate": 1.0464193672430078e-05, "loss": 0.9530751705169678, "step": 1665 }, { "epoch": 0.7718322909427844, "grad_norm": 0.75390625, "learning_rate": 1.0437123949184834e-05, "loss": 0.9852890968322754, "step": 1666 }, { "epoch": 0.7722955756312254, "grad_norm": 1.0625, "learning_rate": 1.0410079552343468e-05, "loss": 1.010793924331665, "step": 1667 }, { "epoch": 0.7727588603196665, "grad_norm": 0.67578125, "learning_rate": 1.0383060532406851e-05, "loss": 0.8825767636299133, "step": 1668 }, { "epoch": 0.7732221450081075, "grad_norm": 0.87109375, "learning_rate": 1.0356066939828494e-05, "loss": 0.7823730707168579, "step": 1669 }, { "epoch": 0.7736854296965485, "grad_norm": 1.03125, "learning_rate": 1.0329098825014424e-05, "loss": 0.8294221758842468, "step": 1670 }, { "epoch": 0.7741487143849896, "grad_norm": 1.5234375, "learning_rate": 1.0302156238323093e-05, "loss": 0.9734241962432861, "step": 1671 }, { "epoch": 0.7746119990734306, "grad_norm": 0.8671875, "learning_rate": 1.0275239230065266e-05, "loss": 1.0100374221801758, "step": 1672 }, { "epoch": 0.7750752837618716, "grad_norm": 0.75, "learning_rate": 1.0248347850503954e-05, "loss": 1.160994529724121, "step": 1673 }, { "epoch": 0.7755385684503128, "grad_norm": 0.79296875, "learning_rate": 1.0221482149854319e-05, "loss": 1.0030722618103027, "step": 1674 }, { "epoch": 0.7760018531387538, "grad_norm": 0.8515625, "learning_rate": 1.0194642178283568e-05, "loss": 0.9101303219795227, "step": 1675 }, { "epoch": 0.7764651378271948, "grad_norm": 0.8046875, "learning_rate": 1.0167827985910865e-05, "loss": 0.9005215167999268, "step": 1676 }, { "epoch": 0.7769284225156359, "grad_norm": 0.86328125, "learning_rate": 1.014103962280722e-05, "loss": 0.935279369354248, "step": 1677 }, { "epoch": 0.7773917072040769, "grad_norm": 0.859375, "learning_rate": 1.0114277138995428e-05, "loss": 0.8451523780822754, "step": 1678 }, { "epoch": 0.7778549918925179, "grad_norm": 0.93359375, "learning_rate": 1.0087540584449966e-05, "loss": 0.8488246202468872, "step": 1679 }, { "epoch": 0.778318276580959, "grad_norm": 0.859375, "learning_rate": 1.0060830009096858e-05, "loss": 0.9785677194595337, "step": 1680 }, { "epoch": 0.7787815612694, "grad_norm": 0.8828125, "learning_rate": 1.0034145462813665e-05, "loss": 1.1581677198410034, "step": 1681 }, { "epoch": 0.779244845957841, "grad_norm": 0.87109375, "learning_rate": 1.000748699542929e-05, "loss": 0.9785441160202026, "step": 1682 }, { "epoch": 0.7797081306462822, "grad_norm": 0.7578125, "learning_rate": 9.980854656723977e-06, "loss": 0.9519538879394531, "step": 1683 }, { "epoch": 0.7801714153347232, "grad_norm": 0.921875, "learning_rate": 9.954248496429166e-06, "loss": 0.8969386219978333, "step": 1684 }, { "epoch": 0.7806347000231643, "grad_norm": 0.78125, "learning_rate": 9.927668564227422e-06, "loss": 0.8716106414794922, "step": 1685 }, { "epoch": 0.7810979847116053, "grad_norm": 0.8359375, "learning_rate": 9.901114909752323e-06, "loss": 0.8693481087684631, "step": 1686 }, { "epoch": 0.7815612694000463, "grad_norm": 0.76953125, "learning_rate": 9.874587582588353e-06, "loss": 0.9671769142150879, "step": 1687 }, { "epoch": 0.7820245540884874, "grad_norm": 0.6875, "learning_rate": 9.848086632270901e-06, "loss": 0.8428149223327637, "step": 1688 }, { "epoch": 0.7824878387769284, "grad_norm": 0.78125, "learning_rate": 9.821612108286036e-06, "loss": 1.0409609079360962, "step": 1689 }, { "epoch": 0.7829511234653694, "grad_norm": 0.87109375, "learning_rate": 9.795164060070523e-06, "loss": 0.9725620746612549, "step": 1690 }, { "epoch": 0.7834144081538105, "grad_norm": 0.9609375, "learning_rate": 9.768742537011652e-06, "loss": 0.9622225165367126, "step": 1691 }, { "epoch": 0.7838776928422516, "grad_norm": 0.6796875, "learning_rate": 9.74234758844721e-06, "loss": 0.762078583240509, "step": 1692 }, { "epoch": 0.7843409775306927, "grad_norm": 0.8046875, "learning_rate": 9.715979263665355e-06, "loss": 0.8382387161254883, "step": 1693 }, { "epoch": 0.7848042622191337, "grad_norm": 0.76171875, "learning_rate": 9.689637611904528e-06, "loss": 0.8127785921096802, "step": 1694 }, { "epoch": 0.7852675469075747, "grad_norm": 0.77734375, "learning_rate": 9.663322682353359e-06, "loss": 0.7931768894195557, "step": 1695 }, { "epoch": 0.7857308315960158, "grad_norm": 0.734375, "learning_rate": 9.637034524150567e-06, "loss": 0.9206767678260803, "step": 1696 }, { "epoch": 0.7861941162844568, "grad_norm": 0.97265625, "learning_rate": 9.610773186384898e-06, "loss": 0.9987959265708923, "step": 1697 }, { "epoch": 0.7866574009728978, "grad_norm": 0.89453125, "learning_rate": 9.584538718095019e-06, "loss": 0.9304317235946655, "step": 1698 }, { "epoch": 0.7871206856613389, "grad_norm": 0.875, "learning_rate": 9.558331168269418e-06, "loss": 0.8821865320205688, "step": 1699 }, { "epoch": 0.7875839703497799, "grad_norm": 0.7109375, "learning_rate": 9.532150585846297e-06, "loss": 0.879505455493927, "step": 1700 }, { "epoch": 0.788047255038221, "grad_norm": 0.97265625, "learning_rate": 9.505997019713527e-06, "loss": 0.9673975110054016, "step": 1701 }, { "epoch": 0.7885105397266621, "grad_norm": 0.78515625, "learning_rate": 9.479870518708525e-06, "loss": 0.9111735820770264, "step": 1702 }, { "epoch": 0.7889738244151031, "grad_norm": 0.8515625, "learning_rate": 9.453771131618154e-06, "loss": 1.1614623069763184, "step": 1703 }, { "epoch": 0.7894371091035441, "grad_norm": 0.73046875, "learning_rate": 9.42769890717867e-06, "loss": 0.8665764331817627, "step": 1704 }, { "epoch": 0.7899003937919852, "grad_norm": 0.86328125, "learning_rate": 9.401653894075576e-06, "loss": 0.9338579177856445, "step": 1705 }, { "epoch": 0.7903636784804262, "grad_norm": 0.8046875, "learning_rate": 9.375636140943591e-06, "loss": 1.0184825658798218, "step": 1706 }, { "epoch": 0.7908269631688672, "grad_norm": 0.890625, "learning_rate": 9.349645696366522e-06, "loss": 0.8786851167678833, "step": 1707 }, { "epoch": 0.7912902478573083, "grad_norm": 0.94921875, "learning_rate": 9.32368260887718e-06, "loss": 1.002317190170288, "step": 1708 }, { "epoch": 0.7917535325457493, "grad_norm": 0.80859375, "learning_rate": 9.29774692695729e-06, "loss": 0.9711103439331055, "step": 1709 }, { "epoch": 0.7922168172341905, "grad_norm": 0.86328125, "learning_rate": 9.271838699037386e-06, "loss": 0.8316183090209961, "step": 1710 }, { "epoch": 0.7926801019226315, "grad_norm": 0.86328125, "learning_rate": 9.24595797349678e-06, "loss": 1.0970137119293213, "step": 1711 }, { "epoch": 0.7931433866110725, "grad_norm": 0.87109375, "learning_rate": 9.220104798663383e-06, "loss": 0.8820261359214783, "step": 1712 }, { "epoch": 0.7936066712995136, "grad_norm": 0.9609375, "learning_rate": 9.194279222813689e-06, "loss": 0.9609889388084412, "step": 1713 }, { "epoch": 0.7940699559879546, "grad_norm": 0.875, "learning_rate": 9.168481294172628e-06, "loss": 0.8790441751480103, "step": 1714 }, { "epoch": 0.7945332406763956, "grad_norm": 0.9375, "learning_rate": 9.142711060913529e-06, "loss": 0.8385621309280396, "step": 1715 }, { "epoch": 0.7949965253648367, "grad_norm": 0.78515625, "learning_rate": 9.116968571158004e-06, "loss": 0.7539405822753906, "step": 1716 }, { "epoch": 0.7954598100532777, "grad_norm": 0.86328125, "learning_rate": 9.09125387297583e-06, "loss": 0.8373897075653076, "step": 1717 }, { "epoch": 0.7959230947417187, "grad_norm": 0.88671875, "learning_rate": 9.065567014384927e-06, "loss": 0.8968495726585388, "step": 1718 }, { "epoch": 0.7963863794301599, "grad_norm": 0.94921875, "learning_rate": 9.03990804335119e-06, "loss": 0.779243528842926, "step": 1719 }, { "epoch": 0.7968496641186009, "grad_norm": 0.859375, "learning_rate": 9.014277007788471e-06, "loss": 0.8106821775436401, "step": 1720 }, { "epoch": 0.797312948807042, "grad_norm": 0.75390625, "learning_rate": 8.988673955558443e-06, "loss": 0.8644953370094299, "step": 1721 }, { "epoch": 0.797776233495483, "grad_norm": 0.76171875, "learning_rate": 8.96309893447053e-06, "loss": 0.9038248658180237, "step": 1722 }, { "epoch": 0.798239518183924, "grad_norm": 0.93359375, "learning_rate": 8.937551992281796e-06, "loss": 0.996218204498291, "step": 1723 }, { "epoch": 0.7987028028723651, "grad_norm": 0.859375, "learning_rate": 8.912033176696893e-06, "loss": 0.8666702508926392, "step": 1724 }, { "epoch": 0.7991660875608061, "grad_norm": 0.85546875, "learning_rate": 8.886542535367954e-06, "loss": 0.8723835349082947, "step": 1725 }, { "epoch": 0.7996293722492471, "grad_norm": 0.8515625, "learning_rate": 8.861080115894469e-06, "loss": 0.9064018130302429, "step": 1726 }, { "epoch": 0.8000926569376882, "grad_norm": 0.86328125, "learning_rate": 8.83564596582327e-06, "loss": 0.8690557479858398, "step": 1727 }, { "epoch": 0.8005559416261293, "grad_norm": 0.9453125, "learning_rate": 8.810240132648365e-06, "loss": 0.9081324934959412, "step": 1728 }, { "epoch": 0.8010192263145703, "grad_norm": 1.0, "learning_rate": 8.784862663810909e-06, "loss": 0.8451979160308838, "step": 1729 }, { "epoch": 0.8014825110030114, "grad_norm": 0.88671875, "learning_rate": 8.759513606699077e-06, "loss": 1.040283203125, "step": 1730 }, { "epoch": 0.8019457956914524, "grad_norm": 0.7734375, "learning_rate": 8.734193008648011e-06, "loss": 1.03599214553833, "step": 1731 }, { "epoch": 0.8024090803798934, "grad_norm": 0.7578125, "learning_rate": 8.708900916939685e-06, "loss": 1.0581880807876587, "step": 1732 }, { "epoch": 0.8028723650683345, "grad_norm": 1.0390625, "learning_rate": 8.683637378802835e-06, "loss": 0.9091055989265442, "step": 1733 }, { "epoch": 0.8033356497567755, "grad_norm": 0.76953125, "learning_rate": 8.658402441412928e-06, "loss": 0.7468848824501038, "step": 1734 }, { "epoch": 0.8037989344452166, "grad_norm": 0.99609375, "learning_rate": 8.63319615189197e-06, "loss": 1.0149157047271729, "step": 1735 }, { "epoch": 0.8042622191336576, "grad_norm": 0.8203125, "learning_rate": 8.608018557308506e-06, "loss": 0.825045645236969, "step": 1736 }, { "epoch": 0.8047255038220987, "grad_norm": 0.92578125, "learning_rate": 8.58286970467747e-06, "loss": 0.7038781642913818, "step": 1737 }, { "epoch": 0.8051887885105398, "grad_norm": 0.8515625, "learning_rate": 8.55774964096015e-06, "loss": 1.1522748470306396, "step": 1738 }, { "epoch": 0.8056520731989808, "grad_norm": 0.83203125, "learning_rate": 8.53265841306407e-06, "loss": 0.949603259563446, "step": 1739 }, { "epoch": 0.8061153578874218, "grad_norm": 0.875, "learning_rate": 8.507596067842894e-06, "loss": 0.9490684866905212, "step": 1740 }, { "epoch": 0.8065786425758629, "grad_norm": 0.81640625, "learning_rate": 8.482562652096375e-06, "loss": 0.874863862991333, "step": 1741 }, { "epoch": 0.8070419272643039, "grad_norm": 0.8203125, "learning_rate": 8.457558212570205e-06, "loss": 0.9082040190696716, "step": 1742 }, { "epoch": 0.8075052119527449, "grad_norm": 0.8671875, "learning_rate": 8.432582795956032e-06, "loss": 0.9235297441482544, "step": 1743 }, { "epoch": 0.807968496641186, "grad_norm": 0.765625, "learning_rate": 8.407636448891245e-06, "loss": 0.9147178530693054, "step": 1744 }, { "epoch": 0.808431781329627, "grad_norm": 0.796875, "learning_rate": 8.382719217958996e-06, "loss": 1.0070385932922363, "step": 1745 }, { "epoch": 0.8088950660180682, "grad_norm": 0.79296875, "learning_rate": 8.35783114968803e-06, "loss": 0.8351930975914001, "step": 1746 }, { "epoch": 0.8093583507065092, "grad_norm": 0.77734375, "learning_rate": 8.332972290552663e-06, "loss": 0.8390335440635681, "step": 1747 }, { "epoch": 0.8098216353949502, "grad_norm": 0.796875, "learning_rate": 8.308142686972666e-06, "loss": 0.9591008424758911, "step": 1748 }, { "epoch": 0.8102849200833913, "grad_norm": 0.87890625, "learning_rate": 8.28334238531316e-06, "loss": 1.086327075958252, "step": 1749 }, { "epoch": 0.8107482047718323, "grad_norm": 1.109375, "learning_rate": 8.258571431884575e-06, "loss": 0.960287868976593, "step": 1750 }, { "epoch": 0.8112114894602733, "grad_norm": 0.796875, "learning_rate": 8.233829872942513e-06, "loss": 0.9874426126480103, "step": 1751 }, { "epoch": 0.8116747741487144, "grad_norm": 0.859375, "learning_rate": 8.209117754687708e-06, "loss": 0.9690349102020264, "step": 1752 }, { "epoch": 0.8121380588371554, "grad_norm": 0.82421875, "learning_rate": 8.184435123265906e-06, "loss": 1.0509775876998901, "step": 1753 }, { "epoch": 0.8126013435255964, "grad_norm": 0.94140625, "learning_rate": 8.159782024767808e-06, "loss": 1.0827224254608154, "step": 1754 }, { "epoch": 0.8130646282140376, "grad_norm": 0.921875, "learning_rate": 8.13515850522894e-06, "loss": 1.030488133430481, "step": 1755 }, { "epoch": 0.8135279129024786, "grad_norm": 0.90234375, "learning_rate": 8.110564610629599e-06, "loss": 0.920238733291626, "step": 1756 }, { "epoch": 0.8139911975909196, "grad_norm": 0.83203125, "learning_rate": 8.086000386894804e-06, "loss": 0.8451364040374756, "step": 1757 }, { "epoch": 0.8144544822793607, "grad_norm": 0.96484375, "learning_rate": 8.061465879894107e-06, "loss": 0.893768846988678, "step": 1758 }, { "epoch": 0.8149177669678017, "grad_norm": 0.84765625, "learning_rate": 8.036961135441621e-06, "loss": 0.9750687479972839, "step": 1759 }, { "epoch": 0.8153810516562428, "grad_norm": 0.7578125, "learning_rate": 8.01248619929584e-06, "loss": 1.0192598104476929, "step": 1760 }, { "epoch": 0.8158443363446838, "grad_norm": 0.7421875, "learning_rate": 7.988041117159626e-06, "loss": 1.0063856840133667, "step": 1761 }, { "epoch": 0.8163076210331248, "grad_norm": 0.890625, "learning_rate": 7.96362593468009e-06, "loss": 0.8969213366508484, "step": 1762 }, { "epoch": 0.8167709057215659, "grad_norm": 0.83203125, "learning_rate": 7.939240697448489e-06, "loss": 1.0545355081558228, "step": 1763 }, { "epoch": 0.817234190410007, "grad_norm": 0.796875, "learning_rate": 7.914885451000196e-06, "loss": 0.8668513894081116, "step": 1764 }, { "epoch": 0.817697475098448, "grad_norm": 0.9375, "learning_rate": 7.890560240814532e-06, "loss": 0.9358338713645935, "step": 1765 }, { "epoch": 0.8181607597868891, "grad_norm": 0.77734375, "learning_rate": 7.866265112314799e-06, "loss": 0.8214500546455383, "step": 1766 }, { "epoch": 0.8186240444753301, "grad_norm": 0.86328125, "learning_rate": 7.842000110868062e-06, "loss": 0.970549464225769, "step": 1767 }, { "epoch": 0.8190873291637711, "grad_norm": 0.86328125, "learning_rate": 7.81776528178517e-06, "loss": 0.7393088936805725, "step": 1768 }, { "epoch": 0.8195506138522122, "grad_norm": 0.84765625, "learning_rate": 7.793560670320604e-06, "loss": 1.0626767873764038, "step": 1769 }, { "epoch": 0.8200138985406532, "grad_norm": 0.984375, "learning_rate": 7.769386321672433e-06, "loss": 0.9659113883972168, "step": 1770 }, { "epoch": 0.8204771832290942, "grad_norm": 0.83984375, "learning_rate": 7.74524228098222e-06, "loss": 0.9227487444877625, "step": 1771 }, { "epoch": 0.8209404679175353, "grad_norm": 0.88671875, "learning_rate": 7.72112859333491e-06, "loss": 1.034658670425415, "step": 1772 }, { "epoch": 0.8214037526059764, "grad_norm": 0.984375, "learning_rate": 7.6970453037588e-06, "loss": 0.90932297706604, "step": 1773 }, { "epoch": 0.8218670372944175, "grad_norm": 0.76171875, "learning_rate": 7.672992457225394e-06, "loss": 0.9367461204528809, "step": 1774 }, { "epoch": 0.8223303219828585, "grad_norm": 0.890625, "learning_rate": 7.648970098649369e-06, "loss": 1.1317241191864014, "step": 1775 }, { "epoch": 0.8227936066712995, "grad_norm": 0.82421875, "learning_rate": 7.6249782728884594e-06, "loss": 1.149112343788147, "step": 1776 }, { "epoch": 0.8232568913597406, "grad_norm": 0.88671875, "learning_rate": 7.601017024743406e-06, "loss": 0.9359456300735474, "step": 1777 }, { "epoch": 0.8237201760481816, "grad_norm": 0.80078125, "learning_rate": 7.57708639895781e-06, "loss": 0.9038832783699036, "step": 1778 }, { "epoch": 0.8241834607366226, "grad_norm": 0.8828125, "learning_rate": 7.553186440218129e-06, "loss": 0.9364471435546875, "step": 1779 }, { "epoch": 0.8246467454250637, "grad_norm": 0.83984375, "learning_rate": 7.529317193153543e-06, "loss": 0.8946930170059204, "step": 1780 }, { "epoch": 0.8251100301135047, "grad_norm": 0.71484375, "learning_rate": 7.505478702335871e-06, "loss": 0.9415825009346008, "step": 1781 }, { "epoch": 0.8255733148019458, "grad_norm": 0.83203125, "learning_rate": 7.481671012279523e-06, "loss": 0.9467945694923401, "step": 1782 }, { "epoch": 0.8260365994903869, "grad_norm": 0.90234375, "learning_rate": 7.457894167441365e-06, "loss": 0.9999266266822815, "step": 1783 }, { "epoch": 0.8264998841788279, "grad_norm": 0.875, "learning_rate": 7.434148212220688e-06, "loss": 0.854033350944519, "step": 1784 }, { "epoch": 0.826963168867269, "grad_norm": 0.71484375, "learning_rate": 7.4104331909591e-06, "loss": 0.8315075039863586, "step": 1785 }, { "epoch": 0.82742645355571, "grad_norm": 0.86328125, "learning_rate": 7.3867491479404256e-06, "loss": 0.7753373980522156, "step": 1786 }, { "epoch": 0.827889738244151, "grad_norm": 1.015625, "learning_rate": 7.363096127390672e-06, "loss": 1.0225822925567627, "step": 1787 }, { "epoch": 0.828353022932592, "grad_norm": 0.7578125, "learning_rate": 7.339474173477875e-06, "loss": 0.8440317511558533, "step": 1788 }, { "epoch": 0.8288163076210331, "grad_norm": 0.734375, "learning_rate": 7.315883330312121e-06, "loss": 0.8816229701042175, "step": 1789 }, { "epoch": 0.8292795923094741, "grad_norm": 0.79296875, "learning_rate": 7.292323641945339e-06, "loss": 0.9245011210441589, "step": 1790 }, { "epoch": 0.8297428769979153, "grad_norm": 0.8515625, "learning_rate": 7.268795152371322e-06, "loss": 0.919562816619873, "step": 1791 }, { "epoch": 0.8302061616863563, "grad_norm": 0.7890625, "learning_rate": 7.245297905525582e-06, "loss": 0.9542215466499329, "step": 1792 }, { "epoch": 0.8306694463747973, "grad_norm": 0.8046875, "learning_rate": 7.2218319452853055e-06, "loss": 0.840879499912262, "step": 1793 }, { "epoch": 0.8311327310632384, "grad_norm": 0.80078125, "learning_rate": 7.198397315469257e-06, "loss": 1.072795033454895, "step": 1794 }, { "epoch": 0.8315960157516794, "grad_norm": 0.7578125, "learning_rate": 7.174994059837673e-06, "loss": 0.9962195754051208, "step": 1795 }, { "epoch": 0.8320593004401204, "grad_norm": 0.7890625, "learning_rate": 7.1516222220922425e-06, "loss": 0.8247233033180237, "step": 1796 }, { "epoch": 0.8325225851285615, "grad_norm": 0.87109375, "learning_rate": 7.128281845875946e-06, "loss": 0.8432327508926392, "step": 1797 }, { "epoch": 0.8329858698170025, "grad_norm": 0.953125, "learning_rate": 7.104972974773042e-06, "loss": 1.0490100383758545, "step": 1798 }, { "epoch": 0.8334491545054435, "grad_norm": 0.86328125, "learning_rate": 7.081695652308952e-06, "loss": 1.007668375968933, "step": 1799 }, { "epoch": 0.8339124391938847, "grad_norm": 0.99609375, "learning_rate": 7.058449921950193e-06, "loss": 0.9167599081993103, "step": 1800 }, { "epoch": 0.8343757238823257, "grad_norm": 0.81640625, "learning_rate": 7.035235827104265e-06, "loss": 0.930167019367218, "step": 1801 }, { "epoch": 0.8348390085707668, "grad_norm": 1.1328125, "learning_rate": 7.012053411119619e-06, "loss": 0.8546901941299438, "step": 1802 }, { "epoch": 0.8353022932592078, "grad_norm": 0.8515625, "learning_rate": 6.988902717285545e-06, "loss": 0.9504755139350891, "step": 1803 }, { "epoch": 0.8357655779476488, "grad_norm": 0.84375, "learning_rate": 6.9657837888320815e-06, "loss": 0.9580096006393433, "step": 1804 }, { "epoch": 0.8362288626360899, "grad_norm": 0.796875, "learning_rate": 6.94269666892998e-06, "loss": 0.7468809485435486, "step": 1805 }, { "epoch": 0.8366921473245309, "grad_norm": 0.88671875, "learning_rate": 6.919641400690559e-06, "loss": 0.8644688129425049, "step": 1806 }, { "epoch": 0.8371554320129719, "grad_norm": 0.83984375, "learning_rate": 6.896618027165684e-06, "loss": 0.9328237771987915, "step": 1807 }, { "epoch": 0.837618716701413, "grad_norm": 0.97265625, "learning_rate": 6.873626591347671e-06, "loss": 1.1448862552642822, "step": 1808 }, { "epoch": 0.8380820013898541, "grad_norm": 0.84765625, "learning_rate": 6.850667136169164e-06, "loss": 0.8983963131904602, "step": 1809 }, { "epoch": 0.8385452860782951, "grad_norm": 0.91015625, "learning_rate": 6.8277397045031205e-06, "loss": 0.8409160375595093, "step": 1810 }, { "epoch": 0.8390085707667362, "grad_norm": 0.73046875, "learning_rate": 6.804844339162666e-06, "loss": 0.8944919109344482, "step": 1811 }, { "epoch": 0.8394718554551772, "grad_norm": 1.140625, "learning_rate": 6.781981082901101e-06, "loss": 0.9417099952697754, "step": 1812 }, { "epoch": 0.8399351401436183, "grad_norm": 0.91796875, "learning_rate": 6.759149978411709e-06, "loss": 0.8804126977920532, "step": 1813 }, { "epoch": 0.8403984248320593, "grad_norm": 0.796875, "learning_rate": 6.736351068327776e-06, "loss": 0.8906807899475098, "step": 1814 }, { "epoch": 0.8408617095205003, "grad_norm": 0.89453125, "learning_rate": 6.713584395222441e-06, "loss": 0.9027203917503357, "step": 1815 }, { "epoch": 0.8413249942089414, "grad_norm": 0.8046875, "learning_rate": 6.690850001608671e-06, "loss": 0.8666508793830872, "step": 1816 }, { "epoch": 0.8417882788973824, "grad_norm": 1.296875, "learning_rate": 6.668147929939147e-06, "loss": 0.8996185064315796, "step": 1817 }, { "epoch": 0.8422515635858235, "grad_norm": 0.8515625, "learning_rate": 6.645478222606184e-06, "loss": 0.8643486499786377, "step": 1818 }, { "epoch": 0.8427148482742646, "grad_norm": 0.78515625, "learning_rate": 6.622840921941684e-06, "loss": 0.8814486265182495, "step": 1819 }, { "epoch": 0.8431781329627056, "grad_norm": 0.81640625, "learning_rate": 6.600236070216997e-06, "loss": 0.9077647924423218, "step": 1820 }, { "epoch": 0.8436414176511466, "grad_norm": 0.796875, "learning_rate": 6.577663709642938e-06, "loss": 0.9069101810455322, "step": 1821 }, { "epoch": 0.8441047023395877, "grad_norm": 0.84765625, "learning_rate": 6.555123882369596e-06, "loss": 0.7405815720558167, "step": 1822 }, { "epoch": 0.8445679870280287, "grad_norm": 0.92578125, "learning_rate": 6.532616630486341e-06, "loss": 0.929868757724762, "step": 1823 }, { "epoch": 0.8450312717164697, "grad_norm": 0.89453125, "learning_rate": 6.5101419960216925e-06, "loss": 0.9069142937660217, "step": 1824 }, { "epoch": 0.8454945564049108, "grad_norm": 0.99609375, "learning_rate": 6.48770002094328e-06, "loss": 0.9167734980583191, "step": 1825 }, { "epoch": 0.8459578410933518, "grad_norm": 0.83984375, "learning_rate": 6.465290747157745e-06, "loss": 0.8654367923736572, "step": 1826 }, { "epoch": 0.846421125781793, "grad_norm": 0.7578125, "learning_rate": 6.442914216510651e-06, "loss": 0.7541770935058594, "step": 1827 }, { "epoch": 0.846884410470234, "grad_norm": 0.84765625, "learning_rate": 6.420570470786438e-06, "loss": 0.8677940964698792, "step": 1828 }, { "epoch": 0.847347695158675, "grad_norm": 0.8671875, "learning_rate": 6.3982595517083064e-06, "loss": 0.8606151342391968, "step": 1829 }, { "epoch": 0.8478109798471161, "grad_norm": 0.890625, "learning_rate": 6.375981500938173e-06, "loss": 0.9796140193939209, "step": 1830 }, { "epoch": 0.8482742645355571, "grad_norm": 0.921875, "learning_rate": 6.353736360076578e-06, "loss": 0.921321451663971, "step": 1831 }, { "epoch": 0.8487375492239981, "grad_norm": 0.921875, "learning_rate": 6.3315241706625946e-06, "loss": 0.9960170388221741, "step": 1832 }, { "epoch": 0.8492008339124392, "grad_norm": 0.73046875, "learning_rate": 6.309344974173784e-06, "loss": 0.783862292766571, "step": 1833 }, { "epoch": 0.8496641186008802, "grad_norm": 0.83203125, "learning_rate": 6.287198812026068e-06, "loss": 0.9817046523094177, "step": 1834 }, { "epoch": 0.8501274032893212, "grad_norm": 0.90625, "learning_rate": 6.265085725573732e-06, "loss": 0.9676863551139832, "step": 1835 }, { "epoch": 0.8505906879777624, "grad_norm": 0.68359375, "learning_rate": 6.243005756109246e-06, "loss": 0.926174521446228, "step": 1836 }, { "epoch": 0.8510539726662034, "grad_norm": 0.8828125, "learning_rate": 6.220958944863276e-06, "loss": 0.889807939529419, "step": 1837 }, { "epoch": 0.8515172573546445, "grad_norm": 0.98828125, "learning_rate": 6.198945333004545e-06, "loss": 0.9411275386810303, "step": 1838 }, { "epoch": 0.8519805420430855, "grad_norm": 0.83203125, "learning_rate": 6.176964961639795e-06, "loss": 0.8610736131668091, "step": 1839 }, { "epoch": 0.8524438267315265, "grad_norm": 0.76171875, "learning_rate": 6.1550178718137095e-06, "loss": 0.8684600591659546, "step": 1840 }, { "epoch": 0.8529071114199676, "grad_norm": 0.83984375, "learning_rate": 6.13310410450879e-06, "loss": 0.8963911533355713, "step": 1841 }, { "epoch": 0.8533703961084086, "grad_norm": 0.91015625, "learning_rate": 6.111223700645352e-06, "loss": 1.0184478759765625, "step": 1842 }, { "epoch": 0.8538336807968496, "grad_norm": 0.8984375, "learning_rate": 6.089376701081368e-06, "loss": 0.9415737390518188, "step": 1843 }, { "epoch": 0.8542969654852907, "grad_norm": 0.83203125, "learning_rate": 6.067563146612489e-06, "loss": 0.9422698020935059, "step": 1844 }, { "epoch": 0.8547602501737318, "grad_norm": 0.921875, "learning_rate": 6.045783077971863e-06, "loss": 0.8872048854827881, "step": 1845 }, { "epoch": 0.8552235348621728, "grad_norm": 0.82421875, "learning_rate": 6.024036535830124e-06, "loss": 0.7562741041183472, "step": 1846 }, { "epoch": 0.8556868195506139, "grad_norm": 0.93359375, "learning_rate": 6.002323560795314e-06, "loss": 0.8720545172691345, "step": 1847 }, { "epoch": 0.8561501042390549, "grad_norm": 0.82421875, "learning_rate": 5.980644193412778e-06, "loss": 0.9384455680847168, "step": 1848 }, { "epoch": 0.856613388927496, "grad_norm": 0.765625, "learning_rate": 5.958998474165121e-06, "loss": 0.8861362934112549, "step": 1849 }, { "epoch": 0.857076673615937, "grad_norm": 0.87890625, "learning_rate": 5.937386443472092e-06, "loss": 1.1345970630645752, "step": 1850 }, { "epoch": 0.857539958304378, "grad_norm": 0.734375, "learning_rate": 5.915808141690556e-06, "loss": 0.8685821294784546, "step": 1851 }, { "epoch": 0.858003242992819, "grad_norm": 0.83984375, "learning_rate": 5.894263609114378e-06, "loss": 0.996828019618988, "step": 1852 }, { "epoch": 0.8584665276812601, "grad_norm": 0.86328125, "learning_rate": 5.872752885974371e-06, "loss": 0.9989946484565735, "step": 1853 }, { "epoch": 0.8589298123697012, "grad_norm": 0.87890625, "learning_rate": 5.851276012438224e-06, "loss": 0.7909801006317139, "step": 1854 }, { "epoch": 0.8593930970581423, "grad_norm": 0.8046875, "learning_rate": 5.829833028610395e-06, "loss": 1.1088160276412964, "step": 1855 }, { "epoch": 0.8598563817465833, "grad_norm": 0.80859375, "learning_rate": 5.80842397453208e-06, "loss": 0.8807237148284912, "step": 1856 }, { "epoch": 0.8603196664350243, "grad_norm": 0.8359375, "learning_rate": 5.787048890181105e-06, "loss": 0.8608243465423584, "step": 1857 }, { "epoch": 0.8607829511234654, "grad_norm": 0.8203125, "learning_rate": 5.765707815471878e-06, "loss": 0.9504100680351257, "step": 1858 }, { "epoch": 0.8612462358119064, "grad_norm": 0.9296875, "learning_rate": 5.744400790255271e-06, "loss": 0.9452154040336609, "step": 1859 }, { "epoch": 0.8617095205003474, "grad_norm": 0.82421875, "learning_rate": 5.72312785431861e-06, "loss": 0.8969765305519104, "step": 1860 }, { "epoch": 0.8621728051887885, "grad_norm": 0.9453125, "learning_rate": 5.701889047385529e-06, "loss": 0.996848464012146, "step": 1861 }, { "epoch": 0.8626360898772295, "grad_norm": 0.8984375, "learning_rate": 5.68068440911596e-06, "loss": 0.8867621421813965, "step": 1862 }, { "epoch": 0.8630993745656707, "grad_norm": 0.875, "learning_rate": 5.6595139791060246e-06, "loss": 1.0145070552825928, "step": 1863 }, { "epoch": 0.8635626592541117, "grad_norm": 0.8515625, "learning_rate": 5.63837779688795e-06, "loss": 0.8787609934806824, "step": 1864 }, { "epoch": 0.8640259439425527, "grad_norm": 0.84375, "learning_rate": 5.617275901930037e-06, "loss": 0.9451928734779358, "step": 1865 }, { "epoch": 0.8644892286309938, "grad_norm": 0.80078125, "learning_rate": 5.596208333636525e-06, "loss": 0.9350622892379761, "step": 1866 }, { "epoch": 0.8649525133194348, "grad_norm": 0.86328125, "learning_rate": 5.5751751313476055e-06, "loss": 0.7591818571090698, "step": 1867 }, { "epoch": 0.8654157980078758, "grad_norm": 0.77734375, "learning_rate": 5.554176334339251e-06, "loss": 0.995162844657898, "step": 1868 }, { "epoch": 0.8658790826963169, "grad_norm": 0.97265625, "learning_rate": 5.533211981823204e-06, "loss": 1.090124487876892, "step": 1869 }, { "epoch": 0.8663423673847579, "grad_norm": 0.84765625, "learning_rate": 5.512282112946889e-06, "loss": 0.8698755502700806, "step": 1870 }, { "epoch": 0.8668056520731989, "grad_norm": 0.7734375, "learning_rate": 5.4913867667933405e-06, "loss": 0.8936692476272583, "step": 1871 }, { "epoch": 0.8672689367616401, "grad_norm": 0.80859375, "learning_rate": 5.470525982381133e-06, "loss": 0.9556330442428589, "step": 1872 }, { "epoch": 0.8677322214500811, "grad_norm": 0.7734375, "learning_rate": 5.449699798664276e-06, "loss": 0.7762373089790344, "step": 1873 }, { "epoch": 0.8681955061385221, "grad_norm": 0.84765625, "learning_rate": 5.428908254532204e-06, "loss": 0.8480014204978943, "step": 1874 }, { "epoch": 0.8686587908269632, "grad_norm": 0.85546875, "learning_rate": 5.4081513888096335e-06, "loss": 1.0431307554244995, "step": 1875 }, { "epoch": 0.8691220755154042, "grad_norm": 0.85546875, "learning_rate": 5.3874292402565515e-06, "loss": 0.9694292545318604, "step": 1876 }, { "epoch": 0.8695853602038452, "grad_norm": 0.93359375, "learning_rate": 5.366741847568112e-06, "loss": 0.9653794765472412, "step": 1877 }, { "epoch": 0.8700486448922863, "grad_norm": 0.77734375, "learning_rate": 5.346089249374549e-06, "loss": 0.8758606314659119, "step": 1878 }, { "epoch": 0.8705119295807273, "grad_norm": 0.76953125, "learning_rate": 5.325471484241144e-06, "loss": 0.8884270191192627, "step": 1879 }, { "epoch": 0.8709752142691684, "grad_norm": 0.8046875, "learning_rate": 5.304888590668126e-06, "loss": 0.9315338730812073, "step": 1880 }, { "epoch": 0.8714384989576095, "grad_norm": 0.79296875, "learning_rate": 5.284340607090616e-06, "loss": 0.874808132648468, "step": 1881 }, { "epoch": 0.8719017836460505, "grad_norm": 0.890625, "learning_rate": 5.263827571878527e-06, "loss": 0.95280921459198, "step": 1882 }, { "epoch": 0.8723650683344916, "grad_norm": 0.80859375, "learning_rate": 5.243349523336532e-06, "loss": 0.7937729954719543, "step": 1883 }, { "epoch": 0.8728283530229326, "grad_norm": 0.8359375, "learning_rate": 5.222906499703955e-06, "loss": 0.8925231695175171, "step": 1884 }, { "epoch": 0.8732916377113736, "grad_norm": 0.9375, "learning_rate": 5.20249853915473e-06, "loss": 0.7958294153213501, "step": 1885 }, { "epoch": 0.8737549223998147, "grad_norm": 0.78125, "learning_rate": 5.1821256797973185e-06, "loss": 0.8661794066429138, "step": 1886 }, { "epoch": 0.8742182070882557, "grad_norm": 0.83203125, "learning_rate": 5.1617879596746155e-06, "loss": 0.8948233723640442, "step": 1887 }, { "epoch": 0.8746814917766967, "grad_norm": 1.0703125, "learning_rate": 5.141485416763928e-06, "loss": 0.9799279570579529, "step": 1888 }, { "epoch": 0.8751447764651378, "grad_norm": 0.77734375, "learning_rate": 5.121218088976843e-06, "loss": 0.8897976279258728, "step": 1889 }, { "epoch": 0.8756080611535789, "grad_norm": 0.8515625, "learning_rate": 5.1009860141592314e-06, "loss": 1.034111738204956, "step": 1890 }, { "epoch": 0.87607134584202, "grad_norm": 0.80078125, "learning_rate": 5.080789230091099e-06, "loss": 0.8257846236228943, "step": 1891 }, { "epoch": 0.876534630530461, "grad_norm": 0.734375, "learning_rate": 5.060627774486557e-06, "loss": 0.9181721210479736, "step": 1892 }, { "epoch": 0.876997915218902, "grad_norm": 0.828125, "learning_rate": 5.04050168499376e-06, "loss": 0.8174453377723694, "step": 1893 }, { "epoch": 0.8774611999073431, "grad_norm": 0.93359375, "learning_rate": 5.020410999194815e-06, "loss": 0.9571653604507446, "step": 1894 }, { "epoch": 0.8779244845957841, "grad_norm": 0.8828125, "learning_rate": 5.0003557546057275e-06, "loss": 0.8483561277389526, "step": 1895 }, { "epoch": 0.8783877692842251, "grad_norm": 0.90234375, "learning_rate": 4.9803359886763e-06, "loss": 0.9033476114273071, "step": 1896 }, { "epoch": 0.8788510539726662, "grad_norm": 0.875, "learning_rate": 4.960351738790113e-06, "loss": 0.9668432474136353, "step": 1897 }, { "epoch": 0.8793143386611072, "grad_norm": 0.83203125, "learning_rate": 4.9404030422644e-06, "loss": 0.8885450959205627, "step": 1898 }, { "epoch": 0.8797776233495483, "grad_norm": 1.0078125, "learning_rate": 4.92048993635002e-06, "loss": 0.7515835762023926, "step": 1899 }, { "epoch": 0.8802409080379894, "grad_norm": 0.8828125, "learning_rate": 4.9006124582313825e-06, "loss": 0.8569181561470032, "step": 1900 }, { "epoch": 0.8807041927264304, "grad_norm": 0.80078125, "learning_rate": 4.880770645026336e-06, "loss": 0.8861024975776672, "step": 1901 }, { "epoch": 0.8811674774148714, "grad_norm": 0.84375, "learning_rate": 4.8609645337861615e-06, "loss": 0.884182333946228, "step": 1902 }, { "epoch": 0.8816307621033125, "grad_norm": 0.72265625, "learning_rate": 4.841194161495456e-06, "loss": 0.8558884263038635, "step": 1903 }, { "epoch": 0.8820940467917535, "grad_norm": 0.76953125, "learning_rate": 4.8214595650720945e-06, "loss": 0.9538986682891846, "step": 1904 }, { "epoch": 0.8825573314801946, "grad_norm": 0.83203125, "learning_rate": 4.8017607813671255e-06, "loss": 0.9202069640159607, "step": 1905 }, { "epoch": 0.8830206161686356, "grad_norm": 0.78515625, "learning_rate": 4.782097847164745e-06, "loss": 0.8663555383682251, "step": 1906 }, { "epoch": 0.8834839008570766, "grad_norm": 0.83203125, "learning_rate": 4.762470799182182e-06, "loss": 0.8842315673828125, "step": 1907 }, { "epoch": 0.8839471855455178, "grad_norm": 0.90625, "learning_rate": 4.7428796740696775e-06, "loss": 1.0153151750564575, "step": 1908 }, { "epoch": 0.8844104702339588, "grad_norm": 0.76953125, "learning_rate": 4.723324508410386e-06, "loss": 0.8607885837554932, "step": 1909 }, { "epoch": 0.8848737549223998, "grad_norm": 0.86328125, "learning_rate": 4.703805338720301e-06, "loss": 0.8658420443534851, "step": 1910 }, { "epoch": 0.8853370396108409, "grad_norm": 0.7578125, "learning_rate": 4.684322201448219e-06, "loss": 0.814765453338623, "step": 1911 }, { "epoch": 0.8858003242992819, "grad_norm": 0.88671875, "learning_rate": 4.664875132975623e-06, "loss": 1.0322341918945312, "step": 1912 }, { "epoch": 0.8862636089877229, "grad_norm": 0.8046875, "learning_rate": 4.645464169616691e-06, "loss": 0.8110833168029785, "step": 1913 }, { "epoch": 0.886726893676164, "grad_norm": 0.8203125, "learning_rate": 4.6260893476181384e-06, "loss": 0.836736798286438, "step": 1914 }, { "epoch": 0.887190178364605, "grad_norm": 0.97265625, "learning_rate": 4.606750703159197e-06, "loss": 0.9669207334518433, "step": 1915 }, { "epoch": 0.887653463053046, "grad_norm": 0.95703125, "learning_rate": 4.587448272351564e-06, "loss": 0.8839113116264343, "step": 1916 }, { "epoch": 0.8881167477414872, "grad_norm": 0.9296875, "learning_rate": 4.568182091239298e-06, "loss": 0.8875067234039307, "step": 1917 }, { "epoch": 0.8885800324299282, "grad_norm": 0.75390625, "learning_rate": 4.548952195798783e-06, "loss": 0.8245463967323303, "step": 1918 }, { "epoch": 0.8890433171183693, "grad_norm": 1.0234375, "learning_rate": 4.529758621938616e-06, "loss": 0.9345543384552002, "step": 1919 }, { "epoch": 0.8895066018068103, "grad_norm": 0.8515625, "learning_rate": 4.510601405499605e-06, "loss": 0.841739296913147, "step": 1920 }, { "epoch": 0.8899698864952513, "grad_norm": 0.8671875, "learning_rate": 4.491480582254634e-06, "loss": 0.8611487746238708, "step": 1921 }, { "epoch": 0.8904331711836924, "grad_norm": 0.90234375, "learning_rate": 4.472396187908652e-06, "loss": 0.7870234251022339, "step": 1922 }, { "epoch": 0.8908964558721334, "grad_norm": 0.95703125, "learning_rate": 4.453348258098582e-06, "loss": 1.1207804679870605, "step": 1923 }, { "epoch": 0.8913597405605744, "grad_norm": 0.796875, "learning_rate": 4.434336828393233e-06, "loss": 1.0150240659713745, "step": 1924 }, { "epoch": 0.8918230252490155, "grad_norm": 0.96484375, "learning_rate": 4.415361934293283e-06, "loss": 1.0550124645233154, "step": 1925 }, { "epoch": 0.8922863099374566, "grad_norm": 0.80078125, "learning_rate": 4.396423611231171e-06, "loss": 0.8985774517059326, "step": 1926 }, { "epoch": 0.8927495946258976, "grad_norm": 0.8515625, "learning_rate": 4.377521894571057e-06, "loss": 0.9433072209358215, "step": 1927 }, { "epoch": 0.8932128793143387, "grad_norm": 0.82421875, "learning_rate": 4.358656819608728e-06, "loss": 0.9555040001869202, "step": 1928 }, { "epoch": 0.8936761640027797, "grad_norm": 1.015625, "learning_rate": 4.339828421571566e-06, "loss": 0.9342141151428223, "step": 1929 }, { "epoch": 0.8941394486912208, "grad_norm": 0.90625, "learning_rate": 4.321036735618446e-06, "loss": 0.9744370579719543, "step": 1930 }, { "epoch": 0.8946027333796618, "grad_norm": 0.8203125, "learning_rate": 4.302281796839706e-06, "loss": 0.8748108744621277, "step": 1931 }, { "epoch": 0.8950660180681028, "grad_norm": 0.83984375, "learning_rate": 4.283563640257069e-06, "loss": 0.9074385762214661, "step": 1932 }, { "epoch": 0.8955293027565439, "grad_norm": 0.8125, "learning_rate": 4.2648823008235475e-06, "loss": 0.8706763982772827, "step": 1933 }, { "epoch": 0.8959925874449849, "grad_norm": 0.8359375, "learning_rate": 4.246237813423425e-06, "loss": 0.9113630652427673, "step": 1934 }, { "epoch": 0.896455872133426, "grad_norm": 0.796875, "learning_rate": 4.227630212872168e-06, "loss": 1.0912119150161743, "step": 1935 }, { "epoch": 0.8969191568218671, "grad_norm": 0.94921875, "learning_rate": 4.2090595339163665e-06, "loss": 0.8499932289123535, "step": 1936 }, { "epoch": 0.8973824415103081, "grad_norm": 0.953125, "learning_rate": 4.190525811233652e-06, "loss": 0.9822698831558228, "step": 1937 }, { "epoch": 0.8978457261987491, "grad_norm": 1.09375, "learning_rate": 4.172029079432648e-06, "loss": 0.9892884492874146, "step": 1938 }, { "epoch": 0.8983090108871902, "grad_norm": 0.8046875, "learning_rate": 4.153569373052913e-06, "loss": 1.048028588294983, "step": 1939 }, { "epoch": 0.8987722955756312, "grad_norm": 0.73046875, "learning_rate": 4.135146726564865e-06, "loss": 0.7311965227127075, "step": 1940 }, { "epoch": 0.8992355802640722, "grad_norm": 0.76171875, "learning_rate": 4.116761174369723e-06, "loss": 0.967644989490509, "step": 1941 }, { "epoch": 0.8996988649525133, "grad_norm": 0.8046875, "learning_rate": 4.098412750799421e-06, "loss": 0.9470330476760864, "step": 1942 }, { "epoch": 0.9001621496409543, "grad_norm": 0.9921875, "learning_rate": 4.080101490116581e-06, "loss": 0.9476629495620728, "step": 1943 }, { "epoch": 0.9006254343293955, "grad_norm": 0.85546875, "learning_rate": 4.061827426514416e-06, "loss": 0.8855443000793457, "step": 1944 }, { "epoch": 0.9010887190178365, "grad_norm": 0.74609375, "learning_rate": 4.043590594116685e-06, "loss": 1.0060893297195435, "step": 1945 }, { "epoch": 0.9015520037062775, "grad_norm": 0.8671875, "learning_rate": 4.025391026977633e-06, "loss": 0.9967565536499023, "step": 1946 }, { "epoch": 0.9020152883947186, "grad_norm": 0.91015625, "learning_rate": 4.007228759081898e-06, "loss": 1.0718729496002197, "step": 1947 }, { "epoch": 0.9024785730831596, "grad_norm": 0.84375, "learning_rate": 3.989103824344483e-06, "loss": 0.7362527251243591, "step": 1948 }, { "epoch": 0.9029418577716006, "grad_norm": 0.80078125, "learning_rate": 3.971016256610675e-06, "loss": 0.7745991945266724, "step": 1949 }, { "epoch": 0.9034051424600417, "grad_norm": 0.78515625, "learning_rate": 3.95296608965599e-06, "loss": 0.9183497428894043, "step": 1950 }, { "epoch": 0.9038684271484827, "grad_norm": 0.77734375, "learning_rate": 3.934953357186084e-06, "loss": 0.9153457880020142, "step": 1951 }, { "epoch": 0.9043317118369237, "grad_norm": 0.8671875, "learning_rate": 3.916978092836737e-06, "loss": 0.9395539164543152, "step": 1952 }, { "epoch": 0.9047949965253649, "grad_norm": 0.88671875, "learning_rate": 3.899040330173741e-06, "loss": 0.9092686176300049, "step": 1953 }, { "epoch": 0.9052582812138059, "grad_norm": 0.828125, "learning_rate": 3.881140102692869e-06, "loss": 0.957666277885437, "step": 1954 }, { "epoch": 0.905721565902247, "grad_norm": 0.99609375, "learning_rate": 3.863277443819814e-06, "loss": 0.8853251934051514, "step": 1955 }, { "epoch": 0.906184850590688, "grad_norm": 0.79296875, "learning_rate": 3.845452386910094e-06, "loss": 0.9027367234230042, "step": 1956 }, { "epoch": 0.906648135279129, "grad_norm": 0.91015625, "learning_rate": 3.827664965249025e-06, "loss": 0.8307380080223083, "step": 1957 }, { "epoch": 0.90711141996757, "grad_norm": 0.7578125, "learning_rate": 3.8099152120516485e-06, "loss": 0.9248343110084534, "step": 1958 }, { "epoch": 0.9075747046560111, "grad_norm": 0.94140625, "learning_rate": 3.792203160462667e-06, "loss": 0.9491377472877502, "step": 1959 }, { "epoch": 0.9080379893444521, "grad_norm": 0.953125, "learning_rate": 3.7745288435563653e-06, "loss": 0.9673945307731628, "step": 1960 }, { "epoch": 0.9085012740328932, "grad_norm": 1.0234375, "learning_rate": 3.7568922943365755e-06, "loss": 0.9584711790084839, "step": 1961 }, { "epoch": 0.9089645587213343, "grad_norm": 0.87109375, "learning_rate": 3.7392935457366088e-06, "loss": 0.9805790781974792, "step": 1962 }, { "epoch": 0.9094278434097753, "grad_norm": 0.9453125, "learning_rate": 3.7217326306191865e-06, "loss": 0.8713183403015137, "step": 1963 }, { "epoch": 0.9098911280982164, "grad_norm": 0.8125, "learning_rate": 3.704209581776387e-06, "loss": 0.9644355177879333, "step": 1964 }, { "epoch": 0.9103544127866574, "grad_norm": 0.87890625, "learning_rate": 3.686724431929563e-06, "loss": 0.9322176575660706, "step": 1965 }, { "epoch": 0.9108176974750984, "grad_norm": 0.75, "learning_rate": 3.6692772137293233e-06, "loss": 0.8894251585006714, "step": 1966 }, { "epoch": 0.9112809821635395, "grad_norm": 0.74609375, "learning_rate": 3.6518679597554196e-06, "loss": 0.8077326416969299, "step": 1967 }, { "epoch": 0.9117442668519805, "grad_norm": 0.79296875, "learning_rate": 3.634496702516724e-06, "loss": 0.8675779104232788, "step": 1968 }, { "epoch": 0.9122075515404215, "grad_norm": 1.046875, "learning_rate": 3.6171634744511705e-06, "loss": 1.0307801961898804, "step": 1969 }, { "epoch": 0.9126708362288626, "grad_norm": 0.80859375, "learning_rate": 3.5998683079256456e-06, "loss": 0.8632080554962158, "step": 1970 }, { "epoch": 0.9131341209173037, "grad_norm": 1.3046875, "learning_rate": 3.5826112352359906e-06, "loss": 0.9748227596282959, "step": 1971 }, { "epoch": 0.9135974056057448, "grad_norm": 0.7734375, "learning_rate": 3.5653922886069056e-06, "loss": 1.0766937732696533, "step": 1972 }, { "epoch": 0.9140606902941858, "grad_norm": 0.8125, "learning_rate": 3.548211500191897e-06, "loss": 0.9421984553337097, "step": 1973 }, { "epoch": 0.9145239749826268, "grad_norm": 0.890625, "learning_rate": 3.5310689020732137e-06, "loss": 0.9485040903091431, "step": 1974 }, { "epoch": 0.9149872596710679, "grad_norm": 0.9375, "learning_rate": 3.513964526261783e-06, "loss": 0.7310665249824524, "step": 1975 }, { "epoch": 0.9154505443595089, "grad_norm": 0.79296875, "learning_rate": 3.496898404697176e-06, "loss": 0.8767428398132324, "step": 1976 }, { "epoch": 0.9159138290479499, "grad_norm": 0.875, "learning_rate": 3.4798705692475193e-06, "loss": 0.7951971292495728, "step": 1977 }, { "epoch": 0.916377113736391, "grad_norm": 0.80859375, "learning_rate": 3.4628810517094586e-06, "loss": 1.0523958206176758, "step": 1978 }, { "epoch": 0.9168403984248321, "grad_norm": 0.88671875, "learning_rate": 3.445929883808061e-06, "loss": 0.9560039639472961, "step": 1979 }, { "epoch": 0.9173036831132731, "grad_norm": 0.7578125, "learning_rate": 3.4290170971968125e-06, "loss": 0.9428179860115051, "step": 1980 }, { "epoch": 0.9177669678017142, "grad_norm": 0.7734375, "learning_rate": 3.4121427234575058e-06, "loss": 0.8995485901832581, "step": 1981 }, { "epoch": 0.9182302524901552, "grad_norm": 0.9921875, "learning_rate": 3.3953067941002262e-06, "loss": 0.8385268449783325, "step": 1982 }, { "epoch": 0.9186935371785963, "grad_norm": 0.78125, "learning_rate": 3.3785093405632497e-06, "loss": 0.8148472309112549, "step": 1983 }, { "epoch": 0.9191568218670373, "grad_norm": 0.83984375, "learning_rate": 3.3617503942130034e-06, "loss": 0.8488112688064575, "step": 1984 }, { "epoch": 0.9196201065554783, "grad_norm": 0.875, "learning_rate": 3.34502998634403e-06, "loss": 0.9861905574798584, "step": 1985 }, { "epoch": 0.9200833912439194, "grad_norm": 0.890625, "learning_rate": 3.3283481481788926e-06, "loss": 0.9687526226043701, "step": 1986 }, { "epoch": 0.9205466759323604, "grad_norm": 0.6953125, "learning_rate": 3.3117049108681425e-06, "loss": 0.7825883030891418, "step": 1987 }, { "epoch": 0.9210099606208015, "grad_norm": 0.76171875, "learning_rate": 3.295100305490232e-06, "loss": 0.89034503698349, "step": 1988 }, { "epoch": 0.9214732453092426, "grad_norm": 0.7578125, "learning_rate": 3.2785343630514944e-06, "loss": 0.8503515720367432, "step": 1989 }, { "epoch": 0.9219365299976836, "grad_norm": 0.76953125, "learning_rate": 3.2620071144860517e-06, "loss": 0.887080192565918, "step": 1990 }, { "epoch": 0.9223998146861246, "grad_norm": 0.82421875, "learning_rate": 3.2455185906557792e-06, "loss": 0.9105535745620728, "step": 1991 }, { "epoch": 0.9228630993745657, "grad_norm": 0.7578125, "learning_rate": 3.2290688223502485e-06, "loss": 0.9910966157913208, "step": 1992 }, { "epoch": 0.9233263840630067, "grad_norm": 0.83203125, "learning_rate": 3.212657840286637e-06, "loss": 0.9041646122932434, "step": 1993 }, { "epoch": 0.9237896687514477, "grad_norm": 0.8203125, "learning_rate": 3.196285675109717e-06, "loss": 0.8051247596740723, "step": 1994 }, { "epoch": 0.9242529534398888, "grad_norm": 0.7734375, "learning_rate": 3.1799523573917726e-06, "loss": 0.9509384036064148, "step": 1995 }, { "epoch": 0.9247162381283298, "grad_norm": 0.7265625, "learning_rate": 3.1636579176325494e-06, "loss": 0.8491644859313965, "step": 1996 }, { "epoch": 0.925179522816771, "grad_norm": 0.8671875, "learning_rate": 3.1474023862591808e-06, "loss": 0.9533796906471252, "step": 1997 }, { "epoch": 0.925642807505212, "grad_norm": 0.8984375, "learning_rate": 3.1311857936261537e-06, "loss": 0.9359503984451294, "step": 1998 }, { "epoch": 0.926106092193653, "grad_norm": 0.7890625, "learning_rate": 3.115008170015248e-06, "loss": 0.9185171723365784, "step": 1999 }, { "epoch": 0.9265693768820941, "grad_norm": 0.8046875, "learning_rate": 3.098869545635469e-06, "loss": 0.9881656169891357, "step": 2000 }, { "epoch": 0.9270326615705351, "grad_norm": 0.80078125, "learning_rate": 3.082769950623007e-06, "loss": 0.7174080610275269, "step": 2001 }, { "epoch": 0.9274959462589761, "grad_norm": 0.81640625, "learning_rate": 3.066709415041155e-06, "loss": 0.8646811246871948, "step": 2002 }, { "epoch": 0.9279592309474172, "grad_norm": 0.82421875, "learning_rate": 3.0506879688802826e-06, "loss": 0.6516255736351013, "step": 2003 }, { "epoch": 0.9284225156358582, "grad_norm": 0.765625, "learning_rate": 3.0347056420577633e-06, "loss": 0.8709309101104736, "step": 2004 }, { "epoch": 0.9288858003242992, "grad_norm": 0.83984375, "learning_rate": 3.0187624644179235e-06, "loss": 0.9930815696716309, "step": 2005 }, { "epoch": 0.9293490850127404, "grad_norm": 0.984375, "learning_rate": 3.002858465731981e-06, "loss": 0.8484547138214111, "step": 2006 }, { "epoch": 0.9298123697011814, "grad_norm": 0.9296875, "learning_rate": 2.9869936756979873e-06, "loss": 0.9493208527565002, "step": 2007 }, { "epoch": 0.9302756543896225, "grad_norm": 0.92578125, "learning_rate": 2.9711681239407924e-06, "loss": 0.8300023078918457, "step": 2008 }, { "epoch": 0.9307389390780635, "grad_norm": 0.94140625, "learning_rate": 2.9553818400119676e-06, "loss": 1.0500361919403076, "step": 2009 }, { "epoch": 0.9312022237665045, "grad_norm": 1.0, "learning_rate": 2.939634853389765e-06, "loss": 1.172685980796814, "step": 2010 }, { "epoch": 0.9316655084549456, "grad_norm": 0.8984375, "learning_rate": 2.923927193479039e-06, "loss": 0.966960608959198, "step": 2011 }, { "epoch": 0.9321287931433866, "grad_norm": 0.875, "learning_rate": 2.908258889611223e-06, "loss": 0.9223648905754089, "step": 2012 }, { "epoch": 0.9325920778318276, "grad_norm": 0.9921875, "learning_rate": 2.892629971044265e-06, "loss": 1.0637054443359375, "step": 2013 }, { "epoch": 0.9330553625202687, "grad_norm": 0.890625, "learning_rate": 2.8770404669625426e-06, "loss": 0.9099516272544861, "step": 2014 }, { "epoch": 0.9335186472087098, "grad_norm": 0.859375, "learning_rate": 2.8614904064768603e-06, "loss": 0.8674840927124023, "step": 2015 }, { "epoch": 0.9339819318971508, "grad_norm": 0.91796875, "learning_rate": 2.8459798186243478e-06, "loss": 0.9098578691482544, "step": 2016 }, { "epoch": 0.9344452165855919, "grad_norm": 0.78125, "learning_rate": 2.8305087323684396e-06, "loss": 0.9073185920715332, "step": 2017 }, { "epoch": 0.9349085012740329, "grad_norm": 0.84765625, "learning_rate": 2.8150771765988054e-06, "loss": 1.0626872777938843, "step": 2018 }, { "epoch": 0.935371785962474, "grad_norm": 1.265625, "learning_rate": 2.799685180131296e-06, "loss": 1.0506995916366577, "step": 2019 }, { "epoch": 0.935835070650915, "grad_norm": 0.8359375, "learning_rate": 2.7843327717078906e-06, "loss": 0.9011653065681458, "step": 2020 }, { "epoch": 0.936298355339356, "grad_norm": 1.015625, "learning_rate": 2.7690199799966412e-06, "loss": 0.8372233510017395, "step": 2021 }, { "epoch": 0.936761640027797, "grad_norm": 0.9296875, "learning_rate": 2.7537468335916275e-06, "loss": 0.8183858394622803, "step": 2022 }, { "epoch": 0.9372249247162381, "grad_norm": 0.83984375, "learning_rate": 2.7385133610129018e-06, "loss": 0.9930511713027954, "step": 2023 }, { "epoch": 0.9376882094046792, "grad_norm": 0.8671875, "learning_rate": 2.7233195907064297e-06, "loss": 0.8853211998939514, "step": 2024 }, { "epoch": 0.9381514940931203, "grad_norm": 1.265625, "learning_rate": 2.708165551044031e-06, "loss": 0.8356503844261169, "step": 2025 }, { "epoch": 0.9386147787815613, "grad_norm": 0.921875, "learning_rate": 2.6930512703233423e-06, "loss": 1.003953218460083, "step": 2026 }, { "epoch": 0.9390780634700023, "grad_norm": 0.8515625, "learning_rate": 2.677976776767765e-06, "loss": 0.950794517993927, "step": 2027 }, { "epoch": 0.9395413481584434, "grad_norm": 0.84765625, "learning_rate": 2.6629420985263856e-06, "loss": 0.9656116962432861, "step": 2028 }, { "epoch": 0.9400046328468844, "grad_norm": 0.86328125, "learning_rate": 2.6479472636739592e-06, "loss": 0.825056791305542, "step": 2029 }, { "epoch": 0.9404679175353254, "grad_norm": 0.81640625, "learning_rate": 2.632992300210825e-06, "loss": 0.9464682340621948, "step": 2030 }, { "epoch": 0.9409312022237665, "grad_norm": 0.91796875, "learning_rate": 2.6180772360628837e-06, "loss": 0.8668578863143921, "step": 2031 }, { "epoch": 0.9413944869122075, "grad_norm": 0.84765625, "learning_rate": 2.6032020990815257e-06, "loss": 0.887437105178833, "step": 2032 }, { "epoch": 0.9418577716006487, "grad_norm": 0.8359375, "learning_rate": 2.588366917043583e-06, "loss": 0.9934899806976318, "step": 2033 }, { "epoch": 0.9423210562890897, "grad_norm": 0.98046875, "learning_rate": 2.5735717176512722e-06, "loss": 0.9330331683158875, "step": 2034 }, { "epoch": 0.9427843409775307, "grad_norm": 0.84375, "learning_rate": 2.5588165285321597e-06, "loss": 0.8642808198928833, "step": 2035 }, { "epoch": 0.9432476256659718, "grad_norm": 0.83984375, "learning_rate": 2.5441013772390964e-06, "loss": 1.014452576637268, "step": 2036 }, { "epoch": 0.9437109103544128, "grad_norm": 0.765625, "learning_rate": 2.5294262912501636e-06, "loss": 1.0355772972106934, "step": 2037 }, { "epoch": 0.9441741950428538, "grad_norm": 0.890625, "learning_rate": 2.5147912979686352e-06, "loss": 0.9281973838806152, "step": 2038 }, { "epoch": 0.9446374797312949, "grad_norm": 0.74609375, "learning_rate": 2.5001964247229074e-06, "loss": 1.0396404266357422, "step": 2039 }, { "epoch": 0.9451007644197359, "grad_norm": 0.921875, "learning_rate": 2.4856416987664723e-06, "loss": 0.8407167196273804, "step": 2040 }, { "epoch": 0.9455640491081769, "grad_norm": 1.140625, "learning_rate": 2.471127147277846e-06, "loss": 0.9255853891372681, "step": 2041 }, { "epoch": 0.9460273337966181, "grad_norm": 0.8828125, "learning_rate": 2.4566527973605314e-06, "loss": 0.9377841353416443, "step": 2042 }, { "epoch": 0.9464906184850591, "grad_norm": 0.86328125, "learning_rate": 2.4422186760429565e-06, "loss": 0.8124240636825562, "step": 2043 }, { "epoch": 0.9469539031735001, "grad_norm": 0.73828125, "learning_rate": 2.4278248102784187e-06, "loss": 0.7741151452064514, "step": 2044 }, { "epoch": 0.9474171878619412, "grad_norm": 0.78515625, "learning_rate": 2.4134712269450693e-06, "loss": 0.8896522521972656, "step": 2045 }, { "epoch": 0.9478804725503822, "grad_norm": 0.75390625, "learning_rate": 2.3991579528458198e-06, "loss": 0.8712372779846191, "step": 2046 }, { "epoch": 0.9483437572388232, "grad_norm": 0.87890625, "learning_rate": 2.3848850147083223e-06, "loss": 0.8915302753448486, "step": 2047 }, { "epoch": 0.9488070419272643, "grad_norm": 0.875, "learning_rate": 2.3706524391848946e-06, "loss": 0.9843184947967529, "step": 2048 }, { "epoch": 0.9492703266157053, "grad_norm": 0.85546875, "learning_rate": 2.3564602528524985e-06, "loss": 1.0059175491333008, "step": 2049 }, { "epoch": 0.9497336113041464, "grad_norm": 1.125, "learning_rate": 2.3423084822126735e-06, "loss": 0.9545824527740479, "step": 2050 }, { "epoch": 0.9501968959925875, "grad_norm": 0.9765625, "learning_rate": 2.3281971536914734e-06, "loss": 1.022434115409851, "step": 2051 }, { "epoch": 0.9506601806810285, "grad_norm": 0.85546875, "learning_rate": 2.3141262936394595e-06, "loss": 0.8228369355201721, "step": 2052 }, { "epoch": 0.9511234653694696, "grad_norm": 0.96875, "learning_rate": 2.3000959283315955e-06, "loss": 0.9236583709716797, "step": 2053 }, { "epoch": 0.9515867500579106, "grad_norm": 0.796875, "learning_rate": 2.2861060839672546e-06, "loss": 0.8788405656814575, "step": 2054 }, { "epoch": 0.9520500347463516, "grad_norm": 0.87109375, "learning_rate": 2.27215678667013e-06, "loss": 0.7980551719665527, "step": 2055 }, { "epoch": 0.9525133194347927, "grad_norm": 1.0078125, "learning_rate": 2.258248062488206e-06, "loss": 0.8565947413444519, "step": 2056 }, { "epoch": 0.9529766041232337, "grad_norm": 0.84375, "learning_rate": 2.244379937393691e-06, "loss": 0.8685353994369507, "step": 2057 }, { "epoch": 0.9534398888116747, "grad_norm": 0.796875, "learning_rate": 2.230552437282996e-06, "loss": 0.7926799654960632, "step": 2058 }, { "epoch": 0.9539031735001158, "grad_norm": 0.78125, "learning_rate": 2.2167655879766687e-06, "loss": 0.9113929271697998, "step": 2059 }, { "epoch": 0.9543664581885569, "grad_norm": 0.828125, "learning_rate": 2.20301941521934e-06, "loss": 0.8047410845756531, "step": 2060 }, { "epoch": 0.954829742876998, "grad_norm": 0.8125, "learning_rate": 2.1893139446796958e-06, "loss": 0.718073844909668, "step": 2061 }, { "epoch": 0.955293027565439, "grad_norm": 0.9140625, "learning_rate": 2.175649201950405e-06, "loss": 0.822567880153656, "step": 2062 }, { "epoch": 0.95575631225388, "grad_norm": 0.8046875, "learning_rate": 2.1620252125480936e-06, "loss": 0.8844413757324219, "step": 2063 }, { "epoch": 0.9562195969423211, "grad_norm": 0.875, "learning_rate": 2.1484420019132813e-06, "loss": 0.9247415065765381, "step": 2064 }, { "epoch": 0.9566828816307621, "grad_norm": 0.78125, "learning_rate": 2.134899595410353e-06, "loss": 0.9057773351669312, "step": 2065 }, { "epoch": 0.9571461663192031, "grad_norm": 0.8515625, "learning_rate": 2.1213980183274828e-06, "loss": 0.8755348324775696, "step": 2066 }, { "epoch": 0.9576094510076442, "grad_norm": 0.8046875, "learning_rate": 2.1079372958766046e-06, "loss": 0.9061083197593689, "step": 2067 }, { "epoch": 0.9580727356960852, "grad_norm": 0.6953125, "learning_rate": 2.0945174531933697e-06, "loss": 0.8380372524261475, "step": 2068 }, { "epoch": 0.9585360203845263, "grad_norm": 0.82421875, "learning_rate": 2.0811385153370924e-06, "loss": 1.0740100145339966, "step": 2069 }, { "epoch": 0.9589993050729674, "grad_norm": 0.84765625, "learning_rate": 2.0678005072907108e-06, "loss": 0.7675211429595947, "step": 2070 }, { "epoch": 0.9594625897614084, "grad_norm": 0.80859375, "learning_rate": 2.0545034539607104e-06, "loss": 0.8730876445770264, "step": 2071 }, { "epoch": 0.9599258744498494, "grad_norm": 0.99609375, "learning_rate": 2.0412473801771247e-06, "loss": 0.9389110207557678, "step": 2072 }, { "epoch": 0.9603891591382905, "grad_norm": 1.0078125, "learning_rate": 2.0280323106934574e-06, "loss": 0.9151057600975037, "step": 2073 }, { "epoch": 0.9608524438267315, "grad_norm": 0.8671875, "learning_rate": 2.0148582701866327e-06, "loss": 0.8602491617202759, "step": 2074 }, { "epoch": 0.9613157285151726, "grad_norm": 0.921875, "learning_rate": 2.0017252832569802e-06, "loss": 0.7774481773376465, "step": 2075 }, { "epoch": 0.9617790132036136, "grad_norm": 0.80859375, "learning_rate": 1.9886333744281473e-06, "loss": 0.9065883755683899, "step": 2076 }, { "epoch": 0.9622422978920546, "grad_norm": 0.8515625, "learning_rate": 1.9755825681470903e-06, "loss": 0.842190146446228, "step": 2077 }, { "epoch": 0.9627055825804958, "grad_norm": 0.82421875, "learning_rate": 1.962572888784009e-06, "loss": 0.7789183259010315, "step": 2078 }, { "epoch": 0.9631688672689368, "grad_norm": 0.83984375, "learning_rate": 1.9496043606323098e-06, "loss": 0.9603561162948608, "step": 2079 }, { "epoch": 0.9636321519573778, "grad_norm": 0.81640625, "learning_rate": 1.936677007908539e-06, "loss": 0.9474771022796631, "step": 2080 }, { "epoch": 0.9640954366458189, "grad_norm": 0.82421875, "learning_rate": 1.9237908547523742e-06, "loss": 0.8616044521331787, "step": 2081 }, { "epoch": 0.9645587213342599, "grad_norm": 0.84375, "learning_rate": 1.910945925226553e-06, "loss": 1.0991566181182861, "step": 2082 }, { "epoch": 0.9650220060227009, "grad_norm": 1.4140625, "learning_rate": 1.8981422433168307e-06, "loss": 0.8651741147041321, "step": 2083 }, { "epoch": 0.965485290711142, "grad_norm": 0.83203125, "learning_rate": 1.8853798329319515e-06, "loss": 0.9337427616119385, "step": 2084 }, { "epoch": 0.965948575399583, "grad_norm": 0.84375, "learning_rate": 1.872658717903569e-06, "loss": 0.7772614359855652, "step": 2085 }, { "epoch": 0.966411860088024, "grad_norm": 1.0546875, "learning_rate": 1.8599789219862499e-06, "loss": 0.7997364401817322, "step": 2086 }, { "epoch": 0.9668751447764652, "grad_norm": 0.77734375, "learning_rate": 1.8473404688573876e-06, "loss": 0.7531715631484985, "step": 2087 }, { "epoch": 0.9673384294649062, "grad_norm": 0.84375, "learning_rate": 1.8347433821171917e-06, "loss": 0.8086212277412415, "step": 2088 }, { "epoch": 0.9678017141533473, "grad_norm": 1.0078125, "learning_rate": 1.822187685288606e-06, "loss": 0.8939145803451538, "step": 2089 }, { "epoch": 0.9682649988417883, "grad_norm": 0.84375, "learning_rate": 1.809673401817289e-06, "loss": 0.9865738749504089, "step": 2090 }, { "epoch": 0.9687282835302293, "grad_norm": 0.83984375, "learning_rate": 1.7972005550715907e-06, "loss": 0.9553055763244629, "step": 2091 }, { "epoch": 0.9691915682186704, "grad_norm": 0.75, "learning_rate": 1.7847691683424535e-06, "loss": 0.9253486394882202, "step": 2092 }, { "epoch": 0.9696548529071114, "grad_norm": 0.96875, "learning_rate": 1.7723792648434237e-06, "loss": 0.8802915811538696, "step": 2093 }, { "epoch": 0.9701181375955524, "grad_norm": 0.828125, "learning_rate": 1.760030867710567e-06, "loss": 0.8802918791770935, "step": 2094 }, { "epoch": 0.9705814222839935, "grad_norm": 0.8203125, "learning_rate": 1.7477240000024547e-06, "loss": 0.843493640422821, "step": 2095 }, { "epoch": 0.9710447069724346, "grad_norm": 0.9296875, "learning_rate": 1.7354586847001068e-06, "loss": 0.9937857389450073, "step": 2096 }, { "epoch": 0.9715079916608756, "grad_norm": 0.83984375, "learning_rate": 1.7232349447069462e-06, "loss": 0.8251986503601074, "step": 2097 }, { "epoch": 0.9719712763493167, "grad_norm": 0.890625, "learning_rate": 1.7110528028487676e-06, "loss": 0.9000308513641357, "step": 2098 }, { "epoch": 0.9724345610377577, "grad_norm": 0.79296875, "learning_rate": 1.6989122818736754e-06, "loss": 1.0388299226760864, "step": 2099 }, { "epoch": 0.9728978457261988, "grad_norm": 0.87890625, "learning_rate": 1.6868134044520744e-06, "loss": 1.0854367017745972, "step": 2100 }, { "epoch": 0.9733611304146398, "grad_norm": 0.87890625, "learning_rate": 1.674756193176588e-06, "loss": 1.0435681343078613, "step": 2101 }, { "epoch": 0.9738244151030808, "grad_norm": 0.8203125, "learning_rate": 1.6627406705620516e-06, "loss": 0.972065806388855, "step": 2102 }, { "epoch": 0.9742876997915219, "grad_norm": 1.03125, "learning_rate": 1.6507668590454375e-06, "loss": 1.1784340143203735, "step": 2103 }, { "epoch": 0.9747509844799629, "grad_norm": 0.73046875, "learning_rate": 1.6388347809858335e-06, "loss": 0.9283071756362915, "step": 2104 }, { "epoch": 0.975214269168404, "grad_norm": 0.83203125, "learning_rate": 1.6269444586644113e-06, "loss": 0.8603734970092773, "step": 2105 }, { "epoch": 0.9756775538568451, "grad_norm": 0.8046875, "learning_rate": 1.6150959142843543e-06, "loss": 0.9831134080886841, "step": 2106 }, { "epoch": 0.9761408385452861, "grad_norm": 0.79296875, "learning_rate": 1.6032891699708412e-06, "loss": 0.986380934715271, "step": 2107 }, { "epoch": 0.9766041232337271, "grad_norm": 1.0234375, "learning_rate": 1.591524247770991e-06, "loss": 0.9720912575721741, "step": 2108 }, { "epoch": 0.9770674079221682, "grad_norm": 0.8125, "learning_rate": 1.5798011696538277e-06, "loss": 0.8218910098075867, "step": 2109 }, { "epoch": 0.9775306926106092, "grad_norm": 0.953125, "learning_rate": 1.568119957510243e-06, "loss": 0.9122781753540039, "step": 2110 }, { "epoch": 0.9779939772990502, "grad_norm": 0.83203125, "learning_rate": 1.5564806331529538e-06, "loss": 1.0062001943588257, "step": 2111 }, { "epoch": 0.9784572619874913, "grad_norm": 0.8125, "learning_rate": 1.5448832183164436e-06, "loss": 0.9109072685241699, "step": 2112 }, { "epoch": 0.9789205466759323, "grad_norm": 0.9140625, "learning_rate": 1.5333277346569414e-06, "loss": 0.9397470951080322, "step": 2113 }, { "epoch": 0.9793838313643735, "grad_norm": 0.8203125, "learning_rate": 1.5218142037523973e-06, "loss": 0.8526613712310791, "step": 2114 }, { "epoch": 0.9798471160528145, "grad_norm": 0.828125, "learning_rate": 1.5103426471023944e-06, "loss": 0.9518367052078247, "step": 2115 }, { "epoch": 0.9803104007412555, "grad_norm": 1.0859375, "learning_rate": 1.4989130861281527e-06, "loss": 1.002701997756958, "step": 2116 }, { "epoch": 0.9807736854296966, "grad_norm": 0.8671875, "learning_rate": 1.4875255421724579e-06, "loss": 0.9528016448020935, "step": 2117 }, { "epoch": 0.9812369701181376, "grad_norm": 0.78125, "learning_rate": 1.4761800364996524e-06, "loss": 0.870173454284668, "step": 2118 }, { "epoch": 0.9817002548065786, "grad_norm": 0.7734375, "learning_rate": 1.4648765902955763e-06, "loss": 0.8694907426834106, "step": 2119 }, { "epoch": 0.9821635394950197, "grad_norm": 1.0078125, "learning_rate": 1.453615224667513e-06, "loss": 0.9634788036346436, "step": 2120 }, { "epoch": 0.9826268241834607, "grad_norm": 0.85546875, "learning_rate": 1.4423959606441911e-06, "loss": 0.8832241296768188, "step": 2121 }, { "epoch": 0.9830901088719017, "grad_norm": 0.84375, "learning_rate": 1.4312188191757027e-06, "loss": 0.9558946490287781, "step": 2122 }, { "epoch": 0.9835533935603429, "grad_norm": 1.0703125, "learning_rate": 1.4200838211334962e-06, "loss": 0.9503135085105896, "step": 2123 }, { "epoch": 0.9840166782487839, "grad_norm": 0.86328125, "learning_rate": 1.4089909873103181e-06, "loss": 1.1282013654708862, "step": 2124 }, { "epoch": 0.984479962937225, "grad_norm": 0.9140625, "learning_rate": 1.3979403384201828e-06, "loss": 0.8594451546669006, "step": 2125 }, { "epoch": 0.984943247625666, "grad_norm": 0.83984375, "learning_rate": 1.3869318950983276e-06, "loss": 0.9713156819343567, "step": 2126 }, { "epoch": 0.985406532314107, "grad_norm": 0.80078125, "learning_rate": 1.3759656779011786e-06, "loss": 0.8094725012779236, "step": 2127 }, { "epoch": 0.985869817002548, "grad_norm": 1.1328125, "learning_rate": 1.3650417073063208e-06, "loss": 0.8664292097091675, "step": 2128 }, { "epoch": 0.9863331016909891, "grad_norm": 0.7734375, "learning_rate": 1.3541600037124343e-06, "loss": 0.85650235414505, "step": 2129 }, { "epoch": 0.9867963863794301, "grad_norm": 0.80859375, "learning_rate": 1.3433205874392886e-06, "loss": 0.9315167665481567, "step": 2130 }, { "epoch": 0.9872596710678712, "grad_norm": 0.828125, "learning_rate": 1.3325234787276746e-06, "loss": 0.9233248233795166, "step": 2131 }, { "epoch": 0.9877229557563123, "grad_norm": 0.75390625, "learning_rate": 1.321768697739392e-06, "loss": 0.8415129780769348, "step": 2132 }, { "epoch": 0.9881862404447533, "grad_norm": 0.78515625, "learning_rate": 1.3110562645571954e-06, "loss": 1.0400590896606445, "step": 2133 }, { "epoch": 0.9886495251331944, "grad_norm": 0.78515625, "learning_rate": 1.3003861991847687e-06, "loss": 0.8513540029525757, "step": 2134 }, { "epoch": 0.9891128098216354, "grad_norm": 0.80859375, "learning_rate": 1.2897585215466699e-06, "loss": 0.8044668436050415, "step": 2135 }, { "epoch": 0.9895760945100764, "grad_norm": 0.96875, "learning_rate": 1.2791732514883067e-06, "loss": 0.9986090660095215, "step": 2136 }, { "epoch": 0.9900393791985175, "grad_norm": 0.76171875, "learning_rate": 1.2686304087759108e-06, "loss": 0.756338894367218, "step": 2137 }, { "epoch": 0.9905026638869585, "grad_norm": 0.7734375, "learning_rate": 1.2581300130964728e-06, "loss": 0.7462416887283325, "step": 2138 }, { "epoch": 0.9909659485753995, "grad_norm": 0.78125, "learning_rate": 1.2476720840577294e-06, "loss": 0.981809675693512, "step": 2139 }, { "epoch": 0.9914292332638406, "grad_norm": 0.7890625, "learning_rate": 1.23725664118811e-06, "loss": 0.9728780388832092, "step": 2140 }, { "epoch": 0.9918925179522817, "grad_norm": 0.95703125, "learning_rate": 1.226883703936716e-06, "loss": 1.043047308921814, "step": 2141 }, { "epoch": 0.9923558026407228, "grad_norm": 0.734375, "learning_rate": 1.2165532916732768e-06, "loss": 0.8684824705123901, "step": 2142 }, { "epoch": 0.9928190873291638, "grad_norm": 0.85546875, "learning_rate": 1.206265423688106e-06, "loss": 0.9691533446311951, "step": 2143 }, { "epoch": 0.9932823720176048, "grad_norm": 0.7734375, "learning_rate": 1.196020119192082e-06, "loss": 0.8577444553375244, "step": 2144 }, { "epoch": 0.9937456567060459, "grad_norm": 0.7578125, "learning_rate": 1.1858173973165886e-06, "loss": 0.824654757976532, "step": 2145 }, { "epoch": 0.9942089413944869, "grad_norm": 0.8203125, "learning_rate": 1.1756572771135146e-06, "loss": 1.0085506439208984, "step": 2146 }, { "epoch": 0.9946722260829279, "grad_norm": 0.83203125, "learning_rate": 1.165539777555182e-06, "loss": 0.8890453577041626, "step": 2147 }, { "epoch": 0.995135510771369, "grad_norm": 0.85546875, "learning_rate": 1.1554649175343316e-06, "loss": 0.8785421848297119, "step": 2148 }, { "epoch": 0.99559879545981, "grad_norm": 0.76171875, "learning_rate": 1.1454327158640743e-06, "loss": 0.9128347039222717, "step": 2149 }, { "epoch": 0.9960620801482511, "grad_norm": 0.796875, "learning_rate": 1.1354431912778758e-06, "loss": 0.7984659671783447, "step": 2150 }, { "epoch": 0.9965253648366922, "grad_norm": 0.94921875, "learning_rate": 1.1254963624295052e-06, "loss": 0.8814120292663574, "step": 2151 }, { "epoch": 0.9969886495251332, "grad_norm": 0.91796875, "learning_rate": 1.1155922478929928e-06, "loss": 0.9477824568748474, "step": 2152 }, { "epoch": 0.9974519342135743, "grad_norm": 0.83984375, "learning_rate": 1.10573086616263e-06, "loss": 0.9535715579986572, "step": 2153 }, { "epoch": 0.9979152189020153, "grad_norm": 1.546875, "learning_rate": 1.0959122356528868e-06, "loss": 0.9569465517997742, "step": 2154 }, { "epoch": 0.9983785035904563, "grad_norm": 0.9609375, "learning_rate": 1.0861363746984196e-06, "loss": 0.8506355285644531, "step": 2155 }, { "epoch": 0.9988417882788974, "grad_norm": 0.88671875, "learning_rate": 1.0764033015540182e-06, "loss": 0.9563447833061218, "step": 2156 }, { "epoch": 0.9993050729673384, "grad_norm": 0.8515625, "learning_rate": 1.0667130343945627e-06, "loss": 0.8507230281829834, "step": 2157 }, { "epoch": 0.9997683576557794, "grad_norm": 0.90234375, "learning_rate": 1.0570655913150135e-06, "loss": 0.9006310701370239, "step": 2158 }, { "epoch": 1.0, "grad_norm": 1.1171875, "learning_rate": 1.0474609903303493e-06, "loss": 1.0268394947052002, "step": 2159 }, { "epoch": 1.0004632846884411, "grad_norm": 0.86328125, "learning_rate": 1.0378992493755704e-06, "loss": 0.6635380387306213, "step": 2160 }, { "epoch": 1.000926569376882, "grad_norm": 0.87890625, "learning_rate": 1.0283803863056181e-06, "loss": 0.6334539651870728, "step": 2161 }, { "epoch": 1.0013898540653232, "grad_norm": 0.953125, "learning_rate": 1.0189044188953833e-06, "loss": 0.7979657053947449, "step": 2162 }, { "epoch": 1.0018531387537641, "grad_norm": 0.80078125, "learning_rate": 1.0094713648396478e-06, "loss": 0.7369372844696045, "step": 2163 }, { "epoch": 1.0023164234422053, "grad_norm": 0.82421875, "learning_rate": 1.0000812417530654e-06, "loss": 0.7056367993354797, "step": 2164 }, { "epoch": 1.0027797081306462, "grad_norm": 0.86328125, "learning_rate": 9.907340671701244e-07, "loss": 0.6825569868087769, "step": 2165 }, { "epoch": 1.0032429928190874, "grad_norm": 0.84765625, "learning_rate": 9.81429858545103e-07, "loss": 0.7204247117042542, "step": 2166 }, { "epoch": 1.0037062775075283, "grad_norm": 0.8828125, "learning_rate": 9.721686332520658e-07, "loss": 0.8663711547851562, "step": 2167 }, { "epoch": 1.0041695621959694, "grad_norm": 0.8359375, "learning_rate": 9.629504085847903e-07, "loss": 0.8728551864624023, "step": 2168 }, { "epoch": 1.0046328468844106, "grad_norm": 0.75390625, "learning_rate": 9.537752017567814e-07, "loss": 0.7198824882507324, "step": 2169 }, { "epoch": 1.0050961315728515, "grad_norm": 0.890625, "learning_rate": 9.446430299011981e-07, "loss": 0.9775660037994385, "step": 2170 }, { "epoch": 1.0055594162612926, "grad_norm": 0.80859375, "learning_rate": 9.355539100708504e-07, "loss": 0.7355836033821106, "step": 2171 }, { "epoch": 1.0060227009497336, "grad_norm": 0.80078125, "learning_rate": 9.265078592381402e-07, "loss": 0.8342987298965454, "step": 2172 }, { "epoch": 1.0064859856381747, "grad_norm": 1.0, "learning_rate": 9.175048942950647e-07, "loss": 0.8058943748474121, "step": 2173 }, { "epoch": 1.0069492703266156, "grad_norm": 0.9453125, "learning_rate": 9.08545032053155e-07, "loss": 0.7417223453521729, "step": 2174 }, { "epoch": 1.0074125550150568, "grad_norm": 0.734375, "learning_rate": 8.996282892434513e-07, "loss": 0.6943836212158203, "step": 2175 }, { "epoch": 1.0078758397034977, "grad_norm": 0.83203125, "learning_rate": 8.907546825164854e-07, "loss": 0.8752128481864929, "step": 2176 }, { "epoch": 1.0083391243919388, "grad_norm": 0.765625, "learning_rate": 8.819242284422267e-07, "loss": 0.7245913743972778, "step": 2177 }, { "epoch": 1.00880240908038, "grad_norm": 0.75, "learning_rate": 8.731369435100796e-07, "loss": 0.7406237125396729, "step": 2178 }, { "epoch": 1.009265693768821, "grad_norm": 0.81640625, "learning_rate": 8.643928441288331e-07, "loss": 0.7131381034851074, "step": 2179 }, { "epoch": 1.009728978457262, "grad_norm": 0.9921875, "learning_rate": 8.556919466266182e-07, "loss": 0.865633487701416, "step": 2180 }, { "epoch": 1.010192263145703, "grad_norm": 0.87109375, "learning_rate": 8.470342672509208e-07, "loss": 0.8932554721832275, "step": 2181 }, { "epoch": 1.0106555478341441, "grad_norm": 0.9296875, "learning_rate": 8.384198221684942e-07, "loss": 0.827540397644043, "step": 2182 }, { "epoch": 1.011118832522585, "grad_norm": 0.86328125, "learning_rate": 8.298486274653935e-07, "loss": 0.7067131996154785, "step": 2183 }, { "epoch": 1.0115821172110262, "grad_norm": 0.80078125, "learning_rate": 8.213206991468747e-07, "loss": 0.7793404459953308, "step": 2184 }, { "epoch": 1.0120454018994671, "grad_norm": 0.8828125, "learning_rate": 8.128360531374313e-07, "loss": 0.6501293182373047, "step": 2185 }, { "epoch": 1.0125086865879083, "grad_norm": 0.859375, "learning_rate": 8.043947052807124e-07, "loss": 0.8105236887931824, "step": 2186 }, { "epoch": 1.0129719712763494, "grad_norm": 0.82421875, "learning_rate": 7.959966713395304e-07, "loss": 0.6882289052009583, "step": 2187 }, { "epoch": 1.0134352559647903, "grad_norm": 0.859375, "learning_rate": 7.876419669958077e-07, "loss": 0.6595849394798279, "step": 2188 }, { "epoch": 1.0138985406532315, "grad_norm": 0.859375, "learning_rate": 7.793306078505529e-07, "loss": 0.8440772294998169, "step": 2189 }, { "epoch": 1.0143618253416724, "grad_norm": 0.796875, "learning_rate": 7.710626094238498e-07, "loss": 0.685444712638855, "step": 2190 }, { "epoch": 1.0148251100301136, "grad_norm": 0.83984375, "learning_rate": 7.628379871547937e-07, "loss": 0.7895556688308716, "step": 2191 }, { "epoch": 1.0152883947185545, "grad_norm": 0.85546875, "learning_rate": 7.546567564014994e-07, "loss": 0.6867796182632446, "step": 2192 }, { "epoch": 1.0157516794069956, "grad_norm": 0.890625, "learning_rate": 7.465189324410427e-07, "loss": 0.7532359957695007, "step": 2193 }, { "epoch": 1.0162149640954365, "grad_norm": 0.8828125, "learning_rate": 7.384245304694544e-07, "loss": 0.9113466739654541, "step": 2194 }, { "epoch": 1.0166782487838777, "grad_norm": 0.80859375, "learning_rate": 7.303735656016705e-07, "loss": 0.830723226070404, "step": 2195 }, { "epoch": 1.0171415334723188, "grad_norm": 0.796875, "learning_rate": 7.223660528715268e-07, "loss": 0.712505578994751, "step": 2196 }, { "epoch": 1.0176048181607598, "grad_norm": 0.875, "learning_rate": 7.144020072317181e-07, "loss": 0.7443718314170837, "step": 2197 }, { "epoch": 1.018068102849201, "grad_norm": 0.95703125, "learning_rate": 7.064814435537592e-07, "loss": 0.7996273040771484, "step": 2198 }, { "epoch": 1.0185313875376418, "grad_norm": 0.70703125, "learning_rate": 6.98604376627987e-07, "loss": 0.783358097076416, "step": 2199 }, { "epoch": 1.018994672226083, "grad_norm": 0.84375, "learning_rate": 6.907708211635022e-07, "loss": 0.917162299156189, "step": 2200 }, { "epoch": 1.019457956914524, "grad_norm": 0.83203125, "learning_rate": 6.829807917881609e-07, "loss": 0.7591511607170105, "step": 2201 }, { "epoch": 1.019921241602965, "grad_norm": 0.7578125, "learning_rate": 6.752343030485433e-07, "loss": 0.8324123620986938, "step": 2202 }, { "epoch": 1.020384526291406, "grad_norm": 0.84765625, "learning_rate": 6.675313694099208e-07, "loss": 0.7730574607849121, "step": 2203 }, { "epoch": 1.0208478109798471, "grad_norm": 0.890625, "learning_rate": 6.598720052562328e-07, "loss": 0.8956565856933594, "step": 2204 }, { "epoch": 1.0213110956682883, "grad_norm": 0.890625, "learning_rate": 6.522562248900652e-07, "loss": 0.7237347364425659, "step": 2205 }, { "epoch": 1.0217743803567292, "grad_norm": 0.76171875, "learning_rate": 6.446840425326128e-07, "loss": 0.9014825224876404, "step": 2206 }, { "epoch": 1.0222376650451703, "grad_norm": 0.828125, "learning_rate": 6.371554723236583e-07, "loss": 0.8383098244667053, "step": 2207 }, { "epoch": 1.0227009497336113, "grad_norm": 0.7890625, "learning_rate": 6.296705283215509e-07, "loss": 0.5707777142524719, "step": 2208 }, { "epoch": 1.0231642344220524, "grad_norm": 0.93359375, "learning_rate": 6.222292245031715e-07, "loss": 0.7602838277816772, "step": 2209 }, { "epoch": 1.0236275191104933, "grad_norm": 0.9765625, "learning_rate": 6.14831574763909e-07, "loss": 0.8218472003936768, "step": 2210 }, { "epoch": 1.0240908037989345, "grad_norm": 0.83984375, "learning_rate": 6.074775929176442e-07, "loss": 0.8414373397827148, "step": 2211 }, { "epoch": 1.0245540884873754, "grad_norm": 0.87890625, "learning_rate": 6.001672926967015e-07, "loss": 0.8075520396232605, "step": 2212 }, { "epoch": 1.0250173731758165, "grad_norm": 0.8125, "learning_rate": 5.929006877518494e-07, "loss": 0.757814347743988, "step": 2213 }, { "epoch": 1.0254806578642577, "grad_norm": 0.796875, "learning_rate": 5.856777916522526e-07, "loss": 0.7999564409255981, "step": 2214 }, { "epoch": 1.0259439425526986, "grad_norm": 0.79296875, "learning_rate": 5.784986178854688e-07, "loss": 0.8249316215515137, "step": 2215 }, { "epoch": 1.0264072272411398, "grad_norm": 1.234375, "learning_rate": 5.713631798574008e-07, "loss": 0.8913872241973877, "step": 2216 }, { "epoch": 1.0268705119295807, "grad_norm": 0.98046875, "learning_rate": 5.642714908922866e-07, "loss": 0.7781076431274414, "step": 2217 }, { "epoch": 1.0273337966180218, "grad_norm": 0.8671875, "learning_rate": 5.572235642326718e-07, "loss": 0.7786509990692139, "step": 2218 }, { "epoch": 1.0277970813064627, "grad_norm": 0.79296875, "learning_rate": 5.502194130393807e-07, "loss": 0.6516589522361755, "step": 2219 }, { "epoch": 1.028260365994904, "grad_norm": 0.953125, "learning_rate": 5.432590503914954e-07, "loss": 0.8788017630577087, "step": 2220 }, { "epoch": 1.0287236506833448, "grad_norm": 0.80078125, "learning_rate": 5.363424892863255e-07, "loss": 0.793433666229248, "step": 2221 }, { "epoch": 1.029186935371786, "grad_norm": 0.91796875, "learning_rate": 5.294697426393986e-07, "loss": 0.7199576497077942, "step": 2222 }, { "epoch": 1.029650220060227, "grad_norm": 0.9921875, "learning_rate": 5.22640823284414e-07, "loss": 0.6732587218284607, "step": 2223 }, { "epoch": 1.030113504748668, "grad_norm": 0.8203125, "learning_rate": 5.158557439732432e-07, "loss": 0.7081973552703857, "step": 2224 }, { "epoch": 1.0305767894371092, "grad_norm": 0.8515625, "learning_rate": 5.091145173758873e-07, "loss": 0.7844109535217285, "step": 2225 }, { "epoch": 1.03104007412555, "grad_norm": 0.953125, "learning_rate": 5.024171560804529e-07, "loss": 0.7437909841537476, "step": 2226 }, { "epoch": 1.0315033588139912, "grad_norm": 0.73046875, "learning_rate": 4.957636725931493e-07, "loss": 0.8256717920303345, "step": 2227 }, { "epoch": 1.0319666435024322, "grad_norm": 0.98828125, "learning_rate": 4.891540793382436e-07, "loss": 0.872868001461029, "step": 2228 }, { "epoch": 1.0324299281908733, "grad_norm": 0.86328125, "learning_rate": 4.825883886580469e-07, "loss": 0.8539603352546692, "step": 2229 }, { "epoch": 1.0328932128793142, "grad_norm": 0.859375, "learning_rate": 4.7606661281288523e-07, "loss": 0.9318234920501709, "step": 2230 }, { "epoch": 1.0333564975677554, "grad_norm": 0.81640625, "learning_rate": 4.695887639810916e-07, "loss": 0.7605723142623901, "step": 2231 }, { "epoch": 1.0338197822561965, "grad_norm": 0.76171875, "learning_rate": 4.631548542589581e-07, "loss": 0.6197681427001953, "step": 2232 }, { "epoch": 1.0342830669446375, "grad_norm": 0.91796875, "learning_rate": 4.567648956607382e-07, "loss": 0.8866512775421143, "step": 2233 }, { "epoch": 1.0347463516330786, "grad_norm": 0.8359375, "learning_rate": 4.5041890011861517e-07, "loss": 0.7138317823410034, "step": 2234 }, { "epoch": 1.0352096363215195, "grad_norm": 0.80078125, "learning_rate": 4.4411687948267266e-07, "loss": 0.7621663212776184, "step": 2235 }, { "epoch": 1.0356729210099607, "grad_norm": 0.87890625, "learning_rate": 4.3785884552087854e-07, "loss": 0.720496654510498, "step": 2236 }, { "epoch": 1.0361362056984016, "grad_norm": 0.8515625, "learning_rate": 4.316448099190664e-07, "loss": 0.7380560636520386, "step": 2237 }, { "epoch": 1.0365994903868427, "grad_norm": 1.078125, "learning_rate": 4.254747842809117e-07, "loss": 0.8146321773529053, "step": 2238 }, { "epoch": 1.0370627750752837, "grad_norm": 0.85546875, "learning_rate": 4.193487801279021e-07, "loss": 0.8686697483062744, "step": 2239 }, { "epoch": 1.0375260597637248, "grad_norm": 0.80078125, "learning_rate": 4.132668088993299e-07, "loss": 0.8822686672210693, "step": 2240 }, { "epoch": 1.037989344452166, "grad_norm": 0.82421875, "learning_rate": 4.0722888195225693e-07, "loss": 0.6353955864906311, "step": 2241 }, { "epoch": 1.0384526291406069, "grad_norm": 0.78515625, "learning_rate": 4.012350105615017e-07, "loss": 0.692233681678772, "step": 2242 }, { "epoch": 1.038915913829048, "grad_norm": 1.0546875, "learning_rate": 3.9528520591962305e-07, "loss": 0.8289405703544617, "step": 2243 }, { "epoch": 1.039379198517489, "grad_norm": 0.82421875, "learning_rate": 3.8937947913688034e-07, "loss": 0.7374635934829712, "step": 2244 }, { "epoch": 1.03984248320593, "grad_norm": 0.7421875, "learning_rate": 3.8351784124123343e-07, "loss": 0.6225665807723999, "step": 2245 }, { "epoch": 1.040305767894371, "grad_norm": 0.8671875, "learning_rate": 3.777003031783055e-07, "loss": 0.9105774164199829, "step": 2246 }, { "epoch": 1.0407690525828122, "grad_norm": 0.8203125, "learning_rate": 3.7192687581138807e-07, "loss": 0.7380335927009583, "step": 2247 }, { "epoch": 1.041232337271253, "grad_norm": 0.80859375, "learning_rate": 3.661975699213853e-07, "loss": 0.8130779266357422, "step": 2248 }, { "epoch": 1.0416956219596942, "grad_norm": 0.79296875, "learning_rate": 3.6051239620681665e-07, "loss": 0.682541012763977, "step": 2249 }, { "epoch": 1.0421589066481354, "grad_norm": 0.7578125, "learning_rate": 3.548713652837954e-07, "loss": 0.8330915570259094, "step": 2250 }, { "epoch": 1.0426221913365763, "grad_norm": 0.9609375, "learning_rate": 3.4927448768600476e-07, "loss": 0.8342495560646057, "step": 2251 }, { "epoch": 1.0430854760250174, "grad_norm": 0.80859375, "learning_rate": 3.4372177386467673e-07, "loss": 0.7605646848678589, "step": 2252 }, { "epoch": 1.0435487607134584, "grad_norm": 0.93359375, "learning_rate": 3.3821323418857843e-07, "loss": 0.7368906140327454, "step": 2253 }, { "epoch": 1.0440120454018995, "grad_norm": 0.859375, "learning_rate": 3.32748878943983e-07, "loss": 0.8219422698020935, "step": 2254 }, { "epoch": 1.0444753300903404, "grad_norm": 0.91015625, "learning_rate": 3.2732871833466427e-07, "loss": 0.8227874040603638, "step": 2255 }, { "epoch": 1.0449386147787816, "grad_norm": 0.8359375, "learning_rate": 3.219527624818621e-07, "loss": 0.780153751373291, "step": 2256 }, { "epoch": 1.0454018994672225, "grad_norm": 0.828125, "learning_rate": 3.1662102142427974e-07, "loss": 0.6628552079200745, "step": 2257 }, { "epoch": 1.0458651841556637, "grad_norm": 0.73828125, "learning_rate": 3.1133350511804905e-07, "loss": 0.8577846884727478, "step": 2258 }, { "epoch": 1.0463284688441048, "grad_norm": 0.7890625, "learning_rate": 3.06090223436728e-07, "loss": 0.721747875213623, "step": 2259 }, { "epoch": 1.0467917535325457, "grad_norm": 0.80859375, "learning_rate": 3.0089118617126064e-07, "loss": 0.8224932551383972, "step": 2260 }, { "epoch": 1.0472550382209869, "grad_norm": 0.79296875, "learning_rate": 2.957364030299852e-07, "loss": 0.8007409572601318, "step": 2261 }, { "epoch": 1.0477183229094278, "grad_norm": 0.78125, "learning_rate": 2.9062588363859645e-07, "loss": 0.756182849407196, "step": 2262 }, { "epoch": 1.048181607597869, "grad_norm": 0.8671875, "learning_rate": 2.855596375401381e-07, "loss": 0.7461752891540527, "step": 2263 }, { "epoch": 1.0486448922863099, "grad_norm": 0.89453125, "learning_rate": 2.8053767419497076e-07, "loss": 0.8917368054389954, "step": 2264 }, { "epoch": 1.049108176974751, "grad_norm": 0.953125, "learning_rate": 2.755600029807797e-07, "loss": 0.9534367918968201, "step": 2265 }, { "epoch": 1.049571461663192, "grad_norm": 0.7734375, "learning_rate": 2.706266331925269e-07, "loss": 0.8556983470916748, "step": 2266 }, { "epoch": 1.050034746351633, "grad_norm": 0.9296875, "learning_rate": 2.657375740424621e-07, "loss": 0.8406637907028198, "step": 2267 }, { "epoch": 1.0504980310400742, "grad_norm": 0.90625, "learning_rate": 2.6089283466007987e-07, "loss": 0.765261173248291, "step": 2268 }, { "epoch": 1.0509613157285151, "grad_norm": 0.81640625, "learning_rate": 2.560924240921221e-07, "loss": 0.7666542530059814, "step": 2269 }, { "epoch": 1.0514246004169563, "grad_norm": 0.73046875, "learning_rate": 2.513363513025597e-07, "loss": 0.6919992566108704, "step": 2270 }, { "epoch": 1.0518878851053972, "grad_norm": 0.82421875, "learning_rate": 2.466246251725579e-07, "loss": 0.7279144525527954, "step": 2271 }, { "epoch": 1.0523511697938384, "grad_norm": 0.86328125, "learning_rate": 2.4195725450047865e-07, "loss": 0.7998260855674744, "step": 2272 }, { "epoch": 1.0528144544822793, "grad_norm": 0.82421875, "learning_rate": 2.373342480018543e-07, "loss": 0.9555582404136658, "step": 2273 }, { "epoch": 1.0532777391707204, "grad_norm": 0.9453125, "learning_rate": 2.3275561430937942e-07, "loss": 0.7288169860839844, "step": 2274 }, { "epoch": 1.0537410238591614, "grad_norm": 0.86328125, "learning_rate": 2.282213619728868e-07, "loss": 0.8385715484619141, "step": 2275 }, { "epoch": 1.0542043085476025, "grad_norm": 0.796875, "learning_rate": 2.2373149945933423e-07, "loss": 0.8468941450119019, "step": 2276 }, { "epoch": 1.0546675932360436, "grad_norm": 0.83984375, "learning_rate": 2.1928603515279388e-07, "loss": 0.6845361590385437, "step": 2277 }, { "epoch": 1.0551308779244846, "grad_norm": 0.94140625, "learning_rate": 2.148849773544175e-07, "loss": 0.8109369277954102, "step": 2278 }, { "epoch": 1.0555941626129257, "grad_norm": 0.7890625, "learning_rate": 2.1052833428245244e-07, "loss": 0.7616801857948303, "step": 2279 }, { "epoch": 1.0560574473013666, "grad_norm": 0.80078125, "learning_rate": 2.062161140721992e-07, "loss": 0.8856765031814575, "step": 2280 }, { "epoch": 1.0565207319898078, "grad_norm": 0.8671875, "learning_rate": 2.0194832477600856e-07, "loss": 0.6820222735404968, "step": 2281 }, { "epoch": 1.0569840166782487, "grad_norm": 0.83984375, "learning_rate": 1.97724974363263e-07, "loss": 0.7769261598587036, "step": 2282 }, { "epoch": 1.0574473013666899, "grad_norm": 0.87890625, "learning_rate": 1.9354607072036335e-07, "loss": 0.8834435343742371, "step": 2283 }, { "epoch": 1.0579105860551308, "grad_norm": 0.99609375, "learning_rate": 1.8941162165071557e-07, "loss": 0.8411494493484497, "step": 2284 }, { "epoch": 1.058373870743572, "grad_norm": 0.9375, "learning_rate": 1.85321634874712e-07, "loss": 0.7289970517158508, "step": 2285 }, { "epoch": 1.058837155432013, "grad_norm": 0.8203125, "learning_rate": 1.8127611802971534e-07, "loss": 0.7000952959060669, "step": 2286 }, { "epoch": 1.059300440120454, "grad_norm": 0.765625, "learning_rate": 1.7727507867005343e-07, "loss": 0.7273321151733398, "step": 2287 }, { "epoch": 1.0597637248088951, "grad_norm": 0.76171875, "learning_rate": 1.7331852426700057e-07, "loss": 0.629833459854126, "step": 2288 }, { "epoch": 1.060227009497336, "grad_norm": 0.8046875, "learning_rate": 1.694064622087641e-07, "loss": 0.9701935052871704, "step": 2289 }, { "epoch": 1.0606902941857772, "grad_norm": 0.859375, "learning_rate": 1.6553889980045788e-07, "loss": 0.8223315477371216, "step": 2290 }, { "epoch": 1.0611535788742181, "grad_norm": 0.8671875, "learning_rate": 1.617158442641129e-07, "loss": 0.8183466196060181, "step": 2291 }, { "epoch": 1.0616168635626593, "grad_norm": 0.8828125, "learning_rate": 1.5793730273864527e-07, "loss": 0.6966171264648438, "step": 2292 }, { "epoch": 1.0620801482511002, "grad_norm": 0.953125, "learning_rate": 1.542032822798509e-07, "loss": 0.8441136479377747, "step": 2293 }, { "epoch": 1.0625434329395413, "grad_norm": 0.80078125, "learning_rate": 1.50513789860387e-07, "loss": 0.9092806577682495, "step": 2294 }, { "epoch": 1.0630067176279825, "grad_norm": 0.9296875, "learning_rate": 1.4686883236976378e-07, "loss": 0.6524341702461243, "step": 2295 }, { "epoch": 1.0634700023164234, "grad_norm": 0.83984375, "learning_rate": 1.432684166143341e-07, "loss": 0.759156346321106, "step": 2296 }, { "epoch": 1.0639332870048646, "grad_norm": 1.15625, "learning_rate": 1.3971254931726663e-07, "loss": 0.7915992736816406, "step": 2297 }, { "epoch": 1.0643965716933055, "grad_norm": 0.85546875, "learning_rate": 1.362012371185539e-07, "loss": 0.9682400226593018, "step": 2298 }, { "epoch": 1.0648598563817466, "grad_norm": 0.87109375, "learning_rate": 1.3273448657498044e-07, "loss": 0.7774080038070679, "step": 2299 }, { "epoch": 1.0653231410701876, "grad_norm": 1.0625, "learning_rate": 1.2931230416012785e-07, "loss": 0.8881627917289734, "step": 2300 }, { "epoch": 1.0657864257586287, "grad_norm": 0.9140625, "learning_rate": 1.2593469626434573e-07, "loss": 0.6603068709373474, "step": 2301 }, { "epoch": 1.0662497104470696, "grad_norm": 0.87890625, "learning_rate": 1.2260166919475423e-07, "loss": 0.953567624092102, "step": 2302 }, { "epoch": 1.0667129951355108, "grad_norm": 0.8125, "learning_rate": 1.1931322917522548e-07, "loss": 0.7024879455566406, "step": 2303 }, { "epoch": 1.067176279823952, "grad_norm": 0.859375, "learning_rate": 1.160693823463701e-07, "loss": 0.7515615224838257, "step": 2304 }, { "epoch": 1.0676395645123928, "grad_norm": 1.4375, "learning_rate": 1.1287013476552943e-07, "loss": 0.7856634855270386, "step": 2305 }, { "epoch": 1.068102849200834, "grad_norm": 0.82421875, "learning_rate": 1.0971549240676204e-07, "loss": 0.8335432410240173, "step": 2306 }, { "epoch": 1.068566133889275, "grad_norm": 0.90234375, "learning_rate": 1.0660546116083847e-07, "loss": 0.7041558027267456, "step": 2307 }, { "epoch": 1.069029418577716, "grad_norm": 0.8984375, "learning_rate": 1.0354004683522256e-07, "loss": 0.7460358738899231, "step": 2308 }, { "epoch": 1.069492703266157, "grad_norm": 0.8515625, "learning_rate": 1.0051925515405813e-07, "loss": 0.7642945647239685, "step": 2309 }, { "epoch": 1.0699559879545981, "grad_norm": 0.73046875, "learning_rate": 9.754309175817166e-08, "loss": 0.603370189666748, "step": 2310 }, { "epoch": 1.070419272643039, "grad_norm": 1.0, "learning_rate": 9.461156220505363e-08, "loss": 0.7628229856491089, "step": 2311 }, { "epoch": 1.0708825573314802, "grad_norm": 0.92578125, "learning_rate": 9.172467196884249e-08, "loss": 0.7144607901573181, "step": 2312 }, { "epoch": 1.0713458420199213, "grad_norm": 0.79296875, "learning_rate": 8.888242644032207e-08, "loss": 0.7531965970993042, "step": 2313 }, { "epoch": 1.0718091267083623, "grad_norm": 0.796875, "learning_rate": 8.608483092691355e-08, "loss": 0.7036784887313843, "step": 2314 }, { "epoch": 1.0722724113968034, "grad_norm": 0.96484375, "learning_rate": 8.333189065265679e-08, "loss": 0.9241542220115662, "step": 2315 }, { "epoch": 1.0727356960852443, "grad_norm": 0.72265625, "learning_rate": 8.062361075821034e-08, "loss": 0.6981607675552368, "step": 2316 }, { "epoch": 1.0731989807736855, "grad_norm": 0.74609375, "learning_rate": 7.795999630083017e-08, "loss": 0.8028992414474487, "step": 2317 }, { "epoch": 1.0736622654621264, "grad_norm": 0.73828125, "learning_rate": 7.534105225437493e-08, "loss": 0.9001232385635376, "step": 2318 }, { "epoch": 1.0741255501505675, "grad_norm": 0.8046875, "learning_rate": 7.276678350927934e-08, "loss": 0.778639554977417, "step": 2319 }, { "epoch": 1.0745888348390085, "grad_norm": 0.91015625, "learning_rate": 7.023719487256752e-08, "loss": 0.8548743724822998, "step": 2320 }, { "epoch": 1.0750521195274496, "grad_norm": 0.96484375, "learning_rate": 6.775229106781832e-08, "loss": 0.8301196098327637, "step": 2321 }, { "epoch": 1.0755154042158908, "grad_norm": 0.84765625, "learning_rate": 6.531207673517603e-08, "loss": 0.6911361813545227, "step": 2322 }, { "epoch": 1.0759786889043317, "grad_norm": 0.859375, "learning_rate": 6.291655643133165e-08, "loss": 0.872600257396698, "step": 2323 }, { "epoch": 1.0764419735927728, "grad_norm": 0.81640625, "learning_rate": 6.056573462952032e-08, "loss": 0.8005569577217102, "step": 2324 }, { "epoch": 1.0769052582812138, "grad_norm": 0.85546875, "learning_rate": 5.825961571950789e-08, "loss": 0.8710095882415771, "step": 2325 }, { "epoch": 1.077368542969655, "grad_norm": 0.87890625, "learning_rate": 5.5998204007585705e-08, "loss": 0.8382307291030884, "step": 2326 }, { "epoch": 1.0778318276580958, "grad_norm": 0.765625, "learning_rate": 5.378150371656254e-08, "loss": 0.7570433616638184, "step": 2327 }, { "epoch": 1.078295112346537, "grad_norm": 0.796875, "learning_rate": 5.160951898575661e-08, "loss": 0.8490878343582153, "step": 2328 }, { "epoch": 1.078758397034978, "grad_norm": 0.8046875, "learning_rate": 4.9482253870984925e-08, "loss": 0.7219923734664917, "step": 2329 }, { "epoch": 1.079221681723419, "grad_norm": 0.953125, "learning_rate": 4.7399712344560643e-08, "loss": 0.8974231481552124, "step": 2330 }, { "epoch": 1.0796849664118602, "grad_norm": 0.80859375, "learning_rate": 4.536189829528237e-08, "loss": 0.797977089881897, "step": 2331 }, { "epoch": 1.080148251100301, "grad_norm": 0.79296875, "learning_rate": 4.3368815528423536e-08, "loss": 0.802757203578949, "step": 2332 }, { "epoch": 1.0806115357887423, "grad_norm": 0.921875, "learning_rate": 4.142046776573771e-08, "loss": 0.753846287727356, "step": 2333 }, { "epoch": 1.0810748204771832, "grad_norm": 0.83984375, "learning_rate": 3.95168586454373e-08, "loss": 0.7284574508666992, "step": 2334 }, { "epoch": 1.0815381051656243, "grad_norm": 0.75390625, "learning_rate": 3.7657991722190865e-08, "loss": 0.7171909213066101, "step": 2335 }, { "epoch": 1.0820013898540652, "grad_norm": 0.84765625, "learning_rate": 3.5843870467125784e-08, "loss": 0.7073402404785156, "step": 2336 }, { "epoch": 1.0824646745425064, "grad_norm": 0.8125, "learning_rate": 3.4074498267809614e-08, "loss": 0.8231452107429504, "step": 2337 }, { "epoch": 1.0829279592309473, "grad_norm": 0.8359375, "learning_rate": 3.234987842824744e-08, "loss": 0.8806526064872742, "step": 2338 }, { "epoch": 1.0833912439193885, "grad_norm": 0.87109375, "learning_rate": 3.067001416887916e-08, "loss": 0.8457326292991638, "step": 2339 }, { "epoch": 1.0838545286078296, "grad_norm": 0.91796875, "learning_rate": 2.9034908626571545e-08, "loss": 0.6565863490104675, "step": 2340 }, { "epoch": 1.0843178132962705, "grad_norm": 0.87890625, "learning_rate": 2.7444564854607558e-08, "loss": 0.8216748237609863, "step": 2341 }, { "epoch": 1.0847810979847117, "grad_norm": 0.96875, "learning_rate": 2.5898985822694343e-08, "loss": 0.9930620789527893, "step": 2342 }, { "epoch": 1.0852443826731526, "grad_norm": 0.80859375, "learning_rate": 2.4398174416936593e-08, "loss": 0.8227108716964722, "step": 2343 }, { "epoch": 1.0857076673615937, "grad_norm": 0.84765625, "learning_rate": 2.294213343985252e-08, "loss": 0.8761816024780273, "step": 2344 }, { "epoch": 1.0861709520500347, "grad_norm": 1.0078125, "learning_rate": 2.1530865610352555e-08, "loss": 0.6705437302589417, "step": 2345 }, { "epoch": 1.0866342367384758, "grad_norm": 1.0625, "learning_rate": 2.0164373563744675e-08, "loss": 0.8744497895240784, "step": 2346 }, { "epoch": 1.0870975214269167, "grad_norm": 0.9765625, "learning_rate": 1.8842659851723732e-08, "loss": 0.8473079800605774, "step": 2347 }, { "epoch": 1.0875608061153579, "grad_norm": 0.79296875, "learning_rate": 1.7565726942371464e-08, "loss": 0.6917478442192078, "step": 2348 }, { "epoch": 1.088024090803799, "grad_norm": 0.91796875, "learning_rate": 1.633357722014317e-08, "loss": 0.6745292544364929, "step": 2349 }, { "epoch": 1.08848737549224, "grad_norm": 0.82421875, "learning_rate": 1.5146212985875706e-08, "loss": 0.7799302935600281, "step": 2350 }, { "epoch": 1.088950660180681, "grad_norm": 0.9296875, "learning_rate": 1.4003636456771496e-08, "loss": 0.6096100807189941, "step": 2351 }, { "epoch": 1.089413944869122, "grad_norm": 0.78515625, "learning_rate": 1.2905849766401189e-08, "loss": 0.6739075183868408, "step": 2352 }, { "epoch": 1.0898772295575632, "grad_norm": 0.8125, "learning_rate": 1.1852854964698346e-08, "loss": 0.6615080833435059, "step": 2353 }, { "epoch": 1.090340514246004, "grad_norm": 0.90625, "learning_rate": 1.0844654017956757e-08, "loss": 0.8795668482780457, "step": 2354 }, { "epoch": 1.0908037989344452, "grad_norm": 0.84765625, "learning_rate": 9.881248808817134e-09, "loss": 0.6835682392120361, "step": 2355 }, { "epoch": 1.0912670836228862, "grad_norm": 0.953125, "learning_rate": 8.962641136285754e-09, "loss": 0.8570435047149658, "step": 2356 }, { "epoch": 1.0917303683113273, "grad_norm": 0.6875, "learning_rate": 8.088832715702487e-09, "loss": 0.653952956199646, "step": 2357 }, { "epoch": 1.0921936529997684, "grad_norm": 0.8359375, "learning_rate": 7.259825178759449e-09, "loss": 0.7819563150405884, "step": 2358 }, { "epoch": 1.0926569376882094, "grad_norm": 0.8125, "learning_rate": 6.475620073493005e-09, "loss": 0.7712017893791199, "step": 2359 }, { "epoch": 1.0931202223766505, "grad_norm": 0.921875, "learning_rate": 5.736218864273113e-09, "loss": 0.7736300230026245, "step": 2360 }, { "epoch": 1.0935835070650914, "grad_norm": 0.80859375, "learning_rate": 5.041622931805989e-09, "loss": 0.8023624420166016, "step": 2361 }, { "epoch": 1.0940467917535326, "grad_norm": 0.9375, "learning_rate": 4.391833573136772e-09, "loss": 0.7673137187957764, "step": 2362 }, { "epoch": 1.0945100764419735, "grad_norm": 0.92578125, "learning_rate": 3.786852001636198e-09, "loss": 0.7695264220237732, "step": 2363 }, { "epoch": 1.0949733611304147, "grad_norm": 0.90234375, "learning_rate": 3.2266793470085986e-09, "loss": 0.8211096525192261, "step": 2364 }, { "epoch": 1.0954366458188556, "grad_norm": 0.85546875, "learning_rate": 2.7113166552812375e-09, "loss": 0.7800441980361938, "step": 2365 }, { "epoch": 1.0958999305072967, "grad_norm": 0.875, "learning_rate": 2.2407648888069786e-09, "loss": 1.0416970252990723, "step": 2366 }, { "epoch": 1.0963632151957379, "grad_norm": 0.84375, "learning_rate": 1.8150249262616214e-09, "loss": 0.6914156675338745, "step": 2367 }, { "epoch": 1.0968264998841788, "grad_norm": 0.859375, "learning_rate": 1.4340975626465635e-09, "loss": 0.8113836050033569, "step": 2368 }, { "epoch": 1.09728978457262, "grad_norm": 0.8203125, "learning_rate": 1.0979835092808087e-09, "loss": 0.7729677557945251, "step": 2369 }, { "epoch": 1.0977530692610609, "grad_norm": 0.921875, "learning_rate": 8.066833937956375e-10, "loss": 0.6993372440338135, "step": 2370 }, { "epoch": 1.098216353949502, "grad_norm": 0.81640625, "learning_rate": 5.601977601505936e-10, "loss": 0.8089796304702759, "step": 2371 }, { "epoch": 1.098679638637943, "grad_norm": 0.80078125, "learning_rate": 3.585270686121689e-10, "loss": 0.7753397822380066, "step": 2372 }, { "epoch": 1.099142923326384, "grad_norm": 0.79296875, "learning_rate": 2.0167169577245402e-10, "loss": 0.9297423362731934, "step": 2373 }, { "epoch": 1.099606208014825, "grad_norm": 0.86328125, "learning_rate": 8.963193452515838e-11, "loss": 0.8522340655326843, "step": 2374 }, { "epoch": 1.1000694927032661, "grad_norm": 0.80859375, "learning_rate": 2.2407994092255023e-11, "loss": 0.8018955588340759, "step": 2375 } ], "logging_steps": 1, "max_steps": 2375, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.087985967887275e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }