diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -2,9 +2,9 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.3333333333333333, + "epoch": 0.6666666666666666, "eval_steps": 500, - "global_step": 10000, + "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -6008,6 +6008,6006 @@ "learning_rate": 7.961620348094241e-05, "loss": 0.0682, "step": 10000 + }, + { + "grad_norm": 0.33132708072662354, + "learning_rate": 7.957177881899577e-05, + "loss": 0.0563, + "step": 10010 + }, + { + "grad_norm": 0.32278794050216675, + "learning_rate": 7.952731822454944e-05, + "loss": 0.0556, + "step": 10020 + }, + { + "grad_norm": 0.2934204339981079, + "learning_rate": 7.948282175162722e-05, + "loss": 0.0448, + "step": 10030 + }, + { + "grad_norm": 0.2591647803783417, + "learning_rate": 7.943828945429652e-05, + "loss": 0.0723, + "step": 10040 + }, + { + "grad_norm": 0.32507428526878357, + "learning_rate": 7.939372138666827e-05, + "loss": 0.0613, + "step": 10050 + }, + { + "grad_norm": 0.32368388772010803, + "learning_rate": 7.934911760289692e-05, + "loss": 0.0534, + "step": 10060 + }, + { + "grad_norm": 0.27588149905204773, + "learning_rate": 7.930447815718022e-05, + "loss": 0.0429, + "step": 10070 + }, + { + "grad_norm": 0.27428266406059265, + "learning_rate": 7.925980310375934e-05, + "loss": 0.0594, + "step": 10080 + }, + { + "grad_norm": 0.40983086824417114, + "learning_rate": 7.921509249691865e-05, + "loss": 0.0565, + "step": 10090 + }, + { + "grad_norm": 0.24723628163337708, + "learning_rate": 7.917034639098579e-05, + "loss": 0.0618, + "step": 10100 + }, + { + "grad_norm": 0.26453569531440735, + "learning_rate": 7.912556484033146e-05, + "loss": 0.0387, + "step": 10110 + }, + { + "grad_norm": 0.3762427568435669, + "learning_rate": 7.908074789936952e-05, + "loss": 0.0468, + "step": 10120 + }, + { + "grad_norm": 0.32893022894859314, + "learning_rate": 7.903589562255673e-05, + "loss": 0.0546, + "step": 10130 + }, + { + "grad_norm": 0.32823359966278076, + "learning_rate": 7.899100806439285e-05, + "loss": 0.052, + "step": 10140 + }, + { + "grad_norm": 0.2984393835067749, + "learning_rate": 7.894608527942049e-05, + "loss": 0.064, + "step": 10150 + }, + { + "grad_norm": 0.3259609639644623, + "learning_rate": 7.89011273222251e-05, + "loss": 0.057, + "step": 10160 + }, + { + "grad_norm": 0.35673999786376953, + "learning_rate": 7.885613424743482e-05, + "loss": 0.049, + "step": 10170 + }, + { + "grad_norm": 0.24055549502372742, + "learning_rate": 7.881110610972044e-05, + "loss": 0.0688, + "step": 10180 + }, + { + "grad_norm": 0.36448580026626587, + "learning_rate": 7.876604296379544e-05, + "loss": 0.0674, + "step": 10190 + }, + { + "grad_norm": 0.2905661463737488, + "learning_rate": 7.872094486441579e-05, + "loss": 0.0654, + "step": 10200 + }, + { + "grad_norm": 0.2830527424812317, + "learning_rate": 7.867581186637991e-05, + "loss": 0.0523, + "step": 10210 + }, + { + "grad_norm": 0.22366714477539062, + "learning_rate": 7.863064402452866e-05, + "loss": 0.0497, + "step": 10220 + }, + { + "grad_norm": 0.31298044323921204, + "learning_rate": 7.858544139374524e-05, + "loss": 0.0582, + "step": 10230 + }, + { + "grad_norm": 0.2092851996421814, + "learning_rate": 7.854020402895508e-05, + "loss": 0.051, + "step": 10240 + }, + { + "grad_norm": 0.2408243864774704, + "learning_rate": 7.849493198512587e-05, + "loss": 0.0509, + "step": 10250 + }, + { + "grad_norm": 0.2615901529788971, + "learning_rate": 7.84496253172674e-05, + "loss": 0.0362, + "step": 10260 + }, + { + "grad_norm": 0.31269392371177673, + "learning_rate": 7.840428408043155e-05, + "loss": 0.0527, + "step": 10270 + }, + { + "grad_norm": 0.34652313590049744, + "learning_rate": 7.835890832971218e-05, + "loss": 0.0595, + "step": 10280 + }, + { + "grad_norm": 0.31965941190719604, + "learning_rate": 7.831349812024512e-05, + "loss": 0.062, + "step": 10290 + }, + { + "grad_norm": 0.29599615931510925, + "learning_rate": 7.826805350720807e-05, + "loss": 0.0537, + "step": 10300 + }, + { + "grad_norm": 0.24278312921524048, + "learning_rate": 7.822257454582049e-05, + "loss": 0.0463, + "step": 10310 + }, + { + "grad_norm": 0.35328978300094604, + "learning_rate": 7.817706129134363e-05, + "loss": 0.0468, + "step": 10320 + }, + { + "grad_norm": 0.24585260450839996, + "learning_rate": 7.813151379908036e-05, + "loss": 0.0538, + "step": 10330 + }, + { + "grad_norm": 0.332724392414093, + "learning_rate": 7.808593212437523e-05, + "loss": 0.0557, + "step": 10340 + }, + { + "grad_norm": 0.3731438219547272, + "learning_rate": 7.804031632261421e-05, + "loss": 0.0659, + "step": 10350 + }, + { + "grad_norm": 0.3650723695755005, + "learning_rate": 7.799466644922484e-05, + "loss": 0.0422, + "step": 10360 + }, + { + "grad_norm": 0.29940640926361084, + "learning_rate": 7.794898255967602e-05, + "loss": 0.0497, + "step": 10370 + }, + { + "grad_norm": 0.3471282124519348, + "learning_rate": 7.790326470947795e-05, + "loss": 0.0527, + "step": 10380 + }, + { + "grad_norm": 0.28322187066078186, + "learning_rate": 7.785751295418217e-05, + "loss": 0.0556, + "step": 10390 + }, + { + "grad_norm": 0.2890983819961548, + "learning_rate": 7.781172734938136e-05, + "loss": 0.0596, + "step": 10400 + }, + { + "grad_norm": 0.27639293670654297, + "learning_rate": 7.776590795070933e-05, + "loss": 0.0554, + "step": 10410 + }, + { + "grad_norm": 0.23798902332782745, + "learning_rate": 7.772005481384099e-05, + "loss": 0.0539, + "step": 10420 + }, + { + "grad_norm": 0.2751089334487915, + "learning_rate": 7.767416799449223e-05, + "loss": 0.0422, + "step": 10430 + }, + { + "grad_norm": 0.3559429347515106, + "learning_rate": 7.762824754841984e-05, + "loss": 0.0452, + "step": 10440 + }, + { + "grad_norm": 0.24810270965099335, + "learning_rate": 7.758229353142152e-05, + "loss": 0.0661, + "step": 10450 + }, + { + "grad_norm": 0.21435704827308655, + "learning_rate": 7.753630599933572e-05, + "loss": 0.0435, + "step": 10460 + }, + { + "grad_norm": 0.21953807771205902, + "learning_rate": 7.749028500804161e-05, + "loss": 0.0411, + "step": 10470 + }, + { + "grad_norm": 0.20052248239517212, + "learning_rate": 7.744423061345906e-05, + "loss": 0.0344, + "step": 10480 + }, + { + "grad_norm": 0.1935633271932602, + "learning_rate": 7.73981428715485e-05, + "loss": 0.0372, + "step": 10490 + }, + { + "grad_norm": 0.2296011596918106, + "learning_rate": 7.735202183831085e-05, + "loss": 0.0403, + "step": 10500 + }, + { + "grad_norm": 0.33665189146995544, + "learning_rate": 7.730586756978757e-05, + "loss": 0.0553, + "step": 10510 + }, + { + "grad_norm": 0.23534074425697327, + "learning_rate": 7.72596801220604e-05, + "loss": 0.0317, + "step": 10520 + }, + { + "grad_norm": 0.27294039726257324, + "learning_rate": 7.721345955125147e-05, + "loss": 0.044, + "step": 10530 + }, + { + "grad_norm": 0.20685341954231262, + "learning_rate": 7.71672059135231e-05, + "loss": 0.0324, + "step": 10540 + }, + { + "grad_norm": 0.2650996744632721, + "learning_rate": 7.712091926507787e-05, + "loss": 0.0329, + "step": 10550 + }, + { + "grad_norm": 0.24565280973911285, + "learning_rate": 7.70745996621584e-05, + "loss": 0.0351, + "step": 10560 + }, + { + "grad_norm": 0.3496357500553131, + "learning_rate": 7.702824716104735e-05, + "loss": 0.0359, + "step": 10570 + }, + { + "grad_norm": 0.20169053971767426, + "learning_rate": 7.698186181806743e-05, + "loss": 0.0464, + "step": 10580 + }, + { + "grad_norm": 0.28644654154777527, + "learning_rate": 7.693544368958116e-05, + "loss": 0.0523, + "step": 10590 + }, + { + "grad_norm": 0.24216139316558838, + "learning_rate": 7.688899283199096e-05, + "loss": 0.0537, + "step": 10600 + }, + { + "grad_norm": 0.30106598138809204, + "learning_rate": 7.684250930173901e-05, + "loss": 0.0681, + "step": 10610 + }, + { + "grad_norm": 0.43666359782218933, + "learning_rate": 7.679599315530718e-05, + "loss": 0.0639, + "step": 10620 + }, + { + "grad_norm": 0.3166155219078064, + "learning_rate": 7.674944444921695e-05, + "loss": 0.0439, + "step": 10630 + }, + { + "grad_norm": 0.2645878791809082, + "learning_rate": 7.670286324002944e-05, + "loss": 0.0512, + "step": 10640 + }, + { + "grad_norm": 0.3639654517173767, + "learning_rate": 7.665624958434514e-05, + "loss": 0.0602, + "step": 10650 + }, + { + "grad_norm": 0.3224162757396698, + "learning_rate": 7.66096035388041e-05, + "loss": 0.0512, + "step": 10660 + }, + { + "grad_norm": 0.31354862451553345, + "learning_rate": 7.656292516008563e-05, + "loss": 0.0558, + "step": 10670 + }, + { + "grad_norm": 0.3189253807067871, + "learning_rate": 7.651621450490837e-05, + "loss": 0.0332, + "step": 10680 + }, + { + "grad_norm": 0.2652711570262909, + "learning_rate": 7.646947163003017e-05, + "loss": 0.0415, + "step": 10690 + }, + { + "grad_norm": 0.3182506263256073, + "learning_rate": 7.642269659224803e-05, + "loss": 0.0681, + "step": 10700 + }, + { + "grad_norm": 0.29999294877052307, + "learning_rate": 7.637588944839803e-05, + "loss": 0.0546, + "step": 10710 + }, + { + "grad_norm": 0.19786204397678375, + "learning_rate": 7.632905025535528e-05, + "loss": 0.0525, + "step": 10720 + }, + { + "grad_norm": 0.25497713685035706, + "learning_rate": 7.628217907003378e-05, + "loss": 0.0407, + "step": 10730 + }, + { + "grad_norm": 0.2241908758878708, + "learning_rate": 7.623527594938649e-05, + "loss": 0.033, + "step": 10740 + }, + { + "grad_norm": 0.32697877287864685, + "learning_rate": 7.618834095040509e-05, + "loss": 0.0453, + "step": 10750 + }, + { + "grad_norm": 0.2551109790802002, + "learning_rate": 7.614137413012e-05, + "loss": 0.0505, + "step": 10760 + }, + { + "grad_norm": 0.3612462878227234, + "learning_rate": 7.609437554560042e-05, + "loss": 0.0515, + "step": 10770 + }, + { + "grad_norm": 0.2806759774684906, + "learning_rate": 7.604734525395398e-05, + "loss": 0.0491, + "step": 10780 + }, + { + "grad_norm": 0.27127817273139954, + "learning_rate": 7.600028331232699e-05, + "loss": 0.048, + "step": 10790 + }, + { + "grad_norm": 0.2838228642940521, + "learning_rate": 7.595318977790408e-05, + "loss": 0.0457, + "step": 10800 + }, + { + "grad_norm": 0.3736947178840637, + "learning_rate": 7.590606470790836e-05, + "loss": 0.0455, + "step": 10810 + }, + { + "grad_norm": 0.19184522330760956, + "learning_rate": 7.585890815960125e-05, + "loss": 0.0451, + "step": 10820 + }, + { + "grad_norm": 0.25505003333091736, + "learning_rate": 7.581172019028238e-05, + "loss": 0.0379, + "step": 10830 + }, + { + "grad_norm": 0.21059545874595642, + "learning_rate": 7.576450085728959e-05, + "loss": 0.0391, + "step": 10840 + }, + { + "grad_norm": 0.3221857249736786, + "learning_rate": 7.571725021799885e-05, + "loss": 0.0504, + "step": 10850 + }, + { + "grad_norm": 0.2043369561433792, + "learning_rate": 7.566996832982409e-05, + "loss": 0.0345, + "step": 10860 + }, + { + "grad_norm": 0.2110092043876648, + "learning_rate": 7.56226552502173e-05, + "loss": 0.0428, + "step": 10870 + }, + { + "grad_norm": 0.23808524012565613, + "learning_rate": 7.557531103666833e-05, + "loss": 0.0536, + "step": 10880 + }, + { + "grad_norm": 0.24966995418071747, + "learning_rate": 7.552793574670485e-05, + "loss": 0.0448, + "step": 10890 + }, + { + "grad_norm": 0.24053335189819336, + "learning_rate": 7.54805294378923e-05, + "loss": 0.0499, + "step": 10900 + }, + { + "grad_norm": 0.31211426854133606, + "learning_rate": 7.543309216783384e-05, + "loss": 0.0404, + "step": 10910 + }, + { + "grad_norm": 0.3212166428565979, + "learning_rate": 7.53856239941702e-05, + "loss": 0.0492, + "step": 10920 + }, + { + "grad_norm": 0.33951273560523987, + "learning_rate": 7.533812497457972e-05, + "loss": 0.0568, + "step": 10930 + }, + { + "grad_norm": 0.316363126039505, + "learning_rate": 7.529059516677814e-05, + "loss": 0.0446, + "step": 10940 + }, + { + "grad_norm": 0.20694230496883392, + "learning_rate": 7.524303462851872e-05, + "loss": 0.0469, + "step": 10950 + }, + { + "grad_norm": 0.2670970857143402, + "learning_rate": 7.519544341759192e-05, + "loss": 0.0421, + "step": 10960 + }, + { + "grad_norm": 0.3025738000869751, + "learning_rate": 7.514782159182562e-05, + "loss": 0.0481, + "step": 10970 + }, + { + "grad_norm": 0.2301190346479416, + "learning_rate": 7.510016920908481e-05, + "loss": 0.0495, + "step": 10980 + }, + { + "grad_norm": 0.234406977891922, + "learning_rate": 7.505248632727158e-05, + "loss": 0.0393, + "step": 10990 + }, + { + "grad_norm": 0.22023805975914001, + "learning_rate": 7.50047730043252e-05, + "loss": 0.0415, + "step": 11000 + }, + { + "grad_norm": 0.3040928840637207, + "learning_rate": 7.495702929822183e-05, + "loss": 0.0423, + "step": 11010 + }, + { + "grad_norm": 0.23407776653766632, + "learning_rate": 7.490925526697455e-05, + "loss": 0.046, + "step": 11020 + }, + { + "grad_norm": 0.30900394916534424, + "learning_rate": 7.486145096863334e-05, + "loss": 0.0547, + "step": 11030 + }, + { + "grad_norm": 0.2530072331428528, + "learning_rate": 7.481361646128491e-05, + "loss": 0.0471, + "step": 11040 + }, + { + "grad_norm": 0.3361715078353882, + "learning_rate": 7.476575180305271e-05, + "loss": 0.0399, + "step": 11050 + }, + { + "grad_norm": 0.24628327786922455, + "learning_rate": 7.471785705209682e-05, + "loss": 0.0576, + "step": 11060 + }, + { + "grad_norm": 0.28060218691825867, + "learning_rate": 7.466993226661387e-05, + "loss": 0.0424, + "step": 11070 + }, + { + "grad_norm": 0.2367982268333435, + "learning_rate": 7.462197750483699e-05, + "loss": 0.0508, + "step": 11080 + }, + { + "grad_norm": 0.3303638696670532, + "learning_rate": 7.457399282503574e-05, + "loss": 0.0475, + "step": 11090 + }, + { + "grad_norm": 0.2684824466705322, + "learning_rate": 7.452597828551604e-05, + "loss": 0.0545, + "step": 11100 + }, + { + "grad_norm": 0.33203116059303284, + "learning_rate": 7.447793394462006e-05, + "loss": 0.0661, + "step": 11110 + }, + { + "grad_norm": 0.27813443541526794, + "learning_rate": 7.442985986072624e-05, + "loss": 0.0485, + "step": 11120 + }, + { + "grad_norm": 0.20397348701953888, + "learning_rate": 7.438175609224908e-05, + "loss": 0.0651, + "step": 11130 + }, + { + "grad_norm": 0.29070982336997986, + "learning_rate": 7.433362269763924e-05, + "loss": 0.0483, + "step": 11140 + }, + { + "grad_norm": 0.30380284786224365, + "learning_rate": 7.428545973538329e-05, + "loss": 0.0362, + "step": 11150 + }, + { + "grad_norm": 0.21173126995563507, + "learning_rate": 7.42372672640038e-05, + "loss": 0.0441, + "step": 11160 + }, + { + "grad_norm": 0.29563266038894653, + "learning_rate": 7.418904534205917e-05, + "loss": 0.0475, + "step": 11170 + }, + { + "grad_norm": 0.30811402201652527, + "learning_rate": 7.414079402814356e-05, + "loss": 0.0569, + "step": 11180 + }, + { + "grad_norm": 0.3020748496055603, + "learning_rate": 7.409251338088687e-05, + "loss": 0.0615, + "step": 11190 + }, + { + "grad_norm": 0.27788203954696655, + "learning_rate": 7.404420345895467e-05, + "loss": 0.0526, + "step": 11200 + }, + { + "grad_norm": 0.3455301821231842, + "learning_rate": 7.399586432104804e-05, + "loss": 0.0603, + "step": 11210 + }, + { + "grad_norm": 0.23239581286907196, + "learning_rate": 7.394749602590358e-05, + "loss": 0.0494, + "step": 11220 + }, + { + "grad_norm": 0.2696135342121124, + "learning_rate": 7.389909863229336e-05, + "loss": 0.0632, + "step": 11230 + }, + { + "grad_norm": 0.2566918134689331, + "learning_rate": 7.385067219902477e-05, + "loss": 0.0502, + "step": 11240 + }, + { + "grad_norm": 0.2538367211818695, + "learning_rate": 7.380221678494049e-05, + "loss": 0.0372, + "step": 11250 + }, + { + "grad_norm": 0.24516938626766205, + "learning_rate": 7.37537324489184e-05, + "loss": 0.0384, + "step": 11260 + }, + { + "grad_norm": 0.22987090051174164, + "learning_rate": 7.370521924987155e-05, + "loss": 0.05, + "step": 11270 + }, + { + "grad_norm": 0.3568144142627716, + "learning_rate": 7.365667724674806e-05, + "loss": 0.0676, + "step": 11280 + }, + { + "grad_norm": 0.3328545093536377, + "learning_rate": 7.360810649853105e-05, + "loss": 0.0571, + "step": 11290 + }, + { + "grad_norm": 0.2935340404510498, + "learning_rate": 7.355950706423853e-05, + "loss": 0.0539, + "step": 11300 + }, + { + "grad_norm": 0.28663113713264465, + "learning_rate": 7.351087900292342e-05, + "loss": 0.0582, + "step": 11310 + }, + { + "grad_norm": 0.24561890959739685, + "learning_rate": 7.346222237367339e-05, + "loss": 0.0526, + "step": 11320 + }, + { + "grad_norm": 0.2559717297554016, + "learning_rate": 7.341353723561081e-05, + "loss": 0.0526, + "step": 11330 + }, + { + "grad_norm": 0.17693690955638885, + "learning_rate": 7.336482364789277e-05, + "loss": 0.0296, + "step": 11340 + }, + { + "grad_norm": 0.22005753219127655, + "learning_rate": 7.331608166971082e-05, + "loss": 0.0572, + "step": 11350 + }, + { + "grad_norm": 0.26099079847335815, + "learning_rate": 7.326731136029108e-05, + "loss": 0.0399, + "step": 11360 + }, + { + "grad_norm": 0.23998157680034637, + "learning_rate": 7.321851277889408e-05, + "loss": 0.0394, + "step": 11370 + }, + { + "grad_norm": 0.236002117395401, + "learning_rate": 7.316968598481469e-05, + "loss": 0.041, + "step": 11380 + }, + { + "grad_norm": 0.29364362359046936, + "learning_rate": 7.312083103738206e-05, + "loss": 0.0585, + "step": 11390 + }, + { + "grad_norm": 0.26120659708976746, + "learning_rate": 7.307194799595958e-05, + "loss": 0.0564, + "step": 11400 + }, + { + "grad_norm": 0.2911699116230011, + "learning_rate": 7.302303691994473e-05, + "loss": 0.0483, + "step": 11410 + }, + { + "grad_norm": 0.21139676868915558, + "learning_rate": 7.29740978687691e-05, + "loss": 0.0577, + "step": 11420 + }, + { + "grad_norm": 0.23853741586208344, + "learning_rate": 7.292513090189825e-05, + "loss": 0.039, + "step": 11430 + }, + { + "grad_norm": 0.25128620862960815, + "learning_rate": 7.287613607883163e-05, + "loss": 0.0396, + "step": 11440 + }, + { + "grad_norm": 0.25657713413238525, + "learning_rate": 7.282711345910263e-05, + "loss": 0.0418, + "step": 11450 + }, + { + "grad_norm": 0.27613696455955505, + "learning_rate": 7.27780631022783e-05, + "loss": 0.0486, + "step": 11460 + }, + { + "grad_norm": 0.3475509583950043, + "learning_rate": 7.272898506795948e-05, + "loss": 0.043, + "step": 11470 + }, + { + "grad_norm": 0.20536865293979645, + "learning_rate": 7.267987941578058e-05, + "loss": 0.0372, + "step": 11480 + }, + { + "grad_norm": 0.2695615589618683, + "learning_rate": 7.263074620540962e-05, + "loss": 0.0543, + "step": 11490 + }, + { + "grad_norm": 0.46835342049598694, + "learning_rate": 7.25815854965481e-05, + "loss": 0.0447, + "step": 11500 + }, + { + "grad_norm": 0.20317630469799042, + "learning_rate": 7.253239734893089e-05, + "loss": 0.0347, + "step": 11510 + }, + { + "grad_norm": 0.21950574219226837, + "learning_rate": 7.248318182232623e-05, + "loss": 0.0443, + "step": 11520 + }, + { + "grad_norm": 0.2229817807674408, + "learning_rate": 7.243393897653565e-05, + "loss": 0.0389, + "step": 11530 + }, + { + "grad_norm": 0.23480597138404846, + "learning_rate": 7.23846688713938e-05, + "loss": 0.0431, + "step": 11540 + }, + { + "grad_norm": 0.2717709243297577, + "learning_rate": 7.233537156676854e-05, + "loss": 0.0509, + "step": 11550 + }, + { + "grad_norm": 0.22844643890857697, + "learning_rate": 7.228604712256076e-05, + "loss": 0.0433, + "step": 11560 + }, + { + "grad_norm": 0.2866854667663574, + "learning_rate": 7.223669559870426e-05, + "loss": 0.0377, + "step": 11570 + }, + { + "grad_norm": 0.2501957416534424, + "learning_rate": 7.218731705516585e-05, + "loss": 0.044, + "step": 11580 + }, + { + "grad_norm": 0.29818347096443176, + "learning_rate": 7.21379115519451e-05, + "loss": 0.0497, + "step": 11590 + }, + { + "grad_norm": 0.2898947298526764, + "learning_rate": 7.20884791490743e-05, + "loss": 0.0443, + "step": 11600 + }, + { + "grad_norm": 0.20588861405849457, + "learning_rate": 7.203901990661857e-05, + "loss": 0.0505, + "step": 11610 + }, + { + "grad_norm": 0.2993820905685425, + "learning_rate": 7.198953388467549e-05, + "loss": 0.0558, + "step": 11620 + }, + { + "grad_norm": 0.2154010832309723, + "learning_rate": 7.194002114337526e-05, + "loss": 0.0597, + "step": 11630 + }, + { + "grad_norm": 0.25284940004348755, + "learning_rate": 7.189048174288054e-05, + "loss": 0.0369, + "step": 11640 + }, + { + "grad_norm": 0.26337292790412903, + "learning_rate": 7.184091574338636e-05, + "loss": 0.0403, + "step": 11650 + }, + { + "grad_norm": 0.30319035053253174, + "learning_rate": 7.179132320512009e-05, + "loss": 0.035, + "step": 11660 + }, + { + "grad_norm": 0.2818997800350189, + "learning_rate": 7.174170418834134e-05, + "loss": 0.0383, + "step": 11670 + }, + { + "grad_norm": 0.24978923797607422, + "learning_rate": 7.169205875334189e-05, + "loss": 0.0478, + "step": 11680 + }, + { + "grad_norm": 0.28280794620513916, + "learning_rate": 7.164238696044562e-05, + "loss": 0.0449, + "step": 11690 + }, + { + "grad_norm": 0.22627778351306915, + "learning_rate": 7.159268887000846e-05, + "loss": 0.0427, + "step": 11700 + }, + { + "grad_norm": 0.24511271715164185, + "learning_rate": 7.154296454241827e-05, + "loss": 0.0323, + "step": 11710 + }, + { + "grad_norm": 0.2262425273656845, + "learning_rate": 7.149321403809479e-05, + "loss": 0.0398, + "step": 11720 + }, + { + "grad_norm": 0.19349408149719238, + "learning_rate": 7.144343741748956e-05, + "loss": 0.0287, + "step": 11730 + }, + { + "grad_norm": 0.20254862308502197, + "learning_rate": 7.139363474108589e-05, + "loss": 0.0389, + "step": 11740 + }, + { + "grad_norm": 0.355533629655838, + "learning_rate": 7.134380606939874e-05, + "loss": 0.0459, + "step": 11750 + }, + { + "grad_norm": 0.21760597825050354, + "learning_rate": 7.12939514629746e-05, + "loss": 0.0339, + "step": 11760 + }, + { + "grad_norm": 0.19992859661579132, + "learning_rate": 7.124407098239155e-05, + "loss": 0.0534, + "step": 11770 + }, + { + "grad_norm": 0.2798827290534973, + "learning_rate": 7.119416468825908e-05, + "loss": 0.0336, + "step": 11780 + }, + { + "grad_norm": 0.25921034812927246, + "learning_rate": 7.114423264121804e-05, + "loss": 0.0396, + "step": 11790 + }, + { + "grad_norm": 0.20574228465557098, + "learning_rate": 7.109427490194056e-05, + "loss": 0.0449, + "step": 11800 + }, + { + "grad_norm": 0.21406352519989014, + "learning_rate": 7.104429153113001e-05, + "loss": 0.0572, + "step": 11810 + }, + { + "grad_norm": 0.24189870059490204, + "learning_rate": 7.099428258952092e-05, + "loss": 0.0423, + "step": 11820 + }, + { + "grad_norm": 0.26482176780700684, + "learning_rate": 7.094424813787883e-05, + "loss": 0.0447, + "step": 11830 + }, + { + "grad_norm": 0.3032524585723877, + "learning_rate": 7.089418823700034e-05, + "loss": 0.0387, + "step": 11840 + }, + { + "grad_norm": 0.17934875190258026, + "learning_rate": 7.084410294771298e-05, + "loss": 0.042, + "step": 11850 + }, + { + "grad_norm": 0.2308870404958725, + "learning_rate": 7.079399233087504e-05, + "loss": 0.0548, + "step": 11860 + }, + { + "grad_norm": 0.2603908181190491, + "learning_rate": 7.074385644737568e-05, + "loss": 0.0456, + "step": 11870 + }, + { + "grad_norm": 0.198285773396492, + "learning_rate": 7.069369535813473e-05, + "loss": 0.0541, + "step": 11880 + }, + { + "grad_norm": 0.19239330291748047, + "learning_rate": 7.06435091241026e-05, + "loss": 0.0409, + "step": 11890 + }, + { + "grad_norm": 0.2985738217830658, + "learning_rate": 7.059329780626034e-05, + "loss": 0.0548, + "step": 11900 + }, + { + "grad_norm": 0.270248681306839, + "learning_rate": 7.054306146561944e-05, + "loss": 0.0487, + "step": 11910 + }, + { + "grad_norm": 0.2968447804450989, + "learning_rate": 7.049280016322178e-05, + "loss": 0.0448, + "step": 11920 + }, + { + "grad_norm": 0.309084415435791, + "learning_rate": 7.044251396013957e-05, + "loss": 0.0478, + "step": 11930 + }, + { + "grad_norm": 0.29188257455825806, + "learning_rate": 7.039220291747528e-05, + "loss": 0.0404, + "step": 11940 + }, + { + "grad_norm": 0.22276915609836578, + "learning_rate": 7.034186709636159e-05, + "loss": 0.0391, + "step": 11950 + }, + { + "grad_norm": 0.24379786849021912, + "learning_rate": 7.029150655796129e-05, + "loss": 0.0444, + "step": 11960 + }, + { + "grad_norm": 0.29198282957077026, + "learning_rate": 7.024112136346712e-05, + "loss": 0.0495, + "step": 11970 + }, + { + "grad_norm": 0.29257962107658386, + "learning_rate": 7.01907115741019e-05, + "loss": 0.0494, + "step": 11980 + }, + { + "grad_norm": 0.19931025803089142, + "learning_rate": 7.014027725111825e-05, + "loss": 0.0542, + "step": 11990 + }, + { + "grad_norm": 0.30612480640411377, + "learning_rate": 7.008981845579864e-05, + "loss": 0.0532, + "step": 12000 + }, + { + "grad_norm": 0.23519417643547058, + "learning_rate": 7.003933524945528e-05, + "loss": 0.0463, + "step": 12010 + }, + { + "grad_norm": 0.32575276494026184, + "learning_rate": 6.998882769342998e-05, + "loss": 0.0405, + "step": 12020 + }, + { + "grad_norm": 0.20692406594753265, + "learning_rate": 6.993829584909423e-05, + "loss": 0.0465, + "step": 12030 + }, + { + "grad_norm": 0.3243485689163208, + "learning_rate": 6.988773977784895e-05, + "loss": 0.0542, + "step": 12040 + }, + { + "grad_norm": 0.35817739367485046, + "learning_rate": 6.983715954112454e-05, + "loss": 0.0603, + "step": 12050 + }, + { + "grad_norm": 0.3146810233592987, + "learning_rate": 6.978655520038079e-05, + "loss": 0.0587, + "step": 12060 + }, + { + "grad_norm": 0.24292369186878204, + "learning_rate": 6.97359268171067e-05, + "loss": 0.0753, + "step": 12070 + }, + { + "grad_norm": 0.3394567370414734, + "learning_rate": 6.968527445282056e-05, + "loss": 0.052, + "step": 12080 + }, + { + "grad_norm": 0.20344555377960205, + "learning_rate": 6.963459816906977e-05, + "loss": 0.0434, + "step": 12090 + }, + { + "grad_norm": 0.26246607303619385, + "learning_rate": 6.958389802743077e-05, + "loss": 0.0605, + "step": 12100 + }, + { + "grad_norm": 0.3500259220600128, + "learning_rate": 6.953317408950903e-05, + "loss": 0.0553, + "step": 12110 + }, + { + "grad_norm": 0.2843182384967804, + "learning_rate": 6.94824264169389e-05, + "loss": 0.0496, + "step": 12120 + }, + { + "grad_norm": 0.2245684415102005, + "learning_rate": 6.94316550713836e-05, + "loss": 0.0563, + "step": 12130 + }, + { + "grad_norm": 0.2734261155128479, + "learning_rate": 6.938086011453513e-05, + "loss": 0.0493, + "step": 12140 + }, + { + "grad_norm": 0.27631089091300964, + "learning_rate": 6.93300416081141e-05, + "loss": 0.055, + "step": 12150 + }, + { + "grad_norm": 0.2564859688282013, + "learning_rate": 6.927919961386984e-05, + "loss": 0.0466, + "step": 12160 + }, + { + "grad_norm": 0.27902746200561523, + "learning_rate": 6.922833419358013e-05, + "loss": 0.0496, + "step": 12170 + }, + { + "grad_norm": 0.281280517578125, + "learning_rate": 6.917744540905124e-05, + "loss": 0.0658, + "step": 12180 + }, + { + "grad_norm": 0.3388251066207886, + "learning_rate": 6.912653332211787e-05, + "loss": 0.0503, + "step": 12190 + }, + { + "grad_norm": 0.25451093912124634, + "learning_rate": 6.9075597994643e-05, + "loss": 0.0726, + "step": 12200 + }, + { + "grad_norm": 0.20979827642440796, + "learning_rate": 6.902463948851787e-05, + "loss": 0.0761, + "step": 12210 + }, + { + "grad_norm": 0.29862359166145325, + "learning_rate": 6.897365786566184e-05, + "loss": 0.0512, + "step": 12220 + }, + { + "grad_norm": 0.22309713065624237, + "learning_rate": 6.892265318802242e-05, + "loss": 0.0585, + "step": 12230 + }, + { + "grad_norm": 0.23608781397342682, + "learning_rate": 6.887162551757507e-05, + "loss": 0.0467, + "step": 12240 + }, + { + "grad_norm": 0.223038449883461, + "learning_rate": 6.882057491632326e-05, + "loss": 0.0648, + "step": 12250 + }, + { + "grad_norm": 0.3235296607017517, + "learning_rate": 6.876950144629824e-05, + "loss": 0.0674, + "step": 12260 + }, + { + "grad_norm": 0.2999056875705719, + "learning_rate": 6.87184051695591e-05, + "loss": 0.0492, + "step": 12270 + }, + { + "grad_norm": 0.27466267347335815, + "learning_rate": 6.866728614819268e-05, + "loss": 0.0505, + "step": 12280 + }, + { + "grad_norm": 0.2942052483558655, + "learning_rate": 6.861614444431337e-05, + "loss": 0.0544, + "step": 12290 + }, + { + "grad_norm": 0.2369472086429596, + "learning_rate": 6.856498012006318e-05, + "loss": 0.0391, + "step": 12300 + }, + { + "grad_norm": 0.2866470217704773, + "learning_rate": 6.851379323761157e-05, + "loss": 0.0501, + "step": 12310 + }, + { + "grad_norm": 0.28425905108451843, + "learning_rate": 6.846258385915545e-05, + "loss": 0.0558, + "step": 12320 + }, + { + "grad_norm": 0.17640024423599243, + "learning_rate": 6.841135204691902e-05, + "loss": 0.0427, + "step": 12330 + }, + { + "grad_norm": 0.26754412055015564, + "learning_rate": 6.836009786315377e-05, + "loss": 0.0525, + "step": 12340 + }, + { + "grad_norm": 0.25522381067276, + "learning_rate": 6.830882137013839e-05, + "loss": 0.0454, + "step": 12350 + }, + { + "grad_norm": 0.2924649119377136, + "learning_rate": 6.825752263017863e-05, + "loss": 0.0604, + "step": 12360 + }, + { + "grad_norm": 0.2609576880931854, + "learning_rate": 6.820620170560731e-05, + "loss": 0.0429, + "step": 12370 + }, + { + "grad_norm": 0.2827361226081848, + "learning_rate": 6.815485865878418e-05, + "loss": 0.0522, + "step": 12380 + }, + { + "grad_norm": 0.31728923320770264, + "learning_rate": 6.810349355209588e-05, + "loss": 0.0574, + "step": 12390 + }, + { + "grad_norm": 0.2583361864089966, + "learning_rate": 6.805210644795588e-05, + "loss": 0.0594, + "step": 12400 + }, + { + "grad_norm": 0.34379732608795166, + "learning_rate": 6.800069740880433e-05, + "loss": 0.0645, + "step": 12410 + }, + { + "grad_norm": 0.2525900900363922, + "learning_rate": 6.794926649710807e-05, + "loss": 0.0433, + "step": 12420 + }, + { + "grad_norm": 0.36740368604660034, + "learning_rate": 6.789781377536052e-05, + "loss": 0.0633, + "step": 12430 + }, + { + "grad_norm": 0.30210140347480774, + "learning_rate": 6.784633930608158e-05, + "loss": 0.0622, + "step": 12440 + }, + { + "grad_norm": 0.2558841109275818, + "learning_rate": 6.779484315181759e-05, + "loss": 0.044, + "step": 12450 + }, + { + "grad_norm": 0.22508147358894348, + "learning_rate": 6.774332537514122e-05, + "loss": 0.0532, + "step": 12460 + }, + { + "grad_norm": 0.3017764389514923, + "learning_rate": 6.769178603865143e-05, + "loss": 0.0478, + "step": 12470 + }, + { + "grad_norm": 0.2352668195962906, + "learning_rate": 6.764022520497337e-05, + "loss": 0.0396, + "step": 12480 + }, + { + "grad_norm": 0.22109867632389069, + "learning_rate": 6.758864293675833e-05, + "loss": 0.0408, + "step": 12490 + }, + { + "grad_norm": 0.24820923805236816, + "learning_rate": 6.753703929668362e-05, + "loss": 0.048, + "step": 12500 + }, + { + "grad_norm": 0.2098865956068039, + "learning_rate": 6.748541434745253e-05, + "loss": 0.0599, + "step": 12510 + }, + { + "grad_norm": 0.31755331158638, + "learning_rate": 6.743376815179424e-05, + "loss": 0.0452, + "step": 12520 + }, + { + "grad_norm": 0.21638964116573334, + "learning_rate": 6.738210077246376e-05, + "loss": 0.0441, + "step": 12530 + }, + { + "grad_norm": 0.3185419738292694, + "learning_rate": 6.733041227224181e-05, + "loss": 0.0487, + "step": 12540 + }, + { + "grad_norm": 0.28742408752441406, + "learning_rate": 6.72787027139348e-05, + "loss": 0.0435, + "step": 12550 + }, + { + "grad_norm": 0.2878827452659607, + "learning_rate": 6.72269721603747e-05, + "loss": 0.0409, + "step": 12560 + }, + { + "grad_norm": 0.21383638679981232, + "learning_rate": 6.717522067441904e-05, + "loss": 0.0428, + "step": 12570 + }, + { + "grad_norm": 0.2186751812696457, + "learning_rate": 6.712344831895074e-05, + "loss": 0.0423, + "step": 12580 + }, + { + "grad_norm": 0.20645152032375336, + "learning_rate": 6.707165515687811e-05, + "loss": 0.0475, + "step": 12590 + }, + { + "grad_norm": 0.2761061489582062, + "learning_rate": 6.70198412511347e-05, + "loss": 0.0414, + "step": 12600 + }, + { + "grad_norm": 0.30351167917251587, + "learning_rate": 6.69680066646793e-05, + "loss": 0.044, + "step": 12610 + }, + { + "grad_norm": 0.22715266048908234, + "learning_rate": 6.691615146049583e-05, + "loss": 0.0395, + "step": 12620 + }, + { + "grad_norm": 0.176068514585495, + "learning_rate": 6.686427570159324e-05, + "loss": 0.029, + "step": 12630 + }, + { + "grad_norm": 0.295550674200058, + "learning_rate": 6.681237945100548e-05, + "loss": 0.0534, + "step": 12640 + }, + { + "grad_norm": 0.2280937284231186, + "learning_rate": 6.676046277179139e-05, + "loss": 0.0363, + "step": 12650 + }, + { + "grad_norm": 0.24965718388557434, + "learning_rate": 6.670852572703462e-05, + "loss": 0.0508, + "step": 12660 + }, + { + "grad_norm": 0.21874406933784485, + "learning_rate": 6.665656837984359e-05, + "loss": 0.043, + "step": 12670 + }, + { + "grad_norm": 0.262835830450058, + "learning_rate": 6.660459079335135e-05, + "loss": 0.0557, + "step": 12680 + }, + { + "grad_norm": 0.26292774081230164, + "learning_rate": 6.655259303071558e-05, + "loss": 0.0458, + "step": 12690 + }, + { + "grad_norm": 0.3098023533821106, + "learning_rate": 6.650057515511848e-05, + "loss": 0.0428, + "step": 12700 + }, + { + "grad_norm": 0.16780833899974823, + "learning_rate": 6.644853722976667e-05, + "loss": 0.0353, + "step": 12710 + }, + { + "grad_norm": 0.2477322518825531, + "learning_rate": 6.639647931789113e-05, + "loss": 0.0459, + "step": 12720 + }, + { + "grad_norm": 0.2533622980117798, + "learning_rate": 6.634440148274713e-05, + "loss": 0.0369, + "step": 12730 + }, + { + "grad_norm": 0.2946830689907074, + "learning_rate": 6.629230378761415e-05, + "loss": 0.0466, + "step": 12740 + }, + { + "grad_norm": 0.2583003044128418, + "learning_rate": 6.624018629579581e-05, + "loss": 0.0367, + "step": 12750 + }, + { + "grad_norm": 0.3376270532608032, + "learning_rate": 6.618804907061976e-05, + "loss": 0.0665, + "step": 12760 + }, + { + "grad_norm": 0.1828286349773407, + "learning_rate": 6.613589217543766e-05, + "loss": 0.0579, + "step": 12770 + }, + { + "grad_norm": 0.3600698411464691, + "learning_rate": 6.608371567362504e-05, + "loss": 0.0501, + "step": 12780 + }, + { + "grad_norm": 0.2311152219772339, + "learning_rate": 6.60315196285813e-05, + "loss": 0.045, + "step": 12790 + }, + { + "grad_norm": 0.2380017638206482, + "learning_rate": 6.597930410372954e-05, + "loss": 0.0466, + "step": 12800 + }, + { + "grad_norm": 0.1733928918838501, + "learning_rate": 6.592706916251653e-05, + "loss": 0.0452, + "step": 12810 + }, + { + "grad_norm": 0.23156650364398956, + "learning_rate": 6.587481486841267e-05, + "loss": 0.0483, + "step": 12820 + }, + { + "grad_norm": 0.2455449253320694, + "learning_rate": 6.582254128491184e-05, + "loss": 0.0415, + "step": 12830 + }, + { + "grad_norm": 0.27493777871131897, + "learning_rate": 6.577024847553139e-05, + "loss": 0.038, + "step": 12840 + }, + { + "grad_norm": 0.17757564783096313, + "learning_rate": 6.571793650381201e-05, + "loss": 0.0312, + "step": 12850 + }, + { + "grad_norm": 0.2842480540275574, + "learning_rate": 6.566560543331766e-05, + "loss": 0.0525, + "step": 12860 + }, + { + "grad_norm": 0.2238079160451889, + "learning_rate": 6.561325532763554e-05, + "loss": 0.0309, + "step": 12870 + }, + { + "grad_norm": 0.26320672035217285, + "learning_rate": 6.556088625037597e-05, + "loss": 0.033, + "step": 12880 + }, + { + "grad_norm": 0.18543098866939545, + "learning_rate": 6.55084982651723e-05, + "loss": 0.0371, + "step": 12890 + }, + { + "grad_norm": 0.26198095083236694, + "learning_rate": 6.54560914356809e-05, + "loss": 0.0393, + "step": 12900 + }, + { + "grad_norm": 0.17788954079151154, + "learning_rate": 6.540366582558097e-05, + "loss": 0.0296, + "step": 12910 + }, + { + "grad_norm": 0.209189772605896, + "learning_rate": 6.53512214985746e-05, + "loss": 0.0279, + "step": 12920 + }, + { + "grad_norm": 0.21414706110954285, + "learning_rate": 6.529875851838658e-05, + "loss": 0.0421, + "step": 12930 + }, + { + "grad_norm": 0.34116730093955994, + "learning_rate": 6.52462769487644e-05, + "loss": 0.0455, + "step": 12940 + }, + { + "grad_norm": 0.15826067328453064, + "learning_rate": 6.519377685347807e-05, + "loss": 0.0353, + "step": 12950 + }, + { + "grad_norm": 0.2656620442867279, + "learning_rate": 6.514125829632021e-05, + "loss": 0.0328, + "step": 12960 + }, + { + "grad_norm": 0.20195116102695465, + "learning_rate": 6.508872134110577e-05, + "loss": 0.0412, + "step": 12970 + }, + { + "grad_norm": 0.16112206876277924, + "learning_rate": 6.503616605167213e-05, + "loss": 0.0338, + "step": 12980 + }, + { + "grad_norm": 0.15051321685314178, + "learning_rate": 6.498359249187893e-05, + "loss": 0.0354, + "step": 12990 + }, + { + "grad_norm": 0.22857053577899933, + "learning_rate": 6.493100072560799e-05, + "loss": 0.0309, + "step": 13000 + }, + { + "grad_norm": 0.27193185687065125, + "learning_rate": 6.487839081676327e-05, + "loss": 0.06, + "step": 13010 + }, + { + "grad_norm": 0.22366248071193695, + "learning_rate": 6.482576282927076e-05, + "loss": 0.0419, + "step": 13020 + }, + { + "grad_norm": 0.3100625276565552, + "learning_rate": 6.477311682707845e-05, + "loss": 0.0566, + "step": 13030 + }, + { + "grad_norm": 0.2141428142786026, + "learning_rate": 6.472045287415616e-05, + "loss": 0.0348, + "step": 13040 + }, + { + "grad_norm": 0.25038474798202515, + "learning_rate": 6.466777103449558e-05, + "loss": 0.029, + "step": 13050 + }, + { + "grad_norm": 0.20111817121505737, + "learning_rate": 6.461507137211012e-05, + "loss": 0.0343, + "step": 13060 + }, + { + "grad_norm": 0.2026173621416092, + "learning_rate": 6.456235395103483e-05, + "loss": 0.0529, + "step": 13070 + }, + { + "grad_norm": 0.2870531678199768, + "learning_rate": 6.450961883532634e-05, + "loss": 0.0358, + "step": 13080 + }, + { + "grad_norm": 0.22877317667007446, + "learning_rate": 6.445686608906283e-05, + "loss": 0.0438, + "step": 13090 + }, + { + "grad_norm": 0.17588594555854797, + "learning_rate": 6.44040957763438e-05, + "loss": 0.0327, + "step": 13100 + }, + { + "grad_norm": 0.17877669632434845, + "learning_rate": 6.435130796129018e-05, + "loss": 0.0318, + "step": 13110 + }, + { + "grad_norm": 0.23388399183750153, + "learning_rate": 6.429850270804416e-05, + "loss": 0.0315, + "step": 13120 + }, + { + "grad_norm": 0.26680952310562134, + "learning_rate": 6.424568008076909e-05, + "loss": 0.0463, + "step": 13130 + }, + { + "grad_norm": 0.2454134076833725, + "learning_rate": 6.419284014364943e-05, + "loss": 0.0407, + "step": 13140 + }, + { + "grad_norm": 0.2529073655605316, + "learning_rate": 6.41399829608907e-05, + "loss": 0.0359, + "step": 13150 + }, + { + "grad_norm": 0.1678391844034195, + "learning_rate": 6.408710859671938e-05, + "loss": 0.0242, + "step": 13160 + }, + { + "grad_norm": 0.2166416347026825, + "learning_rate": 6.403421711538278e-05, + "loss": 0.0385, + "step": 13170 + }, + { + "grad_norm": 0.1579716056585312, + "learning_rate": 6.398130858114903e-05, + "loss": 0.0391, + "step": 13180 + }, + { + "grad_norm": 0.16446398198604584, + "learning_rate": 6.392838305830701e-05, + "loss": 0.0304, + "step": 13190 + }, + { + "grad_norm": 0.24959097802639008, + "learning_rate": 6.387544061116621e-05, + "loss": 0.0456, + "step": 13200 + }, + { + "grad_norm": 0.1959265172481537, + "learning_rate": 6.382248130405671e-05, + "loss": 0.0251, + "step": 13210 + }, + { + "grad_norm": 0.2797122895717621, + "learning_rate": 6.376950520132905e-05, + "loss": 0.0341, + "step": 13220 + }, + { + "grad_norm": 0.23010745644569397, + "learning_rate": 6.371651236735418e-05, + "loss": 0.0352, + "step": 13230 + }, + { + "grad_norm": 0.30470213294029236, + "learning_rate": 6.366350286652341e-05, + "loss": 0.0436, + "step": 13240 + }, + { + "grad_norm": 0.26293066143989563, + "learning_rate": 6.361047676324827e-05, + "loss": 0.0326, + "step": 13250 + }, + { + "grad_norm": 0.2388153076171875, + "learning_rate": 6.355743412196047e-05, + "loss": 0.0344, + "step": 13260 + }, + { + "grad_norm": 0.2736150622367859, + "learning_rate": 6.350437500711184e-05, + "loss": 0.0354, + "step": 13270 + }, + { + "grad_norm": 0.25424832105636597, + "learning_rate": 6.345129948317418e-05, + "loss": 0.0366, + "step": 13280 + }, + { + "grad_norm": 0.23282268643379211, + "learning_rate": 6.33982076146393e-05, + "loss": 0.036, + "step": 13290 + }, + { + "grad_norm": 0.19793224334716797, + "learning_rate": 6.334509946601879e-05, + "loss": 0.0331, + "step": 13300 + }, + { + "grad_norm": 0.22775475680828094, + "learning_rate": 6.329197510184406e-05, + "loss": 0.0344, + "step": 13310 + }, + { + "grad_norm": 0.3382323086261749, + "learning_rate": 6.323883458666624e-05, + "loss": 0.0475, + "step": 13320 + }, + { + "grad_norm": 0.24822965264320374, + "learning_rate": 6.318567798505605e-05, + "loss": 0.0299, + "step": 13330 + }, + { + "grad_norm": 0.30606934428215027, + "learning_rate": 6.313250536160378e-05, + "loss": 0.0379, + "step": 13340 + }, + { + "grad_norm": 0.2354898601770401, + "learning_rate": 6.307931678091917e-05, + "loss": 0.027, + "step": 13350 + }, + { + "grad_norm": 0.14283324778079987, + "learning_rate": 6.302611230763138e-05, + "loss": 0.0337, + "step": 13360 + }, + { + "grad_norm": 0.2780652940273285, + "learning_rate": 6.297289200638887e-05, + "loss": 0.034, + "step": 13370 + }, + { + "grad_norm": 0.3744414448738098, + "learning_rate": 6.29196559418593e-05, + "loss": 0.0373, + "step": 13380 + }, + { + "grad_norm": 0.27270567417144775, + "learning_rate": 6.286640417872951e-05, + "loss": 0.0363, + "step": 13390 + }, + { + "grad_norm": 0.19098936021327972, + "learning_rate": 6.281313678170542e-05, + "loss": 0.0422, + "step": 13400 + }, + { + "grad_norm": 0.21677039563655853, + "learning_rate": 6.275985381551194e-05, + "loss": 0.0326, + "step": 13410 + }, + { + "grad_norm": 0.18409425020217896, + "learning_rate": 6.270655534489292e-05, + "loss": 0.031, + "step": 13420 + }, + { + "grad_norm": 0.2627837359905243, + "learning_rate": 6.265324143461098e-05, + "loss": 0.0234, + "step": 13430 + }, + { + "grad_norm": 0.18557144701480865, + "learning_rate": 6.259991214944758e-05, + "loss": 0.0307, + "step": 13440 + }, + { + "grad_norm": 0.3057144582271576, + "learning_rate": 6.254656755420283e-05, + "loss": 0.048, + "step": 13450 + }, + { + "grad_norm": 0.1474284678697586, + "learning_rate": 6.249320771369545e-05, + "loss": 0.0496, + "step": 13460 + }, + { + "grad_norm": 0.18485069274902344, + "learning_rate": 6.243983269276263e-05, + "loss": 0.0564, + "step": 13470 + }, + { + "grad_norm": 0.22214427590370178, + "learning_rate": 6.238644255626012e-05, + "loss": 0.0455, + "step": 13480 + }, + { + "grad_norm": 0.1736307591199875, + "learning_rate": 6.233303736906193e-05, + "loss": 0.0355, + "step": 13490 + }, + { + "grad_norm": 0.20857229828834534, + "learning_rate": 6.227961719606039e-05, + "loss": 0.0286, + "step": 13500 + }, + { + "grad_norm": 0.20561181008815765, + "learning_rate": 6.222618210216608e-05, + "loss": 0.041, + "step": 13510 + }, + { + "grad_norm": 0.1499837338924408, + "learning_rate": 6.217273215230767e-05, + "loss": 0.0374, + "step": 13520 + }, + { + "grad_norm": 0.18792781233787537, + "learning_rate": 6.211926741143188e-05, + "loss": 0.0489, + "step": 13530 + }, + { + "grad_norm": 0.2103004902601242, + "learning_rate": 6.20657879445034e-05, + "loss": 0.0426, + "step": 13540 + }, + { + "grad_norm": 0.2041206657886505, + "learning_rate": 6.201229381650485e-05, + "loss": 0.0265, + "step": 13550 + }, + { + "grad_norm": 0.2430030256509781, + "learning_rate": 6.195878509243661e-05, + "loss": 0.0431, + "step": 13560 + }, + { + "grad_norm": 0.22608217597007751, + "learning_rate": 6.190526183731685e-05, + "loss": 0.0364, + "step": 13570 + }, + { + "grad_norm": 0.23398171365261078, + "learning_rate": 6.185172411618138e-05, + "loss": 0.0337, + "step": 13580 + }, + { + "grad_norm": 0.21345075964927673, + "learning_rate": 6.179817199408355e-05, + "loss": 0.039, + "step": 13590 + }, + { + "grad_norm": 0.21894922852516174, + "learning_rate": 6.174460553609426e-05, + "loss": 0.0675, + "step": 13600 + }, + { + "grad_norm": 0.20099259912967682, + "learning_rate": 6.16910248073018e-05, + "loss": 0.0277, + "step": 13610 + }, + { + "grad_norm": 0.22411102056503296, + "learning_rate": 6.16374298728118e-05, + "loss": 0.0502, + "step": 13620 + }, + { + "grad_norm": 0.18892306089401245, + "learning_rate": 6.158382079774716e-05, + "loss": 0.0356, + "step": 13630 + }, + { + "grad_norm": 0.23678375780582428, + "learning_rate": 6.153019764724799e-05, + "loss": 0.0339, + "step": 13640 + }, + { + "grad_norm": 0.17481276392936707, + "learning_rate": 6.147656048647144e-05, + "loss": 0.0258, + "step": 13650 + }, + { + "grad_norm": 0.32484325766563416, + "learning_rate": 6.142290938059173e-05, + "loss": 0.0435, + "step": 13660 + }, + { + "grad_norm": 0.20631511509418488, + "learning_rate": 6.136924439480001e-05, + "loss": 0.0308, + "step": 13670 + }, + { + "grad_norm": 0.2821146845817566, + "learning_rate": 6.13155655943043e-05, + "loss": 0.0435, + "step": 13680 + }, + { + "grad_norm": 0.3278275728225708, + "learning_rate": 6.126187304432941e-05, + "loss": 0.0445, + "step": 13690 + }, + { + "grad_norm": 0.25184667110443115, + "learning_rate": 6.120816681011684e-05, + "loss": 0.0344, + "step": 13700 + }, + { + "grad_norm": 0.2556883692741394, + "learning_rate": 6.115444695692474e-05, + "loss": 0.0392, + "step": 13710 + }, + { + "grad_norm": 0.162822425365448, + "learning_rate": 6.110071355002779e-05, + "loss": 0.0299, + "step": 13720 + }, + { + "grad_norm": 0.20809780061244965, + "learning_rate": 6.104696665471714e-05, + "loss": 0.031, + "step": 13730 + }, + { + "grad_norm": 0.19980672001838684, + "learning_rate": 6.099320633630036e-05, + "loss": 0.036, + "step": 13740 + }, + { + "grad_norm": 0.22772574424743652, + "learning_rate": 6.093943266010128e-05, + "loss": 0.0346, + "step": 13750 + }, + { + "grad_norm": 0.26900139451026917, + "learning_rate": 6.088564569146e-05, + "loss": 0.052, + "step": 13760 + }, + { + "grad_norm": 0.23240606486797333, + "learning_rate": 6.083184549573274e-05, + "loss": 0.0384, + "step": 13770 + }, + { + "grad_norm": 0.19179531931877136, + "learning_rate": 6.077803213829184e-05, + "loss": 0.0355, + "step": 13780 + }, + { + "grad_norm": 0.24418708682060242, + "learning_rate": 6.0724205684525594e-05, + "loss": 0.0396, + "step": 13790 + }, + { + "grad_norm": 0.2597847878932953, + "learning_rate": 6.0670366199838215e-05, + "loss": 0.0332, + "step": 13800 + }, + { + "grad_norm": 0.16759580373764038, + "learning_rate": 6.061651374964974e-05, + "loss": 0.0428, + "step": 13810 + }, + { + "grad_norm": 0.16963791847229004, + "learning_rate": 6.0562648399396004e-05, + "loss": 0.0362, + "step": 13820 + }, + { + "grad_norm": 0.23696628212928772, + "learning_rate": 6.050877021452845e-05, + "loss": 0.0468, + "step": 13830 + }, + { + "grad_norm": 0.220236673951149, + "learning_rate": 6.04548792605142e-05, + "loss": 0.0413, + "step": 13840 + }, + { + "grad_norm": 0.2609077990055084, + "learning_rate": 6.04009756028358e-05, + "loss": 0.0325, + "step": 13850 + }, + { + "grad_norm": 0.1692759245634079, + "learning_rate": 6.0347059306991294e-05, + "loss": 0.0422, + "step": 13860 + }, + { + "grad_norm": 0.21413341164588928, + "learning_rate": 6.029313043849407e-05, + "loss": 0.0441, + "step": 13870 + }, + { + "grad_norm": 0.19262272119522095, + "learning_rate": 6.023918906287279e-05, + "loss": 0.0328, + "step": 13880 + }, + { + "grad_norm": 0.33383285999298096, + "learning_rate": 6.01852352456713e-05, + "loss": 0.0574, + "step": 13890 + }, + { + "grad_norm": 0.19612327218055725, + "learning_rate": 6.013126905244858e-05, + "loss": 0.0324, + "step": 13900 + }, + { + "grad_norm": 0.2641826272010803, + "learning_rate": 6.007729054877864e-05, + "loss": 0.0453, + "step": 13910 + }, + { + "grad_norm": 0.2316683828830719, + "learning_rate": 6.0023299800250466e-05, + "loss": 0.0443, + "step": 13920 + }, + { + "grad_norm": 0.29425105452537537, + "learning_rate": 5.9969296872467894e-05, + "loss": 0.0424, + "step": 13930 + }, + { + "grad_norm": 0.23025363683700562, + "learning_rate": 5.9915281831049585e-05, + "loss": 0.0418, + "step": 13940 + }, + { + "grad_norm": 0.24371972680091858, + "learning_rate": 5.98612547416289e-05, + "loss": 0.0461, + "step": 13950 + }, + { + "grad_norm": 0.2405940145254135, + "learning_rate": 5.9807215669853855e-05, + "loss": 0.0511, + "step": 13960 + }, + { + "grad_norm": 0.2209792286157608, + "learning_rate": 5.9753164681387e-05, + "loss": 0.066, + "step": 13970 + }, + { + "grad_norm": 0.25722190737724304, + "learning_rate": 5.969910184190539e-05, + "loss": 0.051, + "step": 13980 + }, + { + "grad_norm": 0.22887277603149414, + "learning_rate": 5.9645027217100477e-05, + "loss": 0.0515, + "step": 13990 + }, + { + "grad_norm": 0.24642027914524078, + "learning_rate": 5.959094087267804e-05, + "loss": 0.0544, + "step": 14000 + }, + { + "grad_norm": 0.18771304190158844, + "learning_rate": 5.9536842874358065e-05, + "loss": 0.0442, + "step": 14010 + }, + { + "grad_norm": 0.2672604024410248, + "learning_rate": 5.9482733287874734e-05, + "loss": 0.0498, + "step": 14020 + }, + { + "grad_norm": 0.24038641154766083, + "learning_rate": 5.942861217897631e-05, + "loss": 0.0634, + "step": 14030 + }, + { + "grad_norm": 0.27619436383247375, + "learning_rate": 5.9374479613425e-05, + "loss": 0.042, + "step": 14040 + }, + { + "grad_norm": 0.23460906744003296, + "learning_rate": 5.932033565699704e-05, + "loss": 0.0389, + "step": 14050 + }, + { + "grad_norm": 0.15453466773033142, + "learning_rate": 5.926618037548237e-05, + "loss": 0.0505, + "step": 14060 + }, + { + "grad_norm": 0.25585466623306274, + "learning_rate": 5.9212013834684824e-05, + "loss": 0.0533, + "step": 14070 + }, + { + "grad_norm": 0.25934070348739624, + "learning_rate": 5.9157836100421824e-05, + "loss": 0.0479, + "step": 14080 + }, + { + "grad_norm": 0.20040905475616455, + "learning_rate": 5.910364723852444e-05, + "loss": 0.0428, + "step": 14090 + }, + { + "grad_norm": 0.1816982477903366, + "learning_rate": 5.904944731483723e-05, + "loss": 0.0509, + "step": 14100 + }, + { + "grad_norm": 0.2069862335920334, + "learning_rate": 5.899523639521825e-05, + "loss": 0.0439, + "step": 14110 + }, + { + "grad_norm": 0.22540567815303802, + "learning_rate": 5.894101454553883e-05, + "loss": 0.0418, + "step": 14120 + }, + { + "grad_norm": 0.33092325925827026, + "learning_rate": 5.888678183168368e-05, + "loss": 0.0533, + "step": 14130 + }, + { + "grad_norm": 0.24165059626102448, + "learning_rate": 5.883253831955061e-05, + "loss": 0.0537, + "step": 14140 + }, + { + "grad_norm": 0.23912487924098969, + "learning_rate": 5.8778284075050625e-05, + "loss": 0.0391, + "step": 14150 + }, + { + "grad_norm": 0.2884124517440796, + "learning_rate": 5.872401916410777e-05, + "loss": 0.056, + "step": 14160 + }, + { + "grad_norm": 0.23026712238788605, + "learning_rate": 5.8669743652659014e-05, + "loss": 0.048, + "step": 14170 + }, + { + "grad_norm": 0.1751270890235901, + "learning_rate": 5.86154576066542e-05, + "loss": 0.0447, + "step": 14180 + }, + { + "grad_norm": 0.2757657766342163, + "learning_rate": 5.856116109205602e-05, + "loss": 0.0525, + "step": 14190 + }, + { + "grad_norm": 0.2318994551897049, + "learning_rate": 5.850685417483983e-05, + "loss": 0.0443, + "step": 14200 + }, + { + "grad_norm": 0.202314093708992, + "learning_rate": 5.8452536920993684e-05, + "loss": 0.0478, + "step": 14210 + }, + { + "grad_norm": 0.2020840346813202, + "learning_rate": 5.839820939651817e-05, + "loss": 0.0546, + "step": 14220 + }, + { + "grad_norm": 0.2128271758556366, + "learning_rate": 5.834387166742632e-05, + "loss": 0.0474, + "step": 14230 + }, + { + "grad_norm": 0.26261764764785767, + "learning_rate": 5.828952379974364e-05, + "loss": 0.046, + "step": 14240 + }, + { + "grad_norm": 0.2828563451766968, + "learning_rate": 5.8235165859507864e-05, + "loss": 0.0548, + "step": 14250 + }, + { + "grad_norm": 0.21041357517242432, + "learning_rate": 5.818079791276907e-05, + "loss": 0.0435, + "step": 14260 + }, + { + "grad_norm": 0.1936400830745697, + "learning_rate": 5.8126420025589415e-05, + "loss": 0.0536, + "step": 14270 + }, + { + "grad_norm": 0.2391810268163681, + "learning_rate": 5.807203226404313e-05, + "loss": 0.0367, + "step": 14280 + }, + { + "grad_norm": 0.18931831419467926, + "learning_rate": 5.801763469421652e-05, + "loss": 0.0315, + "step": 14290 + }, + { + "grad_norm": 0.19822530448436737, + "learning_rate": 5.796322738220774e-05, + "loss": 0.0463, + "step": 14300 + }, + { + "grad_norm": 0.18052971363067627, + "learning_rate": 5.79088103941268e-05, + "loss": 0.0319, + "step": 14310 + }, + { + "grad_norm": 0.18759305775165558, + "learning_rate": 5.785438379609549e-05, + "loss": 0.0529, + "step": 14320 + }, + { + "grad_norm": 0.24209752678871155, + "learning_rate": 5.779994765424724e-05, + "loss": 0.0379, + "step": 14330 + }, + { + "grad_norm": 0.2872958481311798, + "learning_rate": 5.7745502034727114e-05, + "loss": 0.0387, + "step": 14340 + }, + { + "grad_norm": 0.23387618362903595, + "learning_rate": 5.769104700369164e-05, + "loss": 0.0408, + "step": 14350 + }, + { + "grad_norm": 0.2177293300628662, + "learning_rate": 5.7636582627308854e-05, + "loss": 0.0376, + "step": 14360 + }, + { + "grad_norm": 0.16894161701202393, + "learning_rate": 5.7582108971758095e-05, + "loss": 0.0457, + "step": 14370 + }, + { + "grad_norm": 0.28059402108192444, + "learning_rate": 5.752762610322999e-05, + "loss": 0.043, + "step": 14380 + }, + { + "grad_norm": 0.2401747703552246, + "learning_rate": 5.747313408792636e-05, + "loss": 0.0571, + "step": 14390 + }, + { + "grad_norm": 0.22574836015701294, + "learning_rate": 5.741863299206014e-05, + "loss": 0.0514, + "step": 14400 + }, + { + "grad_norm": 0.1901606172323227, + "learning_rate": 5.73641228818553e-05, + "loss": 0.0415, + "step": 14410 + }, + { + "grad_norm": 0.23655609786510468, + "learning_rate": 5.730960382354676e-05, + "loss": 0.0427, + "step": 14420 + }, + { + "grad_norm": 0.20032235980033875, + "learning_rate": 5.725507588338035e-05, + "loss": 0.0469, + "step": 14430 + }, + { + "grad_norm": 0.2565443217754364, + "learning_rate": 5.7200539127612604e-05, + "loss": 0.0412, + "step": 14440 + }, + { + "grad_norm": 0.2287723422050476, + "learning_rate": 5.714599362251087e-05, + "loss": 0.0448, + "step": 14450 + }, + { + "grad_norm": 0.20677787065505981, + "learning_rate": 5.709143943435307e-05, + "loss": 0.0455, + "step": 14460 + }, + { + "grad_norm": 0.2300729900598526, + "learning_rate": 5.7036876629427646e-05, + "loss": 0.0484, + "step": 14470 + }, + { + "grad_norm": 0.2511281371116638, + "learning_rate": 5.698230527403361e-05, + "loss": 0.0412, + "step": 14480 + }, + { + "grad_norm": 0.2553737163543701, + "learning_rate": 5.6927725434480274e-05, + "loss": 0.0403, + "step": 14490 + }, + { + "grad_norm": 0.2141558974981308, + "learning_rate": 5.687313717708728e-05, + "loss": 0.06, + "step": 14500 + }, + { + "grad_norm": 0.19750818610191345, + "learning_rate": 5.681854056818453e-05, + "loss": 0.0447, + "step": 14510 + }, + { + "grad_norm": 0.27140673995018005, + "learning_rate": 5.676393567411205e-05, + "loss": 0.0391, + "step": 14520 + }, + { + "grad_norm": 0.2691078186035156, + "learning_rate": 5.670932256121991e-05, + "loss": 0.0657, + "step": 14530 + }, + { + "grad_norm": 0.3461399972438812, + "learning_rate": 5.6654701295868216e-05, + "loss": 0.0375, + "step": 14540 + }, + { + "grad_norm": 0.20540601015090942, + "learning_rate": 5.660007194442697e-05, + "loss": 0.0359, + "step": 14550 + }, + { + "grad_norm": 0.23647332191467285, + "learning_rate": 5.6545434573275946e-05, + "loss": 0.0474, + "step": 14560 + }, + { + "grad_norm": 0.17516471445560455, + "learning_rate": 5.649078924880472e-05, + "loss": 0.035, + "step": 14570 + }, + { + "grad_norm": 0.281095951795578, + "learning_rate": 5.643613603741252e-05, + "loss": 0.0524, + "step": 14580 + }, + { + "grad_norm": 0.2130465805530548, + "learning_rate": 5.6381475005508154e-05, + "loss": 0.0599, + "step": 14590 + }, + { + "grad_norm": 0.30053281784057617, + "learning_rate": 5.632680621950992e-05, + "loss": 0.0473, + "step": 14600 + }, + { + "grad_norm": 0.21987885236740112, + "learning_rate": 5.627212974584555e-05, + "loss": 0.0353, + "step": 14610 + }, + { + "grad_norm": 0.3048321604728699, + "learning_rate": 5.62174456509521e-05, + "loss": 0.0377, + "step": 14620 + }, + { + "grad_norm": 0.2118852138519287, + "learning_rate": 5.616275400127594e-05, + "loss": 0.0363, + "step": 14630 + }, + { + "grad_norm": 0.22018705308437347, + "learning_rate": 5.610805486327254e-05, + "loss": 0.0331, + "step": 14640 + }, + { + "grad_norm": 0.24166084825992584, + "learning_rate": 5.6053348303406536e-05, + "loss": 0.0435, + "step": 14650 + }, + { + "grad_norm": 0.2740503251552582, + "learning_rate": 5.599863438815156e-05, + "loss": 0.0393, + "step": 14660 + }, + { + "grad_norm": 0.18368415534496307, + "learning_rate": 5.594391318399017e-05, + "loss": 0.0469, + "step": 14670 + }, + { + "grad_norm": 0.2346373349428177, + "learning_rate": 5.588918475741377e-05, + "loss": 0.0371, + "step": 14680 + }, + { + "grad_norm": 0.21182793378829956, + "learning_rate": 5.5834449174922586e-05, + "loss": 0.0388, + "step": 14690 + }, + { + "grad_norm": 0.25930121541023254, + "learning_rate": 5.57797065030255e-05, + "loss": 0.0447, + "step": 14700 + }, + { + "grad_norm": 0.24734878540039062, + "learning_rate": 5.5724956808240016e-05, + "loss": 0.0519, + "step": 14710 + }, + { + "grad_norm": 0.22324860095977783, + "learning_rate": 5.567020015709219e-05, + "loss": 0.0431, + "step": 14720 + }, + { + "grad_norm": 0.17215146124362946, + "learning_rate": 5.561543661611649e-05, + "loss": 0.0381, + "step": 14730 + }, + { + "grad_norm": 0.21854068338871002, + "learning_rate": 5.556066625185583e-05, + "loss": 0.0447, + "step": 14740 + }, + { + "grad_norm": 0.15432070195674896, + "learning_rate": 5.550588913086131e-05, + "loss": 0.0462, + "step": 14750 + }, + { + "grad_norm": 0.25642073154449463, + "learning_rate": 5.545110531969234e-05, + "loss": 0.0449, + "step": 14760 + }, + { + "grad_norm": 0.15306852757930756, + "learning_rate": 5.539631488491641e-05, + "loss": 0.036, + "step": 14770 + }, + { + "grad_norm": 0.22211147844791412, + "learning_rate": 5.534151789310904e-05, + "loss": 0.0353, + "step": 14780 + }, + { + "grad_norm": 0.20247460901737213, + "learning_rate": 5.528671441085376e-05, + "loss": 0.026, + "step": 14790 + }, + { + "grad_norm": 0.19772638380527496, + "learning_rate": 5.523190450474197e-05, + "loss": 0.0287, + "step": 14800 + }, + { + "grad_norm": 0.3126679062843323, + "learning_rate": 5.5177088241372865e-05, + "loss": 0.0395, + "step": 14810 + }, + { + "grad_norm": 0.27556517720222473, + "learning_rate": 5.512226568735338e-05, + "loss": 0.037, + "step": 14820 + }, + { + "grad_norm": 0.20253241062164307, + "learning_rate": 5.506743690929809e-05, + "loss": 0.0427, + "step": 14830 + }, + { + "grad_norm": 0.22430184483528137, + "learning_rate": 5.501260197382912e-05, + "loss": 0.0442, + "step": 14840 + }, + { + "grad_norm": 0.2609712481498718, + "learning_rate": 5.49577609475761e-05, + "loss": 0.0552, + "step": 14850 + }, + { + "grad_norm": 0.18670204281806946, + "learning_rate": 5.490291389717603e-05, + "loss": 0.0323, + "step": 14860 + }, + { + "grad_norm": 0.13871395587921143, + "learning_rate": 5.484806088927329e-05, + "loss": 0.0301, + "step": 14870 + }, + { + "grad_norm": 0.2550557851791382, + "learning_rate": 5.479320199051942e-05, + "loss": 0.0378, + "step": 14880 + }, + { + "grad_norm": 0.2323555052280426, + "learning_rate": 5.473833726757314e-05, + "loss": 0.0362, + "step": 14890 + }, + { + "grad_norm": 0.2695874571800232, + "learning_rate": 5.4683466787100304e-05, + "loss": 0.0366, + "step": 14900 + }, + { + "grad_norm": 0.22652754187583923, + "learning_rate": 5.462859061577369e-05, + "loss": 0.0285, + "step": 14910 + }, + { + "grad_norm": 0.22265128791332245, + "learning_rate": 5.4573708820273026e-05, + "loss": 0.0296, + "step": 14920 + }, + { + "grad_norm": 0.2621091902256012, + "learning_rate": 5.451882146728488e-05, + "loss": 0.0311, + "step": 14930 + }, + { + "grad_norm": 0.1606391966342926, + "learning_rate": 5.446392862350255e-05, + "loss": 0.0387, + "step": 14940 + }, + { + "grad_norm": 0.21763649582862854, + "learning_rate": 5.440903035562603e-05, + "loss": 0.0515, + "step": 14950 + }, + { + "grad_norm": 0.1955883949995041, + "learning_rate": 5.435412673036188e-05, + "loss": 0.0295, + "step": 14960 + }, + { + "grad_norm": 0.30634036660194397, + "learning_rate": 5.429921781442318e-05, + "loss": 0.0413, + "step": 14970 + }, + { + "grad_norm": 0.1893945336341858, + "learning_rate": 5.424430367452945e-05, + "loss": 0.0384, + "step": 14980 + }, + { + "grad_norm": 0.2609647214412689, + "learning_rate": 5.418938437740655e-05, + "loss": 0.0408, + "step": 14990 + }, + { + "grad_norm": 0.22139419615268707, + "learning_rate": 5.413445998978658e-05, + "loss": 0.0397, + "step": 15000 + }, + { + "grad_norm": 0.24316708743572235, + "learning_rate": 5.407953057840789e-05, + "loss": 0.0361, + "step": 15010 + }, + { + "grad_norm": 0.19356046617031097, + "learning_rate": 5.4024596210014853e-05, + "loss": 0.0326, + "step": 15020 + }, + { + "grad_norm": 0.27790606021881104, + "learning_rate": 5.3969656951357935e-05, + "loss": 0.034, + "step": 15030 + }, + { + "grad_norm": 0.25830790400505066, + "learning_rate": 5.3914712869193504e-05, + "loss": 0.0458, + "step": 15040 + }, + { + "grad_norm": 0.18981517851352692, + "learning_rate": 5.385976403028381e-05, + "loss": 0.0549, + "step": 15050 + }, + { + "grad_norm": 0.20615150034427643, + "learning_rate": 5.3804810501396875e-05, + "loss": 0.0426, + "step": 15060 + }, + { + "grad_norm": 0.1733548492193222, + "learning_rate": 5.374985234930639e-05, + "loss": 0.0356, + "step": 15070 + }, + { + "grad_norm": 0.2139529287815094, + "learning_rate": 5.3694889640791724e-05, + "loss": 0.0423, + "step": 15080 + }, + { + "grad_norm": 0.25642672181129456, + "learning_rate": 5.3639922442637736e-05, + "loss": 0.0474, + "step": 15090 + }, + { + "grad_norm": 0.23717907071113586, + "learning_rate": 5.358495082163476e-05, + "loss": 0.0464, + "step": 15100 + }, + { + "grad_norm": 0.24115340411663055, + "learning_rate": 5.35299748445785e-05, + "loss": 0.037, + "step": 15110 + }, + { + "grad_norm": 0.21627314388751984, + "learning_rate": 5.3474994578269944e-05, + "loss": 0.0277, + "step": 15120 + }, + { + "grad_norm": 0.21980252861976624, + "learning_rate": 5.34200100895153e-05, + "loss": 0.0323, + "step": 15130 + }, + { + "grad_norm": 0.18250951170921326, + "learning_rate": 5.3365021445125916e-05, + "loss": 0.0225, + "step": 15140 + }, + { + "grad_norm": 0.23111744225025177, + "learning_rate": 5.331002871191817e-05, + "loss": 0.0447, + "step": 15150 + }, + { + "grad_norm": 0.19363923370838165, + "learning_rate": 5.3255031956713443e-05, + "loss": 0.0318, + "step": 15160 + }, + { + "grad_norm": 0.207148015499115, + "learning_rate": 5.320003124633795e-05, + "loss": 0.0377, + "step": 15170 + }, + { + "grad_norm": 0.20350833237171173, + "learning_rate": 5.314502664762274e-05, + "loss": 0.0305, + "step": 15180 + }, + { + "grad_norm": 0.19085563719272614, + "learning_rate": 5.3090018227403604e-05, + "loss": 0.0327, + "step": 15190 + }, + { + "grad_norm": 0.2257489413022995, + "learning_rate": 5.3035006052520955e-05, + "loss": 0.0473, + "step": 15200 + }, + { + "grad_norm": 0.19037660956382751, + "learning_rate": 5.297999018981976e-05, + "loss": 0.033, + "step": 15210 + }, + { + "grad_norm": 0.22943733632564545, + "learning_rate": 5.2924970706149505e-05, + "loss": 0.0293, + "step": 15220 + }, + { + "grad_norm": 0.17537996172904968, + "learning_rate": 5.2869947668364014e-05, + "loss": 0.0237, + "step": 15230 + }, + { + "grad_norm": 0.1470334827899933, + "learning_rate": 5.2814921143321504e-05, + "loss": 0.0316, + "step": 15240 + }, + { + "grad_norm": 0.31675368547439575, + "learning_rate": 5.275989119788436e-05, + "loss": 0.0496, + "step": 15250 + }, + { + "grad_norm": 0.22736841440200806, + "learning_rate": 5.270485789891919e-05, + "loss": 0.0424, + "step": 15260 + }, + { + "grad_norm": 0.23021981120109558, + "learning_rate": 5.264982131329661e-05, + "loss": 0.0317, + "step": 15270 + }, + { + "grad_norm": 0.17361243069171906, + "learning_rate": 5.259478150789128e-05, + "loss": 0.0296, + "step": 15280 + }, + { + "grad_norm": 0.25106680393218994, + "learning_rate": 5.253973854958173e-05, + "loss": 0.0489, + "step": 15290 + }, + { + "grad_norm": 0.21685688197612762, + "learning_rate": 5.2484692505250376e-05, + "loss": 0.0373, + "step": 15300 + }, + { + "grad_norm": 0.18189367651939392, + "learning_rate": 5.242964344178333e-05, + "loss": 0.0382, + "step": 15310 + }, + { + "grad_norm": 0.19547368586063385, + "learning_rate": 5.237459142607041e-05, + "loss": 0.0257, + "step": 15320 + }, + { + "grad_norm": 0.20228630304336548, + "learning_rate": 5.231953652500498e-05, + "loss": 0.0315, + "step": 15330 + }, + { + "grad_norm": 0.140726238489151, + "learning_rate": 5.2264478805483976e-05, + "loss": 0.0352, + "step": 15340 + }, + { + "grad_norm": 0.13349156081676483, + "learning_rate": 5.220941833440768e-05, + "loss": 0.0315, + "step": 15350 + }, + { + "grad_norm": 0.194534033536911, + "learning_rate": 5.215435517867977e-05, + "loss": 0.0237, + "step": 15360 + }, + { + "grad_norm": 0.14328493177890778, + "learning_rate": 5.209928940520719e-05, + "loss": 0.0261, + "step": 15370 + }, + { + "grad_norm": 0.12424099445343018, + "learning_rate": 5.204422108090004e-05, + "loss": 0.0325, + "step": 15380 + }, + { + "grad_norm": 0.22946031391620636, + "learning_rate": 5.19891502726715e-05, + "loss": 0.0215, + "step": 15390 + }, + { + "grad_norm": 0.1508287787437439, + "learning_rate": 5.193407704743782e-05, + "loss": 0.0268, + "step": 15400 + }, + { + "grad_norm": 0.17863206565380096, + "learning_rate": 5.187900147211815e-05, + "loss": 0.02, + "step": 15410 + }, + { + "grad_norm": 0.18380852043628693, + "learning_rate": 5.182392361363453e-05, + "loss": 0.0305, + "step": 15420 + }, + { + "grad_norm": 0.24871328473091125, + "learning_rate": 5.176884353891172e-05, + "loss": 0.0528, + "step": 15430 + }, + { + "grad_norm": 0.17719528079032898, + "learning_rate": 5.171376131487722e-05, + "loss": 0.0286, + "step": 15440 + }, + { + "grad_norm": 0.1915503293275833, + "learning_rate": 5.165867700846113e-05, + "loss": 0.0382, + "step": 15450 + }, + { + "grad_norm": 0.2137172967195511, + "learning_rate": 5.160359068659606e-05, + "loss": 0.0302, + "step": 15460 + }, + { + "grad_norm": 0.24854391813278198, + "learning_rate": 5.154850241621712e-05, + "loss": 0.0258, + "step": 15470 + }, + { + "grad_norm": 0.18732157349586487, + "learning_rate": 5.149341226426172e-05, + "loss": 0.0292, + "step": 15480 + }, + { + "grad_norm": 0.22397463023662567, + "learning_rate": 5.14383202976696e-05, + "loss": 0.0313, + "step": 15490 + }, + { + "grad_norm": 0.20543788373470306, + "learning_rate": 5.1383226583382684e-05, + "loss": 0.0229, + "step": 15500 + }, + { + "grad_norm": 0.25860118865966797, + "learning_rate": 5.132813118834504e-05, + "loss": 0.0377, + "step": 15510 + }, + { + "grad_norm": 0.2248518019914627, + "learning_rate": 5.1273034179502777e-05, + "loss": 0.0208, + "step": 15520 + }, + { + "grad_norm": 0.26968827843666077, + "learning_rate": 5.1217935623803945e-05, + "loss": 0.0286, + "step": 15530 + }, + { + "grad_norm": 0.22853006422519684, + "learning_rate": 5.1162835588198476e-05, + "loss": 0.0319, + "step": 15540 + }, + { + "grad_norm": 0.1584775149822235, + "learning_rate": 5.110773413963813e-05, + "loss": 0.0312, + "step": 15550 + }, + { + "grad_norm": 0.2278273105621338, + "learning_rate": 5.105263134507636e-05, + "loss": 0.0311, + "step": 15560 + }, + { + "grad_norm": 0.2809124290943146, + "learning_rate": 5.099752727146824e-05, + "loss": 0.041, + "step": 15570 + }, + { + "grad_norm": 0.18524205684661865, + "learning_rate": 5.094242198577042e-05, + "loss": 0.0344, + "step": 15580 + }, + { + "grad_norm": 0.13688844442367554, + "learning_rate": 5.088731555494102e-05, + "loss": 0.0278, + "step": 15590 + }, + { + "grad_norm": 0.19823113083839417, + "learning_rate": 5.0832208045939556e-05, + "loss": 0.0329, + "step": 15600 + }, + { + "grad_norm": 0.23817162215709686, + "learning_rate": 5.0777099525726844e-05, + "loss": 0.0332, + "step": 15610 + }, + { + "grad_norm": 0.1601613610982895, + "learning_rate": 5.072199006126494e-05, + "loss": 0.0238, + "step": 15620 + }, + { + "grad_norm": 0.4294241964817047, + "learning_rate": 5.0666879719517026e-05, + "loss": 0.0338, + "step": 15630 + }, + { + "grad_norm": 0.19021837413311005, + "learning_rate": 5.061176856744737e-05, + "loss": 0.0297, + "step": 15640 + }, + { + "grad_norm": 0.12740124762058258, + "learning_rate": 5.0556656672021205e-05, + "loss": 0.0205, + "step": 15650 + }, + { + "grad_norm": 0.2737583816051483, + "learning_rate": 5.050154410020472e-05, + "loss": 0.0363, + "step": 15660 + }, + { + "grad_norm": 0.17844900488853455, + "learning_rate": 5.044643091896485e-05, + "loss": 0.0325, + "step": 15670 + }, + { + "grad_norm": 0.19980019330978394, + "learning_rate": 5.039131719526932e-05, + "loss": 0.0225, + "step": 15680 + }, + { + "grad_norm": 0.20457105338573456, + "learning_rate": 5.03362029960865e-05, + "loss": 0.0296, + "step": 15690 + }, + { + "grad_norm": 0.22462224960327148, + "learning_rate": 5.028108838838533e-05, + "loss": 0.0221, + "step": 15700 + }, + { + "grad_norm": 0.23789110779762268, + "learning_rate": 5.0225973439135276e-05, + "loss": 0.0306, + "step": 15710 + }, + { + "grad_norm": 0.31748542189598083, + "learning_rate": 5.017085821530617e-05, + "loss": 0.0262, + "step": 15720 + }, + { + "grad_norm": 0.24862779676914215, + "learning_rate": 5.011574278386822e-05, + "loss": 0.0404, + "step": 15730 + }, + { + "grad_norm": 0.22122421860694885, + "learning_rate": 5.006062721179189e-05, + "loss": 0.0275, + "step": 15740 + }, + { + "grad_norm": 0.17756973206996918, + "learning_rate": 5.000551156604777e-05, + "loss": 0.0322, + "step": 15750 + }, + { + "grad_norm": 0.18257424235343933, + "learning_rate": 4.9950395913606594e-05, + "loss": 0.0292, + "step": 15760 + }, + { + "grad_norm": 0.30035004019737244, + "learning_rate": 4.989528032143903e-05, + "loss": 0.0261, + "step": 15770 + }, + { + "grad_norm": 0.2337464839220047, + "learning_rate": 4.984016485651578e-05, + "loss": 0.0306, + "step": 15780 + }, + { + "grad_norm": 0.3577556908130646, + "learning_rate": 4.9785049585807274e-05, + "loss": 0.0433, + "step": 15790 + }, + { + "grad_norm": 0.20973412692546844, + "learning_rate": 4.972993457628381e-05, + "loss": 0.0371, + "step": 15800 + }, + { + "grad_norm": 0.18818731606006622, + "learning_rate": 4.9674819894915306e-05, + "loss": 0.0291, + "step": 15810 + }, + { + "grad_norm": 0.21062414348125458, + "learning_rate": 4.9619705608671265e-05, + "loss": 0.0335, + "step": 15820 + }, + { + "grad_norm": 0.20797160267829895, + "learning_rate": 4.956459178452079e-05, + "loss": 0.0322, + "step": 15830 + }, + { + "grad_norm": 0.18805018067359924, + "learning_rate": 4.950947848943235e-05, + "loss": 0.0313, + "step": 15840 + }, + { + "grad_norm": 0.1985694169998169, + "learning_rate": 4.9454365790373805e-05, + "loss": 0.0323, + "step": 15850 + }, + { + "grad_norm": 0.18034890294075012, + "learning_rate": 4.939925375431226e-05, + "loss": 0.038, + "step": 15860 + }, + { + "grad_norm": 0.18013574182987213, + "learning_rate": 4.9344142448214046e-05, + "loss": 0.0379, + "step": 15870 + }, + { + "grad_norm": 0.18831756711006165, + "learning_rate": 4.928903193904461e-05, + "loss": 0.031, + "step": 15880 + }, + { + "grad_norm": 0.2789424955844879, + "learning_rate": 4.923392229376842e-05, + "loss": 0.0416, + "step": 15890 + }, + { + "grad_norm": 0.2016260176897049, + "learning_rate": 4.9178813579348865e-05, + "loss": 0.0268, + "step": 15900 + }, + { + "grad_norm": 0.2038709968328476, + "learning_rate": 4.9123705862748253e-05, + "loss": 0.0288, + "step": 15910 + }, + { + "grad_norm": 0.1835741251707077, + "learning_rate": 4.9068599210927627e-05, + "loss": 0.0336, + "step": 15920 + }, + { + "grad_norm": 0.14978674054145813, + "learning_rate": 4.901349369084681e-05, + "loss": 0.0378, + "step": 15930 + }, + { + "grad_norm": 0.1662510633468628, + "learning_rate": 4.8958389369464156e-05, + "loss": 0.0337, + "step": 15940 + }, + { + "grad_norm": 0.197488933801651, + "learning_rate": 4.890328631373666e-05, + "loss": 0.0289, + "step": 15950 + }, + { + "grad_norm": 0.1613176167011261, + "learning_rate": 4.88481845906197e-05, + "loss": 0.0318, + "step": 15960 + }, + { + "grad_norm": 0.15729044377803802, + "learning_rate": 4.879308426706707e-05, + "loss": 0.0418, + "step": 15970 + }, + { + "grad_norm": 0.16356970369815826, + "learning_rate": 4.873798541003084e-05, + "loss": 0.0229, + "step": 15980 + }, + { + "grad_norm": 0.22156411409378052, + "learning_rate": 4.868288808646136e-05, + "loss": 0.0466, + "step": 15990 + }, + { + "grad_norm": 0.19999949634075165, + "learning_rate": 4.862779236330705e-05, + "loss": 0.0354, + "step": 16000 + }, + { + "grad_norm": 0.23114009201526642, + "learning_rate": 4.8572698307514395e-05, + "loss": 0.0473, + "step": 16010 + }, + { + "grad_norm": 0.25993239879608154, + "learning_rate": 4.85176059860279e-05, + "loss": 0.0434, + "step": 16020 + }, + { + "grad_norm": 0.20156438648700714, + "learning_rate": 4.846251546578989e-05, + "loss": 0.0474, + "step": 16030 + }, + { + "grad_norm": 0.17206701636314392, + "learning_rate": 4.840742681374058e-05, + "loss": 0.0413, + "step": 16040 + }, + { + "grad_norm": 0.22952349483966827, + "learning_rate": 4.835234009681787e-05, + "loss": 0.038, + "step": 16050 + }, + { + "grad_norm": 0.2570783793926239, + "learning_rate": 4.8297255381957296e-05, + "loss": 0.0381, + "step": 16060 + }, + { + "grad_norm": 0.25052279233932495, + "learning_rate": 4.824217273609198e-05, + "loss": 0.038, + "step": 16070 + }, + { + "grad_norm": 0.1992686539888382, + "learning_rate": 4.8187092226152555e-05, + "loss": 0.0355, + "step": 16080 + }, + { + "grad_norm": 0.25340571999549866, + "learning_rate": 4.8132013919067016e-05, + "loss": 0.042, + "step": 16090 + }, + { + "grad_norm": 0.1953749805688858, + "learning_rate": 4.807693788176071e-05, + "loss": 0.0428, + "step": 16100 + }, + { + "grad_norm": 0.20592743158340454, + "learning_rate": 4.8021864181156214e-05, + "loss": 0.0423, + "step": 16110 + }, + { + "grad_norm": 0.24996092915534973, + "learning_rate": 4.796679288417326e-05, + "loss": 0.0553, + "step": 16120 + }, + { + "grad_norm": 0.2823004722595215, + "learning_rate": 4.791172405772866e-05, + "loss": 0.0502, + "step": 16130 + }, + { + "grad_norm": 0.24368084967136383, + "learning_rate": 4.785665776873625e-05, + "loss": 0.0339, + "step": 16140 + }, + { + "grad_norm": 0.1881355196237564, + "learning_rate": 4.7801594084106763e-05, + "loss": 0.049, + "step": 16150 + }, + { + "grad_norm": 0.18709814548492432, + "learning_rate": 4.774653307074775e-05, + "loss": 0.041, + "step": 16160 + }, + { + "grad_norm": 0.22618357837200165, + "learning_rate": 4.769147479556355e-05, + "loss": 0.0341, + "step": 16170 + }, + { + "grad_norm": 0.21321657299995422, + "learning_rate": 4.763641932545515e-05, + "loss": 0.0299, + "step": 16180 + }, + { + "grad_norm": 0.2529887855052948, + "learning_rate": 4.758136672732013e-05, + "loss": 0.0443, + "step": 16190 + }, + { + "grad_norm": 0.16545626521110535, + "learning_rate": 4.752631706805261e-05, + "loss": 0.0362, + "step": 16200 + }, + { + "grad_norm": 0.18176037073135376, + "learning_rate": 4.7471270414543114e-05, + "loss": 0.0538, + "step": 16210 + }, + { + "grad_norm": 0.19996878504753113, + "learning_rate": 4.741622683367849e-05, + "loss": 0.0353, + "step": 16220 + }, + { + "grad_norm": 0.20248398184776306, + "learning_rate": 4.736118639234191e-05, + "loss": 0.0556, + "step": 16230 + }, + { + "grad_norm": 0.23256130516529083, + "learning_rate": 4.730614915741267e-05, + "loss": 0.0513, + "step": 16240 + }, + { + "grad_norm": 0.17439186573028564, + "learning_rate": 4.7251115195766234e-05, + "loss": 0.0418, + "step": 16250 + }, + { + "grad_norm": 0.17631998658180237, + "learning_rate": 4.719608457427404e-05, + "loss": 0.0411, + "step": 16260 + }, + { + "grad_norm": 0.23560506105422974, + "learning_rate": 4.714105735980347e-05, + "loss": 0.034, + "step": 16270 + }, + { + "grad_norm": 0.16569863259792328, + "learning_rate": 4.70860336192178e-05, + "loss": 0.0404, + "step": 16280 + }, + { + "grad_norm": 0.1920316517353058, + "learning_rate": 4.7031013419376034e-05, + "loss": 0.0414, + "step": 16290 + }, + { + "grad_norm": 0.1938362717628479, + "learning_rate": 4.6975996827132926e-05, + "loss": 0.029, + "step": 16300 + }, + { + "grad_norm": 0.23465505242347717, + "learning_rate": 4.692098390933883e-05, + "loss": 0.0454, + "step": 16310 + }, + { + "grad_norm": 0.22367005050182343, + "learning_rate": 4.6865974732839615e-05, + "loss": 0.0355, + "step": 16320 + }, + { + "grad_norm": 0.24409650266170502, + "learning_rate": 4.6810969364476615e-05, + "loss": 0.0567, + "step": 16330 + }, + { + "grad_norm": 0.196989968419075, + "learning_rate": 4.675596787108653e-05, + "loss": 0.0377, + "step": 16340 + }, + { + "grad_norm": 0.21227724850177765, + "learning_rate": 4.670097031950139e-05, + "loss": 0.0329, + "step": 16350 + }, + { + "grad_norm": 0.1497187614440918, + "learning_rate": 4.664597677654838e-05, + "loss": 0.0403, + "step": 16360 + }, + { + "grad_norm": 0.28369638323783875, + "learning_rate": 4.659098730904986e-05, + "loss": 0.0472, + "step": 16370 + }, + { + "grad_norm": 0.1766453981399536, + "learning_rate": 4.6536001983823204e-05, + "loss": 0.0376, + "step": 16380 + }, + { + "grad_norm": 0.20155072212219238, + "learning_rate": 4.648102086768078e-05, + "loss": 0.058, + "step": 16390 + }, + { + "grad_norm": 0.2339235544204712, + "learning_rate": 4.6426044027429786e-05, + "loss": 0.0458, + "step": 16400 + }, + { + "grad_norm": 0.29515689611434937, + "learning_rate": 4.637107152987234e-05, + "loss": 0.04, + "step": 16410 + }, + { + "grad_norm": 0.17282435297966003, + "learning_rate": 4.631610344180515e-05, + "loss": 0.0327, + "step": 16420 + }, + { + "grad_norm": 0.18782255053520203, + "learning_rate": 4.626113983001965e-05, + "loss": 0.0451, + "step": 16430 + }, + { + "grad_norm": 0.22838972508907318, + "learning_rate": 4.620618076130182e-05, + "loss": 0.0373, + "step": 16440 + }, + { + "grad_norm": 0.13673453032970428, + "learning_rate": 4.6151226302432074e-05, + "loss": 0.0361, + "step": 16450 + }, + { + "grad_norm": 0.1780867576599121, + "learning_rate": 4.6096276520185304e-05, + "loss": 0.0293, + "step": 16460 + }, + { + "grad_norm": 0.22124846279621124, + "learning_rate": 4.604133148133066e-05, + "loss": 0.0336, + "step": 16470 + }, + { + "grad_norm": 0.19676972925662994, + "learning_rate": 4.598639125263155e-05, + "loss": 0.0454, + "step": 16480 + }, + { + "grad_norm": 0.199634850025177, + "learning_rate": 4.593145590084553e-05, + "loss": 0.0478, + "step": 16490 + }, + { + "grad_norm": 0.22204986214637756, + "learning_rate": 4.58765254927242e-05, + "loss": 0.0288, + "step": 16500 + }, + { + "grad_norm": 0.17784124612808228, + "learning_rate": 4.582160009501323e-05, + "loss": 0.0329, + "step": 16510 + }, + { + "grad_norm": 0.22105737030506134, + "learning_rate": 4.5766679774452143e-05, + "loss": 0.0419, + "step": 16520 + }, + { + "grad_norm": 0.20960526168346405, + "learning_rate": 4.571176459777431e-05, + "loss": 0.0394, + "step": 16530 + }, + { + "grad_norm": 0.18201638758182526, + "learning_rate": 4.5656854631706846e-05, + "loss": 0.037, + "step": 16540 + }, + { + "grad_norm": 0.19521449506282806, + "learning_rate": 4.560194994297054e-05, + "loss": 0.0432, + "step": 16550 + }, + { + "grad_norm": 0.18197879195213318, + "learning_rate": 4.554705059827974e-05, + "loss": 0.0402, + "step": 16560 + }, + { + "grad_norm": 0.24373213946819305, + "learning_rate": 4.5492156664342365e-05, + "loss": 0.0442, + "step": 16570 + }, + { + "grad_norm": 0.22170308232307434, + "learning_rate": 4.5437268207859695e-05, + "loss": 0.0482, + "step": 16580 + }, + { + "grad_norm": 0.17027218639850616, + "learning_rate": 4.538238529552641e-05, + "loss": 0.0253, + "step": 16590 + }, + { + "grad_norm": 0.15954935550689697, + "learning_rate": 4.5327507994030394e-05, + "loss": 0.0291, + "step": 16600 + }, + { + "grad_norm": 0.19129158556461334, + "learning_rate": 4.527263637005273e-05, + "loss": 0.0289, + "step": 16610 + }, + { + "grad_norm": 0.14617279171943665, + "learning_rate": 4.521777049026766e-05, + "loss": 0.0275, + "step": 16620 + }, + { + "grad_norm": 0.18538282811641693, + "learning_rate": 4.5162910421342386e-05, + "loss": 0.0283, + "step": 16630 + }, + { + "grad_norm": 0.30161309242248535, + "learning_rate": 4.510805622993706e-05, + "loss": 0.0341, + "step": 16640 + }, + { + "grad_norm": 0.2234259992837906, + "learning_rate": 4.505320798270467e-05, + "loss": 0.0472, + "step": 16650 + }, + { + "grad_norm": 0.15000714361667633, + "learning_rate": 4.499836574629105e-05, + "loss": 0.035, + "step": 16660 + }, + { + "grad_norm": 0.13758152723312378, + "learning_rate": 4.494352958733465e-05, + "loss": 0.0494, + "step": 16670 + }, + { + "grad_norm": 0.2439495176076889, + "learning_rate": 4.488869957246663e-05, + "loss": 0.0423, + "step": 16680 + }, + { + "grad_norm": 0.20449699461460114, + "learning_rate": 4.4833875768310575e-05, + "loss": 0.0385, + "step": 16690 + }, + { + "grad_norm": 0.21864454448223114, + "learning_rate": 4.47790582414826e-05, + "loss": 0.0337, + "step": 16700 + }, + { + "grad_norm": 0.21230706572532654, + "learning_rate": 4.472424705859114e-05, + "loss": 0.0404, + "step": 16710 + }, + { + "grad_norm": 0.20778273046016693, + "learning_rate": 4.466944228623701e-05, + "loss": 0.0479, + "step": 16720 + }, + { + "grad_norm": 0.22977210581302643, + "learning_rate": 4.461464399101312e-05, + "loss": 0.0434, + "step": 16730 + }, + { + "grad_norm": 0.14157815277576447, + "learning_rate": 4.45598522395046e-05, + "loss": 0.0363, + "step": 16740 + }, + { + "grad_norm": 0.23720505833625793, + "learning_rate": 4.4505067098288577e-05, + "loss": 0.0432, + "step": 16750 + }, + { + "grad_norm": 0.1773872822523117, + "learning_rate": 4.445028863393417e-05, + "loss": 0.0307, + "step": 16760 + }, + { + "grad_norm": 0.21853715181350708, + "learning_rate": 4.439551691300236e-05, + "loss": 0.0329, + "step": 16770 + }, + { + "grad_norm": 0.2459811568260193, + "learning_rate": 4.434075200204599e-05, + "loss": 0.0378, + "step": 16780 + }, + { + "grad_norm": 0.16335107386112213, + "learning_rate": 4.428599396760956e-05, + "loss": 0.0315, + "step": 16790 + }, + { + "grad_norm": 0.19703884422779083, + "learning_rate": 4.423124287622926e-05, + "loss": 0.0322, + "step": 16800 + }, + { + "grad_norm": 0.20233698189258575, + "learning_rate": 4.417649879443282e-05, + "loss": 0.0315, + "step": 16810 + }, + { + "grad_norm": 0.2190602719783783, + "learning_rate": 4.4121761788739444e-05, + "loss": 0.0376, + "step": 16820 + }, + { + "grad_norm": 0.18399833142757416, + "learning_rate": 4.4067031925659805e-05, + "loss": 0.0359, + "step": 16830 + }, + { + "grad_norm": 0.1622905433177948, + "learning_rate": 4.4012309271695816e-05, + "loss": 0.0305, + "step": 16840 + }, + { + "grad_norm": 0.194683238863945, + "learning_rate": 4.395759389334067e-05, + "loss": 0.0372, + "step": 16850 + }, + { + "grad_norm": 0.16187703609466553, + "learning_rate": 4.3902885857078685e-05, + "loss": 0.0448, + "step": 16860 + }, + { + "grad_norm": 0.16982007026672363, + "learning_rate": 4.384818522938531e-05, + "loss": 0.0292, + "step": 16870 + }, + { + "grad_norm": 0.21310696005821228, + "learning_rate": 4.379349207672695e-05, + "loss": 0.0284, + "step": 16880 + }, + { + "grad_norm": 0.21731770038604736, + "learning_rate": 4.373880646556098e-05, + "loss": 0.0352, + "step": 16890 + }, + { + "grad_norm": 0.2401011884212494, + "learning_rate": 4.3684128462335533e-05, + "loss": 0.0269, + "step": 16900 + }, + { + "grad_norm": 0.19142329692840576, + "learning_rate": 4.362945813348955e-05, + "loss": 0.0313, + "step": 16910 + }, + { + "grad_norm": 0.17992989718914032, + "learning_rate": 4.357479554545262e-05, + "loss": 0.0355, + "step": 16920 + }, + { + "grad_norm": 0.1972612589597702, + "learning_rate": 4.352014076464499e-05, + "loss": 0.039, + "step": 16930 + }, + { + "grad_norm": 0.175844207406044, + "learning_rate": 4.346549385747734e-05, + "loss": 0.0279, + "step": 16940 + }, + { + "grad_norm": 0.15819713473320007, + "learning_rate": 4.34108548903508e-05, + "loss": 0.0372, + "step": 16950 + }, + { + "grad_norm": 0.2354213446378708, + "learning_rate": 4.335622392965689e-05, + "loss": 0.0332, + "step": 16960 + }, + { + "grad_norm": 0.3464144468307495, + "learning_rate": 4.330160104177738e-05, + "loss": 0.0342, + "step": 16970 + }, + { + "grad_norm": 0.11107303947210312, + "learning_rate": 4.32469862930842e-05, + "loss": 0.0364, + "step": 16980 + }, + { + "grad_norm": 0.17501556873321533, + "learning_rate": 4.319237974993946e-05, + "loss": 0.0392, + "step": 16990 + }, + { + "grad_norm": 0.2296830266714096, + "learning_rate": 4.3137781478695236e-05, + "loss": 0.0307, + "step": 17000 + }, + { + "grad_norm": 0.19337046146392822, + "learning_rate": 4.308319154569358e-05, + "loss": 0.0449, + "step": 17010 + }, + { + "grad_norm": 0.17625300586223602, + "learning_rate": 4.302861001726641e-05, + "loss": 0.0412, + "step": 17020 + }, + { + "grad_norm": 0.15634265542030334, + "learning_rate": 4.2974036959735415e-05, + "loss": 0.0298, + "step": 17030 + }, + { + "grad_norm": 0.2172774374485016, + "learning_rate": 4.2919472439412034e-05, + "loss": 0.0233, + "step": 17040 + }, + { + "grad_norm": 0.21264608204364777, + "learning_rate": 4.286491652259729e-05, + "loss": 0.0303, + "step": 17050 + }, + { + "grad_norm": 0.4149863123893738, + "learning_rate": 4.281036927558178e-05, + "loss": 0.0361, + "step": 17060 + }, + { + "grad_norm": 0.18217696249485016, + "learning_rate": 4.275583076464552e-05, + "loss": 0.02, + "step": 17070 + }, + { + "grad_norm": 0.2094411700963974, + "learning_rate": 4.270130105605794e-05, + "loss": 0.0254, + "step": 17080 + }, + { + "grad_norm": 0.15377216041088104, + "learning_rate": 4.264678021607782e-05, + "loss": 0.0316, + "step": 17090 + }, + { + "grad_norm": 0.1521397829055786, + "learning_rate": 4.25922683109531e-05, + "loss": 0.0326, + "step": 17100 + }, + { + "grad_norm": 0.22790682315826416, + "learning_rate": 4.25377654069209e-05, + "loss": 0.035, + "step": 17110 + }, + { + "grad_norm": 0.19866004586219788, + "learning_rate": 4.248327157020737e-05, + "loss": 0.0357, + "step": 17120 + }, + { + "grad_norm": 0.20211586356163025, + "learning_rate": 4.242878686702763e-05, + "loss": 0.0316, + "step": 17130 + }, + { + "grad_norm": 0.16573238372802734, + "learning_rate": 4.23743113635858e-05, + "loss": 0.0225, + "step": 17140 + }, + { + "grad_norm": 0.20053517818450928, + "learning_rate": 4.2319845126074705e-05, + "loss": 0.0409, + "step": 17150 + }, + { + "grad_norm": 0.5372312068939209, + "learning_rate": 4.226538822067598e-05, + "loss": 0.0412, + "step": 17160 + }, + { + "grad_norm": 0.18660536408424377, + "learning_rate": 4.2210940713559896e-05, + "loss": 0.045, + "step": 17170 + }, + { + "grad_norm": 0.23725625872612, + "learning_rate": 4.21565026708853e-05, + "loss": 0.0338, + "step": 17180 + }, + { + "grad_norm": 0.26244524121284485, + "learning_rate": 4.2102074158799535e-05, + "loss": 0.0322, + "step": 17190 + }, + { + "grad_norm": 0.14086462557315826, + "learning_rate": 4.204765524343841e-05, + "loss": 0.0347, + "step": 17200 + }, + { + "grad_norm": 0.21544791758060455, + "learning_rate": 4.1993245990926036e-05, + "loss": 0.0315, + "step": 17210 + }, + { + "grad_norm": 0.1665424108505249, + "learning_rate": 4.193884646737474e-05, + "loss": 0.0319, + "step": 17220 + }, + { + "grad_norm": 0.2505476772785187, + "learning_rate": 4.1884456738885124e-05, + "loss": 0.0315, + "step": 17230 + }, + { + "grad_norm": 0.22755497694015503, + "learning_rate": 4.1830076871545795e-05, + "loss": 0.0373, + "step": 17240 + }, + { + "grad_norm": 0.21797364950180054, + "learning_rate": 4.177570693143346e-05, + "loss": 0.0489, + "step": 17250 + }, + { + "grad_norm": 0.17442183196544647, + "learning_rate": 4.172134698461271e-05, + "loss": 0.0248, + "step": 17260 + }, + { + "grad_norm": 0.19967007637023926, + "learning_rate": 4.166699709713599e-05, + "loss": 0.0305, + "step": 17270 + }, + { + "grad_norm": 0.1873466670513153, + "learning_rate": 4.1612657335043555e-05, + "loss": 0.0266, + "step": 17280 + }, + { + "grad_norm": 0.22969257831573486, + "learning_rate": 4.155832776436331e-05, + "loss": 0.0375, + "step": 17290 + }, + { + "grad_norm": 0.1707921177148819, + "learning_rate": 4.1504008451110845e-05, + "loss": 0.0309, + "step": 17300 + }, + { + "grad_norm": 0.23129211366176605, + "learning_rate": 4.1449699461289225e-05, + "loss": 0.0333, + "step": 17310 + }, + { + "grad_norm": 0.1480247676372528, + "learning_rate": 4.139540086088901e-05, + "loss": 0.0253, + "step": 17320 + }, + { + "grad_norm": 0.220368430018425, + "learning_rate": 4.1341112715888106e-05, + "loss": 0.0294, + "step": 17330 + }, + { + "grad_norm": 0.14880087971687317, + "learning_rate": 4.128683509225172e-05, + "loss": 0.0345, + "step": 17340 + }, + { + "grad_norm": 0.1543043702840805, + "learning_rate": 4.123256805593231e-05, + "loss": 0.0288, + "step": 17350 + }, + { + "grad_norm": 0.2201840728521347, + "learning_rate": 4.117831167286943e-05, + "loss": 0.0376, + "step": 17360 + }, + { + "grad_norm": 0.2439708560705185, + "learning_rate": 4.1124066008989685e-05, + "loss": 0.0338, + "step": 17370 + }, + { + "grad_norm": 0.09443800151348114, + "learning_rate": 4.106983113020669e-05, + "loss": 0.0312, + "step": 17380 + }, + { + "grad_norm": 0.15740253031253815, + "learning_rate": 4.101560710242094e-05, + "loss": 0.0295, + "step": 17390 + }, + { + "grad_norm": 0.22322557866573334, + "learning_rate": 4.096139399151971e-05, + "loss": 0.0357, + "step": 17400 + }, + { + "grad_norm": 0.20545728504657745, + "learning_rate": 4.090719186337709e-05, + "loss": 0.0262, + "step": 17410 + }, + { + "grad_norm": 0.2304871529340744, + "learning_rate": 4.0853000783853746e-05, + "loss": 0.0397, + "step": 17420 + }, + { + "grad_norm": 0.2542682886123657, + "learning_rate": 4.0798820818796956e-05, + "loss": 0.0292, + "step": 17430 + }, + { + "grad_norm": 0.21827110648155212, + "learning_rate": 4.074465203404048e-05, + "loss": 0.0361, + "step": 17440 + }, + { + "grad_norm": 0.2567199766635895, + "learning_rate": 4.0690494495404495e-05, + "loss": 0.04, + "step": 17450 + }, + { + "grad_norm": 0.1915086954832077, + "learning_rate": 4.0636348268695535e-05, + "loss": 0.0312, + "step": 17460 + }, + { + "grad_norm": 0.1676570624113083, + "learning_rate": 4.058221341970637e-05, + "loss": 0.0305, + "step": 17470 + }, + { + "grad_norm": 0.22271402180194855, + "learning_rate": 4.0528090014215945e-05, + "loss": 0.0396, + "step": 17480 + }, + { + "grad_norm": 0.2074212282896042, + "learning_rate": 4.047397811798929e-05, + "loss": 0.026, + "step": 17490 + }, + { + "grad_norm": 0.19638590514659882, + "learning_rate": 4.041987779677745e-05, + "loss": 0.03, + "step": 17500 + }, + { + "grad_norm": 0.2510003447532654, + "learning_rate": 4.036578911631745e-05, + "loss": 0.0348, + "step": 17510 + }, + { + "grad_norm": 0.17145125567913055, + "learning_rate": 4.031171214233211e-05, + "loss": 0.0403, + "step": 17520 + }, + { + "grad_norm": 0.18289557099342346, + "learning_rate": 4.025764694053008e-05, + "loss": 0.0358, + "step": 17530 + }, + { + "grad_norm": 0.19655637443065643, + "learning_rate": 4.020359357660566e-05, + "loss": 0.0444, + "step": 17540 + }, + { + "grad_norm": 0.20548003911972046, + "learning_rate": 4.014955211623875e-05, + "loss": 0.0341, + "step": 17550 + }, + { + "grad_norm": 0.23746474087238312, + "learning_rate": 4.0095522625094895e-05, + "loss": 0.0508, + "step": 17560 + }, + { + "grad_norm": 0.2323760688304901, + "learning_rate": 4.004150516882497e-05, + "loss": 0.0471, + "step": 17570 + }, + { + "grad_norm": 0.18428923189640045, + "learning_rate": 3.99874998130653e-05, + "loss": 0.043, + "step": 17580 + }, + { + "grad_norm": 0.30359897017478943, + "learning_rate": 3.9933506623437455e-05, + "loss": 0.0423, + "step": 17590 + }, + { + "grad_norm": 0.21229343116283417, + "learning_rate": 3.9879525665548276e-05, + "loss": 0.0613, + "step": 17600 + }, + { + "grad_norm": 0.31361615657806396, + "learning_rate": 3.9825557004989704e-05, + "loss": 0.0416, + "step": 17610 + }, + { + "grad_norm": 0.1819619983434677, + "learning_rate": 3.977160070733877e-05, + "loss": 0.0415, + "step": 17620 + }, + { + "grad_norm": 0.2578895688056946, + "learning_rate": 3.971765683815746e-05, + "loss": 0.0306, + "step": 17630 + }, + { + "grad_norm": 0.21937401592731476, + "learning_rate": 3.966372546299265e-05, + "loss": 0.0446, + "step": 17640 + }, + { + "grad_norm": 0.3193115293979645, + "learning_rate": 3.960980664737604e-05, + "loss": 0.0364, + "step": 17650 + }, + { + "grad_norm": 0.23718389868736267, + "learning_rate": 3.955590045682408e-05, + "loss": 0.0497, + "step": 17660 + }, + { + "grad_norm": 0.2850209176540375, + "learning_rate": 3.950200695683788e-05, + "loss": 0.0442, + "step": 17670 + }, + { + "grad_norm": 0.2615636885166168, + "learning_rate": 3.944812621290314e-05, + "loss": 0.0447, + "step": 17680 + }, + { + "grad_norm": 0.23840856552124023, + "learning_rate": 3.939425829049002e-05, + "loss": 0.0416, + "step": 17690 + }, + { + "grad_norm": 0.18407590687274933, + "learning_rate": 3.9340403255053135e-05, + "loss": 0.0387, + "step": 17700 + }, + { + "grad_norm": 0.21182048320770264, + "learning_rate": 3.9286561172031407e-05, + "loss": 0.0391, + "step": 17710 + }, + { + "grad_norm": 0.17676414549350739, + "learning_rate": 3.923273210684809e-05, + "loss": 0.0409, + "step": 17720 + }, + { + "grad_norm": 0.20453956723213196, + "learning_rate": 3.9178916124910555e-05, + "loss": 0.0308, + "step": 17730 + }, + { + "grad_norm": 0.2249411940574646, + "learning_rate": 3.9125113291610274e-05, + "loss": 0.0481, + "step": 17740 + }, + { + "grad_norm": 0.27679717540740967, + "learning_rate": 3.907132367232278e-05, + "loss": 0.0564, + "step": 17750 + }, + { + "grad_norm": 0.22610853612422943, + "learning_rate": 3.901754733240752e-05, + "loss": 0.0484, + "step": 17760 + }, + { + "grad_norm": 0.24631579220294952, + "learning_rate": 3.896378433720785e-05, + "loss": 0.0284, + "step": 17770 + }, + { + "grad_norm": 0.19653761386871338, + "learning_rate": 3.891003475205086e-05, + "loss": 0.0512, + "step": 17780 + }, + { + "grad_norm": 0.1597522348165512, + "learning_rate": 3.885629864224736e-05, + "loss": 0.0205, + "step": 17790 + }, + { + "grad_norm": 0.19200357794761658, + "learning_rate": 3.880257607309178e-05, + "loss": 0.0354, + "step": 17800 + }, + { + "grad_norm": 0.2562716007232666, + "learning_rate": 3.874886710986213e-05, + "loss": 0.0495, + "step": 17810 + }, + { + "grad_norm": 0.23644733428955078, + "learning_rate": 3.8695171817819834e-05, + "loss": 0.0369, + "step": 17820 + }, + { + "grad_norm": 0.20975366234779358, + "learning_rate": 3.8641490262209774e-05, + "loss": 0.0369, + "step": 17830 + }, + { + "grad_norm": 0.2481648325920105, + "learning_rate": 3.8587822508260084e-05, + "loss": 0.0342, + "step": 17840 + }, + { + "grad_norm": 0.2557702362537384, + "learning_rate": 3.853416862118214e-05, + "loss": 0.0313, + "step": 17850 + }, + { + "grad_norm": 0.18996906280517578, + "learning_rate": 3.848052866617049e-05, + "loss": 0.0374, + "step": 17860 + }, + { + "grad_norm": 0.26094773411750793, + "learning_rate": 3.84269027084027e-05, + "loss": 0.0383, + "step": 17870 + }, + { + "grad_norm": 0.2058882862329483, + "learning_rate": 3.83732908130394e-05, + "loss": 0.0372, + "step": 17880 + }, + { + "grad_norm": 0.2173628807067871, + "learning_rate": 3.831969304522409e-05, + "loss": 0.0352, + "step": 17890 + }, + { + "grad_norm": 0.23674464225769043, + "learning_rate": 3.826610947008312e-05, + "loss": 0.0383, + "step": 17900 + }, + { + "grad_norm": 0.25689318776130676, + "learning_rate": 3.821254015272559e-05, + "loss": 0.0403, + "step": 17910 + }, + { + "grad_norm": 0.17689329385757446, + "learning_rate": 3.8158985158243214e-05, + "loss": 0.0316, + "step": 17920 + }, + { + "grad_norm": 0.16930650174617767, + "learning_rate": 3.810544455171044e-05, + "loss": 0.0387, + "step": 17930 + }, + { + "grad_norm": 0.24286694824695587, + "learning_rate": 3.8051918398184114e-05, + "loss": 0.0374, + "step": 17940 + }, + { + "grad_norm": 0.17894239723682404, + "learning_rate": 3.799840676270356e-05, + "loss": 0.0441, + "step": 17950 + }, + { + "grad_norm": 0.2311716377735138, + "learning_rate": 3.794490971029048e-05, + "loss": 0.0489, + "step": 17960 + }, + { + "grad_norm": 0.23548994958400726, + "learning_rate": 3.789142730594881e-05, + "loss": 0.0303, + "step": 17970 + }, + { + "grad_norm": 0.2196691781282425, + "learning_rate": 3.783795961466471e-05, + "loss": 0.0503, + "step": 17980 + }, + { + "grad_norm": 0.2409685105085373, + "learning_rate": 3.778450670140651e-05, + "loss": 0.0285, + "step": 17990 + }, + { + "grad_norm": 0.16938459873199463, + "learning_rate": 3.773106863112451e-05, + "loss": 0.0421, + "step": 18000 + }, + { + "grad_norm": 0.18907026946544647, + "learning_rate": 3.7677645468751e-05, + "loss": 0.0506, + "step": 18010 + }, + { + "grad_norm": 0.1788053810596466, + "learning_rate": 3.762423727920018e-05, + "loss": 0.0409, + "step": 18020 + }, + { + "grad_norm": 0.18449702858924866, + "learning_rate": 3.7570844127367995e-05, + "loss": 0.0449, + "step": 18030 + }, + { + "grad_norm": 0.18287159502506256, + "learning_rate": 3.751746607813221e-05, + "loss": 0.044, + "step": 18040 + }, + { + "grad_norm": 0.19219540059566498, + "learning_rate": 3.746410319635217e-05, + "loss": 0.0402, + "step": 18050 + }, + { + "grad_norm": 0.18559890985488892, + "learning_rate": 3.7410755546868806e-05, + "loss": 0.0348, + "step": 18060 + }, + { + "grad_norm": 0.20342591404914856, + "learning_rate": 3.735742319450454e-05, + "loss": 0.0403, + "step": 18070 + }, + { + "grad_norm": 0.19558192789554596, + "learning_rate": 3.7304106204063184e-05, + "loss": 0.0347, + "step": 18080 + }, + { + "grad_norm": 0.16370651125907898, + "learning_rate": 3.725080464032996e-05, + "loss": 0.0383, + "step": 18090 + }, + { + "grad_norm": 0.16821178793907166, + "learning_rate": 3.719751856807125e-05, + "loss": 0.0439, + "step": 18100 + }, + { + "grad_norm": 0.2235502302646637, + "learning_rate": 3.71442480520347e-05, + "loss": 0.0333, + "step": 18110 + }, + { + "grad_norm": 0.1981503814458847, + "learning_rate": 3.709099315694897e-05, + "loss": 0.0375, + "step": 18120 + }, + { + "grad_norm": 0.2684493362903595, + "learning_rate": 3.703775394752378e-05, + "loss": 0.0391, + "step": 18130 + }, + { + "grad_norm": 0.1816844344139099, + "learning_rate": 3.698453048844983e-05, + "loss": 0.03, + "step": 18140 + }, + { + "grad_norm": 0.25211620330810547, + "learning_rate": 3.693132284439861e-05, + "loss": 0.0413, + "step": 18150 + }, + { + "grad_norm": 0.16730985045433044, + "learning_rate": 3.6878131080022414e-05, + "loss": 0.0364, + "step": 18160 + }, + { + "grad_norm": 0.20597617328166962, + "learning_rate": 3.682495525995429e-05, + "loss": 0.0355, + "step": 18170 + }, + { + "grad_norm": 0.20978334546089172, + "learning_rate": 3.6771795448807846e-05, + "loss": 0.0341, + "step": 18180 + }, + { + "grad_norm": 0.12720029056072235, + "learning_rate": 3.671865171117724e-05, + "loss": 0.0385, + "step": 18190 + }, + { + "grad_norm": 0.17334280908107758, + "learning_rate": 3.666552411163718e-05, + "loss": 0.0342, + "step": 18200 + }, + { + "grad_norm": 0.19893065094947815, + "learning_rate": 3.661241271474269e-05, + "loss": 0.0321, + "step": 18210 + }, + { + "grad_norm": 0.19480782747268677, + "learning_rate": 3.6559317585029116e-05, + "loss": 0.0302, + "step": 18220 + }, + { + "grad_norm": 0.17633108794689178, + "learning_rate": 3.650623878701204e-05, + "loss": 0.0329, + "step": 18230 + }, + { + "grad_norm": 0.27699360251426697, + "learning_rate": 3.645317638518721e-05, + "loss": 0.0348, + "step": 18240 + }, + { + "grad_norm": 0.2831765413284302, + "learning_rate": 3.640013044403046e-05, + "loss": 0.0406, + "step": 18250 + }, + { + "grad_norm": 0.2677629590034485, + "learning_rate": 3.634710102799761e-05, + "loss": 0.0337, + "step": 18260 + }, + { + "grad_norm": 0.2703036069869995, + "learning_rate": 3.6294088201524394e-05, + "loss": 0.0393, + "step": 18270 + }, + { + "grad_norm": 0.1464790552854538, + "learning_rate": 3.62410920290264e-05, + "loss": 0.0253, + "step": 18280 + }, + { + "grad_norm": 0.25031083822250366, + "learning_rate": 3.6188112574898954e-05, + "loss": 0.035, + "step": 18290 + }, + { + "grad_norm": 0.2124224454164505, + "learning_rate": 3.6135149903517115e-05, + "loss": 0.0267, + "step": 18300 + }, + { + "grad_norm": 0.1874246448278427, + "learning_rate": 3.608220407923552e-05, + "loss": 0.0371, + "step": 18310 + }, + { + "grad_norm": 0.17868053913116455, + "learning_rate": 3.602927516638833e-05, + "loss": 0.0366, + "step": 18320 + }, + { + "grad_norm": 0.15576161444187164, + "learning_rate": 3.5976363229289165e-05, + "loss": 0.0351, + "step": 18330 + }, + { + "grad_norm": 0.2119443565607071, + "learning_rate": 3.5923468332231e-05, + "loss": 0.0391, + "step": 18340 + }, + { + "grad_norm": 0.26227131485939026, + "learning_rate": 3.587059053948616e-05, + "loss": 0.0351, + "step": 18350 + }, + { + "grad_norm": 0.2130778580904007, + "learning_rate": 3.5817729915306134e-05, + "loss": 0.0346, + "step": 18360 + }, + { + "grad_norm": 0.17493723332881927, + "learning_rate": 3.5764886523921566e-05, + "loss": 0.0291, + "step": 18370 + }, + { + "grad_norm": 0.2187938541173935, + "learning_rate": 3.571206042954214e-05, + "loss": 0.0344, + "step": 18380 + }, + { + "grad_norm": 0.13787372410297394, + "learning_rate": 3.5659251696356566e-05, + "loss": 0.029, + "step": 18390 + }, + { + "grad_norm": 0.19648106396198273, + "learning_rate": 3.56064603885324e-05, + "loss": 0.0433, + "step": 18400 + }, + { + "grad_norm": 0.22102369368076324, + "learning_rate": 3.5553686570216115e-05, + "loss": 0.0442, + "step": 18410 + }, + { + "grad_norm": 0.17229019105434418, + "learning_rate": 3.550093030553284e-05, + "loss": 0.0408, + "step": 18420 + }, + { + "grad_norm": 0.26873594522476196, + "learning_rate": 3.544819165858642e-05, + "loss": 0.0331, + "step": 18430 + }, + { + "grad_norm": 0.22624897956848145, + "learning_rate": 3.539547069345926e-05, + "loss": 0.0286, + "step": 18440 + }, + { + "grad_norm": 0.14645111560821533, + "learning_rate": 3.5342767474212344e-05, + "loss": 0.0429, + "step": 18450 + }, + { + "grad_norm": 0.2993749678134918, + "learning_rate": 3.529008206488502e-05, + "loss": 0.0341, + "step": 18460 + }, + { + "grad_norm": 0.3248251974582672, + "learning_rate": 3.5237414529495055e-05, + "loss": 0.0465, + "step": 18470 + }, + { + "grad_norm": 0.11484508216381073, + "learning_rate": 3.5184764932038454e-05, + "loss": 0.0327, + "step": 18480 + }, + { + "grad_norm": 0.1629406213760376, + "learning_rate": 3.513213333648945e-05, + "loss": 0.0275, + "step": 18490 + }, + { + "grad_norm": 0.23732957243919373, + "learning_rate": 3.5079519806800374e-05, + "loss": 0.0491, + "step": 18500 + }, + { + "grad_norm": 0.24712564051151276, + "learning_rate": 3.502692440690165e-05, + "loss": 0.0385, + "step": 18510 + }, + { + "grad_norm": 0.15794216096401215, + "learning_rate": 3.497434720070165e-05, + "loss": 0.0335, + "step": 18520 + }, + { + "grad_norm": 0.21695983409881592, + "learning_rate": 3.4921788252086616e-05, + "loss": 0.0393, + "step": 18530 + }, + { + "grad_norm": 0.10693687200546265, + "learning_rate": 3.486924762492065e-05, + "loss": 0.0289, + "step": 18540 + }, + { + "grad_norm": 0.16470690071582794, + "learning_rate": 3.4816725383045534e-05, + "loss": 0.0344, + "step": 18550 + }, + { + "grad_norm": 0.15006233751773834, + "learning_rate": 3.476422159028079e-05, + "loss": 0.0547, + "step": 18560 + }, + { + "grad_norm": 0.21596089005470276, + "learning_rate": 3.471173631042345e-05, + "loss": 0.0357, + "step": 18570 + }, + { + "grad_norm": 0.17482729256153107, + "learning_rate": 3.465926960724808e-05, + "loss": 0.0274, + "step": 18580 + }, + { + "grad_norm": 0.24598614871501923, + "learning_rate": 3.460682154450666e-05, + "loss": 0.0356, + "step": 18590 + }, + { + "grad_norm": 0.22643035650253296, + "learning_rate": 3.4554392185928564e-05, + "loss": 0.0523, + "step": 18600 + }, + { + "grad_norm": 0.17603425681591034, + "learning_rate": 3.450198159522037e-05, + "loss": 0.0298, + "step": 18610 + }, + { + "grad_norm": 0.15921419858932495, + "learning_rate": 3.444958983606592e-05, + "loss": 0.053, + "step": 18620 + }, + { + "grad_norm": 0.3005395531654358, + "learning_rate": 3.439721697212612e-05, + "loss": 0.0641, + "step": 18630 + }, + { + "grad_norm": 0.16808553040027618, + "learning_rate": 3.4344863067038954e-05, + "loss": 0.0321, + "step": 18640 + }, + { + "grad_norm": 0.24982233345508575, + "learning_rate": 3.429252818441935e-05, + "loss": 0.0367, + "step": 18650 + }, + { + "grad_norm": 0.17254509031772614, + "learning_rate": 3.4240212387859094e-05, + "loss": 0.0389, + "step": 18660 + }, + { + "grad_norm": 0.20754341781139374, + "learning_rate": 3.418791574092686e-05, + "loss": 0.0391, + "step": 18670 + }, + { + "grad_norm": 0.25050297379493713, + "learning_rate": 3.413563830716796e-05, + "loss": 0.0398, + "step": 18680 + }, + { + "grad_norm": 0.1877581626176834, + "learning_rate": 3.408338015010445e-05, + "loss": 0.0316, + "step": 18690 + }, + { + "grad_norm": 0.12823662161827087, + "learning_rate": 3.40311413332349e-05, + "loss": 0.023, + "step": 18700 + }, + { + "grad_norm": 0.1968139111995697, + "learning_rate": 3.3978921920034365e-05, + "loss": 0.0267, + "step": 18710 + }, + { + "grad_norm": 0.12501156330108643, + "learning_rate": 3.392672197395441e-05, + "loss": 0.0203, + "step": 18720 + }, + { + "grad_norm": 0.1986612230539322, + "learning_rate": 3.387454155842287e-05, + "loss": 0.0687, + "step": 18730 + }, + { + "grad_norm": 0.1545063555240631, + "learning_rate": 3.382238073684386e-05, + "loss": 0.0222, + "step": 18740 + }, + { + "grad_norm": 0.18074332177639008, + "learning_rate": 3.377023957259771e-05, + "loss": 0.0247, + "step": 18750 + }, + { + "grad_norm": 0.2664492130279541, + "learning_rate": 3.3718118129040835e-05, + "loss": 0.0336, + "step": 18760 + }, + { + "grad_norm": 0.16315147280693054, + "learning_rate": 3.3666016469505724e-05, + "loss": 0.0403, + "step": 18770 + }, + { + "grad_norm": 0.24761256575584412, + "learning_rate": 3.361393465730079e-05, + "loss": 0.0451, + "step": 18780 + }, + { + "grad_norm": 0.1949421763420105, + "learning_rate": 3.3561872755710366e-05, + "loss": 0.0304, + "step": 18790 + }, + { + "grad_norm": 0.21247145533561707, + "learning_rate": 3.350983082799456e-05, + "loss": 0.0302, + "step": 18800 + }, + { + "grad_norm": 0.22408461570739746, + "learning_rate": 3.34578089373892e-05, + "loss": 0.033, + "step": 18810 + }, + { + "grad_norm": 0.2062862664461136, + "learning_rate": 3.340580714710581e-05, + "loss": 0.0304, + "step": 18820 + }, + { + "grad_norm": 0.20906749367713928, + "learning_rate": 3.3353825520331466e-05, + "loss": 0.0309, + "step": 18830 + }, + { + "grad_norm": 0.14874252676963806, + "learning_rate": 3.330186412022876e-05, + "loss": 0.0321, + "step": 18840 + }, + { + "grad_norm": 0.17219728231430054, + "learning_rate": 3.324992300993568e-05, + "loss": 0.0263, + "step": 18850 + }, + { + "grad_norm": 0.14920870959758759, + "learning_rate": 3.319800225256556e-05, + "loss": 0.0317, + "step": 18860 + }, + { + "grad_norm": 0.20687468349933624, + "learning_rate": 3.314610191120702e-05, + "loss": 0.0387, + "step": 18870 + }, + { + "grad_norm": 0.1286058872938156, + "learning_rate": 3.30942220489239e-05, + "loss": 0.0277, + "step": 18880 + }, + { + "grad_norm": 0.20597349107265472, + "learning_rate": 3.3042362728755086e-05, + "loss": 0.0352, + "step": 18890 + }, + { + "grad_norm": 0.12942515313625336, + "learning_rate": 3.299052401371456e-05, + "loss": 0.0466, + "step": 18900 + }, + { + "grad_norm": 0.1755361109972, + "learning_rate": 3.293870596679125e-05, + "loss": 0.0243, + "step": 18910 + }, + { + "grad_norm": 0.18293610215187073, + "learning_rate": 3.288690865094895e-05, + "loss": 0.0263, + "step": 18920 + }, + { + "grad_norm": 0.19544711709022522, + "learning_rate": 3.283513212912632e-05, + "loss": 0.0376, + "step": 18930 + }, + { + "grad_norm": 0.18266679346561432, + "learning_rate": 3.2783376464236684e-05, + "loss": 0.024, + "step": 18940 + }, + { + "grad_norm": 0.1919373720884323, + "learning_rate": 3.273164171916806e-05, + "loss": 0.0232, + "step": 18950 + }, + { + "grad_norm": 0.16968359053134918, + "learning_rate": 3.267992795678306e-05, + "loss": 0.0308, + "step": 18960 + }, + { + "grad_norm": 0.19283561408519745, + "learning_rate": 3.2628235239918744e-05, + "loss": 0.0374, + "step": 18970 + }, + { + "grad_norm": 0.1951591521501541, + "learning_rate": 3.2576563631386695e-05, + "loss": 0.0372, + "step": 18980 + }, + { + "grad_norm": 0.1839912086725235, + "learning_rate": 3.252491319397275e-05, + "loss": 0.0256, + "step": 18990 + }, + { + "grad_norm": 0.2229239046573639, + "learning_rate": 3.247328399043706e-05, + "loss": 0.0443, + "step": 19000 + }, + { + "grad_norm": 0.20551182329654694, + "learning_rate": 3.242167608351399e-05, + "loss": 0.041, + "step": 19010 + }, + { + "grad_norm": 0.2610750198364258, + "learning_rate": 3.2370089535911986e-05, + "loss": 0.0314, + "step": 19020 + }, + { + "grad_norm": 0.19975534081459045, + "learning_rate": 3.23185244103136e-05, + "loss": 0.0266, + "step": 19030 + }, + { + "grad_norm": 0.17642317712306976, + "learning_rate": 3.22669807693753e-05, + "loss": 0.0418, + "step": 19040 + }, + { + "grad_norm": 0.189057856798172, + "learning_rate": 3.2215458675727495e-05, + "loss": 0.0348, + "step": 19050 + }, + { + "grad_norm": 0.22612616419792175, + "learning_rate": 3.216395819197438e-05, + "loss": 0.0387, + "step": 19060 + }, + { + "grad_norm": 0.22631070017814636, + "learning_rate": 3.211247938069387e-05, + "loss": 0.0439, + "step": 19070 + }, + { + "grad_norm": 0.23374705016613007, + "learning_rate": 3.206102230443759e-05, + "loss": 0.0278, + "step": 19080 + }, + { + "grad_norm": 0.20651166141033173, + "learning_rate": 3.2009587025730764e-05, + "loss": 0.039, + "step": 19090 + }, + { + "grad_norm": 0.17833474278450012, + "learning_rate": 3.195817360707207e-05, + "loss": 0.0445, + "step": 19100 + }, + { + "grad_norm": 0.25085994601249695, + "learning_rate": 3.19067821109337e-05, + "loss": 0.0311, + "step": 19110 + }, + { + "grad_norm": 0.14792053401470184, + "learning_rate": 3.185541259976114e-05, + "loss": 0.022, + "step": 19120 + }, + { + "grad_norm": 0.16901874542236328, + "learning_rate": 3.180406513597316e-05, + "loss": 0.045, + "step": 19130 + }, + { + "grad_norm": 0.18258601427078247, + "learning_rate": 3.1752739781961835e-05, + "loss": 0.0255, + "step": 19140 + }, + { + "grad_norm": 0.19481147825717926, + "learning_rate": 3.170143660009228e-05, + "loss": 0.0182, + "step": 19150 + }, + { + "grad_norm": 0.22523626685142517, + "learning_rate": 3.16501556527027e-05, + "loss": 0.0347, + "step": 19160 + }, + { + "grad_norm": 0.19510923326015472, + "learning_rate": 3.1598897002104265e-05, + "loss": 0.0266, + "step": 19170 + }, + { + "grad_norm": 0.17791448533535004, + "learning_rate": 3.154766071058108e-05, + "loss": 0.029, + "step": 19180 + }, + { + "grad_norm": 0.1949159801006317, + "learning_rate": 3.149644684039008e-05, + "loss": 0.0473, + "step": 19190 + }, + { + "grad_norm": 0.20815108716487885, + "learning_rate": 3.144525545376095e-05, + "loss": 0.027, + "step": 19200 + }, + { + "grad_norm": 0.23945747315883636, + "learning_rate": 3.139408661289603e-05, + "loss": 0.0297, + "step": 19210 + }, + { + "grad_norm": 0.2272346317768097, + "learning_rate": 3.134294037997032e-05, + "loss": 0.0277, + "step": 19220 + }, + { + "grad_norm": 0.24184994399547577, + "learning_rate": 3.129181681713127e-05, + "loss": 0.0324, + "step": 19230 + }, + { + "grad_norm": 0.20474806427955627, + "learning_rate": 3.1240715986498855e-05, + "loss": 0.0303, + "step": 19240 + }, + { + "grad_norm": 0.1872473955154419, + "learning_rate": 3.1189637950165394e-05, + "loss": 0.0358, + "step": 19250 + }, + { + "grad_norm": 0.2087792456150055, + "learning_rate": 3.1138582770195544e-05, + "loss": 0.0267, + "step": 19260 + }, + { + "grad_norm": 0.14861610531806946, + "learning_rate": 3.108755050862615e-05, + "loss": 0.0231, + "step": 19270 + }, + { + "grad_norm": 0.22141896188259125, + "learning_rate": 3.1036541227466204e-05, + "loss": 0.026, + "step": 19280 + }, + { + "grad_norm": 0.2618032693862915, + "learning_rate": 3.0985554988696786e-05, + "loss": 0.0453, + "step": 19290 + }, + { + "grad_norm": 0.17633269727230072, + "learning_rate": 3.093459185427102e-05, + "loss": 0.0224, + "step": 19300 + }, + { + "grad_norm": 0.29034292697906494, + "learning_rate": 3.0883651886113905e-05, + "loss": 0.0304, + "step": 19310 + }, + { + "grad_norm": 0.1869911551475525, + "learning_rate": 3.0832735146122294e-05, + "loss": 0.0479, + "step": 19320 + }, + { + "grad_norm": 0.20567278563976288, + "learning_rate": 3.078184169616485e-05, + "loss": 0.0477, + "step": 19330 + }, + { + "grad_norm": 0.23613309860229492, + "learning_rate": 3.073097159808187e-05, + "loss": 0.0499, + "step": 19340 + }, + { + "grad_norm": 0.17235489189624786, + "learning_rate": 3.068012491368537e-05, + "loss": 0.041, + "step": 19350 + }, + { + "grad_norm": 0.17517909407615662, + "learning_rate": 3.062930170475885e-05, + "loss": 0.0208, + "step": 19360 + }, + { + "grad_norm": 0.2294636368751526, + "learning_rate": 3.057850203305729e-05, + "loss": 0.0288, + "step": 19370 + }, + { + "grad_norm": 0.23882533609867096, + "learning_rate": 3.052772596030708e-05, + "loss": 0.0366, + "step": 19380 + }, + { + "grad_norm": 0.10134632885456085, + "learning_rate": 3.0476973548205943e-05, + "loss": 0.0296, + "step": 19390 + }, + { + "grad_norm": 0.1951560378074646, + "learning_rate": 3.0426244858422847e-05, + "loss": 0.0301, + "step": 19400 + }, + { + "grad_norm": 0.11982168257236481, + "learning_rate": 3.0375539952597943e-05, + "loss": 0.0184, + "step": 19410 + }, + { + "grad_norm": 0.2145329862833023, + "learning_rate": 3.0324858892342468e-05, + "loss": 0.0215, + "step": 19420 + }, + { + "grad_norm": 0.5094833970069885, + "learning_rate": 3.0274201739238672e-05, + "loss": 0.0401, + "step": 19430 + }, + { + "grad_norm": 0.22126208245754242, + "learning_rate": 3.0223568554839786e-05, + "loss": 0.0242, + "step": 19440 + }, + { + "grad_norm": 0.17152267694473267, + "learning_rate": 3.0172959400669886e-05, + "loss": 0.0223, + "step": 19450 + }, + { + "grad_norm": 0.16729439795017242, + "learning_rate": 3.0122374338223902e-05, + "loss": 0.0206, + "step": 19460 + }, + { + "grad_norm": 0.1498456448316574, + "learning_rate": 3.0071813428967427e-05, + "loss": 0.0329, + "step": 19470 + }, + { + "grad_norm": 0.2324570268392563, + "learning_rate": 3.0021276734336746e-05, + "loss": 0.0259, + "step": 19480 + }, + { + "grad_norm": 0.16636350750923157, + "learning_rate": 2.997076431573871e-05, + "loss": 0.036, + "step": 19490 + }, + { + "grad_norm": 0.25444895029067993, + "learning_rate": 2.9920276234550636e-05, + "loss": 0.0244, + "step": 19500 + }, + { + "grad_norm": 0.1194058507680893, + "learning_rate": 2.986981255212035e-05, + "loss": 0.0407, + "step": 19510 + }, + { + "grad_norm": 0.19864346086978912, + "learning_rate": 2.9819373329765977e-05, + "loss": 0.0267, + "step": 19520 + }, + { + "grad_norm": 0.16163180768489838, + "learning_rate": 2.9768958628775902e-05, + "loss": 0.0178, + "step": 19530 + }, + { + "grad_norm": 0.19096767902374268, + "learning_rate": 2.9718568510408762e-05, + "loss": 0.0218, + "step": 19540 + }, + { + "grad_norm": 0.18430452048778534, + "learning_rate": 2.9668203035893272e-05, + "loss": 0.0268, + "step": 19550 + }, + { + "grad_norm": 0.1986236423254013, + "learning_rate": 2.9617862266428287e-05, + "loss": 0.0361, + "step": 19560 + }, + { + "grad_norm": 0.16380731761455536, + "learning_rate": 2.9567546263182556e-05, + "loss": 0.0258, + "step": 19570 + }, + { + "grad_norm": 0.19214165210723877, + "learning_rate": 2.951725508729476e-05, + "loss": 0.033, + "step": 19580 + }, + { + "grad_norm": 0.17830154299736023, + "learning_rate": 2.946698879987344e-05, + "loss": 0.0318, + "step": 19590 + }, + { + "grad_norm": 0.2021605372428894, + "learning_rate": 2.9416747461996853e-05, + "loss": 0.0325, + "step": 19600 + }, + { + "grad_norm": 0.1445900946855545, + "learning_rate": 2.9366531134712973e-05, + "loss": 0.0246, + "step": 19610 + }, + { + "grad_norm": 0.21563471853733063, + "learning_rate": 2.9316339879039367e-05, + "loss": 0.028, + "step": 19620 + }, + { + "grad_norm": 0.12324675917625427, + "learning_rate": 2.9266173755963167e-05, + "loss": 0.0267, + "step": 19630 + }, + { + "grad_norm": 0.18537619709968567, + "learning_rate": 2.9216032826440926e-05, + "loss": 0.0241, + "step": 19640 + }, + { + "grad_norm": 0.1994611769914627, + "learning_rate": 2.9165917151398592e-05, + "loss": 0.0263, + "step": 19650 + }, + { + "grad_norm": 0.1934840977191925, + "learning_rate": 2.9115826791731428e-05, + "loss": 0.024, + "step": 19660 + }, + { + "grad_norm": 0.15007077157497406, + "learning_rate": 2.906576180830398e-05, + "loss": 0.0205, + "step": 19670 + }, + { + "grad_norm": 0.22179685533046722, + "learning_rate": 2.9015722261949917e-05, + "loss": 0.0252, + "step": 19680 + }, + { + "grad_norm": 0.17887121438980103, + "learning_rate": 2.8965708213471986e-05, + "loss": 0.0396, + "step": 19690 + }, + { + "grad_norm": 0.25737902522087097, + "learning_rate": 2.8915719723641975e-05, + "loss": 0.0304, + "step": 19700 + }, + { + "grad_norm": 0.11517304927110672, + "learning_rate": 2.8865756853200604e-05, + "loss": 0.0321, + "step": 19710 + }, + { + "grad_norm": 0.17800123989582062, + "learning_rate": 2.8815819662857503e-05, + "loss": 0.0332, + "step": 19720 + }, + { + "grad_norm": 0.2051839530467987, + "learning_rate": 2.876590821329105e-05, + "loss": 0.0274, + "step": 19730 + }, + { + "grad_norm": 0.173445463180542, + "learning_rate": 2.871602256514836e-05, + "loss": 0.0267, + "step": 19740 + }, + { + "grad_norm": 0.15847386419773102, + "learning_rate": 2.8666162779045203e-05, + "loss": 0.0187, + "step": 19750 + }, + { + "grad_norm": 0.1442551612854004, + "learning_rate": 2.8616328915565904e-05, + "loss": 0.0436, + "step": 19760 + }, + { + "grad_norm": 0.15275998413562775, + "learning_rate": 2.856652103526334e-05, + "loss": 0.0178, + "step": 19770 + }, + { + "grad_norm": 0.25260835886001587, + "learning_rate": 2.8516739198658755e-05, + "loss": 0.0373, + "step": 19780 + }, + { + "grad_norm": 0.19478574395179749, + "learning_rate": 2.846698346624177e-05, + "loss": 0.0367, + "step": 19790 + }, + { + "grad_norm": 0.17324167490005493, + "learning_rate": 2.841725389847032e-05, + "loss": 0.0289, + "step": 19800 + }, + { + "grad_norm": 0.1577959954738617, + "learning_rate": 2.8367550555770506e-05, + "loss": 0.0237, + "step": 19810 + }, + { + "grad_norm": 0.14876766502857208, + "learning_rate": 2.831787349853655e-05, + "loss": 0.0331, + "step": 19820 + }, + { + "grad_norm": 0.15258604288101196, + "learning_rate": 2.8268222787130806e-05, + "loss": 0.0178, + "step": 19830 + }, + { + "grad_norm": 0.17461040616035461, + "learning_rate": 2.821859848188355e-05, + "loss": 0.0228, + "step": 19840 + }, + { + "grad_norm": 0.2127096801996231, + "learning_rate": 2.816900064309299e-05, + "loss": 0.04, + "step": 19850 + }, + { + "grad_norm": 0.2279936522245407, + "learning_rate": 2.811942933102517e-05, + "loss": 0.0407, + "step": 19860 + }, + { + "grad_norm": 0.19612039625644684, + "learning_rate": 2.806988460591391e-05, + "loss": 0.0237, + "step": 19870 + }, + { + "grad_norm": 0.21209228038787842, + "learning_rate": 2.802036652796074e-05, + "loss": 0.0315, + "step": 19880 + }, + { + "grad_norm": 0.12661439180374146, + "learning_rate": 2.797087515733478e-05, + "loss": 0.0328, + "step": 19890 + }, + { + "grad_norm": 0.2136591374874115, + "learning_rate": 2.7921410554172723e-05, + "loss": 0.0538, + "step": 19900 + }, + { + "grad_norm": 0.1741955578327179, + "learning_rate": 2.787197277857871e-05, + "loss": 0.0237, + "step": 19910 + }, + { + "grad_norm": 0.25654467940330505, + "learning_rate": 2.782256189062429e-05, + "loss": 0.029, + "step": 19920 + }, + { + "grad_norm": 0.18921031057834625, + "learning_rate": 2.777317795034839e-05, + "loss": 0.0386, + "step": 19930 + }, + { + "grad_norm": 0.1802884042263031, + "learning_rate": 2.7723821017757112e-05, + "loss": 0.0292, + "step": 19940 + }, + { + "grad_norm": 0.18317203223705292, + "learning_rate": 2.7674491152823822e-05, + "loss": 0.023, + "step": 19950 + }, + { + "grad_norm": 0.20081117749214172, + "learning_rate": 2.7625188415488944e-05, + "loss": 0.0302, + "step": 19960 + }, + { + "grad_norm": 0.17144300043582916, + "learning_rate": 2.7575912865659924e-05, + "loss": 0.0329, + "step": 19970 + }, + { + "grad_norm": 0.25255900621414185, + "learning_rate": 2.7526664563211245e-05, + "loss": 0.0361, + "step": 19980 + }, + { + "grad_norm": 0.20729579031467438, + "learning_rate": 2.7477443567984224e-05, + "loss": 0.0351, + "step": 19990 + }, + { + "grad_norm": 0.23493489623069763, + "learning_rate": 2.7428249939787e-05, + "loss": 0.0377, + "step": 20000 } ], "logging_steps": 10,