| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 1.3594739437103271, |
| "learning_rate": 9e-07, |
| "loss": 1.1913, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 1.0572824478149414, |
| "learning_rate": 1.9e-06, |
| "loss": 1.1841, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.5717663764953613, |
| "learning_rate": 2.9e-06, |
| "loss": 1.1508, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.3898443877696991, |
| "learning_rate": 3.9e-06, |
| "loss": 1.1205, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.28664326667785645, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.0888, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.1729290783405304, |
| "learning_rate": 5.9e-06, |
| "loss": 1.0782, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.17002208530902863, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 1.0691, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.2152942717075348, |
| "learning_rate": 7.9e-06, |
| "loss": 1.0562, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.19103780388832092, |
| "learning_rate": 8.9e-06, |
| "loss": 1.0479, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.3243984878063202, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.0372, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.1820673942565918, |
| "learning_rate": 1.09e-05, |
| "loss": 1.0272, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.21819084882736206, |
| "learning_rate": 1.19e-05, |
| "loss": 1.0236, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.20377595722675323, |
| "learning_rate": 1.29e-05, |
| "loss": 1.0237, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.20572194457054138, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 1.0228, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.20157840847969055, |
| "learning_rate": 1.49e-05, |
| "loss": 1.0217, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.23459017276763916, |
| "learning_rate": 1.59e-05, |
| "loss": 1.0192, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.32469043135643005, |
| "learning_rate": 1.69e-05, |
| "loss": 1.0063, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.36008527874946594, |
| "learning_rate": 1.79e-05, |
| "loss": 0.9873, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.5633573532104492, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.9672, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.7019369006156921, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.9315, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.5538105964660645, |
| "learning_rate": 2.09e-05, |
| "loss": 0.8958, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.5306029319763184, |
| "learning_rate": 2.19e-05, |
| "loss": 0.8707, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.6606974005699158, |
| "learning_rate": 2.29e-05, |
| "loss": 0.8479, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.8058410882949829, |
| "learning_rate": 2.39e-05, |
| "loss": 0.8169, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 0.7277475595474243, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.77, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 0.6617355942726135, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.7456, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 0.8156651258468628, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.6984, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.7090954780578613, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.6774, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.8667084574699402, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.6429, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 0.946596622467041, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.6052, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 0.8120863437652588, |
| "learning_rate": 3.09e-05, |
| "loss": 0.5681, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 0.9630921483039856, |
| "learning_rate": 3.19e-05, |
| "loss": 0.5267, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 0.9185823798179626, |
| "learning_rate": 3.29e-05, |
| "loss": 0.497, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.9909350872039795, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.4704, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 0.7408623695373535, |
| "learning_rate": 3.49e-05, |
| "loss": 0.4463, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 0.8417967557907104, |
| "learning_rate": 3.59e-05, |
| "loss": 0.4515, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 0.9200495481491089, |
| "learning_rate": 3.69e-05, |
| "loss": 0.417, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.146302342414856, |
| "learning_rate": 3.79e-05, |
| "loss": 0.3937, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.0057293176651, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.3773, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.112216591835022, |
| "learning_rate": 3.99e-05, |
| "loss": 0.348, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.0176512002944946, |
| "learning_rate": 4.09e-05, |
| "loss": 0.3392, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.0310163497924805, |
| "learning_rate": 4.19e-05, |
| "loss": 0.3065, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.022374153137207, |
| "learning_rate": 4.29e-05, |
| "loss": 0.2808, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 1.368080735206604, |
| "learning_rate": 4.39e-05, |
| "loss": 0.2624, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.1092591285705566, |
| "learning_rate": 4.49e-05, |
| "loss": 0.2405, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 0.9738430380821228, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.2254, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.033246636390686, |
| "learning_rate": 4.69e-05, |
| "loss": 0.2162, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 0.9855560064315796, |
| "learning_rate": 4.79e-05, |
| "loss": 0.2088, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.0313360691070557, |
| "learning_rate": 4.89e-05, |
| "loss": 0.2188, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 1.100176215171814, |
| "learning_rate": 4.99e-05, |
| "loss": 0.2007, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.0784265995025635, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.2016, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.0822303295135498, |
| "learning_rate": 5.19e-05, |
| "loss": 0.1961, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 1.067589282989502, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.1801, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 1.1917147636413574, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.1705, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 1.3141072988510132, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.1851, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.002855658531189, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.1663, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.167011022567749, |
| "learning_rate": 5.69e-05, |
| "loss": 0.1741, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 1.0936863422393799, |
| "learning_rate": 5.79e-05, |
| "loss": 0.1661, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 0.9669778347015381, |
| "learning_rate": 5.89e-05, |
| "loss": 0.1648, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 0.9405611753463745, |
| "learning_rate": 5.99e-05, |
| "loss": 0.1627, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 1.0284767150878906, |
| "learning_rate": 6.09e-05, |
| "loss": 0.1496, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.1097605228424072, |
| "learning_rate": 6.19e-05, |
| "loss": 0.1628, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 0.9104214310646057, |
| "learning_rate": 6.29e-05, |
| "loss": 0.1302, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 0.8578998446464539, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.1326, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.1287304162979126, |
| "learning_rate": 6.49e-05, |
| "loss": 0.1127, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.8655268549919128, |
| "learning_rate": 6.59e-05, |
| "loss": 0.1202, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.9937160015106201, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.1198, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.9691420197486877, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.1096, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 1.0945252180099487, |
| "learning_rate": 6.89e-05, |
| "loss": 0.105, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 1.0388752222061157, |
| "learning_rate": 6.99e-05, |
| "loss": 0.1027, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.881949245929718, |
| "learning_rate": 7.09e-05, |
| "loss": 0.1044, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.8678519129753113, |
| "learning_rate": 7.19e-05, |
| "loss": 0.0842, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 1.2314260005950928, |
| "learning_rate": 7.29e-05, |
| "loss": 0.0841, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 0.7337191700935364, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.0771, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 1.194354772567749, |
| "learning_rate": 7.49e-05, |
| "loss": 0.0791, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 1.0703870058059692, |
| "learning_rate": 7.59e-05, |
| "loss": 0.0697, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.9820927977561951, |
| "learning_rate": 7.69e-05, |
| "loss": 0.0798, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 1.099042534828186, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.0736, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.9056155681610107, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.0756, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.8292648792266846, |
| "learning_rate": 7.99e-05, |
| "loss": 0.0796, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.9507290720939636, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.0829, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.9466397762298584, |
| "learning_rate": 8.19e-05, |
| "loss": 0.0688, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.7956731915473938, |
| "learning_rate": 8.29e-05, |
| "loss": 0.0747, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.7995853424072266, |
| "learning_rate": 8.39e-05, |
| "loss": 0.0634, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.7665478587150574, |
| "learning_rate": 8.49e-05, |
| "loss": 0.0661, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.9283880591392517, |
| "learning_rate": 8.59e-05, |
| "loss": 0.0702, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 1.126967191696167, |
| "learning_rate": 8.69e-05, |
| "loss": 0.0716, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.8662194609642029, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.0667, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.9572857022285461, |
| "learning_rate": 8.89e-05, |
| "loss": 0.0791, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.9036967158317566, |
| "learning_rate": 8.99e-05, |
| "loss": 0.0745, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.7550048828125, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.0746, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.9990408420562744, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.0648, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.8286410570144653, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.0697, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.9783310890197754, |
| "learning_rate": 9.39e-05, |
| "loss": 0.0749, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.9899768233299255, |
| "learning_rate": 9.49e-05, |
| "loss": 0.0722, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.7450554370880127, |
| "learning_rate": 9.59e-05, |
| "loss": 0.0599, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.7791635394096375, |
| "learning_rate": 9.69e-05, |
| "loss": 0.0654, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.7614015340805054, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.0558, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.9096309542655945, |
| "learning_rate": 9.89e-05, |
| "loss": 0.0581, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.668950080871582, |
| "learning_rate": 9.99e-05, |
| "loss": 0.0652, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 0.8658283948898315, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.0529, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 0.7495288848876953, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.059, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 0.9980189204216003, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.0638, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 0.7826606035232544, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.0546, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 0.6360778212547302, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.054, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 0.7757160067558289, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.0591, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 0.7390689849853516, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.0595, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 0.6460424065589905, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.0658, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 0.8082983493804932, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.0596, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 0.7415626645088196, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.0483, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 0.8829818367958069, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.0619, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 0.8239393830299377, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.0528, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 0.8529507517814636, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.0654, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 0.7502208948135376, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.0526, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 0.6970030069351196, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.0547, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 0.9376399517059326, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.0557, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 0.7249330282211304, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.0504, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 0.8968970775604248, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.0526, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 0.7676458358764648, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.0506, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 0.6501711010932922, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.061, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 0.677116870880127, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.0462, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 0.8147766590118408, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.0486, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.706069827079773, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.0511, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 0.6159539818763733, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.0492, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 0.6369336843490601, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.0479, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 0.7543830275535583, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.0493, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 0.7505154609680176, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.053, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 0.5397493243217468, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.0432, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 0.6707198619842529, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.0472, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 0.8792182803153992, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.0453, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 0.7324561476707458, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.0479, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 0.8385289907455444, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.0549, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 0.5908923745155334, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.0483, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 0.63700932264328, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.0485, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 0.7525864839553833, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.0469, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 0.5628486275672913, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.0459, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 0.795554518699646, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.0467, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 0.7957155704498291, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.0481, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 0.5773254632949829, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.0471, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.6150880455970764, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.0426, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 0.7106145620346069, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.0537, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 0.9491516947746277, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.0525, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 0.6860232353210449, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.0429, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 0.7841853499412537, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.0524, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 0.6175568103790283, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.0385, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 0.5932314991950989, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.0408, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 0.7410153150558472, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.043, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 0.8330276608467102, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.051, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 0.7202706933021545, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.0493, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.574433445930481, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.0466, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 0.7351802587509155, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.057, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 0.819564163684845, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.0555, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 0.6065496206283569, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.0509, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 0.6572223901748657, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.044, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 0.6978927254676819, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.0465, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 0.5508580803871155, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.0452, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 0.5769541263580322, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.0475, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 0.5610742568969727, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.0469, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 0.5692776441574097, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.0449, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 0.5226185321807861, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.0476, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 0.7111744284629822, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.047, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 0.514858067035675, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.043, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 0.5856963396072388, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.0471, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 0.6191436052322388, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.0422, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 0.5670982599258423, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.0419, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 0.7190003991127014, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.0453, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 0.6552428007125854, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.046, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 0.578118622303009, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.0392, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 0.7054030895233154, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.0455, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 0.6531293392181396, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.0421, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 0.6111751198768616, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.0517, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 0.4928556978702545, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.044, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 0.6597058773040771, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.047, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 0.6202155351638794, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.0384, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 0.5262959599494934, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.0425, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 0.6935763955116272, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.0409, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 0.6149827837944031, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.0383, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 0.5901826024055481, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.0426, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 0.5597098469734192, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.0352, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 0.5581690073013306, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.0366, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 0.5969916582107544, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.0355, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 0.5474916696548462, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.0376, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 0.5651562809944153, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.0359, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 0.6243921518325806, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.0373, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 0.4624647796154022, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.0347, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 0.5893751382827759, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.0408, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 0.526287317276001, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.0388, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 0.6111840605735779, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.0346, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 0.46461328864097595, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.0411, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 0.610548734664917, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.0407, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 0.5339504480361938, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.0398, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 0.46559029817581177, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.039, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 0.7745798826217651, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.04, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 0.5811882019042969, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.0425, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 0.4856541156768799, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.0363, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 0.5952467918395996, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.0439, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 0.5669976472854614, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.05, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 0.5959198474884033, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.0382, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 0.520875096321106, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.0386, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 0.4351758360862732, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.0488, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 0.6345981359481812, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.0391, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 0.6230748295783997, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.0388, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 0.5425089001655579, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.0395, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 0.49332770705223083, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.039, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 0.6731558442115784, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.0427, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 0.5310463309288025, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.0474, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 0.548930823802948, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.0423, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 0.5744786262512207, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.0372, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 0.6390929222106934, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.0368, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 0.5252511501312256, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.038, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 0.5656434297561646, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.0333, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 0.5288258790969849, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.0406, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 0.5040147304534912, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.0369, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 0.5128138661384583, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.0368, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 0.4942684769630432, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.0334, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 0.5318565368652344, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.0325, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 0.5772367715835571, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.0344, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 0.5957911014556885, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.0368, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 0.6259480714797974, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.0375, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 0.691277801990509, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.0402, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 0.624859094619751, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.0316, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 0.46915674209594727, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.0352, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 0.5559591054916382, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.0353, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 0.47577548027038574, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.0395, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 0.7231595516204834, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.0442, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 0.4607575535774231, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.037, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 0.4901242256164551, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.0342, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.46413323283195496, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.0386, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.5028432607650757, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.0384, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 0.6079827547073364, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.0416, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 0.6189248561859131, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.0372, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.569456160068512, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.0317, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 0.5782006978988647, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.0363, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.6612024307250977, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.0386, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 0.45619797706604004, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.0385, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 0.4743977189064026, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.0384, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 0.5303918719291687, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.0364, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.5923212766647339, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.0366, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 0.5156052112579346, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.0412, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.4702778458595276, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.0353, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.4955006241798401, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.0356, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 0.4722374677658081, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.0328, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 0.5261074900627136, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.0314, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 0.43109720945358276, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.0306, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 0.5150691270828247, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.0331, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 0.413881778717041, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.0313, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 0.5778813362121582, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.0327, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 0.5499809980392456, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.0371, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 0.533755898475647, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.0347, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.5036794543266296, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.0365, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.4547636806964874, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.0284, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.4148177206516266, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.0338, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 0.4307814836502075, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.0401, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.5055217146873474, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.0413, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 0.5252987742424011, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.0334, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 0.5325053334236145, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.0339, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.5485632419586182, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.0354, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 0.5406777262687683, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.0368, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 0.37698280811309814, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.0336, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 0.5253736972808838, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.0337, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 0.482319176197052, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.0299, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 0.43845584988594055, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.032, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.4526597559452057, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.034, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 0.45589521527290344, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.0464, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.5381149649620056, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.0334, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.5279123187065125, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.0318, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.3532435894012451, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.0285, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.3765302896499634, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.0289, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.46037837862968445, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.0291, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.483735591173172, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.0392, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 0.45865148305892944, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.0393, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 0.4620376229286194, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.0352, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 0.41832435131073, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.0319, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.4439375400543213, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.0307, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.5037664771080017, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.0306, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.378164678812027, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.0313, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.5053073763847351, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.0295, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.4680381119251251, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.0277, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.4624013304710388, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.0288, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.5074241757392883, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.0292, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.4835506081581116, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.0294, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.5738292336463928, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.0295, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.5334445238113403, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.0293, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.4011390507221222, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.0291, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.41171419620513916, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.0302, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.46391263604164124, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.0405, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.5071845650672913, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.0285, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.4814237058162689, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.0317, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.5396919846534729, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.0316, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 0.5363779664039612, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.027, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 0.505138099193573, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.0354, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 0.5476271510124207, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.0299, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.5189036130905151, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.0331, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.45717042684555054, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.026, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.4337165355682373, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.0272, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.4811023771762848, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.0288, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.3455168902873993, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.0298, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.5057815313339233, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.0332, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.38619765639305115, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.0256, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.3297816514968872, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.0355, |
| "step": 3000 |
| }, |
| { |
| "grad_norm": 0.5174765586853027, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.0309, |
| "step": 3010 |
| }, |
| { |
| "grad_norm": 0.43245866894721985, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.033, |
| "step": 3020 |
| }, |
| { |
| "grad_norm": 0.516598105430603, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.03, |
| "step": 3030 |
| }, |
| { |
| "grad_norm": 0.48712822794914246, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.0322, |
| "step": 3040 |
| }, |
| { |
| "grad_norm": 0.3674415946006775, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.0319, |
| "step": 3050 |
| }, |
| { |
| "grad_norm": 0.4218079149723053, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.0255, |
| "step": 3060 |
| }, |
| { |
| "grad_norm": 0.4967867136001587, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.0311, |
| "step": 3070 |
| }, |
| { |
| "grad_norm": 0.497653067111969, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.0341, |
| "step": 3080 |
| }, |
| { |
| "grad_norm": 0.4222137928009033, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.0273, |
| "step": 3090 |
| }, |
| { |
| "grad_norm": 0.37705838680267334, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.036, |
| "step": 3100 |
| }, |
| { |
| "grad_norm": 0.5626199841499329, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.0331, |
| "step": 3110 |
| }, |
| { |
| "grad_norm": 0.46293774247169495, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.0277, |
| "step": 3120 |
| }, |
| { |
| "grad_norm": 0.42764750123023987, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.0327, |
| "step": 3130 |
| }, |
| { |
| "grad_norm": 0.4717363715171814, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.0339, |
| "step": 3140 |
| }, |
| { |
| "grad_norm": 0.458967387676239, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.0261, |
| "step": 3150 |
| }, |
| { |
| "grad_norm": 0.45871081948280334, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.0309, |
| "step": 3160 |
| }, |
| { |
| "grad_norm": 0.5132860541343689, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.0264, |
| "step": 3170 |
| }, |
| { |
| "grad_norm": 0.4729975461959839, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.0315, |
| "step": 3180 |
| }, |
| { |
| "grad_norm": 0.4921012818813324, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.035, |
| "step": 3190 |
| }, |
| { |
| "grad_norm": 0.4574073255062103, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.0284, |
| "step": 3200 |
| }, |
| { |
| "grad_norm": 0.48541590571403503, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.028, |
| "step": 3210 |
| }, |
| { |
| "grad_norm": 0.4924331307411194, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.028, |
| "step": 3220 |
| }, |
| { |
| "grad_norm": 0.5730510950088501, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.0288, |
| "step": 3230 |
| }, |
| { |
| "grad_norm": 0.5551027059555054, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.0269, |
| "step": 3240 |
| }, |
| { |
| "grad_norm": 0.4366356134414673, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.0301, |
| "step": 3250 |
| }, |
| { |
| "grad_norm": 0.5327138304710388, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.0278, |
| "step": 3260 |
| }, |
| { |
| "grad_norm": 0.4516207277774811, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.0266, |
| "step": 3270 |
| }, |
| { |
| "grad_norm": 0.4709407687187195, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.0325, |
| "step": 3280 |
| }, |
| { |
| "grad_norm": 0.36673372983932495, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.0285, |
| "step": 3290 |
| }, |
| { |
| "grad_norm": 0.5308244824409485, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.0281, |
| "step": 3300 |
| }, |
| { |
| "grad_norm": 0.45568153262138367, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.0286, |
| "step": 3310 |
| }, |
| { |
| "grad_norm": 0.4082559049129486, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.0241, |
| "step": 3320 |
| }, |
| { |
| "grad_norm": 0.48012563586235046, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.0297, |
| "step": 3330 |
| }, |
| { |
| "grad_norm": 0.4177444875240326, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.032, |
| "step": 3340 |
| }, |
| { |
| "grad_norm": 0.48793429136276245, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.0285, |
| "step": 3350 |
| }, |
| { |
| "grad_norm": 0.4371464252471924, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.0275, |
| "step": 3360 |
| }, |
| { |
| "grad_norm": 0.4983312487602234, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.0254, |
| "step": 3370 |
| }, |
| { |
| "grad_norm": 0.5624396204948425, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.027, |
| "step": 3380 |
| }, |
| { |
| "grad_norm": 0.43570947647094727, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.0301, |
| "step": 3390 |
| }, |
| { |
| "grad_norm": 0.4448493719100952, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.0351, |
| "step": 3400 |
| }, |
| { |
| "grad_norm": 0.4213621914386749, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.0292, |
| "step": 3410 |
| }, |
| { |
| "grad_norm": 0.4154338836669922, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.0262, |
| "step": 3420 |
| }, |
| { |
| "grad_norm": 0.45102718472480774, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.0313, |
| "step": 3430 |
| }, |
| { |
| "grad_norm": 0.38145503401756287, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.027, |
| "step": 3440 |
| }, |
| { |
| "grad_norm": 0.41790488362312317, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.0291, |
| "step": 3450 |
| }, |
| { |
| "grad_norm": 0.4644753336906433, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.0249, |
| "step": 3460 |
| }, |
| { |
| "grad_norm": 0.4731713533401489, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.0263, |
| "step": 3470 |
| }, |
| { |
| "grad_norm": 0.42398542165756226, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.026, |
| "step": 3480 |
| }, |
| { |
| "grad_norm": 0.5419644117355347, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.0296, |
| "step": 3490 |
| }, |
| { |
| "grad_norm": 0.463670939207077, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.0238, |
| "step": 3500 |
| }, |
| { |
| "grad_norm": 0.39643239974975586, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.0292, |
| "step": 3510 |
| }, |
| { |
| "grad_norm": 0.3542700409889221, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.0231, |
| "step": 3520 |
| }, |
| { |
| "grad_norm": 0.43060752749443054, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.025, |
| "step": 3530 |
| }, |
| { |
| "grad_norm": 0.40233463048934937, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.0422, |
| "step": 3540 |
| }, |
| { |
| "grad_norm": 0.4497774839401245, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.029, |
| "step": 3550 |
| }, |
| { |
| "grad_norm": 0.5018470287322998, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.0283, |
| "step": 3560 |
| }, |
| { |
| "grad_norm": 0.47868454456329346, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.0349, |
| "step": 3570 |
| }, |
| { |
| "grad_norm": 0.4413691759109497, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.0238, |
| "step": 3580 |
| }, |
| { |
| "grad_norm": 0.40998080372810364, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.03, |
| "step": 3590 |
| }, |
| { |
| "grad_norm": 0.43824273347854614, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.0284, |
| "step": 3600 |
| }, |
| { |
| "grad_norm": 0.4570449888706207, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.0266, |
| "step": 3610 |
| }, |
| { |
| "grad_norm": 0.44766074419021606, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.0278, |
| "step": 3620 |
| }, |
| { |
| "grad_norm": 0.481611967086792, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.0277, |
| "step": 3630 |
| }, |
| { |
| "grad_norm": 0.4858357608318329, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.0265, |
| "step": 3640 |
| }, |
| { |
| "grad_norm": 0.40574368834495544, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.0297, |
| "step": 3650 |
| }, |
| { |
| "grad_norm": 0.4581122100353241, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.0224, |
| "step": 3660 |
| }, |
| { |
| "grad_norm": 0.4100882411003113, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.0239, |
| "step": 3670 |
| }, |
| { |
| "grad_norm": 0.40333643555641174, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.0238, |
| "step": 3680 |
| }, |
| { |
| "grad_norm": 0.5596145987510681, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.0245, |
| "step": 3690 |
| }, |
| { |
| "grad_norm": 0.5230712890625, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.0262, |
| "step": 3700 |
| }, |
| { |
| "grad_norm": 0.39155617356300354, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.0264, |
| "step": 3710 |
| }, |
| { |
| "grad_norm": 0.4129464328289032, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.0208, |
| "step": 3720 |
| }, |
| { |
| "grad_norm": 0.5159543752670288, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.0239, |
| "step": 3730 |
| }, |
| { |
| "grad_norm": 0.5178094506263733, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.0253, |
| "step": 3740 |
| }, |
| { |
| "grad_norm": 0.43580612540245056, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.0268, |
| "step": 3750 |
| }, |
| { |
| "grad_norm": 0.4596274495124817, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.0256, |
| "step": 3760 |
| }, |
| { |
| "grad_norm": 0.41573286056518555, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.0243, |
| "step": 3770 |
| }, |
| { |
| "grad_norm": 0.47876912355422974, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.0215, |
| "step": 3780 |
| }, |
| { |
| "grad_norm": 0.4741547703742981, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.027, |
| "step": 3790 |
| }, |
| { |
| "grad_norm": 0.4306631088256836, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.0277, |
| "step": 3800 |
| }, |
| { |
| "grad_norm": 0.46127429604530334, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.0273, |
| "step": 3810 |
| }, |
| { |
| "grad_norm": 0.5021414160728455, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.0263, |
| "step": 3820 |
| }, |
| { |
| "grad_norm": 0.5333779454231262, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.0234, |
| "step": 3830 |
| }, |
| { |
| "grad_norm": 0.4366990625858307, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.0225, |
| "step": 3840 |
| }, |
| { |
| "grad_norm": 0.4819251298904419, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.0238, |
| "step": 3850 |
| }, |
| { |
| "grad_norm": 0.3999616503715515, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.0234, |
| "step": 3860 |
| }, |
| { |
| "grad_norm": 0.37807697057724, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.0258, |
| "step": 3870 |
| }, |
| { |
| "grad_norm": 0.5266739130020142, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.0329, |
| "step": 3880 |
| }, |
| { |
| "grad_norm": 0.3961910903453827, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.0226, |
| "step": 3890 |
| }, |
| { |
| "grad_norm": 0.3786242604255676, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.024, |
| "step": 3900 |
| }, |
| { |
| "grad_norm": 0.4175941050052643, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.0219, |
| "step": 3910 |
| }, |
| { |
| "grad_norm": 0.44096827507019043, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.0253, |
| "step": 3920 |
| }, |
| { |
| "grad_norm": 0.41201087832450867, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.0223, |
| "step": 3930 |
| }, |
| { |
| "grad_norm": 0.5009353160858154, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.0266, |
| "step": 3940 |
| }, |
| { |
| "grad_norm": 0.5505723357200623, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.0258, |
| "step": 3950 |
| }, |
| { |
| "grad_norm": 0.45981982350349426, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.0279, |
| "step": 3960 |
| }, |
| { |
| "grad_norm": 0.4804719388484955, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.022, |
| "step": 3970 |
| }, |
| { |
| "grad_norm": 0.4238436222076416, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.0212, |
| "step": 3980 |
| }, |
| { |
| "grad_norm": 0.403974324464798, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.0189, |
| "step": 3990 |
| }, |
| { |
| "grad_norm": 0.48837044835090637, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.0244, |
| "step": 4000 |
| }, |
| { |
| "grad_norm": 0.48128196597099304, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.0217, |
| "step": 4010 |
| }, |
| { |
| "grad_norm": 0.3272818624973297, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.0242, |
| "step": 4020 |
| }, |
| { |
| "grad_norm": 0.36953118443489075, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.0246, |
| "step": 4030 |
| }, |
| { |
| "grad_norm": 0.41161492466926575, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.021, |
| "step": 4040 |
| }, |
| { |
| "grad_norm": 0.4544064998626709, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.0193, |
| "step": 4050 |
| }, |
| { |
| "grad_norm": 0.47396498918533325, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.0213, |
| "step": 4060 |
| }, |
| { |
| "grad_norm": 0.4621795117855072, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.0266, |
| "step": 4070 |
| }, |
| { |
| "grad_norm": 0.5184421539306641, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.0196, |
| "step": 4080 |
| }, |
| { |
| "grad_norm": 0.4004800319671631, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.0263, |
| "step": 4090 |
| }, |
| { |
| "grad_norm": 0.3737598657608032, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.0196, |
| "step": 4100 |
| }, |
| { |
| "grad_norm": 0.4000731110572815, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.0203, |
| "step": 4110 |
| }, |
| { |
| "grad_norm": 0.3826330006122589, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.0219, |
| "step": 4120 |
| }, |
| { |
| "grad_norm": 0.5988262891769409, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.0204, |
| "step": 4130 |
| }, |
| { |
| "grad_norm": 0.4280189275741577, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.0224, |
| "step": 4140 |
| }, |
| { |
| "grad_norm": 0.3716961145401001, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.0262, |
| "step": 4150 |
| }, |
| { |
| "grad_norm": 0.4295980930328369, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.0207, |
| "step": 4160 |
| }, |
| { |
| "grad_norm": 0.425942063331604, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.0238, |
| "step": 4170 |
| }, |
| { |
| "grad_norm": 0.416522741317749, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.0226, |
| "step": 4180 |
| }, |
| { |
| "grad_norm": 0.5610533952713013, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.0208, |
| "step": 4190 |
| }, |
| { |
| "grad_norm": 0.379802942276001, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.022, |
| "step": 4200 |
| }, |
| { |
| "grad_norm": 0.4576219618320465, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.0166, |
| "step": 4210 |
| }, |
| { |
| "grad_norm": 0.45310190320014954, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.0242, |
| "step": 4220 |
| }, |
| { |
| "grad_norm": 0.4080248177051544, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.0224, |
| "step": 4230 |
| }, |
| { |
| "grad_norm": 0.33399489521980286, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.0187, |
| "step": 4240 |
| }, |
| { |
| "grad_norm": 0.356057733297348, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.0173, |
| "step": 4250 |
| }, |
| { |
| "grad_norm": 0.42619287967681885, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.0264, |
| "step": 4260 |
| }, |
| { |
| "grad_norm": 0.3479536175727844, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.0213, |
| "step": 4270 |
| }, |
| { |
| "grad_norm": 0.3362795114517212, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.0206, |
| "step": 4280 |
| }, |
| { |
| "grad_norm": 0.43236204981803894, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.0191, |
| "step": 4290 |
| }, |
| { |
| "grad_norm": 0.32585880160331726, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.0194, |
| "step": 4300 |
| }, |
| { |
| "grad_norm": 0.4723697900772095, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.016, |
| "step": 4310 |
| }, |
| { |
| "grad_norm": 0.42713454365730286, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.0154, |
| "step": 4320 |
| }, |
| { |
| "grad_norm": 0.33830246329307556, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.0146, |
| "step": 4330 |
| }, |
| { |
| "grad_norm": 0.4066753387451172, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.0259, |
| "step": 4340 |
| }, |
| { |
| "grad_norm": 0.448772668838501, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.0187, |
| "step": 4350 |
| }, |
| { |
| "grad_norm": 0.4219300448894501, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.0208, |
| "step": 4360 |
| }, |
| { |
| "grad_norm": 0.39920371770858765, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.0175, |
| "step": 4370 |
| }, |
| { |
| "grad_norm": 0.42131638526916504, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.0182, |
| "step": 4380 |
| }, |
| { |
| "grad_norm": 0.45648935437202454, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.0158, |
| "step": 4390 |
| }, |
| { |
| "grad_norm": 0.42188429832458496, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.0183, |
| "step": 4400 |
| }, |
| { |
| "grad_norm": 0.36581969261169434, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.0147, |
| "step": 4410 |
| }, |
| { |
| "grad_norm": 0.42502549290657043, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.0198, |
| "step": 4420 |
| }, |
| { |
| "grad_norm": 0.35229989886283875, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.0153, |
| "step": 4430 |
| }, |
| { |
| "grad_norm": 0.4085313379764557, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.0192, |
| "step": 4440 |
| }, |
| { |
| "grad_norm": 0.4650028645992279, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.0173, |
| "step": 4450 |
| }, |
| { |
| "grad_norm": 0.4048616886138916, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.0204, |
| "step": 4460 |
| }, |
| { |
| "grad_norm": 0.4178619980812073, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.0204, |
| "step": 4470 |
| }, |
| { |
| "grad_norm": 0.46256691217422485, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.0177, |
| "step": 4480 |
| }, |
| { |
| "grad_norm": 0.35352519154548645, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.028, |
| "step": 4490 |
| }, |
| { |
| "grad_norm": 0.4470050632953644, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.0194, |
| "step": 4500 |
| }, |
| { |
| "grad_norm": 0.3508913815021515, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.0241, |
| "step": 4510 |
| }, |
| { |
| "grad_norm": 0.411702424287796, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.0204, |
| "step": 4520 |
| }, |
| { |
| "grad_norm": 0.4468960762023926, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.0194, |
| "step": 4530 |
| }, |
| { |
| "grad_norm": 0.4807928204536438, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.0256, |
| "step": 4540 |
| }, |
| { |
| "grad_norm": 0.39205247163772583, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.0185, |
| "step": 4550 |
| }, |
| { |
| "grad_norm": 0.44329723715782166, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.0178, |
| "step": 4560 |
| }, |
| { |
| "grad_norm": 0.47283023595809937, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.0174, |
| "step": 4570 |
| }, |
| { |
| "grad_norm": 0.38849857449531555, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.0143, |
| "step": 4580 |
| }, |
| { |
| "grad_norm": 0.3656264543533325, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.0212, |
| "step": 4590 |
| }, |
| { |
| "grad_norm": 0.4674840271472931, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.0153, |
| "step": 4600 |
| }, |
| { |
| "grad_norm": 0.4313472509384155, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.0214, |
| "step": 4610 |
| }, |
| { |
| "grad_norm": 0.3471619486808777, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.0165, |
| "step": 4620 |
| }, |
| { |
| "grad_norm": 0.4542539715766907, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.0189, |
| "step": 4630 |
| }, |
| { |
| "grad_norm": 0.47608688473701477, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.017, |
| "step": 4640 |
| }, |
| { |
| "grad_norm": 0.3959465026855469, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.0164, |
| "step": 4650 |
| }, |
| { |
| "grad_norm": 0.408431738615036, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.0255, |
| "step": 4660 |
| }, |
| { |
| "grad_norm": 0.4116475582122803, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.0174, |
| "step": 4670 |
| }, |
| { |
| "grad_norm": 0.3917919993400574, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.0199, |
| "step": 4680 |
| }, |
| { |
| "grad_norm": 0.3482106029987335, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.0179, |
| "step": 4690 |
| }, |
| { |
| "grad_norm": 0.34646838903427124, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.0174, |
| "step": 4700 |
| }, |
| { |
| "grad_norm": 0.39419376850128174, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.0191, |
| "step": 4710 |
| }, |
| { |
| "grad_norm": 0.4543268084526062, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.0213, |
| "step": 4720 |
| }, |
| { |
| "grad_norm": 0.4212946891784668, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.0181, |
| "step": 4730 |
| }, |
| { |
| "grad_norm": 0.3014923334121704, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.019, |
| "step": 4740 |
| }, |
| { |
| "grad_norm": 0.36527299880981445, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.0179, |
| "step": 4750 |
| }, |
| { |
| "grad_norm": 0.3752840757369995, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.0183, |
| "step": 4760 |
| }, |
| { |
| "grad_norm": 0.42201003432273865, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.0162, |
| "step": 4770 |
| }, |
| { |
| "grad_norm": 0.3307137191295624, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.0206, |
| "step": 4780 |
| }, |
| { |
| "grad_norm": 0.33322593569755554, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.016, |
| "step": 4790 |
| }, |
| { |
| "grad_norm": 0.35324618220329285, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.0194, |
| "step": 4800 |
| }, |
| { |
| "grad_norm": 0.427572637796402, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.015, |
| "step": 4810 |
| }, |
| { |
| "grad_norm": 0.3561609983444214, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.0145, |
| "step": 4820 |
| }, |
| { |
| "grad_norm": 0.37716561555862427, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.016, |
| "step": 4830 |
| }, |
| { |
| "grad_norm": 0.39859738945961, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.0178, |
| "step": 4840 |
| }, |
| { |
| "grad_norm": 0.4500395655632019, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.0202, |
| "step": 4850 |
| }, |
| { |
| "grad_norm": 0.34830138087272644, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0169, |
| "step": 4860 |
| }, |
| { |
| "grad_norm": 0.3729107677936554, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.0212, |
| "step": 4870 |
| }, |
| { |
| "grad_norm": 0.3912448585033417, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.0146, |
| "step": 4880 |
| }, |
| { |
| "grad_norm": 0.4137353003025055, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.0171, |
| "step": 4890 |
| }, |
| { |
| "grad_norm": 0.4444160759449005, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.0162, |
| "step": 4900 |
| }, |
| { |
| "grad_norm": 0.3148241639137268, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.0191, |
| "step": 4910 |
| }, |
| { |
| "grad_norm": 0.4391416907310486, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.0187, |
| "step": 4920 |
| }, |
| { |
| "grad_norm": 0.4296688139438629, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.0143, |
| "step": 4930 |
| }, |
| { |
| "grad_norm": 0.29728299379348755, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.0148, |
| "step": 4940 |
| }, |
| { |
| "grad_norm": 0.4356195032596588, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0287, |
| "step": 4950 |
| }, |
| { |
| "grad_norm": 0.4179481565952301, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.0157, |
| "step": 4960 |
| }, |
| { |
| "grad_norm": 0.3610477149486542, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.0159, |
| "step": 4970 |
| }, |
| { |
| "grad_norm": 0.34345686435699463, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.016, |
| "step": 4980 |
| }, |
| { |
| "grad_norm": 0.3698787987232208, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.0214, |
| "step": 4990 |
| }, |
| { |
| "grad_norm": 0.327648788690567, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.0173, |
| "step": 5000 |
| }, |
| { |
| "grad_norm": 0.2809329330921173, |
| "learning_rate": 8.941128059952201e-05, |
| "loss": 0.0132, |
| "step": 5010 |
| }, |
| { |
| "grad_norm": 0.31239569187164307, |
| "learning_rate": 8.936035060877102e-05, |
| "loss": 0.0244, |
| "step": 5020 |
| }, |
| { |
| "grad_norm": 0.40824398398399353, |
| "learning_rate": 8.930931300832443e-05, |
| "loss": 0.0181, |
| "step": 5030 |
| }, |
| { |
| "grad_norm": 0.36586353182792664, |
| "learning_rate": 8.925816793771711e-05, |
| "loss": 0.0148, |
| "step": 5040 |
| }, |
| { |
| "grad_norm": 0.2970711886882782, |
| "learning_rate": 8.92069155367777e-05, |
| "loss": 0.0139, |
| "step": 5050 |
| }, |
| { |
| "grad_norm": 0.3478129506111145, |
| "learning_rate": 8.915555594562834e-05, |
| "loss": 0.0172, |
| "step": 5060 |
| }, |
| { |
| "grad_norm": 0.3716062903404236, |
| "learning_rate": 8.910408930468416e-05, |
| "loss": 0.016, |
| "step": 5070 |
| }, |
| { |
| "grad_norm": 0.35524141788482666, |
| "learning_rate": 8.905251575465303e-05, |
| "loss": 0.0128, |
| "step": 5080 |
| }, |
| { |
| "grad_norm": 0.46534451842308044, |
| "learning_rate": 8.900083543653502e-05, |
| "loss": 0.0192, |
| "step": 5090 |
| }, |
| { |
| "grad_norm": 0.4329080283641815, |
| "learning_rate": 8.894904849162218e-05, |
| "loss": 0.0176, |
| "step": 5100 |
| }, |
| { |
| "grad_norm": 0.33941879868507385, |
| "learning_rate": 8.889715506149802e-05, |
| "loss": 0.0161, |
| "step": 5110 |
| }, |
| { |
| "grad_norm": 0.37832191586494446, |
| "learning_rate": 8.884515528803722e-05, |
| "loss": 0.0157, |
| "step": 5120 |
| }, |
| { |
| "grad_norm": 0.36182844638824463, |
| "learning_rate": 8.879304931340517e-05, |
| "loss": 0.0148, |
| "step": 5130 |
| }, |
| { |
| "grad_norm": 0.3270893096923828, |
| "learning_rate": 8.874083728005759e-05, |
| "loss": 0.014, |
| "step": 5140 |
| }, |
| { |
| "grad_norm": 0.3815666437149048, |
| "learning_rate": 8.868851933074021e-05, |
| "loss": 0.0207, |
| "step": 5150 |
| }, |
| { |
| "grad_norm": 0.4211690425872803, |
| "learning_rate": 8.863609560848829e-05, |
| "loss": 0.0147, |
| "step": 5160 |
| }, |
| { |
| "grad_norm": 0.38520973920822144, |
| "learning_rate": 8.85835662566263e-05, |
| "loss": 0.0167, |
| "step": 5170 |
| }, |
| { |
| "grad_norm": 0.4238542914390564, |
| "learning_rate": 8.853093141876747e-05, |
| "loss": 0.0123, |
| "step": 5180 |
| }, |
| { |
| "grad_norm": 0.4630364179611206, |
| "learning_rate": 8.847819123881343e-05, |
| "loss": 0.0157, |
| "step": 5190 |
| }, |
| { |
| "grad_norm": 0.3982260227203369, |
| "learning_rate": 8.842534586095383e-05, |
| "loss": 0.0156, |
| "step": 5200 |
| }, |
| { |
| "grad_norm": 0.418029248714447, |
| "learning_rate": 8.837239542966593e-05, |
| "loss": 0.0147, |
| "step": 5210 |
| }, |
| { |
| "grad_norm": 0.42096608877182007, |
| "learning_rate": 8.831934008971417e-05, |
| "loss": 0.0155, |
| "step": 5220 |
| }, |
| { |
| "grad_norm": 0.40651246905326843, |
| "learning_rate": 8.826617998614982e-05, |
| "loss": 0.0134, |
| "step": 5230 |
| }, |
| { |
| "grad_norm": 0.35442405939102173, |
| "learning_rate": 8.821291526431056e-05, |
| "loss": 0.0149, |
| "step": 5240 |
| }, |
| { |
| "grad_norm": 0.4477267563343048, |
| "learning_rate": 8.815954606982015e-05, |
| "loss": 0.017, |
| "step": 5250 |
| }, |
| { |
| "grad_norm": 0.42138704657554626, |
| "learning_rate": 8.810607254858789e-05, |
| "loss": 0.0161, |
| "step": 5260 |
| }, |
| { |
| "grad_norm": 0.3887590169906616, |
| "learning_rate": 8.805249484680838e-05, |
| "loss": 0.0177, |
| "step": 5270 |
| }, |
| { |
| "grad_norm": 0.3340283930301666, |
| "learning_rate": 8.799881311096096e-05, |
| "loss": 0.0132, |
| "step": 5280 |
| }, |
| { |
| "grad_norm": 0.37869396805763245, |
| "learning_rate": 8.794502748780949e-05, |
| "loss": 0.0158, |
| "step": 5290 |
| }, |
| { |
| "grad_norm": 0.38290390372276306, |
| "learning_rate": 8.78911381244018e-05, |
| "loss": 0.0142, |
| "step": 5300 |
| }, |
| { |
| "grad_norm": 0.3591060936450958, |
| "learning_rate": 8.783714516806933e-05, |
| "loss": 0.0151, |
| "step": 5310 |
| }, |
| { |
| "grad_norm": 0.4037642478942871, |
| "learning_rate": 8.77830487664268e-05, |
| "loss": 0.0128, |
| "step": 5320 |
| }, |
| { |
| "grad_norm": 0.25061705708503723, |
| "learning_rate": 8.772884906737167e-05, |
| "loss": 0.0162, |
| "step": 5330 |
| }, |
| { |
| "grad_norm": 0.3075624108314514, |
| "learning_rate": 8.767454621908387e-05, |
| "loss": 0.0136, |
| "step": 5340 |
| }, |
| { |
| "grad_norm": 0.38026294112205505, |
| "learning_rate": 8.76201403700253e-05, |
| "loss": 0.0136, |
| "step": 5350 |
| }, |
| { |
| "grad_norm": 0.48398804664611816, |
| "learning_rate": 8.756563166893949e-05, |
| "loss": 0.0132, |
| "step": 5360 |
| }, |
| { |
| "grad_norm": 0.3468417823314667, |
| "learning_rate": 8.751102026485113e-05, |
| "loss": 0.0151, |
| "step": 5370 |
| }, |
| { |
| "grad_norm": 0.3630031943321228, |
| "learning_rate": 8.745630630706571e-05, |
| "loss": 0.0158, |
| "step": 5380 |
| }, |
| { |
| "grad_norm": 0.5174519419670105, |
| "learning_rate": 8.740148994516912e-05, |
| "loss": 0.0165, |
| "step": 5390 |
| }, |
| { |
| "grad_norm": 0.3353045582771301, |
| "learning_rate": 8.73465713290272e-05, |
| "loss": 0.0133, |
| "step": 5400 |
| }, |
| { |
| "grad_norm": 0.3345337212085724, |
| "learning_rate": 8.729155060878533e-05, |
| "loss": 0.0186, |
| "step": 5410 |
| }, |
| { |
| "grad_norm": 0.3502795100212097, |
| "learning_rate": 8.723642793486809e-05, |
| "loss": 0.017, |
| "step": 5420 |
| }, |
| { |
| "grad_norm": 0.33117562532424927, |
| "learning_rate": 8.718120345797873e-05, |
| "loss": 0.0156, |
| "step": 5430 |
| }, |
| { |
| "grad_norm": 0.3348385989665985, |
| "learning_rate": 8.712587732909889e-05, |
| "loss": 0.0131, |
| "step": 5440 |
| }, |
| { |
| "grad_norm": 0.31218665838241577, |
| "learning_rate": 8.707044969948806e-05, |
| "loss": 0.0139, |
| "step": 5450 |
| }, |
| { |
| "grad_norm": 0.36143720149993896, |
| "learning_rate": 8.701492072068329e-05, |
| "loss": 0.0162, |
| "step": 5460 |
| }, |
| { |
| "grad_norm": 0.397625207901001, |
| "learning_rate": 8.695929054449869e-05, |
| "loss": 0.0162, |
| "step": 5470 |
| }, |
| { |
| "grad_norm": 0.35156044363975525, |
| "learning_rate": 8.690355932302501e-05, |
| "loss": 0.0149, |
| "step": 5480 |
| }, |
| { |
| "grad_norm": 0.3862064778804779, |
| "learning_rate": 8.684772720862931e-05, |
| "loss": 0.0134, |
| "step": 5490 |
| }, |
| { |
| "grad_norm": 0.33415740728378296, |
| "learning_rate": 8.679179435395446e-05, |
| "loss": 0.0156, |
| "step": 5500 |
| }, |
| { |
| "grad_norm": 0.33985161781311035, |
| "learning_rate": 8.673576091191874e-05, |
| "loss": 0.0152, |
| "step": 5510 |
| }, |
| { |
| "grad_norm": 0.43412765860557556, |
| "learning_rate": 8.667962703571541e-05, |
| "loss": 0.0153, |
| "step": 5520 |
| }, |
| { |
| "grad_norm": 0.29241663217544556, |
| "learning_rate": 8.662339287881238e-05, |
| "loss": 0.0145, |
| "step": 5530 |
| }, |
| { |
| "grad_norm": 0.26985955238342285, |
| "learning_rate": 8.656705859495169e-05, |
| "loss": 0.0095, |
| "step": 5540 |
| }, |
| { |
| "grad_norm": 0.3288934528827667, |
| "learning_rate": 8.651062433814912e-05, |
| "loss": 0.0148, |
| "step": 5550 |
| }, |
| { |
| "grad_norm": 0.32042691111564636, |
| "learning_rate": 8.645409026269375e-05, |
| "loss": 0.0178, |
| "step": 5560 |
| }, |
| { |
| "grad_norm": 0.29201775789260864, |
| "learning_rate": 8.639745652314759e-05, |
| "loss": 0.0136, |
| "step": 5570 |
| }, |
| { |
| "grad_norm": 0.33705347776412964, |
| "learning_rate": 8.634072327434515e-05, |
| "loss": 0.0199, |
| "step": 5580 |
| }, |
| { |
| "grad_norm": 0.43964189291000366, |
| "learning_rate": 8.628389067139294e-05, |
| "loss": 0.0153, |
| "step": 5590 |
| }, |
| { |
| "grad_norm": 0.3852575421333313, |
| "learning_rate": 8.622695886966911e-05, |
| "loss": 0.0124, |
| "step": 5600 |
| }, |
| { |
| "grad_norm": 0.3601333200931549, |
| "learning_rate": 8.616992802482308e-05, |
| "loss": 0.0115, |
| "step": 5610 |
| }, |
| { |
| "grad_norm": 0.3712993562221527, |
| "learning_rate": 8.611279829277496e-05, |
| "loss": 0.0129, |
| "step": 5620 |
| }, |
| { |
| "grad_norm": 0.3430801033973694, |
| "learning_rate": 8.605556982971528e-05, |
| "loss": 0.0119, |
| "step": 5630 |
| }, |
| { |
| "grad_norm": 0.2783951163291931, |
| "learning_rate": 8.599824279210447e-05, |
| "loss": 0.0113, |
| "step": 5640 |
| }, |
| { |
| "grad_norm": 0.3604603111743927, |
| "learning_rate": 8.594081733667243e-05, |
| "loss": 0.016, |
| "step": 5650 |
| }, |
| { |
| "grad_norm": 0.4052552282810211, |
| "learning_rate": 8.58832936204182e-05, |
| "loss": 0.0141, |
| "step": 5660 |
| }, |
| { |
| "grad_norm": 0.2946913242340088, |
| "learning_rate": 8.582567180060942e-05, |
| "loss": 0.0188, |
| "step": 5670 |
| }, |
| { |
| "grad_norm": 0.29554295539855957, |
| "learning_rate": 8.576795203478194e-05, |
| "loss": 0.0147, |
| "step": 5680 |
| }, |
| { |
| "grad_norm": 0.30913200974464417, |
| "learning_rate": 8.571013448073939e-05, |
| "loss": 0.0155, |
| "step": 5690 |
| }, |
| { |
| "grad_norm": 0.3153333067893982, |
| "learning_rate": 8.565221929655275e-05, |
| "loss": 0.0116, |
| "step": 5700 |
| }, |
| { |
| "grad_norm": 0.26914530992507935, |
| "learning_rate": 8.559420664055992e-05, |
| "loss": 0.0125, |
| "step": 5710 |
| }, |
| { |
| "grad_norm": 0.3266845941543579, |
| "learning_rate": 8.553609667136532e-05, |
| "loss": 0.0127, |
| "step": 5720 |
| }, |
| { |
| "grad_norm": 0.36770594120025635, |
| "learning_rate": 8.547788954783936e-05, |
| "loss": 0.0132, |
| "step": 5730 |
| }, |
| { |
| "grad_norm": 0.3868075907230377, |
| "learning_rate": 8.541958542911808e-05, |
| "loss": 0.0137, |
| "step": 5740 |
| }, |
| { |
| "grad_norm": 0.3873762786388397, |
| "learning_rate": 8.536118447460275e-05, |
| "loss": 0.016, |
| "step": 5750 |
| }, |
| { |
| "grad_norm": 0.34997740387916565, |
| "learning_rate": 8.530268684395932e-05, |
| "loss": 0.012, |
| "step": 5760 |
| }, |
| { |
| "grad_norm": 0.36314913630485535, |
| "learning_rate": 8.524409269711807e-05, |
| "loss": 0.014, |
| "step": 5770 |
| }, |
| { |
| "grad_norm": 0.2800992727279663, |
| "learning_rate": 8.51854021942732e-05, |
| "loss": 0.0111, |
| "step": 5780 |
| }, |
| { |
| "grad_norm": 0.3715326488018036, |
| "learning_rate": 8.512661549588227e-05, |
| "loss": 0.0128, |
| "step": 5790 |
| }, |
| { |
| "grad_norm": 0.3508760631084442, |
| "learning_rate": 8.506773276266588e-05, |
| "loss": 0.0123, |
| "step": 5800 |
| }, |
| { |
| "grad_norm": 0.31156125664711, |
| "learning_rate": 8.500875415560721e-05, |
| "loss": 0.0104, |
| "step": 5810 |
| }, |
| { |
| "grad_norm": 0.28672730922698975, |
| "learning_rate": 8.494967983595144e-05, |
| "loss": 0.0138, |
| "step": 5820 |
| }, |
| { |
| "grad_norm": 0.2949328124523163, |
| "learning_rate": 8.489050996520558e-05, |
| "loss": 0.0111, |
| "step": 5830 |
| }, |
| { |
| "grad_norm": 0.3339660167694092, |
| "learning_rate": 8.483124470513775e-05, |
| "loss": 0.0125, |
| "step": 5840 |
| }, |
| { |
| "grad_norm": 0.37675192952156067, |
| "learning_rate": 8.477188421777692e-05, |
| "loss": 0.013, |
| "step": 5850 |
| }, |
| { |
| "grad_norm": 0.4156615436077118, |
| "learning_rate": 8.47124286654124e-05, |
| "loss": 0.0188, |
| "step": 5860 |
| }, |
| { |
| "grad_norm": 0.3377411365509033, |
| "learning_rate": 8.465287821059341e-05, |
| "loss": 0.0197, |
| "step": 5870 |
| }, |
| { |
| "grad_norm": 0.31554165482521057, |
| "learning_rate": 8.45932330161286e-05, |
| "loss": 0.0125, |
| "step": 5880 |
| }, |
| { |
| "grad_norm": 0.3891998529434204, |
| "learning_rate": 8.453349324508567e-05, |
| "loss": 0.0169, |
| "step": 5890 |
| }, |
| { |
| "grad_norm": 0.2835284173488617, |
| "learning_rate": 8.447365906079088e-05, |
| "loss": 0.0172, |
| "step": 5900 |
| }, |
| { |
| "grad_norm": 0.3825901746749878, |
| "learning_rate": 8.441373062682856e-05, |
| "loss": 0.0146, |
| "step": 5910 |
| }, |
| { |
| "grad_norm": 0.3294428884983063, |
| "learning_rate": 8.43537081070408e-05, |
| "loss": 0.0218, |
| "step": 5920 |
| }, |
| { |
| "grad_norm": 0.3541003167629242, |
| "learning_rate": 8.429359166552689e-05, |
| "loss": 0.0132, |
| "step": 5930 |
| }, |
| { |
| "grad_norm": 0.3192877173423767, |
| "learning_rate": 8.423338146664284e-05, |
| "loss": 0.014, |
| "step": 5940 |
| }, |
| { |
| "grad_norm": 0.3500727713108063, |
| "learning_rate": 8.417307767500107e-05, |
| "loss": 0.0115, |
| "step": 5950 |
| }, |
| { |
| "grad_norm": 0.3229285478591919, |
| "learning_rate": 8.411268045546983e-05, |
| "loss": 0.0121, |
| "step": 5960 |
| }, |
| { |
| "grad_norm": 0.4392866790294647, |
| "learning_rate": 8.405218997317281e-05, |
| "loss": 0.0125, |
| "step": 5970 |
| }, |
| { |
| "grad_norm": 0.3409421145915985, |
| "learning_rate": 8.399160639348869e-05, |
| "loss": 0.0115, |
| "step": 5980 |
| }, |
| { |
| "grad_norm": 0.3397701680660248, |
| "learning_rate": 8.393092988205065e-05, |
| "loss": 0.0144, |
| "step": 5990 |
| }, |
| { |
| "grad_norm": 0.2932409346103668, |
| "learning_rate": 8.387016060474597e-05, |
| "loss": 0.0155, |
| "step": 6000 |
| }, |
| { |
| "grad_norm": 0.30588042736053467, |
| "learning_rate": 8.380929872771551e-05, |
| "loss": 0.018, |
| "step": 6010 |
| }, |
| { |
| "grad_norm": 0.2569223642349243, |
| "learning_rate": 8.374834441735335e-05, |
| "loss": 0.0139, |
| "step": 6020 |
| }, |
| { |
| "grad_norm": 0.37832796573638916, |
| "learning_rate": 8.368729784030622e-05, |
| "loss": 0.012, |
| "step": 6030 |
| }, |
| { |
| "grad_norm": 0.3126446604728699, |
| "learning_rate": 8.362615916347315e-05, |
| "loss": 0.0138, |
| "step": 6040 |
| }, |
| { |
| "grad_norm": 0.2546840310096741, |
| "learning_rate": 8.356492855400493e-05, |
| "loss": 0.0122, |
| "step": 6050 |
| }, |
| { |
| "grad_norm": 0.3276226818561554, |
| "learning_rate": 8.350360617930371e-05, |
| "loss": 0.0118, |
| "step": 6060 |
| }, |
| { |
| "grad_norm": 0.3676457405090332, |
| "learning_rate": 8.344219220702255e-05, |
| "loss": 0.0117, |
| "step": 6070 |
| }, |
| { |
| "grad_norm": 0.35417577624320984, |
| "learning_rate": 8.338068680506485e-05, |
| "loss": 0.0104, |
| "step": 6080 |
| }, |
| { |
| "grad_norm": 0.26148155331611633, |
| "learning_rate": 8.33190901415841e-05, |
| "loss": 0.0142, |
| "step": 6090 |
| }, |
| { |
| "grad_norm": 0.29308485984802246, |
| "learning_rate": 8.325740238498317e-05, |
| "loss": 0.0111, |
| "step": 6100 |
| }, |
| { |
| "grad_norm": 0.41223078966140747, |
| "learning_rate": 8.319562370391406e-05, |
| "loss": 0.0135, |
| "step": 6110 |
| }, |
| { |
| "grad_norm": 0.38607892394065857, |
| "learning_rate": 8.31337542672773e-05, |
| "loss": 0.0131, |
| "step": 6120 |
| }, |
| { |
| "grad_norm": 0.3013926148414612, |
| "learning_rate": 8.307179424422158e-05, |
| "loss": 0.0115, |
| "step": 6130 |
| }, |
| { |
| "grad_norm": 0.34792882204055786, |
| "learning_rate": 8.300974380414327e-05, |
| "loss": 0.0165, |
| "step": 6140 |
| }, |
| { |
| "grad_norm": 0.3160726726055145, |
| "learning_rate": 8.294760311668586e-05, |
| "loss": 0.0133, |
| "step": 6150 |
| }, |
| { |
| "grad_norm": 0.3607368469238281, |
| "learning_rate": 8.288537235173961e-05, |
| "loss": 0.0152, |
| "step": 6160 |
| }, |
| { |
| "grad_norm": 0.2962538003921509, |
| "learning_rate": 8.282305167944108e-05, |
| "loss": 0.0146, |
| "step": 6170 |
| }, |
| { |
| "grad_norm": 0.3576897978782654, |
| "learning_rate": 8.276064127017262e-05, |
| "loss": 0.0132, |
| "step": 6180 |
| }, |
| { |
| "grad_norm": 0.3158738911151886, |
| "learning_rate": 8.269814129456189e-05, |
| "loss": 0.0165, |
| "step": 6190 |
| }, |
| { |
| "grad_norm": 0.3886716663837433, |
| "learning_rate": 8.263555192348143e-05, |
| "loss": 0.0164, |
| "step": 6200 |
| }, |
| { |
| "grad_norm": 0.33075031638145447, |
| "learning_rate": 8.257287332804819e-05, |
| "loss": 0.024, |
| "step": 6210 |
| }, |
| { |
| "grad_norm": 0.40429654717445374, |
| "learning_rate": 8.251010567962307e-05, |
| "loss": 0.0142, |
| "step": 6220 |
| }, |
| { |
| "grad_norm": 0.26305797696113586, |
| "learning_rate": 8.244724914981041e-05, |
| "loss": 0.0127, |
| "step": 6230 |
| }, |
| { |
| "grad_norm": 0.26651525497436523, |
| "learning_rate": 8.238430391045757e-05, |
| "loss": 0.0099, |
| "step": 6240 |
| }, |
| { |
| "grad_norm": 0.22140610218048096, |
| "learning_rate": 8.232127013365445e-05, |
| "loss": 0.0162, |
| "step": 6250 |
| }, |
| { |
| "grad_norm": 0.3048286736011505, |
| "learning_rate": 8.225814799173295e-05, |
| "loss": 0.0219, |
| "step": 6260 |
| }, |
| { |
| "grad_norm": 0.27734512090682983, |
| "learning_rate": 8.219493765726663e-05, |
| "loss": 0.012, |
| "step": 6270 |
| }, |
| { |
| "grad_norm": 0.3474031090736389, |
| "learning_rate": 8.21316393030701e-05, |
| "loss": 0.0122, |
| "step": 6280 |
| }, |
| { |
| "grad_norm": 0.3461661636829376, |
| "learning_rate": 8.206825310219865e-05, |
| "loss": 0.0142, |
| "step": 6290 |
| }, |
| { |
| "grad_norm": 0.3480895161628723, |
| "learning_rate": 8.200477922794776e-05, |
| "loss": 0.0155, |
| "step": 6300 |
| }, |
| { |
| "grad_norm": 0.29770898818969727, |
| "learning_rate": 8.194121785385256e-05, |
| "loss": 0.0122, |
| "step": 6310 |
| }, |
| { |
| "grad_norm": 0.27077922224998474, |
| "learning_rate": 8.187756915368741e-05, |
| "loss": 0.0114, |
| "step": 6320 |
| }, |
| { |
| "grad_norm": 0.37504443526268005, |
| "learning_rate": 8.181383330146544e-05, |
| "loss": 0.0123, |
| "step": 6330 |
| }, |
| { |
| "grad_norm": 0.27733808755874634, |
| "learning_rate": 8.175001047143804e-05, |
| "loss": 0.0152, |
| "step": 6340 |
| }, |
| { |
| "grad_norm": 0.2905326187610626, |
| "learning_rate": 8.168610083809438e-05, |
| "loss": 0.014, |
| "step": 6350 |
| }, |
| { |
| "grad_norm": 0.3680465519428253, |
| "learning_rate": 8.162210457616095e-05, |
| "loss": 0.0128, |
| "step": 6360 |
| }, |
| { |
| "grad_norm": 0.41437506675720215, |
| "learning_rate": 8.155802186060109e-05, |
| "loss": 0.0168, |
| "step": 6370 |
| }, |
| { |
| "grad_norm": 0.2748274505138397, |
| "learning_rate": 8.149385286661453e-05, |
| "loss": 0.014, |
| "step": 6380 |
| }, |
| { |
| "grad_norm": 0.27940356731414795, |
| "learning_rate": 8.14295977696368e-05, |
| "loss": 0.0106, |
| "step": 6390 |
| }, |
| { |
| "grad_norm": 0.3001856505870819, |
| "learning_rate": 8.13652567453389e-05, |
| "loss": 0.0161, |
| "step": 6400 |
| }, |
| { |
| "grad_norm": 0.3228931725025177, |
| "learning_rate": 8.130082996962676e-05, |
| "loss": 0.0108, |
| "step": 6410 |
| }, |
| { |
| "grad_norm": 0.37547504901885986, |
| "learning_rate": 8.123631761864068e-05, |
| "loss": 0.0108, |
| "step": 6420 |
| }, |
| { |
| "grad_norm": 0.3306344449520111, |
| "learning_rate": 8.1171719868755e-05, |
| "loss": 0.012, |
| "step": 6430 |
| }, |
| { |
| "grad_norm": 0.30303868651390076, |
| "learning_rate": 8.110703689657748e-05, |
| "loss": 0.0132, |
| "step": 6440 |
| }, |
| { |
| "grad_norm": 0.3219710886478424, |
| "learning_rate": 8.104226887894892e-05, |
| "loss": 0.0247, |
| "step": 6450 |
| }, |
| { |
| "grad_norm": 0.25483033061027527, |
| "learning_rate": 8.097741599294257e-05, |
| "loss": 0.0137, |
| "step": 6460 |
| }, |
| { |
| "grad_norm": 0.34695756435394287, |
| "learning_rate": 8.091247841586378e-05, |
| "loss": 0.0118, |
| "step": 6470 |
| }, |
| { |
| "grad_norm": 0.31917816400527954, |
| "learning_rate": 8.084745632524939e-05, |
| "loss": 0.0153, |
| "step": 6480 |
| }, |
| { |
| "grad_norm": 0.298793762922287, |
| "learning_rate": 8.07823498988673e-05, |
| "loss": 0.013, |
| "step": 6490 |
| }, |
| { |
| "grad_norm": 0.29808494448661804, |
| "learning_rate": 8.071715931471602e-05, |
| "loss": 0.0141, |
| "step": 6500 |
| }, |
| { |
| "grad_norm": 0.2866530120372772, |
| "learning_rate": 8.06518847510241e-05, |
| "loss": 0.013, |
| "step": 6510 |
| }, |
| { |
| "grad_norm": 0.2647181451320648, |
| "learning_rate": 8.058652638624971e-05, |
| "loss": 0.013, |
| "step": 6520 |
| }, |
| { |
| "grad_norm": 0.2521056830883026, |
| "learning_rate": 8.052108439908013e-05, |
| "loss": 0.0117, |
| "step": 6530 |
| }, |
| { |
| "grad_norm": 0.30903002619743347, |
| "learning_rate": 8.045555896843125e-05, |
| "loss": 0.0127, |
| "step": 6540 |
| }, |
| { |
| "grad_norm": 0.3295891284942627, |
| "learning_rate": 8.03899502734471e-05, |
| "loss": 0.0116, |
| "step": 6550 |
| }, |
| { |
| "grad_norm": 0.3196203410625458, |
| "learning_rate": 8.032425849349931e-05, |
| "loss": 0.0096, |
| "step": 6560 |
| }, |
| { |
| "grad_norm": 0.2777307331562042, |
| "learning_rate": 8.025848380818674e-05, |
| "loss": 0.0137, |
| "step": 6570 |
| }, |
| { |
| "grad_norm": 0.2528461217880249, |
| "learning_rate": 8.019262639733487e-05, |
| "loss": 0.012, |
| "step": 6580 |
| }, |
| { |
| "grad_norm": 0.29351767897605896, |
| "learning_rate": 8.012668644099531e-05, |
| "loss": 0.0116, |
| "step": 6590 |
| }, |
| { |
| "grad_norm": 0.3499806821346283, |
| "learning_rate": 8.006066411944542e-05, |
| "loss": 0.0145, |
| "step": 6600 |
| }, |
| { |
| "grad_norm": 0.3001391589641571, |
| "learning_rate": 7.999455961318769e-05, |
| "loss": 0.0107, |
| "step": 6610 |
| }, |
| { |
| "grad_norm": 0.34684231877326965, |
| "learning_rate": 7.992837310294932e-05, |
| "loss": 0.0174, |
| "step": 6620 |
| }, |
| { |
| "grad_norm": 0.29393184185028076, |
| "learning_rate": 7.986210476968167e-05, |
| "loss": 0.0139, |
| "step": 6630 |
| }, |
| { |
| "grad_norm": 0.29339519143104553, |
| "learning_rate": 7.97957547945599e-05, |
| "loss": 0.0174, |
| "step": 6640 |
| }, |
| { |
| "grad_norm": 0.4209054112434387, |
| "learning_rate": 7.972932335898226e-05, |
| "loss": 0.0115, |
| "step": 6650 |
| }, |
| { |
| "grad_norm": 0.28844037652015686, |
| "learning_rate": 7.966281064456975e-05, |
| "loss": 0.0121, |
| "step": 6660 |
| }, |
| { |
| "grad_norm": 0.3526553511619568, |
| "learning_rate": 7.959621683316563e-05, |
| "loss": 0.0119, |
| "step": 6670 |
| }, |
| { |
| "grad_norm": 0.42336228489875793, |
| "learning_rate": 7.952954210683481e-05, |
| "loss": 0.016, |
| "step": 6680 |
| }, |
| { |
| "grad_norm": 0.3053176701068878, |
| "learning_rate": 7.946278664786345e-05, |
| "loss": 0.0143, |
| "step": 6690 |
| }, |
| { |
| "grad_norm": 0.25422704219818115, |
| "learning_rate": 7.939595063875842e-05, |
| "loss": 0.0129, |
| "step": 6700 |
| }, |
| { |
| "grad_norm": 0.2565918266773224, |
| "learning_rate": 7.932903426224683e-05, |
| "loss": 0.0118, |
| "step": 6710 |
| }, |
| { |
| "grad_norm": 0.29806122183799744, |
| "learning_rate": 7.926203770127552e-05, |
| "loss": 0.0112, |
| "step": 6720 |
| }, |
| { |
| "grad_norm": 0.29177016019821167, |
| "learning_rate": 7.919496113901046e-05, |
| "loss": 0.0138, |
| "step": 6730 |
| }, |
| { |
| "grad_norm": 0.24790076911449432, |
| "learning_rate": 7.912780475883649e-05, |
| "loss": 0.0104, |
| "step": 6740 |
| }, |
| { |
| "grad_norm": 0.3190149664878845, |
| "learning_rate": 7.906056874435652e-05, |
| "loss": 0.0127, |
| "step": 6750 |
| }, |
| { |
| "grad_norm": 0.28794440627098083, |
| "learning_rate": 7.899325327939131e-05, |
| "loss": 0.0094, |
| "step": 6760 |
| }, |
| { |
| "grad_norm": 0.24909764528274536, |
| "learning_rate": 7.892585854797872e-05, |
| "loss": 0.0135, |
| "step": 6770 |
| }, |
| { |
| "grad_norm": 0.358511745929718, |
| "learning_rate": 7.88583847343734e-05, |
| "loss": 0.0119, |
| "step": 6780 |
| }, |
| { |
| "grad_norm": 0.2664150893688202, |
| "learning_rate": 7.879083202304616e-05, |
| "loss": 0.0137, |
| "step": 6790 |
| }, |
| { |
| "grad_norm": 0.24875850975513458, |
| "learning_rate": 7.872320059868355e-05, |
| "loss": 0.0144, |
| "step": 6800 |
| }, |
| { |
| "grad_norm": 0.2646051347255707, |
| "learning_rate": 7.865549064618729e-05, |
| "loss": 0.0101, |
| "step": 6810 |
| }, |
| { |
| "grad_norm": 0.3005359172821045, |
| "learning_rate": 7.858770235067381e-05, |
| "loss": 0.0189, |
| "step": 6820 |
| }, |
| { |
| "grad_norm": 0.30753788352012634, |
| "learning_rate": 7.851983589747374e-05, |
| "loss": 0.0128, |
| "step": 6830 |
| }, |
| { |
| "grad_norm": 0.3018791675567627, |
| "learning_rate": 7.845189147213133e-05, |
| "loss": 0.0103, |
| "step": 6840 |
| }, |
| { |
| "grad_norm": 0.27601730823516846, |
| "learning_rate": 7.838386926040407e-05, |
| "loss": 0.012, |
| "step": 6850 |
| }, |
| { |
| "grad_norm": 0.18579219281673431, |
| "learning_rate": 7.83157694482621e-05, |
| "loss": 0.0088, |
| "step": 6860 |
| }, |
| { |
| "grad_norm": 0.3077498972415924, |
| "learning_rate": 7.824759222188768e-05, |
| "loss": 0.0135, |
| "step": 6870 |
| }, |
| { |
| "grad_norm": 0.3342336118221283, |
| "learning_rate": 7.817933776767478e-05, |
| "loss": 0.0107, |
| "step": 6880 |
| }, |
| { |
| "grad_norm": 0.2645319998264313, |
| "learning_rate": 7.811100627222842e-05, |
| "loss": 0.0079, |
| "step": 6890 |
| }, |
| { |
| "grad_norm": 0.34547173976898193, |
| "learning_rate": 7.804259792236435e-05, |
| "loss": 0.0106, |
| "step": 6900 |
| }, |
| { |
| "grad_norm": 0.3139336407184601, |
| "learning_rate": 7.797411290510835e-05, |
| "loss": 0.0143, |
| "step": 6910 |
| }, |
| { |
| "grad_norm": 0.32347217202186584, |
| "learning_rate": 7.790555140769586e-05, |
| "loss": 0.0116, |
| "step": 6920 |
| }, |
| { |
| "grad_norm": 0.2938658595085144, |
| "learning_rate": 7.78369136175714e-05, |
| "loss": 0.0151, |
| "step": 6930 |
| }, |
| { |
| "grad_norm": 0.3143678605556488, |
| "learning_rate": 7.776819972238806e-05, |
| "loss": 0.0157, |
| "step": 6940 |
| }, |
| { |
| "grad_norm": 0.3636862337589264, |
| "learning_rate": 7.7699409910007e-05, |
| "loss": 0.012, |
| "step": 6950 |
| }, |
| { |
| "grad_norm": 0.2957841157913208, |
| "learning_rate": 7.763054436849694e-05, |
| "loss": 0.0137, |
| "step": 6960 |
| }, |
| { |
| "grad_norm": 0.3176383078098297, |
| "learning_rate": 7.756160328613364e-05, |
| "loss": 0.0138, |
| "step": 6970 |
| }, |
| { |
| "grad_norm": 0.38332003355026245, |
| "learning_rate": 7.749258685139942e-05, |
| "loss": 0.0103, |
| "step": 6980 |
| }, |
| { |
| "grad_norm": 0.3329831659793854, |
| "learning_rate": 7.742349525298253e-05, |
| "loss": 0.0118, |
| "step": 6990 |
| }, |
| { |
| "grad_norm": 0.373206228017807, |
| "learning_rate": 7.735432867977679e-05, |
| "loss": 0.0096, |
| "step": 7000 |
| }, |
| { |
| "grad_norm": 0.22941261529922485, |
| "learning_rate": 7.728508732088096e-05, |
| "loss": 0.0141, |
| "step": 7010 |
| }, |
| { |
| "grad_norm": 0.3203655183315277, |
| "learning_rate": 7.721577136559825e-05, |
| "loss": 0.0142, |
| "step": 7020 |
| }, |
| { |
| "grad_norm": 0.2580220103263855, |
| "learning_rate": 7.714638100343588e-05, |
| "loss": 0.0119, |
| "step": 7030 |
| }, |
| { |
| "grad_norm": 0.23703204095363617, |
| "learning_rate": 7.707691642410444e-05, |
| "loss": 0.0114, |
| "step": 7040 |
| }, |
| { |
| "grad_norm": 0.2933865189552307, |
| "learning_rate": 7.70073778175174e-05, |
| "loss": 0.0125, |
| "step": 7050 |
| }, |
| { |
| "grad_norm": 0.3587990403175354, |
| "learning_rate": 7.69377653737907e-05, |
| "loss": 0.0103, |
| "step": 7060 |
| }, |
| { |
| "grad_norm": 0.3031073212623596, |
| "learning_rate": 7.686807928324209e-05, |
| "loss": 0.0119, |
| "step": 7070 |
| }, |
| { |
| "grad_norm": 0.24994587898254395, |
| "learning_rate": 7.679831973639065e-05, |
| "loss": 0.0108, |
| "step": 7080 |
| }, |
| { |
| "grad_norm": 0.2641933262348175, |
| "learning_rate": 7.672848692395637e-05, |
| "loss": 0.0124, |
| "step": 7090 |
| }, |
| { |
| "grad_norm": 0.3555925488471985, |
| "learning_rate": 7.665858103685944e-05, |
| "loss": 0.0106, |
| "step": 7100 |
| }, |
| { |
| "grad_norm": 0.32579174637794495, |
| "learning_rate": 7.658860226621991e-05, |
| "loss": 0.0129, |
| "step": 7110 |
| }, |
| { |
| "grad_norm": 0.2926287353038788, |
| "learning_rate": 7.651855080335708e-05, |
| "loss": 0.0127, |
| "step": 7120 |
| }, |
| { |
| "grad_norm": 0.32705703377723694, |
| "learning_rate": 7.644842683978896e-05, |
| "loss": 0.0103, |
| "step": 7130 |
| }, |
| { |
| "grad_norm": 0.3596729636192322, |
| "learning_rate": 7.63782305672318e-05, |
| "loss": 0.0153, |
| "step": 7140 |
| }, |
| { |
| "grad_norm": 0.3586880564689636, |
| "learning_rate": 7.63079621775995e-05, |
| "loss": 0.0132, |
| "step": 7150 |
| }, |
| { |
| "grad_norm": 0.23977535963058472, |
| "learning_rate": 7.623762186300319e-05, |
| "loss": 0.0125, |
| "step": 7160 |
| }, |
| { |
| "grad_norm": 0.3540763854980469, |
| "learning_rate": 7.616720981575057e-05, |
| "loss": 0.0133, |
| "step": 7170 |
| }, |
| { |
| "grad_norm": 0.33983471989631653, |
| "learning_rate": 7.609672622834552e-05, |
| "loss": 0.0102, |
| "step": 7180 |
| }, |
| { |
| "grad_norm": 0.30762892961502075, |
| "learning_rate": 7.602617129348747e-05, |
| "loss": 0.0108, |
| "step": 7190 |
| }, |
| { |
| "grad_norm": 0.3010900020599365, |
| "learning_rate": 7.595554520407088e-05, |
| "loss": 0.0111, |
| "step": 7200 |
| }, |
| { |
| "grad_norm": 0.2769547402858734, |
| "learning_rate": 7.588484815318484e-05, |
| "loss": 0.0126, |
| "step": 7210 |
| }, |
| { |
| "grad_norm": 0.28370919823646545, |
| "learning_rate": 7.581408033411234e-05, |
| "loss": 0.0101, |
| "step": 7220 |
| }, |
| { |
| "grad_norm": 0.32081568241119385, |
| "learning_rate": 7.574324194032995e-05, |
| "loss": 0.009, |
| "step": 7230 |
| }, |
| { |
| "grad_norm": 0.2977130711078644, |
| "learning_rate": 7.567233316550705e-05, |
| "loss": 0.012, |
| "step": 7240 |
| }, |
| { |
| "grad_norm": 0.22645479440689087, |
| "learning_rate": 7.560135420350562e-05, |
| "loss": 0.0087, |
| "step": 7250 |
| }, |
| { |
| "grad_norm": 0.3486950397491455, |
| "learning_rate": 7.553030524837935e-05, |
| "loss": 0.0184, |
| "step": 7260 |
| }, |
| { |
| "grad_norm": 0.3366019129753113, |
| "learning_rate": 7.545918649437341e-05, |
| "loss": 0.0109, |
| "step": 7270 |
| }, |
| { |
| "grad_norm": 0.3164430856704712, |
| "learning_rate": 7.538799813592377e-05, |
| "loss": 0.0121, |
| "step": 7280 |
| }, |
| { |
| "grad_norm": 0.30747735500335693, |
| "learning_rate": 7.531674036765662e-05, |
| "loss": 0.0127, |
| "step": 7290 |
| }, |
| { |
| "grad_norm": 0.2376401573419571, |
| "learning_rate": 7.524541338438807e-05, |
| "loss": 0.0085, |
| "step": 7300 |
| }, |
| { |
| "grad_norm": 0.263528436422348, |
| "learning_rate": 7.517401738112328e-05, |
| "loss": 0.0116, |
| "step": 7310 |
| }, |
| { |
| "grad_norm": 0.35192421078681946, |
| "learning_rate": 7.510255255305628e-05, |
| "loss": 0.0097, |
| "step": 7320 |
| }, |
| { |
| "grad_norm": 0.3433808386325836, |
| "learning_rate": 7.503101909556911e-05, |
| "loss": 0.0106, |
| "step": 7330 |
| }, |
| { |
| "grad_norm": 0.2994978129863739, |
| "learning_rate": 7.495941720423154e-05, |
| "loss": 0.0101, |
| "step": 7340 |
| }, |
| { |
| "grad_norm": 0.2600700557231903, |
| "learning_rate": 7.488774707480042e-05, |
| "loss": 0.0116, |
| "step": 7350 |
| }, |
| { |
| "grad_norm": 0.3085675537586212, |
| "learning_rate": 7.481600890321911e-05, |
| "loss": 0.0088, |
| "step": 7360 |
| }, |
| { |
| "grad_norm": 0.2523610591888428, |
| "learning_rate": 7.474420288561708e-05, |
| "loss": 0.0149, |
| "step": 7370 |
| }, |
| { |
| "grad_norm": 0.3608851432800293, |
| "learning_rate": 7.467232921830921e-05, |
| "loss": 0.015, |
| "step": 7380 |
| }, |
| { |
| "grad_norm": 0.27163684368133545, |
| "learning_rate": 7.460038809779537e-05, |
| "loss": 0.0092, |
| "step": 7390 |
| }, |
| { |
| "grad_norm": 0.3165534436702728, |
| "learning_rate": 7.452837972075983e-05, |
| "loss": 0.0094, |
| "step": 7400 |
| }, |
| { |
| "grad_norm": 0.2729543149471283, |
| "learning_rate": 7.445630428407074e-05, |
| "loss": 0.0093, |
| "step": 7410 |
| }, |
| { |
| "grad_norm": 0.35123759508132935, |
| "learning_rate": 7.43841619847796e-05, |
| "loss": 0.0106, |
| "step": 7420 |
| }, |
| { |
| "grad_norm": 0.2810732424259186, |
| "learning_rate": 7.431195302012072e-05, |
| "loss": 0.0097, |
| "step": 7430 |
| }, |
| { |
| "grad_norm": 0.3775671720504761, |
| "learning_rate": 7.423967758751061e-05, |
| "loss": 0.0119, |
| "step": 7440 |
| }, |
| { |
| "grad_norm": 0.2957786023616791, |
| "learning_rate": 7.416733588454758e-05, |
| "loss": 0.0129, |
| "step": 7450 |
| }, |
| { |
| "grad_norm": 0.3434288799762726, |
| "learning_rate": 7.409492810901106e-05, |
| "loss": 0.0132, |
| "step": 7460 |
| }, |
| { |
| "grad_norm": 0.35347649455070496, |
| "learning_rate": 7.402245445886116e-05, |
| "loss": 0.0147, |
| "step": 7470 |
| }, |
| { |
| "grad_norm": 0.3285079598426819, |
| "learning_rate": 7.394991513223806e-05, |
| "loss": 0.011, |
| "step": 7480 |
| }, |
| { |
| "grad_norm": 0.29947003722190857, |
| "learning_rate": 7.38773103274615e-05, |
| "loss": 0.0132, |
| "step": 7490 |
| }, |
| { |
| "grad_norm": 0.28526970744132996, |
| "learning_rate": 7.380464024303028e-05, |
| "loss": 0.0105, |
| "step": 7500 |
| }, |
| { |
| "grad_norm": 0.2930798828601837, |
| "learning_rate": 7.373190507762162e-05, |
| "loss": 0.0127, |
| "step": 7510 |
| }, |
| { |
| "grad_norm": 0.2921172082424164, |
| "learning_rate": 7.365910503009066e-05, |
| "loss": 0.0156, |
| "step": 7520 |
| }, |
| { |
| "grad_norm": 0.3323417007923126, |
| "learning_rate": 7.358624029946996e-05, |
| "loss": 0.0088, |
| "step": 7530 |
| }, |
| { |
| "grad_norm": 0.29670819640159607, |
| "learning_rate": 7.351331108496893e-05, |
| "loss": 0.0095, |
| "step": 7540 |
| }, |
| { |
| "grad_norm": 0.32244303822517395, |
| "learning_rate": 7.344031758597325e-05, |
| "loss": 0.0137, |
| "step": 7550 |
| }, |
| { |
| "grad_norm": 0.25546425580978394, |
| "learning_rate": 7.336726000204435e-05, |
| "loss": 0.0101, |
| "step": 7560 |
| }, |
| { |
| "grad_norm": 0.30756881833076477, |
| "learning_rate": 7.32941385329189e-05, |
| "loss": 0.0129, |
| "step": 7570 |
| }, |
| { |
| "grad_norm": 0.2749859690666199, |
| "learning_rate": 7.322095337850816e-05, |
| "loss": 0.0095, |
| "step": 7580 |
| }, |
| { |
| "grad_norm": 0.41934898495674133, |
| "learning_rate": 7.314770473889758e-05, |
| "loss": 0.0154, |
| "step": 7590 |
| }, |
| { |
| "grad_norm": 0.2750692665576935, |
| "learning_rate": 7.307439281434615e-05, |
| "loss": 0.0089, |
| "step": 7600 |
| }, |
| { |
| "grad_norm": 0.28263887763023376, |
| "learning_rate": 7.300101780528585e-05, |
| "loss": 0.0127, |
| "step": 7610 |
| }, |
| { |
| "grad_norm": 0.2647198438644409, |
| "learning_rate": 7.292757991232117e-05, |
| "loss": 0.0155, |
| "step": 7620 |
| }, |
| { |
| "grad_norm": 0.30357304215431213, |
| "learning_rate": 7.285407933622848e-05, |
| "loss": 0.0122, |
| "step": 7630 |
| }, |
| { |
| "grad_norm": 0.2601131796836853, |
| "learning_rate": 7.278051627795557e-05, |
| "loss": 0.0173, |
| "step": 7640 |
| }, |
| { |
| "grad_norm": 0.2693704664707184, |
| "learning_rate": 7.270689093862105e-05, |
| "loss": 0.0123, |
| "step": 7650 |
| }, |
| { |
| "grad_norm": 0.3310806453227997, |
| "learning_rate": 7.263320351951374e-05, |
| "loss": 0.009, |
| "step": 7660 |
| }, |
| { |
| "grad_norm": 0.2853841483592987, |
| "learning_rate": 7.255945422209227e-05, |
| "loss": 0.0104, |
| "step": 7670 |
| }, |
| { |
| "grad_norm": 0.19990304112434387, |
| "learning_rate": 7.248564324798437e-05, |
| "loss": 0.0105, |
| "step": 7680 |
| }, |
| { |
| "grad_norm": 0.20870745182037354, |
| "learning_rate": 7.241177079898644e-05, |
| "loss": 0.0126, |
| "step": 7690 |
| }, |
| { |
| "grad_norm": 0.29264724254608154, |
| "learning_rate": 7.233783707706295e-05, |
| "loss": 0.0108, |
| "step": 7700 |
| }, |
| { |
| "grad_norm": 0.26418036222457886, |
| "learning_rate": 7.226384228434586e-05, |
| "loss": 0.0121, |
| "step": 7710 |
| }, |
| { |
| "grad_norm": 0.19485041499137878, |
| "learning_rate": 7.21897866231341e-05, |
| "loss": 0.0136, |
| "step": 7720 |
| }, |
| { |
| "grad_norm": 0.21631906926631927, |
| "learning_rate": 7.211567029589303e-05, |
| "loss": 0.0103, |
| "step": 7730 |
| }, |
| { |
| "grad_norm": 0.2685507833957672, |
| "learning_rate": 7.204149350525387e-05, |
| "loss": 0.0088, |
| "step": 7740 |
| }, |
| { |
| "grad_norm": 0.21937017142772675, |
| "learning_rate": 7.196725645401309e-05, |
| "loss": 0.0118, |
| "step": 7750 |
| }, |
| { |
| "grad_norm": 0.40596023201942444, |
| "learning_rate": 7.1892959345132e-05, |
| "loss": 0.0106, |
| "step": 7760 |
| }, |
| { |
| "grad_norm": 0.3677843511104584, |
| "learning_rate": 7.181860238173605e-05, |
| "loss": 0.0155, |
| "step": 7770 |
| }, |
| { |
| "grad_norm": 0.3440069854259491, |
| "learning_rate": 7.174418576711432e-05, |
| "loss": 0.012, |
| "step": 7780 |
| }, |
| { |
| "grad_norm": 0.3115421533584595, |
| "learning_rate": 7.1669709704719e-05, |
| "loss": 0.0129, |
| "step": 7790 |
| }, |
| { |
| "grad_norm": 0.2567780315876007, |
| "learning_rate": 7.159517439816481e-05, |
| "loss": 0.0186, |
| "step": 7800 |
| }, |
| { |
| "grad_norm": 0.26565659046173096, |
| "learning_rate": 7.152058005122842e-05, |
| "loss": 0.0117, |
| "step": 7810 |
| }, |
| { |
| "grad_norm": 0.2598077654838562, |
| "learning_rate": 7.144592686784793e-05, |
| "loss": 0.0105, |
| "step": 7820 |
| }, |
| { |
| "grad_norm": 0.3031388819217682, |
| "learning_rate": 7.137121505212229e-05, |
| "loss": 0.0103, |
| "step": 7830 |
| }, |
| { |
| "grad_norm": 0.334942102432251, |
| "learning_rate": 7.129644480831077e-05, |
| "loss": 0.0198, |
| "step": 7840 |
| }, |
| { |
| "grad_norm": 0.28533506393432617, |
| "learning_rate": 7.122161634083234e-05, |
| "loss": 0.0103, |
| "step": 7850 |
| }, |
| { |
| "grad_norm": 0.27979883551597595, |
| "learning_rate": 7.114672985426516e-05, |
| "loss": 0.0097, |
| "step": 7860 |
| }, |
| { |
| "grad_norm": 0.21115346252918243, |
| "learning_rate": 7.107178555334606e-05, |
| "loss": 0.0151, |
| "step": 7870 |
| }, |
| { |
| "grad_norm": 0.36340436339378357, |
| "learning_rate": 7.099678364296989e-05, |
| "loss": 0.0115, |
| "step": 7880 |
| }, |
| { |
| "grad_norm": 0.21899573504924774, |
| "learning_rate": 7.0921724328189e-05, |
| "loss": 0.0102, |
| "step": 7890 |
| }, |
| { |
| "grad_norm": 0.3320227861404419, |
| "learning_rate": 7.084660781421268e-05, |
| "loss": 0.0132, |
| "step": 7900 |
| }, |
| { |
| "grad_norm": 0.27939140796661377, |
| "learning_rate": 7.077143430640662e-05, |
| "loss": 0.0119, |
| "step": 7910 |
| }, |
| { |
| "grad_norm": 0.36499500274658203, |
| "learning_rate": 7.069620401029232e-05, |
| "loss": 0.0099, |
| "step": 7920 |
| }, |
| { |
| "grad_norm": 0.3523150682449341, |
| "learning_rate": 7.062091713154655e-05, |
| "loss": 0.0093, |
| "step": 7930 |
| }, |
| { |
| "grad_norm": 0.2645350396633148, |
| "learning_rate": 7.054557387600075e-05, |
| "loss": 0.0097, |
| "step": 7940 |
| }, |
| { |
| "grad_norm": 0.20298346877098083, |
| "learning_rate": 7.04701744496405e-05, |
| "loss": 0.0086, |
| "step": 7950 |
| }, |
| { |
| "grad_norm": 0.20196221768856049, |
| "learning_rate": 7.039471905860495e-05, |
| "loss": 0.0096, |
| "step": 7960 |
| }, |
| { |
| "grad_norm": 0.25855553150177, |
| "learning_rate": 7.031920790918628e-05, |
| "loss": 0.0097, |
| "step": 7970 |
| }, |
| { |
| "grad_norm": 0.28350019454956055, |
| "learning_rate": 7.024364120782906e-05, |
| "loss": 0.0105, |
| "step": 7980 |
| }, |
| { |
| "grad_norm": 0.28542742133140564, |
| "learning_rate": 7.016801916112978e-05, |
| "loss": 0.0137, |
| "step": 7990 |
| }, |
| { |
| "grad_norm": 0.24646037817001343, |
| "learning_rate": 7.009234197583623e-05, |
| "loss": 0.0103, |
| "step": 8000 |
| }, |
| { |
| "grad_norm": 0.26659995317459106, |
| "learning_rate": 7.001660985884692e-05, |
| "loss": 0.0088, |
| "step": 8010 |
| }, |
| { |
| "grad_norm": 0.3342621624469757, |
| "learning_rate": 6.994082301721063e-05, |
| "loss": 0.0114, |
| "step": 8020 |
| }, |
| { |
| "grad_norm": 0.2519735097885132, |
| "learning_rate": 6.986498165812563e-05, |
| "loss": 0.009, |
| "step": 8030 |
| }, |
| { |
| "grad_norm": 0.25942492485046387, |
| "learning_rate": 6.978908598893932e-05, |
| "loss": 0.0089, |
| "step": 8040 |
| }, |
| { |
| "grad_norm": 0.2715606391429901, |
| "learning_rate": 6.971313621714756e-05, |
| "loss": 0.0129, |
| "step": 8050 |
| }, |
| { |
| "grad_norm": 0.28495460748672485, |
| "learning_rate": 6.96371325503941e-05, |
| "loss": 0.0086, |
| "step": 8060 |
| }, |
| { |
| "grad_norm": 0.2748803198337555, |
| "learning_rate": 6.956107519647014e-05, |
| "loss": 0.0079, |
| "step": 8070 |
| }, |
| { |
| "grad_norm": 0.19981186091899872, |
| "learning_rate": 6.94849643633135e-05, |
| "loss": 0.011, |
| "step": 8080 |
| }, |
| { |
| "grad_norm": 0.25709986686706543, |
| "learning_rate": 6.940880025900834e-05, |
| "loss": 0.0091, |
| "step": 8090 |
| }, |
| { |
| "grad_norm": 0.31318721175193787, |
| "learning_rate": 6.933258309178438e-05, |
| "loss": 0.0128, |
| "step": 8100 |
| }, |
| { |
| "grad_norm": 0.2338728904724121, |
| "learning_rate": 6.925631307001646e-05, |
| "loss": 0.01, |
| "step": 8110 |
| }, |
| { |
| "grad_norm": 0.2807973027229309, |
| "learning_rate": 6.91799904022239e-05, |
| "loss": 0.0117, |
| "step": 8120 |
| }, |
| { |
| "grad_norm": 0.16702820360660553, |
| "learning_rate": 6.910361529706997e-05, |
| "loss": 0.0081, |
| "step": 8130 |
| }, |
| { |
| "grad_norm": 0.2894277572631836, |
| "learning_rate": 6.902718796336131e-05, |
| "loss": 0.0086, |
| "step": 8140 |
| }, |
| { |
| "grad_norm": 0.3167431950569153, |
| "learning_rate": 6.895070861004729e-05, |
| "loss": 0.0099, |
| "step": 8150 |
| }, |
| { |
| "grad_norm": 0.35957372188568115, |
| "learning_rate": 6.887417744621956e-05, |
| "loss": 0.0103, |
| "step": 8160 |
| }, |
| { |
| "grad_norm": 0.2503855526447296, |
| "learning_rate": 6.87975946811114e-05, |
| "loss": 0.0088, |
| "step": 8170 |
| }, |
| { |
| "grad_norm": 0.24923115968704224, |
| "learning_rate": 6.872096052409718e-05, |
| "loss": 0.0101, |
| "step": 8180 |
| }, |
| { |
| "grad_norm": 0.27979594469070435, |
| "learning_rate": 6.864427518469174e-05, |
| "loss": 0.0088, |
| "step": 8190 |
| }, |
| { |
| "grad_norm": 0.22051957249641418, |
| "learning_rate": 6.856753887254986e-05, |
| "loss": 0.009, |
| "step": 8200 |
| }, |
| { |
| "grad_norm": 0.2485746592283249, |
| "learning_rate": 6.849075179746572e-05, |
| "loss": 0.0132, |
| "step": 8210 |
| }, |
| { |
| "grad_norm": 0.29662564396858215, |
| "learning_rate": 6.841391416937221e-05, |
| "loss": 0.0109, |
| "step": 8220 |
| }, |
| { |
| "grad_norm": 0.22791483998298645, |
| "learning_rate": 6.833702619834053e-05, |
| "loss": 0.0119, |
| "step": 8230 |
| }, |
| { |
| "grad_norm": 0.2398047000169754, |
| "learning_rate": 6.82600880945794e-05, |
| "loss": 0.0114, |
| "step": 8240 |
| }, |
| { |
| "grad_norm": 0.28467315435409546, |
| "learning_rate": 6.818310006843468e-05, |
| "loss": 0.0109, |
| "step": 8250 |
| }, |
| { |
| "grad_norm": 0.39719972014427185, |
| "learning_rate": 6.810606233038868e-05, |
| "loss": 0.0125, |
| "step": 8260 |
| }, |
| { |
| "grad_norm": 0.367841899394989, |
| "learning_rate": 6.802897509105966e-05, |
| "loss": 0.0093, |
| "step": 8270 |
| }, |
| { |
| "grad_norm": 0.28418073058128357, |
| "learning_rate": 6.79518385612012e-05, |
| "loss": 0.011, |
| "step": 8280 |
| }, |
| { |
| "grad_norm": 0.33649322390556335, |
| "learning_rate": 6.787465295170157e-05, |
| "loss": 0.0111, |
| "step": 8290 |
| }, |
| { |
| "grad_norm": 0.20563358068466187, |
| "learning_rate": 6.779741847358332e-05, |
| "loss": 0.0101, |
| "step": 8300 |
| }, |
| { |
| "grad_norm": 0.2554636299610138, |
| "learning_rate": 6.772013533800256e-05, |
| "loss": 0.0078, |
| "step": 8310 |
| }, |
| { |
| "grad_norm": 0.35546278953552246, |
| "learning_rate": 6.764280375624843e-05, |
| "loss": 0.0112, |
| "step": 8320 |
| }, |
| { |
| "grad_norm": 0.33014950156211853, |
| "learning_rate": 6.756542393974252e-05, |
| "loss": 0.0136, |
| "step": 8330 |
| }, |
| { |
| "grad_norm": 0.34563302993774414, |
| "learning_rate": 6.748799610003828e-05, |
| "loss": 0.0111, |
| "step": 8340 |
| }, |
| { |
| "grad_norm": 0.28476187586784363, |
| "learning_rate": 6.741052044882048e-05, |
| "loss": 0.0133, |
| "step": 8350 |
| }, |
| { |
| "grad_norm": 0.21680086851119995, |
| "learning_rate": 6.73329971979046e-05, |
| "loss": 0.01, |
| "step": 8360 |
| }, |
| { |
| "grad_norm": 0.24411574006080627, |
| "learning_rate": 6.725542655923625e-05, |
| "loss": 0.0101, |
| "step": 8370 |
| }, |
| { |
| "grad_norm": 0.2673936188220978, |
| "learning_rate": 6.717780874489057e-05, |
| "loss": 0.0142, |
| "step": 8380 |
| }, |
| { |
| "grad_norm": 0.22214150428771973, |
| "learning_rate": 6.710014396707172e-05, |
| "loss": 0.0081, |
| "step": 8390 |
| }, |
| { |
| "grad_norm": 0.21424426138401031, |
| "learning_rate": 6.702243243811221e-05, |
| "loss": 0.0094, |
| "step": 8400 |
| }, |
| { |
| "grad_norm": 0.326800137758255, |
| "learning_rate": 6.694467437047244e-05, |
| "loss": 0.0109, |
| "step": 8410 |
| }, |
| { |
| "grad_norm": 0.33343732357025146, |
| "learning_rate": 6.686686997673997e-05, |
| "loss": 0.0073, |
| "step": 8420 |
| }, |
| { |
| "grad_norm": 0.2529543936252594, |
| "learning_rate": 6.678901946962903e-05, |
| "loss": 0.0091, |
| "step": 8430 |
| }, |
| { |
| "grad_norm": 0.33415693044662476, |
| "learning_rate": 6.671112306197996e-05, |
| "loss": 0.0093, |
| "step": 8440 |
| }, |
| { |
| "grad_norm": 0.21618202328681946, |
| "learning_rate": 6.663318096675854e-05, |
| "loss": 0.0109, |
| "step": 8450 |
| }, |
| { |
| "grad_norm": 0.17679205536842346, |
| "learning_rate": 6.655519339705552e-05, |
| "loss": 0.0081, |
| "step": 8460 |
| }, |
| { |
| "grad_norm": 0.31180447340011597, |
| "learning_rate": 6.647716056608588e-05, |
| "loss": 0.0075, |
| "step": 8470 |
| }, |
| { |
| "grad_norm": 0.3059850335121155, |
| "learning_rate": 6.639908268718843e-05, |
| "loss": 0.0104, |
| "step": 8480 |
| }, |
| { |
| "grad_norm": 0.2953120768070221, |
| "learning_rate": 6.632095997382514e-05, |
| "loss": 0.0136, |
| "step": 8490 |
| }, |
| { |
| "grad_norm": 0.26256608963012695, |
| "learning_rate": 6.624279263958047e-05, |
| "loss": 0.0093, |
| "step": 8500 |
| }, |
| { |
| "grad_norm": 0.31494539976119995, |
| "learning_rate": 6.616458089816097e-05, |
| "loss": 0.0106, |
| "step": 8510 |
| }, |
| { |
| "grad_norm": 0.376949280500412, |
| "learning_rate": 6.608632496339454e-05, |
| "loss": 0.0099, |
| "step": 8520 |
| }, |
| { |
| "grad_norm": 0.2433367669582367, |
| "learning_rate": 6.600802504922988e-05, |
| "loss": 0.0102, |
| "step": 8530 |
| }, |
| { |
| "grad_norm": 0.37388041615486145, |
| "learning_rate": 6.592968136973604e-05, |
| "loss": 0.0105, |
| "step": 8540 |
| }, |
| { |
| "grad_norm": 0.28184112906455994, |
| "learning_rate": 6.585129413910159e-05, |
| "loss": 0.0097, |
| "step": 8550 |
| }, |
| { |
| "grad_norm": 0.2922300696372986, |
| "learning_rate": 6.577286357163424e-05, |
| "loss": 0.01, |
| "step": 8560 |
| }, |
| { |
| "grad_norm": 0.2782540023326874, |
| "learning_rate": 6.569438988176018e-05, |
| "loss": 0.0082, |
| "step": 8570 |
| }, |
| { |
| "grad_norm": 0.27248328924179077, |
| "learning_rate": 6.561587328402347e-05, |
| "loss": 0.0088, |
| "step": 8580 |
| }, |
| { |
| "grad_norm": 0.24880534410476685, |
| "learning_rate": 6.553731399308549e-05, |
| "loss": 0.0079, |
| "step": 8590 |
| }, |
| { |
| "grad_norm": 0.2515351176261902, |
| "learning_rate": 6.545871222372436e-05, |
| "loss": 0.0073, |
| "step": 8600 |
| }, |
| { |
| "grad_norm": 0.18304027616977692, |
| "learning_rate": 6.538006819083426e-05, |
| "loss": 0.0071, |
| "step": 8610 |
| }, |
| { |
| "grad_norm": 0.3344535231590271, |
| "learning_rate": 6.530138210942505e-05, |
| "loss": 0.0101, |
| "step": 8620 |
| }, |
| { |
| "grad_norm": 0.2146245390176773, |
| "learning_rate": 6.522265419462141e-05, |
| "loss": 0.0121, |
| "step": 8630 |
| }, |
| { |
| "grad_norm": 0.24218611419200897, |
| "learning_rate": 6.514388466166248e-05, |
| "loss": 0.0096, |
| "step": 8640 |
| }, |
| { |
| "grad_norm": 0.16334691643714905, |
| "learning_rate": 6.506507372590119e-05, |
| "loss": 0.0079, |
| "step": 8650 |
| }, |
| { |
| "grad_norm": 0.33363232016563416, |
| "learning_rate": 6.498622160280355e-05, |
| "loss": 0.0098, |
| "step": 8660 |
| }, |
| { |
| "grad_norm": 0.35225972533226013, |
| "learning_rate": 6.490732850794832e-05, |
| "loss": 0.0103, |
| "step": 8670 |
| }, |
| { |
| "grad_norm": 0.3227727711200714, |
| "learning_rate": 6.482839465702616e-05, |
| "loss": 0.0107, |
| "step": 8680 |
| }, |
| { |
| "grad_norm": 0.2620507776737213, |
| "learning_rate": 6.474942026583923e-05, |
| "loss": 0.0104, |
| "step": 8690 |
| }, |
| { |
| "grad_norm": 0.2854481041431427, |
| "learning_rate": 6.467040555030052e-05, |
| "loss": 0.0153, |
| "step": 8700 |
| }, |
| { |
| "grad_norm": 0.24487437307834625, |
| "learning_rate": 6.459135072643321e-05, |
| "loss": 0.0099, |
| "step": 8710 |
| }, |
| { |
| "grad_norm": 0.23461481928825378, |
| "learning_rate": 6.451225601037019e-05, |
| "loss": 0.0109, |
| "step": 8720 |
| }, |
| { |
| "grad_norm": 0.2263409048318863, |
| "learning_rate": 6.443312161835338e-05, |
| "loss": 0.0105, |
| "step": 8730 |
| }, |
| { |
| "grad_norm": 0.3112694025039673, |
| "learning_rate": 6.43539477667332e-05, |
| "loss": 0.0116, |
| "step": 8740 |
| }, |
| { |
| "grad_norm": 0.2899706959724426, |
| "learning_rate": 6.427473467196793e-05, |
| "loss": 0.0109, |
| "step": 8750 |
| }, |
| { |
| "grad_norm": 0.319865882396698, |
| "learning_rate": 6.419548255062315e-05, |
| "loss": 0.0142, |
| "step": 8760 |
| }, |
| { |
| "grad_norm": 0.2569391429424286, |
| "learning_rate": 6.411619161937112e-05, |
| "loss": 0.0185, |
| "step": 8770 |
| }, |
| { |
| "grad_norm": 0.2740200161933899, |
| "learning_rate": 6.403686209499022e-05, |
| "loss": 0.0101, |
| "step": 8780 |
| }, |
| { |
| "grad_norm": 0.29832252860069275, |
| "learning_rate": 6.395749419436437e-05, |
| "loss": 0.0094, |
| "step": 8790 |
| }, |
| { |
| "grad_norm": 0.275097519159317, |
| "learning_rate": 6.387808813448234e-05, |
| "loss": 0.0106, |
| "step": 8800 |
| }, |
| { |
| "grad_norm": 0.32685816287994385, |
| "learning_rate": 6.37986441324373e-05, |
| "loss": 0.0086, |
| "step": 8810 |
| }, |
| { |
| "grad_norm": 0.28648194670677185, |
| "learning_rate": 6.37191624054261e-05, |
| "loss": 0.0117, |
| "step": 8820 |
| }, |
| { |
| "grad_norm": 0.2401561737060547, |
| "learning_rate": 6.363964317074872e-05, |
| "loss": 0.0081, |
| "step": 8830 |
| }, |
| { |
| "grad_norm": 0.2832534909248352, |
| "learning_rate": 6.356008664580776e-05, |
| "loss": 0.0136, |
| "step": 8840 |
| }, |
| { |
| "grad_norm": 0.2052382379770279, |
| "learning_rate": 6.348049304810771e-05, |
| "loss": 0.0097, |
| "step": 8850 |
| }, |
| { |
| "grad_norm": 0.3278440833091736, |
| "learning_rate": 6.340086259525442e-05, |
| "loss": 0.0083, |
| "step": 8860 |
| }, |
| { |
| "grad_norm": 0.34554144740104675, |
| "learning_rate": 6.332119550495448e-05, |
| "loss": 0.0098, |
| "step": 8870 |
| }, |
| { |
| "grad_norm": 0.2610031068325043, |
| "learning_rate": 6.324149199501473e-05, |
| "loss": 0.01, |
| "step": 8880 |
| }, |
| { |
| "grad_norm": 0.22511707246303558, |
| "learning_rate": 6.316175228334146e-05, |
| "loss": 0.0092, |
| "step": 8890 |
| }, |
| { |
| "grad_norm": 0.2637081742286682, |
| "learning_rate": 6.308197658794003e-05, |
| "loss": 0.0128, |
| "step": 8900 |
| }, |
| { |
| "grad_norm": 0.31135818362236023, |
| "learning_rate": 6.300216512691417e-05, |
| "loss": 0.008, |
| "step": 8910 |
| }, |
| { |
| "grad_norm": 0.23880526423454285, |
| "learning_rate": 6.292231811846532e-05, |
| "loss": 0.0101, |
| "step": 8920 |
| }, |
| { |
| "grad_norm": 0.25867730379104614, |
| "learning_rate": 6.284243578089217e-05, |
| "loss": 0.0088, |
| "step": 8930 |
| }, |
| { |
| "grad_norm": 0.26295626163482666, |
| "learning_rate": 6.276251833258999e-05, |
| "loss": 0.0081, |
| "step": 8940 |
| }, |
| { |
| "grad_norm": 0.28615128993988037, |
| "learning_rate": 6.268256599205003e-05, |
| "loss": 0.0104, |
| "step": 8950 |
| }, |
| { |
| "grad_norm": 0.292758971452713, |
| "learning_rate": 6.260257897785892e-05, |
| "loss": 0.009, |
| "step": 8960 |
| }, |
| { |
| "grad_norm": 0.2305050492286682, |
| "learning_rate": 6.252255750869811e-05, |
| "loss": 0.01, |
| "step": 8970 |
| }, |
| { |
| "grad_norm": 0.2941057085990906, |
| "learning_rate": 6.244250180334325e-05, |
| "loss": 0.0136, |
| "step": 8980 |
| }, |
| { |
| "grad_norm": 0.3271690607070923, |
| "learning_rate": 6.236241208066356e-05, |
| "loss": 0.0111, |
| "step": 8990 |
| }, |
| { |
| "grad_norm": 0.2185642123222351, |
| "learning_rate": 6.228228855962133e-05, |
| "loss": 0.0074, |
| "step": 9000 |
| }, |
| { |
| "grad_norm": 0.26744088530540466, |
| "learning_rate": 6.220213145927115e-05, |
| "loss": 0.0067, |
| "step": 9010 |
| }, |
| { |
| "grad_norm": 0.20686663687229156, |
| "learning_rate": 6.212194099875951e-05, |
| "loss": 0.0094, |
| "step": 9020 |
| }, |
| { |
| "grad_norm": 0.2725589871406555, |
| "learning_rate": 6.204171739732405e-05, |
| "loss": 0.0083, |
| "step": 9030 |
| }, |
| { |
| "grad_norm": 0.3449211120605469, |
| "learning_rate": 6.196146087429303e-05, |
| "loss": 0.0085, |
| "step": 9040 |
| }, |
| { |
| "grad_norm": 0.24136734008789062, |
| "learning_rate": 6.188117164908474e-05, |
| "loss": 0.0094, |
| "step": 9050 |
| }, |
| { |
| "grad_norm": 0.2191270887851715, |
| "learning_rate": 6.180084994120684e-05, |
| "loss": 0.011, |
| "step": 9060 |
| }, |
| { |
| "grad_norm": 0.4127255082130432, |
| "learning_rate": 6.17204959702558e-05, |
| "loss": 0.0115, |
| "step": 9070 |
| }, |
| { |
| "grad_norm": 0.25778821110725403, |
| "learning_rate": 6.164010995591635e-05, |
| "loss": 0.0073, |
| "step": 9080 |
| }, |
| { |
| "grad_norm": 0.19585411250591278, |
| "learning_rate": 6.155969211796076e-05, |
| "loss": 0.0093, |
| "step": 9090 |
| }, |
| { |
| "grad_norm": 0.1971423476934433, |
| "learning_rate": 6.147924267624829e-05, |
| "loss": 0.0065, |
| "step": 9100 |
| }, |
| { |
| "grad_norm": 0.18513402342796326, |
| "learning_rate": 6.13987618507247e-05, |
| "loss": 0.0082, |
| "step": 9110 |
| }, |
| { |
| "grad_norm": 0.30468112230300903, |
| "learning_rate": 6.131824986142147e-05, |
| "loss": 0.0108, |
| "step": 9120 |
| }, |
| { |
| "grad_norm": 0.2643079161643982, |
| "learning_rate": 6.123770692845529e-05, |
| "loss": 0.0081, |
| "step": 9130 |
| }, |
| { |
| "grad_norm": 0.35840684175491333, |
| "learning_rate": 6.11571332720275e-05, |
| "loss": 0.0081, |
| "step": 9140 |
| }, |
| { |
| "grad_norm": 0.34239524602890015, |
| "learning_rate": 6.107652911242336e-05, |
| "loss": 0.0118, |
| "step": 9150 |
| }, |
| { |
| "grad_norm": 0.3136473596096039, |
| "learning_rate": 6.0995894670011586e-05, |
| "loss": 0.0128, |
| "step": 9160 |
| }, |
| { |
| "grad_norm": 0.3185141980648041, |
| "learning_rate": 6.091523016524368e-05, |
| "loss": 0.0133, |
| "step": 9170 |
| }, |
| { |
| "grad_norm": 0.2437521517276764, |
| "learning_rate": 6.083453581865328e-05, |
| "loss": 0.0116, |
| "step": 9180 |
| }, |
| { |
| "grad_norm": 0.24852575361728668, |
| "learning_rate": 6.075381185085568e-05, |
| "loss": 0.0101, |
| "step": 9190 |
| }, |
| { |
| "grad_norm": 0.21035079658031464, |
| "learning_rate": 6.067305848254709e-05, |
| "loss": 0.0089, |
| "step": 9200 |
| }, |
| { |
| "grad_norm": 0.19124074280261993, |
| "learning_rate": 6.059227593450418e-05, |
| "loss": 0.01, |
| "step": 9210 |
| }, |
| { |
| "grad_norm": 0.22028234601020813, |
| "learning_rate": 6.051146442758333e-05, |
| "loss": 0.0128, |
| "step": 9220 |
| }, |
| { |
| "grad_norm": 0.2855907082557678, |
| "learning_rate": 6.043062418272012e-05, |
| "loss": 0.0103, |
| "step": 9230 |
| }, |
| { |
| "grad_norm": 0.23253290355205536, |
| "learning_rate": 6.0349755420928666e-05, |
| "loss": 0.0075, |
| "step": 9240 |
| }, |
| { |
| "grad_norm": 0.22832125425338745, |
| "learning_rate": 6.0268858363301105e-05, |
| "loss": 0.0074, |
| "step": 9250 |
| }, |
| { |
| "grad_norm": 0.22071580588817596, |
| "learning_rate": 6.018793323100689e-05, |
| "loss": 0.0106, |
| "step": 9260 |
| }, |
| { |
| "grad_norm": 0.3454406261444092, |
| "learning_rate": 6.0106980245292255e-05, |
| "loss": 0.011, |
| "step": 9270 |
| }, |
| { |
| "grad_norm": 0.3467009663581848, |
| "learning_rate": 6.002599962747957e-05, |
| "loss": 0.0087, |
| "step": 9280 |
| }, |
| { |
| "grad_norm": 0.2289619743824005, |
| "learning_rate": 5.994499159896673e-05, |
| "loss": 0.0068, |
| "step": 9290 |
| }, |
| { |
| "grad_norm": 0.2502879202365875, |
| "learning_rate": 5.9863956381226607e-05, |
| "loss": 0.0138, |
| "step": 9300 |
| }, |
| { |
| "grad_norm": 0.23016954958438873, |
| "learning_rate": 5.9782894195806394e-05, |
| "loss": 0.0088, |
| "step": 9310 |
| }, |
| { |
| "grad_norm": 0.3265341520309448, |
| "learning_rate": 5.9701805264327004e-05, |
| "loss": 0.0122, |
| "step": 9320 |
| }, |
| { |
| "grad_norm": 0.2779223322868347, |
| "learning_rate": 5.96206898084825e-05, |
| "loss": 0.0073, |
| "step": 9330 |
| }, |
| { |
| "grad_norm": 0.20430560410022736, |
| "learning_rate": 5.953954805003942e-05, |
| "loss": 0.0106, |
| "step": 9340 |
| }, |
| { |
| "grad_norm": 0.22642415761947632, |
| "learning_rate": 5.945838021083623e-05, |
| "loss": 0.0083, |
| "step": 9350 |
| }, |
| { |
| "grad_norm": 0.22150662541389465, |
| "learning_rate": 5.9377186512782714e-05, |
| "loss": 0.0065, |
| "step": 9360 |
| }, |
| { |
| "grad_norm": 0.364218145608902, |
| "learning_rate": 5.929596717785935e-05, |
| "loss": 0.0089, |
| "step": 9370 |
| }, |
| { |
| "grad_norm": 0.24294275045394897, |
| "learning_rate": 5.921472242811668e-05, |
| "loss": 0.0079, |
| "step": 9380 |
| }, |
| { |
| "grad_norm": 0.23857471346855164, |
| "learning_rate": 5.913345248567475e-05, |
| "loss": 0.0125, |
| "step": 9390 |
| }, |
| { |
| "grad_norm": 0.17391999065876007, |
| "learning_rate": 5.905215757272248e-05, |
| "loss": 0.0162, |
| "step": 9400 |
| }, |
| { |
| "grad_norm": 0.2742446959018707, |
| "learning_rate": 5.897083791151706e-05, |
| "loss": 0.009, |
| "step": 9410 |
| }, |
| { |
| "grad_norm": 0.27407529950141907, |
| "learning_rate": 5.888949372438336e-05, |
| "loss": 0.0104, |
| "step": 9420 |
| }, |
| { |
| "grad_norm": 0.1971682459115982, |
| "learning_rate": 5.8808125233713255e-05, |
| "loss": 0.008, |
| "step": 9430 |
| }, |
| { |
| "grad_norm": 0.17385496199131012, |
| "learning_rate": 5.872673266196509e-05, |
| "loss": 0.007, |
| "step": 9440 |
| }, |
| { |
| "grad_norm": 0.2608735263347626, |
| "learning_rate": 5.864531623166305e-05, |
| "loss": 0.0083, |
| "step": 9450 |
| }, |
| { |
| "grad_norm": 0.2396305799484253, |
| "learning_rate": 5.856387616539656e-05, |
| "loss": 0.0082, |
| "step": 9460 |
| }, |
| { |
| "grad_norm": 0.26889148354530334, |
| "learning_rate": 5.848241268581967e-05, |
| "loss": 0.0085, |
| "step": 9470 |
| }, |
| { |
| "grad_norm": 0.26665395498275757, |
| "learning_rate": 5.840092601565037e-05, |
| "loss": 0.0094, |
| "step": 9480 |
| }, |
| { |
| "grad_norm": 0.23885580897331238, |
| "learning_rate": 5.8319416377670144e-05, |
| "loss": 0.008, |
| "step": 9490 |
| }, |
| { |
| "grad_norm": 0.2632520794868469, |
| "learning_rate": 5.82378839947232e-05, |
| "loss": 0.0098, |
| "step": 9500 |
| }, |
| { |
| "grad_norm": 0.3209339678287506, |
| "learning_rate": 5.815632908971599e-05, |
| "loss": 0.0106, |
| "step": 9510 |
| }, |
| { |
| "grad_norm": 0.282398521900177, |
| "learning_rate": 5.80747518856165e-05, |
| "loss": 0.0099, |
| "step": 9520 |
| }, |
| { |
| "grad_norm": 0.3100825250148773, |
| "learning_rate": 5.799315260545367e-05, |
| "loss": 0.0134, |
| "step": 9530 |
| }, |
| { |
| "grad_norm": 0.2550257444381714, |
| "learning_rate": 5.791153147231686e-05, |
| "loss": 0.0135, |
| "step": 9540 |
| }, |
| { |
| "grad_norm": 0.3137185275554657, |
| "learning_rate": 5.782988870935509e-05, |
| "loss": 0.008, |
| "step": 9550 |
| }, |
| { |
| "grad_norm": 0.23910042643547058, |
| "learning_rate": 5.774822453977657e-05, |
| "loss": 0.0087, |
| "step": 9560 |
| }, |
| { |
| "grad_norm": 0.21105986833572388, |
| "learning_rate": 5.7666539186848036e-05, |
| "loss": 0.009, |
| "step": 9570 |
| }, |
| { |
| "grad_norm": 0.2725152373313904, |
| "learning_rate": 5.758483287389411e-05, |
| "loss": 0.0143, |
| "step": 9580 |
| }, |
| { |
| "grad_norm": 0.2424250990152359, |
| "learning_rate": 5.7503105824296735e-05, |
| "loss": 0.0122, |
| "step": 9590 |
| }, |
| { |
| "grad_norm": 0.20699156820774078, |
| "learning_rate": 5.742135826149453e-05, |
| "loss": 0.0092, |
| "step": 9600 |
| }, |
| { |
| "grad_norm": 0.19423116743564606, |
| "learning_rate": 5.7339590408982223e-05, |
| "loss": 0.0065, |
| "step": 9610 |
| }, |
| { |
| "grad_norm": 0.2932196259498596, |
| "learning_rate": 5.725780249031e-05, |
| "loss": 0.0091, |
| "step": 9620 |
| }, |
| { |
| "grad_norm": 0.3803527057170868, |
| "learning_rate": 5.717599472908292e-05, |
| "loss": 0.0109, |
| "step": 9630 |
| }, |
| { |
| "grad_norm": 0.3079898953437805, |
| "learning_rate": 5.7094167348960237e-05, |
| "loss": 0.0084, |
| "step": 9640 |
| }, |
| { |
| "grad_norm": 0.2345152646303177, |
| "learning_rate": 5.7012320573654945e-05, |
| "loss": 0.0093, |
| "step": 9650 |
| }, |
| { |
| "grad_norm": 0.19556953012943268, |
| "learning_rate": 5.693045462693295e-05, |
| "loss": 0.008, |
| "step": 9660 |
| }, |
| { |
| "grad_norm": 0.22584684193134308, |
| "learning_rate": 5.684856973261266e-05, |
| "loss": 0.0073, |
| "step": 9670 |
| }, |
| { |
| "grad_norm": 0.2571251094341278, |
| "learning_rate": 5.6766666114564215e-05, |
| "loss": 0.0099, |
| "step": 9680 |
| }, |
| { |
| "grad_norm": 0.3253817856311798, |
| "learning_rate": 5.668474399670899e-05, |
| "loss": 0.0089, |
| "step": 9690 |
| }, |
| { |
| "grad_norm": 0.23601661622524261, |
| "learning_rate": 5.660280360301896e-05, |
| "loss": 0.0085, |
| "step": 9700 |
| }, |
| { |
| "grad_norm": 0.18850122392177582, |
| "learning_rate": 5.652084515751599e-05, |
| "loss": 0.0063, |
| "step": 9710 |
| }, |
| { |
| "grad_norm": 0.2345300316810608, |
| "learning_rate": 5.643886888427137e-05, |
| "loss": 0.009, |
| "step": 9720 |
| }, |
| { |
| "grad_norm": 0.21616721153259277, |
| "learning_rate": 5.6356875007405074e-05, |
| "loss": 0.0105, |
| "step": 9730 |
| }, |
| { |
| "grad_norm": 0.23095466196537018, |
| "learning_rate": 5.627486375108525e-05, |
| "loss": 0.0113, |
| "step": 9740 |
| }, |
| { |
| "grad_norm": 0.20307700335979462, |
| "learning_rate": 5.619283533952754e-05, |
| "loss": 0.0098, |
| "step": 9750 |
| }, |
| { |
| "grad_norm": 0.3022615313529968, |
| "learning_rate": 5.6110789996994474e-05, |
| "loss": 0.0087, |
| "step": 9760 |
| }, |
| { |
| "grad_norm": 0.2918921411037445, |
| "learning_rate": 5.602872794779491e-05, |
| "loss": 0.0115, |
| "step": 9770 |
| }, |
| { |
| "grad_norm": 0.3074280023574829, |
| "learning_rate": 5.594664941628334e-05, |
| "loss": 0.0087, |
| "step": 9780 |
| }, |
| { |
| "grad_norm": 0.2652987241744995, |
| "learning_rate": 5.5864554626859324e-05, |
| "loss": 0.0102, |
| "step": 9790 |
| }, |
| { |
| "grad_norm": 0.2364000827074051, |
| "learning_rate": 5.578244380396691e-05, |
| "loss": 0.0071, |
| "step": 9800 |
| }, |
| { |
| "grad_norm": 0.25912168622016907, |
| "learning_rate": 5.570031717209394e-05, |
| "loss": 0.0073, |
| "step": 9810 |
| }, |
| { |
| "grad_norm": 0.2132926732301712, |
| "learning_rate": 5.561817495577147e-05, |
| "loss": 0.0081, |
| "step": 9820 |
| }, |
| { |
| "grad_norm": 0.3217675983905792, |
| "learning_rate": 5.5536017379573215e-05, |
| "loss": 0.0095, |
| "step": 9830 |
| }, |
| { |
| "grad_norm": 0.21416251361370087, |
| "learning_rate": 5.545384466811483e-05, |
| "loss": 0.0088, |
| "step": 9840 |
| }, |
| { |
| "grad_norm": 0.30158206820487976, |
| "learning_rate": 5.5371657046053384e-05, |
| "loss": 0.0127, |
| "step": 9850 |
| }, |
| { |
| "grad_norm": 0.17979058623313904, |
| "learning_rate": 5.528945473808669e-05, |
| "loss": 0.0083, |
| "step": 9860 |
| }, |
| { |
| "grad_norm": 0.1995510756969452, |
| "learning_rate": 5.520723796895272e-05, |
| "loss": 0.0063, |
| "step": 9870 |
| }, |
| { |
| "grad_norm": 0.21220991015434265, |
| "learning_rate": 5.512500696342897e-05, |
| "loss": 0.0077, |
| "step": 9880 |
| }, |
| { |
| "grad_norm": 0.3290112316608429, |
| "learning_rate": 5.504276194633188e-05, |
| "loss": 0.0107, |
| "step": 9890 |
| }, |
| { |
| "grad_norm": 0.28488659858703613, |
| "learning_rate": 5.49605031425162e-05, |
| "loss": 0.0081, |
| "step": 9900 |
| }, |
| { |
| "grad_norm": 0.32763949036598206, |
| "learning_rate": 5.487823077687434e-05, |
| "loss": 0.0135, |
| "step": 9910 |
| }, |
| { |
| "grad_norm": 0.23580661416053772, |
| "learning_rate": 5.4795945074335806e-05, |
| "loss": 0.0094, |
| "step": 9920 |
| }, |
| { |
| "grad_norm": 0.22253672778606415, |
| "learning_rate": 5.471364625986657e-05, |
| "loss": 0.008, |
| "step": 9930 |
| }, |
| { |
| "grad_norm": 0.20649607479572296, |
| "learning_rate": 5.463133455846845e-05, |
| "loss": 0.0064, |
| "step": 9940 |
| }, |
| { |
| "grad_norm": 0.21485736966133118, |
| "learning_rate": 5.4549010195178505e-05, |
| "loss": 0.0142, |
| "step": 9950 |
| }, |
| { |
| "grad_norm": 0.22330300509929657, |
| "learning_rate": 5.446667339506838e-05, |
| "loss": 0.008, |
| "step": 9960 |
| }, |
| { |
| "grad_norm": 0.2618495523929596, |
| "learning_rate": 5.4384324383243756e-05, |
| "loss": 0.0099, |
| "step": 9970 |
| }, |
| { |
| "grad_norm": 0.28015658259391785, |
| "learning_rate": 5.430196338484368e-05, |
| "loss": 0.011, |
| "step": 9980 |
| }, |
| { |
| "grad_norm": 0.20648691058158875, |
| "learning_rate": 5.4219590625039975e-05, |
| "loss": 0.0083, |
| "step": 9990 |
| }, |
| { |
| "grad_norm": 0.22049671411514282, |
| "learning_rate": 5.413720632903664e-05, |
| "loss": 0.008, |
| "step": 10000 |
| }, |
| { |
| "grad_norm": 0.26092201471328735, |
| "learning_rate": 5.405481072206917e-05, |
| "loss": 0.0068, |
| "step": 10010 |
| }, |
| { |
| "grad_norm": 0.2266596406698227, |
| "learning_rate": 5.397240402940402e-05, |
| "loss": 0.0107, |
| "step": 10020 |
| }, |
| { |
| "grad_norm": 0.2024102360010147, |
| "learning_rate": 5.388998647633794e-05, |
| "loss": 0.0081, |
| "step": 10030 |
| }, |
| { |
| "grad_norm": 0.29881739616394043, |
| "learning_rate": 5.380755828819737e-05, |
| "loss": 0.0109, |
| "step": 10040 |
| }, |
| { |
| "grad_norm": 0.31106695532798767, |
| "learning_rate": 5.3725119690337846e-05, |
| "loss": 0.0079, |
| "step": 10050 |
| }, |
| { |
| "grad_norm": 0.22892136871814728, |
| "learning_rate": 5.3642670908143324e-05, |
| "loss": 0.007, |
| "step": 10060 |
| }, |
| { |
| "grad_norm": 0.30826979875564575, |
| "learning_rate": 5.356021216702562e-05, |
| "loss": 0.0073, |
| "step": 10070 |
| }, |
| { |
| "grad_norm": 0.27872055768966675, |
| "learning_rate": 5.347774369242381e-05, |
| "loss": 0.0075, |
| "step": 10080 |
| }, |
| { |
| "grad_norm": 0.3187792897224426, |
| "learning_rate": 5.3395265709803545e-05, |
| "loss": 0.0072, |
| "step": 10090 |
| }, |
| { |
| "grad_norm": 0.24945269525051117, |
| "learning_rate": 5.331277844465647e-05, |
| "loss": 0.0121, |
| "step": 10100 |
| }, |
| { |
| "grad_norm": 0.21182595193386078, |
| "learning_rate": 5.323028212249963e-05, |
| "loss": 0.012, |
| "step": 10110 |
| }, |
| { |
| "grad_norm": 0.28724876046180725, |
| "learning_rate": 5.314777696887481e-05, |
| "loss": 0.0088, |
| "step": 10120 |
| }, |
| { |
| "grad_norm": 0.2590867877006531, |
| "learning_rate": 5.306526320934796e-05, |
| "loss": 0.0099, |
| "step": 10130 |
| }, |
| { |
| "grad_norm": 0.2745634615421295, |
| "learning_rate": 5.298274106950854e-05, |
| "loss": 0.007, |
| "step": 10140 |
| }, |
| { |
| "grad_norm": 0.22778289020061493, |
| "learning_rate": 5.290021077496893e-05, |
| "loss": 0.0081, |
| "step": 10150 |
| }, |
| { |
| "grad_norm": 0.2155156284570694, |
| "learning_rate": 5.2817672551363816e-05, |
| "loss": 0.0073, |
| "step": 10160 |
| }, |
| { |
| "grad_norm": 0.2104647010564804, |
| "learning_rate": 5.273512662434952e-05, |
| "loss": 0.0085, |
| "step": 10170 |
| }, |
| { |
| "grad_norm": 0.2083871066570282, |
| "learning_rate": 5.265257321960349e-05, |
| "loss": 0.0093, |
| "step": 10180 |
| }, |
| { |
| "grad_norm": 0.25575950741767883, |
| "learning_rate": 5.257001256282357e-05, |
| "loss": 0.0086, |
| "step": 10190 |
| }, |
| { |
| "grad_norm": 0.2434515506029129, |
| "learning_rate": 5.248744487972742e-05, |
| "loss": 0.0094, |
| "step": 10200 |
| }, |
| { |
| "grad_norm": 0.28249382972717285, |
| "learning_rate": 5.240487039605196e-05, |
| "loss": 0.0087, |
| "step": 10210 |
| }, |
| { |
| "grad_norm": 0.20960280299186707, |
| "learning_rate": 5.232228933755267e-05, |
| "loss": 0.0097, |
| "step": 10220 |
| }, |
| { |
| "grad_norm": 0.17936058342456818, |
| "learning_rate": 5.2239701930003006e-05, |
| "loss": 0.0065, |
| "step": 10230 |
| }, |
| { |
| "grad_norm": 0.21344617009162903, |
| "learning_rate": 5.215710839919379e-05, |
| "loss": 0.0077, |
| "step": 10240 |
| }, |
| { |
| "grad_norm": 0.2035842388868332, |
| "learning_rate": 5.207450897093257e-05, |
| "loss": 0.0086, |
| "step": 10250 |
| }, |
| { |
| "grad_norm": 0.21530857682228088, |
| "learning_rate": 5.1991903871043046e-05, |
| "loss": 0.0071, |
| "step": 10260 |
| }, |
| { |
| "grad_norm": 0.21515516936779022, |
| "learning_rate": 5.190929332536439e-05, |
| "loss": 0.0084, |
| "step": 10270 |
| }, |
| { |
| "grad_norm": 0.30118030309677124, |
| "learning_rate": 5.182667755975071e-05, |
| "loss": 0.0092, |
| "step": 10280 |
| }, |
| { |
| "grad_norm": 0.2057802826166153, |
| "learning_rate": 5.1744056800070315e-05, |
| "loss": 0.0078, |
| "step": 10290 |
| }, |
| { |
| "grad_norm": 0.26955923438072205, |
| "learning_rate": 5.166143127220524e-05, |
| "loss": 0.0097, |
| "step": 10300 |
| }, |
| { |
| "grad_norm": 0.2716047763824463, |
| "learning_rate": 5.1578801202050485e-05, |
| "loss": 0.0088, |
| "step": 10310 |
| }, |
| { |
| "grad_norm": 0.2820281386375427, |
| "learning_rate": 5.149616681551355e-05, |
| "loss": 0.0088, |
| "step": 10320 |
| }, |
| { |
| "grad_norm": 0.25352105498313904, |
| "learning_rate": 5.141352833851367e-05, |
| "loss": 0.0073, |
| "step": 10330 |
| }, |
| { |
| "grad_norm": 0.15962743759155273, |
| "learning_rate": 5.1330885996981285e-05, |
| "loss": 0.0078, |
| "step": 10340 |
| }, |
| { |
| "grad_norm": 0.22716277837753296, |
| "learning_rate": 5.124824001685741e-05, |
| "loss": 0.0075, |
| "step": 10350 |
| }, |
| { |
| "grad_norm": 0.20729690790176392, |
| "learning_rate": 5.116559062409298e-05, |
| "loss": 0.0059, |
| "step": 10360 |
| }, |
| { |
| "grad_norm": 0.2034323364496231, |
| "learning_rate": 5.10829380446483e-05, |
| "loss": 0.0094, |
| "step": 10370 |
| }, |
| { |
| "grad_norm": 0.21604637801647186, |
| "learning_rate": 5.100028250449235e-05, |
| "loss": 0.008, |
| "step": 10380 |
| }, |
| { |
| "grad_norm": 0.2015807330608368, |
| "learning_rate": 5.0917624229602234e-05, |
| "loss": 0.0065, |
| "step": 10390 |
| }, |
| { |
| "grad_norm": 0.18266531825065613, |
| "learning_rate": 5.0834963445962524e-05, |
| "loss": 0.0089, |
| "step": 10400 |
| }, |
| { |
| "grad_norm": 0.252404123544693, |
| "learning_rate": 5.075230037956461e-05, |
| "loss": 0.0083, |
| "step": 10410 |
| }, |
| { |
| "grad_norm": 0.22738321125507355, |
| "learning_rate": 5.0669635256406213e-05, |
| "loss": 0.0145, |
| "step": 10420 |
| }, |
| { |
| "grad_norm": 0.3449110984802246, |
| "learning_rate": 5.058696830249058e-05, |
| "loss": 0.01, |
| "step": 10430 |
| }, |
| { |
| "grad_norm": 0.30087509751319885, |
| "learning_rate": 5.050429974382602e-05, |
| "loss": 0.0156, |
| "step": 10440 |
| }, |
| { |
| "grad_norm": 0.27621498703956604, |
| "learning_rate": 5.042162980642523e-05, |
| "loss": 0.0075, |
| "step": 10450 |
| }, |
| { |
| "grad_norm": 0.2740698754787445, |
| "learning_rate": 5.033895871630462e-05, |
| "loss": 0.011, |
| "step": 10460 |
| }, |
| { |
| "grad_norm": 0.23271113634109497, |
| "learning_rate": 5.025628669948386e-05, |
| "loss": 0.007, |
| "step": 10470 |
| }, |
| { |
| "grad_norm": 0.25937408208847046, |
| "learning_rate": 5.017361398198502e-05, |
| "loss": 0.0065, |
| "step": 10480 |
| }, |
| { |
| "grad_norm": 0.2318635731935501, |
| "learning_rate": 5.009094078983221e-05, |
| "loss": 0.0069, |
| "step": 10490 |
| }, |
| { |
| "grad_norm": 0.20766286551952362, |
| "learning_rate": 5.000826734905073e-05, |
| "loss": 0.0096, |
| "step": 10500 |
| }, |
| { |
| "grad_norm": 0.2584581971168518, |
| "learning_rate": 4.9925593885666645e-05, |
| "loss": 0.0086, |
| "step": 10510 |
| }, |
| { |
| "grad_norm": 0.2603772282600403, |
| "learning_rate": 4.984292062570602e-05, |
| "loss": 0.0063, |
| "step": 10520 |
| }, |
| { |
| "grad_norm": 0.23702503740787506, |
| "learning_rate": 4.976024779519442e-05, |
| "loss": 0.0079, |
| "step": 10530 |
| }, |
| { |
| "grad_norm": 0.2646651268005371, |
| "learning_rate": 4.9677575620156194e-05, |
| "loss": 0.0092, |
| "step": 10540 |
| }, |
| { |
| "grad_norm": 0.1895063817501068, |
| "learning_rate": 4.959490432661391e-05, |
| "loss": 0.0093, |
| "step": 10550 |
| }, |
| { |
| "grad_norm": 0.21291036903858185, |
| "learning_rate": 4.9512234140587726e-05, |
| "loss": 0.0075, |
| "step": 10560 |
| }, |
| { |
| "grad_norm": 0.20933032035827637, |
| "learning_rate": 4.942956528809477e-05, |
| "loss": 0.0077, |
| "step": 10570 |
| }, |
| { |
| "grad_norm": 0.20019420981407166, |
| "learning_rate": 4.934689799514854e-05, |
| "loss": 0.0097, |
| "step": 10580 |
| }, |
| { |
| "grad_norm": 0.26344871520996094, |
| "learning_rate": 4.926423248775827e-05, |
| "loss": 0.0096, |
| "step": 10590 |
| }, |
| { |
| "grad_norm": 0.20692236721515656, |
| "learning_rate": 4.918156899192826e-05, |
| "loss": 0.0131, |
| "step": 10600 |
| }, |
| { |
| "grad_norm": 0.23554596304893494, |
| "learning_rate": 4.909890773365738e-05, |
| "loss": 0.0137, |
| "step": 10610 |
| }, |
| { |
| "grad_norm": 0.20528851449489594, |
| "learning_rate": 4.9016248938938344e-05, |
| "loss": 0.0105, |
| "step": 10620 |
| }, |
| { |
| "grad_norm": 0.26955676078796387, |
| "learning_rate": 4.8933592833757156e-05, |
| "loss": 0.0132, |
| "step": 10630 |
| }, |
| { |
| "grad_norm": 0.24838532507419586, |
| "learning_rate": 4.8850939644092435e-05, |
| "loss": 0.0136, |
| "step": 10640 |
| }, |
| { |
| "grad_norm": 0.2874491214752197, |
| "learning_rate": 4.876828959591485e-05, |
| "loss": 0.0079, |
| "step": 10650 |
| }, |
| { |
| "grad_norm": 0.22860568761825562, |
| "learning_rate": 4.8685642915186474e-05, |
| "loss": 0.0089, |
| "step": 10660 |
| }, |
| { |
| "grad_norm": 0.23194189369678497, |
| "learning_rate": 4.860299982786018e-05, |
| "loss": 0.0108, |
| "step": 10670 |
| }, |
| { |
| "grad_norm": 0.23060914874076843, |
| "learning_rate": 4.852036055987901e-05, |
| "loss": 0.0097, |
| "step": 10680 |
| }, |
| { |
| "grad_norm": 0.385558545589447, |
| "learning_rate": 4.843772533717558e-05, |
| "loss": 0.0093, |
| "step": 10690 |
| }, |
| { |
| "grad_norm": 0.24155037105083466, |
| "learning_rate": 4.835509438567142e-05, |
| "loss": 0.009, |
| "step": 10700 |
| }, |
| { |
| "grad_norm": 0.25894564390182495, |
| "learning_rate": 4.827246793127639e-05, |
| "loss": 0.0079, |
| "step": 10710 |
| }, |
| { |
| "grad_norm": 0.22689871490001678, |
| "learning_rate": 4.818984619988807e-05, |
| "loss": 0.0059, |
| "step": 10720 |
| }, |
| { |
| "grad_norm": 0.17574647068977356, |
| "learning_rate": 4.810722941739115e-05, |
| "loss": 0.0056, |
| "step": 10730 |
| }, |
| { |
| "grad_norm": 0.2433113157749176, |
| "learning_rate": 4.8024617809656684e-05, |
| "loss": 0.0084, |
| "step": 10740 |
| }, |
| { |
| "grad_norm": 0.28003692626953125, |
| "learning_rate": 4.794201160254171e-05, |
| "loss": 0.0094, |
| "step": 10750 |
| }, |
| { |
| "grad_norm": 0.23488906025886536, |
| "learning_rate": 4.785941102188844e-05, |
| "loss": 0.006, |
| "step": 10760 |
| }, |
| { |
| "grad_norm": 0.2274412214756012, |
| "learning_rate": 4.7776816293523686e-05, |
| "loss": 0.0084, |
| "step": 10770 |
| }, |
| { |
| "grad_norm": 0.19838649034500122, |
| "learning_rate": 4.769422764325832e-05, |
| "loss": 0.0064, |
| "step": 10780 |
| }, |
| { |
| "grad_norm": 0.18760640919208527, |
| "learning_rate": 4.76116452968865e-05, |
| "loss": 0.0062, |
| "step": 10790 |
| }, |
| { |
| "grad_norm": 0.2381298542022705, |
| "learning_rate": 4.752906948018525e-05, |
| "loss": 0.0071, |
| "step": 10800 |
| }, |
| { |
| "grad_norm": 0.2588251829147339, |
| "learning_rate": 4.7446500418913684e-05, |
| "loss": 0.0095, |
| "step": 10810 |
| }, |
| { |
| "grad_norm": 0.2380070835351944, |
| "learning_rate": 4.736393833881247e-05, |
| "loss": 0.0068, |
| "step": 10820 |
| }, |
| { |
| "grad_norm": 0.23917271196842194, |
| "learning_rate": 4.7281383465603194e-05, |
| "loss": 0.0097, |
| "step": 10830 |
| }, |
| { |
| "grad_norm": 0.18200017511844635, |
| "learning_rate": 4.71988360249877e-05, |
| "loss": 0.006, |
| "step": 10840 |
| }, |
| { |
| "grad_norm": 0.23980382084846497, |
| "learning_rate": 4.7116296242647554e-05, |
| "loss": 0.0085, |
| "step": 10850 |
| }, |
| { |
| "grad_norm": 0.24523408710956573, |
| "learning_rate": 4.703376434424336e-05, |
| "loss": 0.0086, |
| "step": 10860 |
| }, |
| { |
| "grad_norm": 0.18079398572444916, |
| "learning_rate": 4.695124055541421e-05, |
| "loss": 0.0086, |
| "step": 10870 |
| }, |
| { |
| "grad_norm": 0.21983122825622559, |
| "learning_rate": 4.6868725101776934e-05, |
| "loss": 0.0076, |
| "step": 10880 |
| }, |
| { |
| "grad_norm": 0.22801896929740906, |
| "learning_rate": 4.678621820892567e-05, |
| "loss": 0.0077, |
| "step": 10890 |
| }, |
| { |
| "grad_norm": 0.23620279133319855, |
| "learning_rate": 4.670372010243111e-05, |
| "loss": 0.0085, |
| "step": 10900 |
| }, |
| { |
| "grad_norm": 0.1909436583518982, |
| "learning_rate": 4.662123100783992e-05, |
| "loss": 0.0094, |
| "step": 10910 |
| }, |
| { |
| "grad_norm": 0.22192321717739105, |
| "learning_rate": 4.653875115067415e-05, |
| "loss": 0.0081, |
| "step": 10920 |
| }, |
| { |
| "grad_norm": 0.17492982745170593, |
| "learning_rate": 4.6456280756430545e-05, |
| "loss": 0.0076, |
| "step": 10930 |
| }, |
| { |
| "grad_norm": 0.17999714612960815, |
| "learning_rate": 4.637382005058004e-05, |
| "loss": 0.0072, |
| "step": 10940 |
| }, |
| { |
| "grad_norm": 0.19913795590400696, |
| "learning_rate": 4.629136925856705e-05, |
| "loss": 0.0108, |
| "step": 10950 |
| }, |
| { |
| "grad_norm": 0.1705617606639862, |
| "learning_rate": 4.6208928605808895e-05, |
| "loss": 0.0086, |
| "step": 10960 |
| }, |
| { |
| "grad_norm": 0.2795408368110657, |
| "learning_rate": 4.612649831769519e-05, |
| "loss": 0.0093, |
| "step": 10970 |
| }, |
| { |
| "grad_norm": 0.2092956155538559, |
| "learning_rate": 4.604407861958715e-05, |
| "loss": 0.0077, |
| "step": 10980 |
| }, |
| { |
| "grad_norm": 0.2425389438867569, |
| "learning_rate": 4.5961669736817114e-05, |
| "loss": 0.0069, |
| "step": 10990 |
| }, |
| { |
| "grad_norm": 0.23790328204631805, |
| "learning_rate": 4.5879271894687814e-05, |
| "loss": 0.0064, |
| "step": 11000 |
| }, |
| { |
| "grad_norm": 0.20760825276374817, |
| "learning_rate": 4.5796885318471826e-05, |
| "loss": 0.008, |
| "step": 11010 |
| }, |
| { |
| "grad_norm": 0.24325376749038696, |
| "learning_rate": 4.571451023341086e-05, |
| "loss": 0.0097, |
| "step": 11020 |
| }, |
| { |
| "grad_norm": 0.24800147116184235, |
| "learning_rate": 4.563214686471527e-05, |
| "loss": 0.0113, |
| "step": 11030 |
| }, |
| { |
| "grad_norm": 0.31807875633239746, |
| "learning_rate": 4.5549795437563365e-05, |
| "loss": 0.0088, |
| "step": 11040 |
| }, |
| { |
| "grad_norm": 0.24229101836681366, |
| "learning_rate": 4.546745617710081e-05, |
| "loss": 0.009, |
| "step": 11050 |
| }, |
| { |
| "grad_norm": 0.22642278671264648, |
| "learning_rate": 4.5385129308440014e-05, |
| "loss": 0.0072, |
| "step": 11060 |
| }, |
| { |
| "grad_norm": 0.2451431155204773, |
| "learning_rate": 4.530281505665944e-05, |
| "loss": 0.0062, |
| "step": 11070 |
| }, |
| { |
| "grad_norm": 0.1859740912914276, |
| "learning_rate": 4.5220513646803134e-05, |
| "loss": 0.0119, |
| "step": 11080 |
| }, |
| { |
| "grad_norm": 0.24542033672332764, |
| "learning_rate": 4.513822530388003e-05, |
| "loss": 0.0085, |
| "step": 11090 |
| }, |
| { |
| "grad_norm": 0.28155162930488586, |
| "learning_rate": 4.5055950252863296e-05, |
| "loss": 0.0074, |
| "step": 11100 |
| }, |
| { |
| "grad_norm": 0.2138509750366211, |
| "learning_rate": 4.4973688718689803e-05, |
| "loss": 0.0079, |
| "step": 11110 |
| }, |
| { |
| "grad_norm": 0.2261638045310974, |
| "learning_rate": 4.4891440926259406e-05, |
| "loss": 0.0071, |
| "step": 11120 |
| }, |
| { |
| "grad_norm": 0.2542162835597992, |
| "learning_rate": 4.480920710043443e-05, |
| "loss": 0.0097, |
| "step": 11130 |
| }, |
| { |
| "grad_norm": 0.15774618089199066, |
| "learning_rate": 4.4726987466039044e-05, |
| "loss": 0.0062, |
| "step": 11140 |
| }, |
| { |
| "grad_norm": 0.23213805258274078, |
| "learning_rate": 4.46447822478586e-05, |
| "loss": 0.0063, |
| "step": 11150 |
| }, |
| { |
| "grad_norm": 0.25310662388801575, |
| "learning_rate": 4.4562591670638974e-05, |
| "loss": 0.0057, |
| "step": 11160 |
| }, |
| { |
| "grad_norm": 0.2297695130109787, |
| "learning_rate": 4.4480415959086105e-05, |
| "loss": 0.0093, |
| "step": 11170 |
| }, |
| { |
| "grad_norm": 0.2621958255767822, |
| "learning_rate": 4.439825533786522e-05, |
| "loss": 0.0071, |
| "step": 11180 |
| }, |
| { |
| "grad_norm": 0.22085212171077728, |
| "learning_rate": 4.431611003160035e-05, |
| "loss": 0.0098, |
| "step": 11190 |
| }, |
| { |
| "grad_norm": 0.18988831341266632, |
| "learning_rate": 4.4233980264873636e-05, |
| "loss": 0.0051, |
| "step": 11200 |
| }, |
| { |
| "grad_norm": 0.2241099625825882, |
| "learning_rate": 4.4151866262224684e-05, |
| "loss": 0.0067, |
| "step": 11210 |
| }, |
| { |
| "grad_norm": 0.27083107829093933, |
| "learning_rate": 4.406976824815006e-05, |
| "loss": 0.0068, |
| "step": 11220 |
| }, |
| { |
| "grad_norm": 0.26719561219215393, |
| "learning_rate": 4.3987686447102595e-05, |
| "loss": 0.0074, |
| "step": 11230 |
| }, |
| { |
| "grad_norm": 0.20571677386760712, |
| "learning_rate": 4.3905621083490804e-05, |
| "loss": 0.0068, |
| "step": 11240 |
| }, |
| { |
| "grad_norm": 0.15599504113197327, |
| "learning_rate": 4.3823572381678286e-05, |
| "loss": 0.006, |
| "step": 11250 |
| }, |
| { |
| "grad_norm": 0.21513454616069794, |
| "learning_rate": 4.374154056598301e-05, |
| "loss": 0.0073, |
| "step": 11260 |
| }, |
| { |
| "grad_norm": 0.22201840579509735, |
| "learning_rate": 4.3659525860676845e-05, |
| "loss": 0.0064, |
| "step": 11270 |
| }, |
| { |
| "grad_norm": 0.22313819825649261, |
| "learning_rate": 4.3577528489984854e-05, |
| "loss": 0.006, |
| "step": 11280 |
| }, |
| { |
| "grad_norm": 0.23398296535015106, |
| "learning_rate": 4.349554867808476e-05, |
| "loss": 0.0061, |
| "step": 11290 |
| }, |
| { |
| "grad_norm": 0.21648810803890228, |
| "learning_rate": 4.34135866491062e-05, |
| "loss": 0.0068, |
| "step": 11300 |
| }, |
| { |
| "grad_norm": 0.2085336595773697, |
| "learning_rate": 4.333164262713022e-05, |
| "loss": 0.0142, |
| "step": 11310 |
| }, |
| { |
| "grad_norm": 0.27589038014411926, |
| "learning_rate": 4.324971683618868e-05, |
| "loss": 0.0101, |
| "step": 11320 |
| }, |
| { |
| "grad_norm": 0.21786828339099884, |
| "learning_rate": 4.316780950026354e-05, |
| "loss": 0.0053, |
| "step": 11330 |
| }, |
| { |
| "grad_norm": 0.2073872834444046, |
| "learning_rate": 4.308592084328637e-05, |
| "loss": 0.009, |
| "step": 11340 |
| }, |
| { |
| "grad_norm": 0.22114667296409607, |
| "learning_rate": 4.3004051089137576e-05, |
| "loss": 0.0079, |
| "step": 11350 |
| }, |
| { |
| "grad_norm": 0.22930344939231873, |
| "learning_rate": 4.292220046164597e-05, |
| "loss": 0.0069, |
| "step": 11360 |
| }, |
| { |
| "grad_norm": 0.22985005378723145, |
| "learning_rate": 4.2840369184588035e-05, |
| "loss": 0.0063, |
| "step": 11370 |
| }, |
| { |
| "grad_norm": 0.18045322597026825, |
| "learning_rate": 4.2758557481687345e-05, |
| "loss": 0.0055, |
| "step": 11380 |
| }, |
| { |
| "grad_norm": 0.2663944661617279, |
| "learning_rate": 4.267676557661403e-05, |
| "loss": 0.0079, |
| "step": 11390 |
| }, |
| { |
| "grad_norm": 0.24303773045539856, |
| "learning_rate": 4.2594993692983955e-05, |
| "loss": 0.0065, |
| "step": 11400 |
| }, |
| { |
| "grad_norm": 0.15300486981868744, |
| "learning_rate": 4.251324205435837e-05, |
| "loss": 0.0067, |
| "step": 11410 |
| }, |
| { |
| "grad_norm": 0.22211404144763947, |
| "learning_rate": 4.243151088424312e-05, |
| "loss": 0.0069, |
| "step": 11420 |
| }, |
| { |
| "grad_norm": 0.21946389973163605, |
| "learning_rate": 4.234980040608813e-05, |
| "loss": 0.0084, |
| "step": 11430 |
| }, |
| { |
| "grad_norm": 0.21228325366973877, |
| "learning_rate": 4.22681108432867e-05, |
| "loss": 0.0063, |
| "step": 11440 |
| }, |
| { |
| "grad_norm": 0.23013344407081604, |
| "learning_rate": 4.2186442419174984e-05, |
| "loss": 0.0063, |
| "step": 11450 |
| }, |
| { |
| "grad_norm": 0.20971117913722992, |
| "learning_rate": 4.210479535703133e-05, |
| "loss": 0.0083, |
| "step": 11460 |
| }, |
| { |
| "grad_norm": 0.22944243252277374, |
| "learning_rate": 4.202316988007567e-05, |
| "loss": 0.0081, |
| "step": 11470 |
| }, |
| { |
| "grad_norm": 0.2118869572877884, |
| "learning_rate": 4.194156621146901e-05, |
| "loss": 0.0087, |
| "step": 11480 |
| }, |
| { |
| "grad_norm": 0.26815682649612427, |
| "learning_rate": 4.1859984574312596e-05, |
| "loss": 0.0093, |
| "step": 11490 |
| }, |
| { |
| "grad_norm": 0.21731431782245636, |
| "learning_rate": 4.177842519164752e-05, |
| "loss": 0.0065, |
| "step": 11500 |
| }, |
| { |
| "grad_norm": 0.2921445965766907, |
| "learning_rate": 4.169688828645404e-05, |
| "loss": 0.0073, |
| "step": 11510 |
| }, |
| { |
| "grad_norm": 0.1848747879266739, |
| "learning_rate": 4.161537408165092e-05, |
| "loss": 0.0056, |
| "step": 11520 |
| }, |
| { |
| "grad_norm": 0.18249960243701935, |
| "learning_rate": 4.1533882800094924e-05, |
| "loss": 0.0082, |
| "step": 11530 |
| }, |
| { |
| "grad_norm": 0.15499596297740936, |
| "learning_rate": 4.145241466458005e-05, |
| "loss": 0.0094, |
| "step": 11540 |
| }, |
| { |
| "grad_norm": 0.19080030918121338, |
| "learning_rate": 4.13709698978371e-05, |
| "loss": 0.0077, |
| "step": 11550 |
| }, |
| { |
| "grad_norm": 0.1779751032590866, |
| "learning_rate": 4.1289548722532944e-05, |
| "loss": 0.007, |
| "step": 11560 |
| }, |
| { |
| "grad_norm": 0.22897697985172272, |
| "learning_rate": 4.120815136126999e-05, |
| "loss": 0.0075, |
| "step": 11570 |
| }, |
| { |
| "grad_norm": 0.2294849157333374, |
| "learning_rate": 4.112677803658548e-05, |
| "loss": 0.0095, |
| "step": 11580 |
| }, |
| { |
| "grad_norm": 0.20816902816295624, |
| "learning_rate": 4.1045428970951e-05, |
| "loss": 0.0066, |
| "step": 11590 |
| }, |
| { |
| "grad_norm": 0.22296547889709473, |
| "learning_rate": 4.0964104386771785e-05, |
| "loss": 0.0088, |
| "step": 11600 |
| }, |
| { |
| "grad_norm": 0.15643584728240967, |
| "learning_rate": 4.0882804506386144e-05, |
| "loss": 0.0053, |
| "step": 11610 |
| }, |
| { |
| "grad_norm": 0.28590163588523865, |
| "learning_rate": 4.080152955206485e-05, |
| "loss": 0.0062, |
| "step": 11620 |
| }, |
| { |
| "grad_norm": 0.18030160665512085, |
| "learning_rate": 4.0720279746010505e-05, |
| "loss": 0.0082, |
| "step": 11630 |
| }, |
| { |
| "grad_norm": 0.19158947467803955, |
| "learning_rate": 4.063905531035699e-05, |
| "loss": 0.0068, |
| "step": 11640 |
| }, |
| { |
| "grad_norm": 0.2148633450269699, |
| "learning_rate": 4.055785646716882e-05, |
| "loss": 0.0067, |
| "step": 11650 |
| }, |
| { |
| "grad_norm": 0.2393154799938202, |
| "learning_rate": 4.047668343844051e-05, |
| "loss": 0.0101, |
| "step": 11660 |
| }, |
| { |
| "grad_norm": 0.31594404578208923, |
| "learning_rate": 4.039553644609604e-05, |
| "loss": 0.0075, |
| "step": 11670 |
| }, |
| { |
| "grad_norm": 0.3099832236766815, |
| "learning_rate": 4.0314415711988176e-05, |
| "loss": 0.0084, |
| "step": 11680 |
| }, |
| { |
| "grad_norm": 0.2928639054298401, |
| "learning_rate": 4.023332145789792e-05, |
| "loss": 0.0064, |
| "step": 11690 |
| }, |
| { |
| "grad_norm": 0.2324325442314148, |
| "learning_rate": 4.015225390553385e-05, |
| "loss": 0.0101, |
| "step": 11700 |
| }, |
| { |
| "grad_norm": 0.1799289435148239, |
| "learning_rate": 4.007121327653158e-05, |
| "loss": 0.0067, |
| "step": 11710 |
| }, |
| { |
| "grad_norm": 0.2345605045557022, |
| "learning_rate": 3.9990199792453064e-05, |
| "loss": 0.0104, |
| "step": 11720 |
| }, |
| { |
| "grad_norm": 0.22300177812576294, |
| "learning_rate": 3.9909213674786103e-05, |
| "loss": 0.0071, |
| "step": 11730 |
| }, |
| { |
| "grad_norm": 0.18316997587680817, |
| "learning_rate": 3.982825514494363e-05, |
| "loss": 0.0116, |
| "step": 11740 |
| }, |
| { |
| "grad_norm": 0.1820177286863327, |
| "learning_rate": 3.974732442426319e-05, |
| "loss": 0.0065, |
| "step": 11750 |
| }, |
| { |
| "grad_norm": 0.2372979074716568, |
| "learning_rate": 3.966642173400629e-05, |
| "loss": 0.0092, |
| "step": 11760 |
| }, |
| { |
| "grad_norm": 0.22073568403720856, |
| "learning_rate": 3.9585547295357764e-05, |
| "loss": 0.0073, |
| "step": 11770 |
| }, |
| { |
| "grad_norm": 0.18935738503932953, |
| "learning_rate": 3.950470132942526e-05, |
| "loss": 0.0062, |
| "step": 11780 |
| }, |
| { |
| "grad_norm": 0.1814570277929306, |
| "learning_rate": 3.942388405723856e-05, |
| "loss": 0.006, |
| "step": 11790 |
| }, |
| { |
| "grad_norm": 0.2537878751754761, |
| "learning_rate": 3.9343095699749e-05, |
| "loss": 0.0092, |
| "step": 11800 |
| }, |
| { |
| "grad_norm": 0.20250701904296875, |
| "learning_rate": 3.9262336477828874e-05, |
| "loss": 0.0086, |
| "step": 11810 |
| }, |
| { |
| "grad_norm": 0.20579147338867188, |
| "learning_rate": 3.9181606612270794e-05, |
| "loss": 0.0068, |
| "step": 11820 |
| }, |
| { |
| "grad_norm": 0.15340419113636017, |
| "learning_rate": 3.910090632378713e-05, |
| "loss": 0.0047, |
| "step": 11830 |
| }, |
| { |
| "grad_norm": 0.21056094765663147, |
| "learning_rate": 3.90202358330094e-05, |
| "loss": 0.0087, |
| "step": 11840 |
| }, |
| { |
| "grad_norm": 0.1647689789533615, |
| "learning_rate": 3.8939595360487656e-05, |
| "loss": 0.0061, |
| "step": 11850 |
| }, |
| { |
| "grad_norm": 0.17378567159175873, |
| "learning_rate": 3.885898512668984e-05, |
| "loss": 0.0055, |
| "step": 11860 |
| }, |
| { |
| "grad_norm": 0.179255411028862, |
| "learning_rate": 3.877840535200127e-05, |
| "loss": 0.0112, |
| "step": 11870 |
| }, |
| { |
| "grad_norm": 0.2528724670410156, |
| "learning_rate": 3.869785625672397e-05, |
| "loss": 0.0057, |
| "step": 11880 |
| }, |
| { |
| "grad_norm": 0.30834850668907166, |
| "learning_rate": 3.8617338061076094e-05, |
| "loss": 0.0065, |
| "step": 11890 |
| }, |
| { |
| "grad_norm": 0.21379484236240387, |
| "learning_rate": 3.853685098519132e-05, |
| "loss": 0.0061, |
| "step": 11900 |
| }, |
| { |
| "grad_norm": 0.21671782433986664, |
| "learning_rate": 3.845639524911823e-05, |
| "loss": 0.0058, |
| "step": 11910 |
| }, |
| { |
| "grad_norm": 0.1984700709581375, |
| "learning_rate": 3.837597107281974e-05, |
| "loss": 0.0061, |
| "step": 11920 |
| }, |
| { |
| "grad_norm": 0.13126368820667267, |
| "learning_rate": 3.829557867617247e-05, |
| "loss": 0.0076, |
| "step": 11930 |
| }, |
| { |
| "grad_norm": 0.22941109538078308, |
| "learning_rate": 3.821521827896618e-05, |
| "loss": 0.0089, |
| "step": 11940 |
| }, |
| { |
| "grad_norm": 0.20077037811279297, |
| "learning_rate": 3.81348901009031e-05, |
| "loss": 0.0079, |
| "step": 11950 |
| }, |
| { |
| "grad_norm": 0.27187028527259827, |
| "learning_rate": 3.805459436159741e-05, |
| "loss": 0.0054, |
| "step": 11960 |
| }, |
| { |
| "grad_norm": 0.250042200088501, |
| "learning_rate": 3.797433128057461e-05, |
| "loss": 0.0098, |
| "step": 11970 |
| }, |
| { |
| "grad_norm": 0.1824905276298523, |
| "learning_rate": 3.789410107727089e-05, |
| "loss": 0.008, |
| "step": 11980 |
| }, |
| { |
| "grad_norm": 0.1979016214609146, |
| "learning_rate": 3.781390397103257e-05, |
| "loss": 0.0078, |
| "step": 11990 |
| }, |
| { |
| "grad_norm": 0.18383477628231049, |
| "learning_rate": 3.7733740181115455e-05, |
| "loss": 0.0097, |
| "step": 12000 |
| }, |
| { |
| "grad_norm": 0.14712467789649963, |
| "learning_rate": 3.7653609926684306e-05, |
| "loss": 0.0064, |
| "step": 12010 |
| }, |
| { |
| "grad_norm": 0.2039942592382431, |
| "learning_rate": 3.757351342681217e-05, |
| "loss": 0.0064, |
| "step": 12020 |
| }, |
| { |
| "grad_norm": 0.19466397166252136, |
| "learning_rate": 3.749345090047982e-05, |
| "loss": 0.0064, |
| "step": 12030 |
| }, |
| { |
| "grad_norm": 0.24284909665584564, |
| "learning_rate": 3.741342256657515e-05, |
| "loss": 0.0065, |
| "step": 12040 |
| }, |
| { |
| "grad_norm": 0.21631167829036713, |
| "learning_rate": 3.7333428643892567e-05, |
| "loss": 0.0053, |
| "step": 12050 |
| }, |
| { |
| "grad_norm": 0.17816664278507233, |
| "learning_rate": 3.725346935113239e-05, |
| "loss": 0.0058, |
| "step": 12060 |
| }, |
| { |
| "grad_norm": 0.17582198977470398, |
| "learning_rate": 3.717354490690029e-05, |
| "loss": 0.0087, |
| "step": 12070 |
| }, |
| { |
| "grad_norm": 0.17063601315021515, |
| "learning_rate": 3.709365552970664e-05, |
| "loss": 0.0066, |
| "step": 12080 |
| }, |
| { |
| "grad_norm": 0.19023405015468597, |
| "learning_rate": 3.7013801437965945e-05, |
| "loss": 0.0062, |
| "step": 12090 |
| }, |
| { |
| "grad_norm": 0.17706483602523804, |
| "learning_rate": 3.693398284999623e-05, |
| "loss": 0.0084, |
| "step": 12100 |
| }, |
| { |
| "grad_norm": 0.20245179533958435, |
| "learning_rate": 3.6854199984018484e-05, |
| "loss": 0.0058, |
| "step": 12110 |
| }, |
| { |
| "grad_norm": 0.20159313082695007, |
| "learning_rate": 3.677445305815601e-05, |
| "loss": 0.0064, |
| "step": 12120 |
| }, |
| { |
| "grad_norm": 0.21612194180488586, |
| "learning_rate": 3.669474229043387e-05, |
| "loss": 0.0077, |
| "step": 12130 |
| }, |
| { |
| "grad_norm": 0.18706907331943512, |
| "learning_rate": 3.6615067898778235e-05, |
| "loss": 0.0077, |
| "step": 12140 |
| }, |
| { |
| "grad_norm": 0.24379310011863708, |
| "learning_rate": 3.6535430101015866e-05, |
| "loss": 0.0088, |
| "step": 12150 |
| }, |
| { |
| "grad_norm": 0.3636125922203064, |
| "learning_rate": 3.645582911487345e-05, |
| "loss": 0.0069, |
| "step": 12160 |
| }, |
| { |
| "grad_norm": 0.20372240245342255, |
| "learning_rate": 3.637626515797706e-05, |
| "loss": 0.0049, |
| "step": 12170 |
| }, |
| { |
| "grad_norm": 0.2063855528831482, |
| "learning_rate": 3.629673844785152e-05, |
| "loss": 0.0058, |
| "step": 12180 |
| }, |
| { |
| "grad_norm": 0.1806401014328003, |
| "learning_rate": 3.621724920191979e-05, |
| "loss": 0.0067, |
| "step": 12190 |
| }, |
| { |
| "grad_norm": 0.21033532917499542, |
| "learning_rate": 3.6137797637502444e-05, |
| "loss": 0.0111, |
| "step": 12200 |
| }, |
| { |
| "grad_norm": 0.24749749898910522, |
| "learning_rate": 3.6058383971817035e-05, |
| "loss": 0.0071, |
| "step": 12210 |
| }, |
| { |
| "grad_norm": 0.22450430691242218, |
| "learning_rate": 3.59790084219775e-05, |
| "loss": 0.0068, |
| "step": 12220 |
| }, |
| { |
| "grad_norm": 0.36298665404319763, |
| "learning_rate": 3.589967120499353e-05, |
| "loss": 0.0114, |
| "step": 12230 |
| }, |
| { |
| "grad_norm": 0.32570943236351013, |
| "learning_rate": 3.5820372537770075e-05, |
| "loss": 0.0074, |
| "step": 12240 |
| }, |
| { |
| "grad_norm": 0.2093043178319931, |
| "learning_rate": 3.5741112637106655e-05, |
| "loss": 0.0057, |
| "step": 12250 |
| }, |
| { |
| "grad_norm": 0.19811396300792694, |
| "learning_rate": 3.5661891719696804e-05, |
| "loss": 0.0094, |
| "step": 12260 |
| }, |
| { |
| "grad_norm": 0.15150699019432068, |
| "learning_rate": 3.5582710002127504e-05, |
| "loss": 0.0057, |
| "step": 12270 |
| }, |
| { |
| "grad_norm": 0.18276816606521606, |
| "learning_rate": 3.550356770087853e-05, |
| "loss": 0.0069, |
| "step": 12280 |
| }, |
| { |
| "grad_norm": 0.1798425316810608, |
| "learning_rate": 3.5424465032321914e-05, |
| "loss": 0.0088, |
| "step": 12290 |
| }, |
| { |
| "grad_norm": 0.22988200187683105, |
| "learning_rate": 3.5345402212721335e-05, |
| "loss": 0.0098, |
| "step": 12300 |
| }, |
| { |
| "grad_norm": 0.1798902451992035, |
| "learning_rate": 3.526637945823152e-05, |
| "loss": 0.0072, |
| "step": 12310 |
| }, |
| { |
| "grad_norm": 0.18393878638744354, |
| "learning_rate": 3.518739698489767e-05, |
| "loss": 0.0066, |
| "step": 12320 |
| }, |
| { |
| "grad_norm": 0.2207607924938202, |
| "learning_rate": 3.510845500865485e-05, |
| "loss": 0.0097, |
| "step": 12330 |
| }, |
| { |
| "grad_norm": 0.24832548201084137, |
| "learning_rate": 3.502955374532739e-05, |
| "loss": 0.0071, |
| "step": 12340 |
| }, |
| { |
| "grad_norm": 0.25870540738105774, |
| "learning_rate": 3.495069341062836e-05, |
| "loss": 0.0075, |
| "step": 12350 |
| }, |
| { |
| "grad_norm": 0.18368877470493317, |
| "learning_rate": 3.4871874220158896e-05, |
| "loss": 0.0068, |
| "step": 12360 |
| }, |
| { |
| "grad_norm": 0.18090079724788666, |
| "learning_rate": 3.479309638940762e-05, |
| "loss": 0.0052, |
| "step": 12370 |
| }, |
| { |
| "grad_norm": 0.16779513657093048, |
| "learning_rate": 3.4714360133750146e-05, |
| "loss": 0.0073, |
| "step": 12380 |
| }, |
| { |
| "grad_norm": 0.15940751135349274, |
| "learning_rate": 3.463566566844839e-05, |
| "loss": 0.0051, |
| "step": 12390 |
| }, |
| { |
| "grad_norm": 0.24262839555740356, |
| "learning_rate": 3.4557013208650016e-05, |
| "loss": 0.0094, |
| "step": 12400 |
| }, |
| { |
| "grad_norm": 0.24909386038780212, |
| "learning_rate": 3.4478402969387857e-05, |
| "loss": 0.0079, |
| "step": 12410 |
| }, |
| { |
| "grad_norm": 0.19093437492847443, |
| "learning_rate": 3.4399835165579266e-05, |
| "loss": 0.0071, |
| "step": 12420 |
| }, |
| { |
| "grad_norm": 0.25968360900878906, |
| "learning_rate": 3.4321310012025645e-05, |
| "loss": 0.0081, |
| "step": 12430 |
| }, |
| { |
| "grad_norm": 0.2364204376935959, |
| "learning_rate": 3.424282772341176e-05, |
| "loss": 0.0084, |
| "step": 12440 |
| }, |
| { |
| "grad_norm": 0.23931537568569183, |
| "learning_rate": 3.416438851430519e-05, |
| "loss": 0.0071, |
| "step": 12450 |
| }, |
| { |
| "grad_norm": 0.21466895937919617, |
| "learning_rate": 3.408599259915577e-05, |
| "loss": 0.0067, |
| "step": 12460 |
| }, |
| { |
| "grad_norm": 0.2476363629102707, |
| "learning_rate": 3.400764019229487e-05, |
| "loss": 0.0056, |
| "step": 12470 |
| }, |
| { |
| "grad_norm": 0.2308126986026764, |
| "learning_rate": 3.3929331507935035e-05, |
| "loss": 0.0091, |
| "step": 12480 |
| }, |
| { |
| "grad_norm": 0.222612202167511, |
| "learning_rate": 3.3851066760169196e-05, |
| "loss": 0.012, |
| "step": 12490 |
| }, |
| { |
| "grad_norm": 0.18202485144138336, |
| "learning_rate": 3.377284616297021e-05, |
| "loss": 0.0065, |
| "step": 12500 |
| }, |
| { |
| "grad_norm": 0.18596436083316803, |
| "learning_rate": 3.3694669930190166e-05, |
| "loss": 0.009, |
| "step": 12510 |
| }, |
| { |
| "grad_norm": 0.21673136949539185, |
| "learning_rate": 3.36165382755599e-05, |
| "loss": 0.0085, |
| "step": 12520 |
| }, |
| { |
| "grad_norm": 0.18619108200073242, |
| "learning_rate": 3.35384514126884e-05, |
| "loss": 0.0049, |
| "step": 12530 |
| }, |
| { |
| "grad_norm": 0.14304818212985992, |
| "learning_rate": 3.3460409555062154e-05, |
| "loss": 0.0058, |
| "step": 12540 |
| }, |
| { |
| "grad_norm": 0.188069149851799, |
| "learning_rate": 3.3382412916044645e-05, |
| "loss": 0.0062, |
| "step": 12550 |
| }, |
| { |
| "grad_norm": 0.2641826570034027, |
| "learning_rate": 3.330446170887566e-05, |
| "loss": 0.0083, |
| "step": 12560 |
| }, |
| { |
| "grad_norm": 0.20169927179813385, |
| "learning_rate": 3.3226556146670834e-05, |
| "loss": 0.0054, |
| "step": 12570 |
| }, |
| { |
| "grad_norm": 0.2031773030757904, |
| "learning_rate": 3.314869644242102e-05, |
| "loss": 0.0058, |
| "step": 12580 |
| }, |
| { |
| "grad_norm": 0.2010979801416397, |
| "learning_rate": 3.3070882808991674e-05, |
| "loss": 0.0126, |
| "step": 12590 |
| }, |
| { |
| "grad_norm": 0.2006341814994812, |
| "learning_rate": 3.2993115459122305e-05, |
| "loss": 0.0056, |
| "step": 12600 |
| }, |
| { |
| "grad_norm": 0.21729214489459991, |
| "learning_rate": 3.2915394605425835e-05, |
| "loss": 0.0081, |
| "step": 12610 |
| }, |
| { |
| "grad_norm": 0.18181125819683075, |
| "learning_rate": 3.283772046038816e-05, |
| "loss": 0.0066, |
| "step": 12620 |
| }, |
| { |
| "grad_norm": 0.1822119951248169, |
| "learning_rate": 3.276009323636739e-05, |
| "loss": 0.0072, |
| "step": 12630 |
| }, |
| { |
| "grad_norm": 0.2782512605190277, |
| "learning_rate": 3.268251314559344e-05, |
| "loss": 0.0063, |
| "step": 12640 |
| }, |
| { |
| "grad_norm": 0.3128264844417572, |
| "learning_rate": 3.2604980400167254e-05, |
| "loss": 0.0083, |
| "step": 12650 |
| }, |
| { |
| "grad_norm": 0.18374745547771454, |
| "learning_rate": 3.252749521206042e-05, |
| "loss": 0.0064, |
| "step": 12660 |
| }, |
| { |
| "grad_norm": 0.2119012176990509, |
| "learning_rate": 3.2450057793114494e-05, |
| "loss": 0.0134, |
| "step": 12670 |
| }, |
| { |
| "grad_norm": 0.1813078075647354, |
| "learning_rate": 3.2372668355040435e-05, |
| "loss": 0.0055, |
| "step": 12680 |
| }, |
| { |
| "grad_norm": 0.3598603308200836, |
| "learning_rate": 3.2295327109418005e-05, |
| "loss": 0.0101, |
| "step": 12690 |
| }, |
| { |
| "grad_norm": 0.2781860828399658, |
| "learning_rate": 3.221803426769518e-05, |
| "loss": 0.0087, |
| "step": 12700 |
| }, |
| { |
| "grad_norm": 0.19660301506519318, |
| "learning_rate": 3.214079004118768e-05, |
| "loss": 0.0088, |
| "step": 12710 |
| }, |
| { |
| "grad_norm": 0.18734294176101685, |
| "learning_rate": 3.2063594641078234e-05, |
| "loss": 0.0093, |
| "step": 12720 |
| }, |
| { |
| "grad_norm": 0.19372877478599548, |
| "learning_rate": 3.198644827841616e-05, |
| "loss": 0.0097, |
| "step": 12730 |
| }, |
| { |
| "grad_norm": 0.1916368454694748, |
| "learning_rate": 3.1909351164116654e-05, |
| "loss": 0.0062, |
| "step": 12740 |
| }, |
| { |
| "grad_norm": 0.1832991987466812, |
| "learning_rate": 3.183230350896026e-05, |
| "loss": 0.0084, |
| "step": 12750 |
| }, |
| { |
| "grad_norm": 0.2088966965675354, |
| "learning_rate": 3.1755305523592337e-05, |
| "loss": 0.0081, |
| "step": 12760 |
| }, |
| { |
| "grad_norm": 0.25852876901626587, |
| "learning_rate": 3.167835741852245e-05, |
| "loss": 0.0076, |
| "step": 12770 |
| }, |
| { |
| "grad_norm": 0.21350952982902527, |
| "learning_rate": 3.160145940412378e-05, |
| "loss": 0.0081, |
| "step": 12780 |
| }, |
| { |
| "grad_norm": 0.22510845959186554, |
| "learning_rate": 3.1524611690632545e-05, |
| "loss": 0.0044, |
| "step": 12790 |
| }, |
| { |
| "grad_norm": 0.1736249029636383, |
| "learning_rate": 3.144781448814746e-05, |
| "loss": 0.0083, |
| "step": 12800 |
| }, |
| { |
| "grad_norm": 0.21229802072048187, |
| "learning_rate": 3.1371068006629145e-05, |
| "loss": 0.0095, |
| "step": 12810 |
| }, |
| { |
| "grad_norm": 0.194345623254776, |
| "learning_rate": 3.129437245589956e-05, |
| "loss": 0.0084, |
| "step": 12820 |
| }, |
| { |
| "grad_norm": 0.2444475144147873, |
| "learning_rate": 3.121772804564143e-05, |
| "loss": 0.0103, |
| "step": 12830 |
| }, |
| { |
| "grad_norm": 0.23777633905410767, |
| "learning_rate": 3.11411349853976e-05, |
| "loss": 0.0071, |
| "step": 12840 |
| }, |
| { |
| "grad_norm": 0.19102996587753296, |
| "learning_rate": 3.10645934845706e-05, |
| "loss": 0.006, |
| "step": 12850 |
| }, |
| { |
| "grad_norm": 0.16968311369419098, |
| "learning_rate": 3.098810375242196e-05, |
| "loss": 0.0068, |
| "step": 12860 |
| }, |
| { |
| "grad_norm": 0.21856744587421417, |
| "learning_rate": 3.0911665998071704e-05, |
| "loss": 0.0082, |
| "step": 12870 |
| }, |
| { |
| "grad_norm": 0.14362917840480804, |
| "learning_rate": 3.083528043049774e-05, |
| "loss": 0.0067, |
| "step": 12880 |
| }, |
| { |
| "grad_norm": 0.18467217683792114, |
| "learning_rate": 3.0758947258535255e-05, |
| "loss": 0.0061, |
| "step": 12890 |
| }, |
| { |
| "grad_norm": 0.18513968586921692, |
| "learning_rate": 3.068266669087625e-05, |
| "loss": 0.0078, |
| "step": 12900 |
| }, |
| { |
| "grad_norm": 0.20277082920074463, |
| "learning_rate": 3.060643893606887e-05, |
| "loss": 0.0074, |
| "step": 12910 |
| }, |
| { |
| "grad_norm": 0.1835511475801468, |
| "learning_rate": 3.053026420251693e-05, |
| "loss": 0.0053, |
| "step": 12920 |
| }, |
| { |
| "grad_norm": 0.16996018588542938, |
| "learning_rate": 3.0454142698479183e-05, |
| "loss": 0.006, |
| "step": 12930 |
| }, |
| { |
| "grad_norm": 0.24543605744838715, |
| "learning_rate": 3.0378074632068954e-05, |
| "loss": 0.0066, |
| "step": 12940 |
| }, |
| { |
| "grad_norm": 0.1390630453824997, |
| "learning_rate": 3.0302060211253408e-05, |
| "loss": 0.0106, |
| "step": 12950 |
| }, |
| { |
| "grad_norm": 0.23156525194644928, |
| "learning_rate": 3.0226099643853073e-05, |
| "loss": 0.0066, |
| "step": 12960 |
| }, |
| { |
| "grad_norm": 0.150277242064476, |
| "learning_rate": 3.0150193137541283e-05, |
| "loss": 0.0075, |
| "step": 12970 |
| }, |
| { |
| "grad_norm": 0.1806921511888504, |
| "learning_rate": 3.0074340899843467e-05, |
| "loss": 0.0078, |
| "step": 12980 |
| }, |
| { |
| "grad_norm": 0.1816614270210266, |
| "learning_rate": 2.999854313813677e-05, |
| "loss": 0.0049, |
| "step": 12990 |
| }, |
| { |
| "grad_norm": 0.18437013030052185, |
| "learning_rate": 2.9922800059649382e-05, |
| "loss": 0.0071, |
| "step": 13000 |
| }, |
| { |
| "grad_norm": 0.20052628219127655, |
| "learning_rate": 2.9847111871459976e-05, |
| "loss": 0.0053, |
| "step": 13010 |
| }, |
| { |
| "grad_norm": 0.1503404974937439, |
| "learning_rate": 2.977147878049721e-05, |
| "loss": 0.0061, |
| "step": 13020 |
| }, |
| { |
| "grad_norm": 0.17255957424640656, |
| "learning_rate": 2.9695900993539006e-05, |
| "loss": 0.0054, |
| "step": 13030 |
| }, |
| { |
| "grad_norm": 0.17958895862102509, |
| "learning_rate": 2.9620378717212183e-05, |
| "loss": 0.0062, |
| "step": 13040 |
| }, |
| { |
| "grad_norm": 0.1710553616285324, |
| "learning_rate": 2.9544912157991745e-05, |
| "loss": 0.0088, |
| "step": 13050 |
| }, |
| { |
| "grad_norm": 0.2009718269109726, |
| "learning_rate": 2.9469501522200405e-05, |
| "loss": 0.0049, |
| "step": 13060 |
| }, |
| { |
| "grad_norm": 0.23603618144989014, |
| "learning_rate": 2.9394147016007946e-05, |
| "loss": 0.0062, |
| "step": 13070 |
| }, |
| { |
| "grad_norm": 0.18876326084136963, |
| "learning_rate": 2.9318848845430702e-05, |
| "loss": 0.0067, |
| "step": 13080 |
| }, |
| { |
| "grad_norm": 0.1263684630393982, |
| "learning_rate": 2.9243607216331013e-05, |
| "loss": 0.004, |
| "step": 13090 |
| }, |
| { |
| "grad_norm": 0.18033087253570557, |
| "learning_rate": 2.916842233441661e-05, |
| "loss": 0.0103, |
| "step": 13100 |
| }, |
| { |
| "grad_norm": 0.17048488557338715, |
| "learning_rate": 2.90932944052401e-05, |
| "loss": 0.0065, |
| "step": 13110 |
| }, |
| { |
| "grad_norm": 0.20619574189186096, |
| "learning_rate": 2.9018223634198354e-05, |
| "loss": 0.0088, |
| "step": 13120 |
| }, |
| { |
| "grad_norm": 0.32705435156822205, |
| "learning_rate": 2.8943210226532025e-05, |
| "loss": 0.0072, |
| "step": 13130 |
| }, |
| { |
| "grad_norm": 0.25954321026802063, |
| "learning_rate": 2.8868254387324857e-05, |
| "loss": 0.0123, |
| "step": 13140 |
| }, |
| { |
| "grad_norm": 0.12593084573745728, |
| "learning_rate": 2.8793356321503306e-05, |
| "loss": 0.0061, |
| "step": 13150 |
| }, |
| { |
| "grad_norm": 0.15029622614383698, |
| "learning_rate": 2.87185162338358e-05, |
| "loss": 0.0061, |
| "step": 13160 |
| }, |
| { |
| "grad_norm": 0.1333160251379013, |
| "learning_rate": 2.8643734328932253e-05, |
| "loss": 0.0081, |
| "step": 13170 |
| }, |
| { |
| "grad_norm": 0.14292830228805542, |
| "learning_rate": 2.856901081124359e-05, |
| "loss": 0.0063, |
| "step": 13180 |
| }, |
| { |
| "grad_norm": 0.17720255255699158, |
| "learning_rate": 2.8494345885061002e-05, |
| "loss": 0.0099, |
| "step": 13190 |
| }, |
| { |
| "grad_norm": 0.2270614057779312, |
| "learning_rate": 2.8419739754515616e-05, |
| "loss": 0.0094, |
| "step": 13200 |
| }, |
| { |
| "grad_norm": 0.20722974836826324, |
| "learning_rate": 2.8345192623577666e-05, |
| "loss": 0.0063, |
| "step": 13210 |
| }, |
| { |
| "grad_norm": 0.19621099531650543, |
| "learning_rate": 2.8270704696056193e-05, |
| "loss": 0.0136, |
| "step": 13220 |
| }, |
| { |
| "grad_norm": 0.16113749146461487, |
| "learning_rate": 2.8196276175598367e-05, |
| "loss": 0.005, |
| "step": 13230 |
| }, |
| { |
| "grad_norm": 0.1010737493634224, |
| "learning_rate": 2.8121907265688884e-05, |
| "loss": 0.0041, |
| "step": 13240 |
| }, |
| { |
| "grad_norm": 0.2032041996717453, |
| "learning_rate": 2.804759816964957e-05, |
| "loss": 0.0093, |
| "step": 13250 |
| }, |
| { |
| "grad_norm": 0.24636270105838776, |
| "learning_rate": 2.797334909063857e-05, |
| "loss": 0.008, |
| "step": 13260 |
| }, |
| { |
| "grad_norm": 0.23668859899044037, |
| "learning_rate": 2.7899160231650056e-05, |
| "loss": 0.0081, |
| "step": 13270 |
| }, |
| { |
| "grad_norm": 0.18992413580417633, |
| "learning_rate": 2.7825031795513585e-05, |
| "loss": 0.0067, |
| "step": 13280 |
| }, |
| { |
| "grad_norm": 0.23678800463676453, |
| "learning_rate": 2.775096398489341e-05, |
| "loss": 0.0054, |
| "step": 13290 |
| }, |
| { |
| "grad_norm": 0.18498824536800385, |
| "learning_rate": 2.7676957002288163e-05, |
| "loss": 0.0066, |
| "step": 13300 |
| }, |
| { |
| "grad_norm": 0.16668982803821564, |
| "learning_rate": 2.760301105003003e-05, |
| "loss": 0.0097, |
| "step": 13310 |
| }, |
| { |
| "grad_norm": 0.2797868847846985, |
| "learning_rate": 2.752912633028446e-05, |
| "loss": 0.0065, |
| "step": 13320 |
| }, |
| { |
| "grad_norm": 0.1960904598236084, |
| "learning_rate": 2.7455303045049474e-05, |
| "loss": 0.0087, |
| "step": 13330 |
| }, |
| { |
| "grad_norm": 0.17691780626773834, |
| "learning_rate": 2.7381541396155098e-05, |
| "loss": 0.008, |
| "step": 13340 |
| }, |
| { |
| "grad_norm": 0.18473778665065765, |
| "learning_rate": 2.730784158526286e-05, |
| "loss": 0.0078, |
| "step": 13350 |
| }, |
| { |
| "grad_norm": 0.20810966193675995, |
| "learning_rate": 2.723420381386521e-05, |
| "loss": 0.0069, |
| "step": 13360 |
| }, |
| { |
| "grad_norm": 0.15738220512866974, |
| "learning_rate": 2.7160628283285018e-05, |
| "loss": 0.0073, |
| "step": 13370 |
| }, |
| { |
| "grad_norm": 0.2037407010793686, |
| "learning_rate": 2.7087115194675007e-05, |
| "loss": 0.0079, |
| "step": 13380 |
| }, |
| { |
| "grad_norm": 0.2184944599866867, |
| "learning_rate": 2.701366474901712e-05, |
| "loss": 0.0082, |
| "step": 13390 |
| }, |
| { |
| "grad_norm": 0.15587963163852692, |
| "learning_rate": 2.6940277147122085e-05, |
| "loss": 0.0059, |
| "step": 13400 |
| }, |
| { |
| "grad_norm": 0.16783714294433594, |
| "learning_rate": 2.686695258962878e-05, |
| "loss": 0.007, |
| "step": 13410 |
| }, |
| { |
| "grad_norm": 0.17781268060207367, |
| "learning_rate": 2.679369127700375e-05, |
| "loss": 0.007, |
| "step": 13420 |
| }, |
| { |
| "grad_norm": 0.18915528059005737, |
| "learning_rate": 2.672049340954067e-05, |
| "loss": 0.0087, |
| "step": 13430 |
| }, |
| { |
| "grad_norm": 0.12304963916540146, |
| "learning_rate": 2.6647359187359676e-05, |
| "loss": 0.0064, |
| "step": 13440 |
| }, |
| { |
| "grad_norm": 0.14121095836162567, |
| "learning_rate": 2.6574288810406946e-05, |
| "loss": 0.0057, |
| "step": 13450 |
| }, |
| { |
| "grad_norm": 0.16274137794971466, |
| "learning_rate": 2.6501282478454083e-05, |
| "loss": 0.0051, |
| "step": 13460 |
| }, |
| { |
| "grad_norm": 0.10483109205961227, |
| "learning_rate": 2.6428340391097618e-05, |
| "loss": 0.0062, |
| "step": 13470 |
| }, |
| { |
| "grad_norm": 0.2279292345046997, |
| "learning_rate": 2.6355462747758485e-05, |
| "loss": 0.0073, |
| "step": 13480 |
| }, |
| { |
| "grad_norm": 0.18314263224601746, |
| "learning_rate": 2.6282649747681304e-05, |
| "loss": 0.0051, |
| "step": 13490 |
| }, |
| { |
| "grad_norm": 0.1461828052997589, |
| "learning_rate": 2.620990158993406e-05, |
| "loss": 0.0058, |
| "step": 13500 |
| }, |
| { |
| "grad_norm": 0.20838026702404022, |
| "learning_rate": 2.6137218473407477e-05, |
| "loss": 0.0061, |
| "step": 13510 |
| }, |
| { |
| "grad_norm": 0.19161008298397064, |
| "learning_rate": 2.606460059681436e-05, |
| "loss": 0.0057, |
| "step": 13520 |
| }, |
| { |
| "grad_norm": 0.1648252159357071, |
| "learning_rate": 2.599204815868928e-05, |
| "loss": 0.0057, |
| "step": 13530 |
| }, |
| { |
| "grad_norm": 0.13666298985481262, |
| "learning_rate": 2.5919561357387756e-05, |
| "loss": 0.0054, |
| "step": 13540 |
| }, |
| { |
| "grad_norm": 0.13384267687797546, |
| "learning_rate": 2.5847140391085972e-05, |
| "loss": 0.0048, |
| "step": 13550 |
| }, |
| { |
| "grad_norm": 0.11047980189323425, |
| "learning_rate": 2.5774785457780103e-05, |
| "loss": 0.0077, |
| "step": 13560 |
| }, |
| { |
| "grad_norm": 0.13502903282642365, |
| "learning_rate": 2.5702496755285753e-05, |
| "loss": 0.0051, |
| "step": 13570 |
| }, |
| { |
| "grad_norm": 0.1964835673570633, |
| "learning_rate": 2.5630274481237483e-05, |
| "loss": 0.0066, |
| "step": 13580 |
| }, |
| { |
| "grad_norm": 0.1709243208169937, |
| "learning_rate": 2.5558118833088197e-05, |
| "loss": 0.006, |
| "step": 13590 |
| }, |
| { |
| "grad_norm": 0.19570232927799225, |
| "learning_rate": 2.548603000810872e-05, |
| "loss": 0.0072, |
| "step": 13600 |
| }, |
| { |
| "grad_norm": 0.1558714509010315, |
| "learning_rate": 2.5414008203387152e-05, |
| "loss": 0.0076, |
| "step": 13610 |
| }, |
| { |
| "grad_norm": 0.18019653856754303, |
| "learning_rate": 2.534205361582834e-05, |
| "loss": 0.0053, |
| "step": 13620 |
| }, |
| { |
| "grad_norm": 0.16142991185188293, |
| "learning_rate": 2.527016644215338e-05, |
| "loss": 0.0088, |
| "step": 13630 |
| }, |
| { |
| "grad_norm": 0.13523423671722412, |
| "learning_rate": 2.519834687889905e-05, |
| "loss": 0.0062, |
| "step": 13640 |
| }, |
| { |
| "grad_norm": 0.15555405616760254, |
| "learning_rate": 2.5126595122417295e-05, |
| "loss": 0.0051, |
| "step": 13650 |
| }, |
| { |
| "grad_norm": 0.1433010846376419, |
| "learning_rate": 2.5054911368874713e-05, |
| "loss": 0.0054, |
| "step": 13660 |
| }, |
| { |
| "grad_norm": 0.1764252781867981, |
| "learning_rate": 2.4983295814251916e-05, |
| "loss": 0.0052, |
| "step": 13670 |
| }, |
| { |
| "grad_norm": 0.2680763006210327, |
| "learning_rate": 2.4911748654343105e-05, |
| "loss": 0.0092, |
| "step": 13680 |
| }, |
| { |
| "grad_norm": 0.2182915061712265, |
| "learning_rate": 2.4840270084755463e-05, |
| "loss": 0.0066, |
| "step": 13690 |
| }, |
| { |
| "grad_norm": 0.21932873129844666, |
| "learning_rate": 2.4768860300908685e-05, |
| "loss": 0.0081, |
| "step": 13700 |
| }, |
| { |
| "grad_norm": 0.16487343609333038, |
| "learning_rate": 2.469751949803443e-05, |
| "loss": 0.0094, |
| "step": 13710 |
| }, |
| { |
| "grad_norm": 0.1933409422636032, |
| "learning_rate": 2.4626247871175666e-05, |
| "loss": 0.0084, |
| "step": 13720 |
| }, |
| { |
| "grad_norm": 0.22529985010623932, |
| "learning_rate": 2.4555045615186346e-05, |
| "loss": 0.006, |
| "step": 13730 |
| }, |
| { |
| "grad_norm": 0.14443622529506683, |
| "learning_rate": 2.4483912924730677e-05, |
| "loss": 0.0048, |
| "step": 13740 |
| }, |
| { |
| "grad_norm": 0.29311496019363403, |
| "learning_rate": 2.4412849994282742e-05, |
| "loss": 0.0077, |
| "step": 13750 |
| }, |
| { |
| "grad_norm": 0.1520247906446457, |
| "learning_rate": 2.434185701812592e-05, |
| "loss": 0.0091, |
| "step": 13760 |
| }, |
| { |
| "grad_norm": 0.2153899073600769, |
| "learning_rate": 2.4270934190352218e-05, |
| "loss": 0.0052, |
| "step": 13770 |
| }, |
| { |
| "grad_norm": 0.18413366377353668, |
| "learning_rate": 2.4200081704861998e-05, |
| "loss": 0.0046, |
| "step": 13780 |
| }, |
| { |
| "grad_norm": 0.15907230973243713, |
| "learning_rate": 2.412929975536321e-05, |
| "loss": 0.0067, |
| "step": 13790 |
| }, |
| { |
| "grad_norm": 0.1581483632326126, |
| "learning_rate": 2.4058588535371017e-05, |
| "loss": 0.0048, |
| "step": 13800 |
| }, |
| { |
| "grad_norm": 0.11827662587165833, |
| "learning_rate": 2.3987948238207243e-05, |
| "loss": 0.0044, |
| "step": 13810 |
| }, |
| { |
| "grad_norm": 0.147860586643219, |
| "learning_rate": 2.3917379056999678e-05, |
| "loss": 0.0054, |
| "step": 13820 |
| }, |
| { |
| "grad_norm": 0.13074715435504913, |
| "learning_rate": 2.3846881184681824e-05, |
| "loss": 0.0064, |
| "step": 13830 |
| }, |
| { |
| "grad_norm": 0.23691308498382568, |
| "learning_rate": 2.377645481399214e-05, |
| "loss": 0.0063, |
| "step": 13840 |
| }, |
| { |
| "grad_norm": 0.1883758157491684, |
| "learning_rate": 2.3706100137473667e-05, |
| "loss": 0.005, |
| "step": 13850 |
| }, |
| { |
| "grad_norm": 0.1975412368774414, |
| "learning_rate": 2.3635817347473394e-05, |
| "loss": 0.0094, |
| "step": 13860 |
| }, |
| { |
| "grad_norm": 0.1934109926223755, |
| "learning_rate": 2.3565606636141757e-05, |
| "loss": 0.0054, |
| "step": 13870 |
| }, |
| { |
| "grad_norm": 0.21237199008464813, |
| "learning_rate": 2.3495468195432203e-05, |
| "loss": 0.0067, |
| "step": 13880 |
| }, |
| { |
| "grad_norm": 0.1800747960805893, |
| "learning_rate": 2.3425402217100507e-05, |
| "loss": 0.0072, |
| "step": 13890 |
| }, |
| { |
| "grad_norm": 0.16725149750709534, |
| "learning_rate": 2.3355408892704424e-05, |
| "loss": 0.0049, |
| "step": 13900 |
| }, |
| { |
| "grad_norm": 0.1488545536994934, |
| "learning_rate": 2.3285488413603003e-05, |
| "loss": 0.0084, |
| "step": 13910 |
| }, |
| { |
| "grad_norm": 0.18190857768058777, |
| "learning_rate": 2.321564097095615e-05, |
| "loss": 0.0054, |
| "step": 13920 |
| }, |
| { |
| "grad_norm": 0.18374863266944885, |
| "learning_rate": 2.3145866755724142e-05, |
| "loss": 0.006, |
| "step": 13930 |
| }, |
| { |
| "grad_norm": 0.11474452167749405, |
| "learning_rate": 2.307616595866699e-05, |
| "loss": 0.0044, |
| "step": 13940 |
| }, |
| { |
| "grad_norm": 0.16919605433940887, |
| "learning_rate": 2.3006538770344032e-05, |
| "loss": 0.0062, |
| "step": 13950 |
| }, |
| { |
| "grad_norm": 0.14991624653339386, |
| "learning_rate": 2.293698538111334e-05, |
| "loss": 0.0081, |
| "step": 13960 |
| }, |
| { |
| "grad_norm": 0.20199425518512726, |
| "learning_rate": 2.28675059811312e-05, |
| "loss": 0.0068, |
| "step": 13970 |
| }, |
| { |
| "grad_norm": 0.15437351167201996, |
| "learning_rate": 2.279810076035167e-05, |
| "loss": 0.0049, |
| "step": 13980 |
| }, |
| { |
| "grad_norm": 0.16406899690628052, |
| "learning_rate": 2.272876990852596e-05, |
| "loss": 0.0073, |
| "step": 13990 |
| }, |
| { |
| "grad_norm": 0.13990004360675812, |
| "learning_rate": 2.265951361520195e-05, |
| "loss": 0.0058, |
| "step": 14000 |
| }, |
| { |
| "grad_norm": 0.1541512906551361, |
| "learning_rate": 2.2590332069723748e-05, |
| "loss": 0.0059, |
| "step": 14010 |
| }, |
| { |
| "grad_norm": 0.15612509846687317, |
| "learning_rate": 2.2521225461231004e-05, |
| "loss": 0.0118, |
| "step": 14020 |
| }, |
| { |
| "grad_norm": 0.10496876388788223, |
| "learning_rate": 2.2452193978658597e-05, |
| "loss": 0.0068, |
| "step": 14030 |
| }, |
| { |
| "grad_norm": 0.2270757555961609, |
| "learning_rate": 2.238323781073594e-05, |
| "loss": 0.0076, |
| "step": 14040 |
| }, |
| { |
| "grad_norm": 0.18619896471500397, |
| "learning_rate": 2.2314357145986552e-05, |
| "loss": 0.0056, |
| "step": 14050 |
| }, |
| { |
| "grad_norm": 0.1321837157011032, |
| "learning_rate": 2.224555217272757e-05, |
| "loss": 0.0044, |
| "step": 14060 |
| }, |
| { |
| "grad_norm": 0.15203258395195007, |
| "learning_rate": 2.2176823079069127e-05, |
| "loss": 0.0055, |
| "step": 14070 |
| }, |
| { |
| "grad_norm": 0.20918844640254974, |
| "learning_rate": 2.210817005291398e-05, |
| "loss": 0.006, |
| "step": 14080 |
| }, |
| { |
| "grad_norm": 0.22175315022468567, |
| "learning_rate": 2.203959328195686e-05, |
| "loss": 0.0061, |
| "step": 14090 |
| }, |
| { |
| "grad_norm": 0.09628665447235107, |
| "learning_rate": 2.1971092953684026e-05, |
| "loss": 0.0048, |
| "step": 14100 |
| }, |
| { |
| "grad_norm": 0.20158182084560394, |
| "learning_rate": 2.1902669255372788e-05, |
| "loss": 0.0045, |
| "step": 14110 |
| }, |
| { |
| "grad_norm": 0.1197918951511383, |
| "learning_rate": 2.1834322374090897e-05, |
| "loss": 0.0077, |
| "step": 14120 |
| }, |
| { |
| "grad_norm": 0.16859853267669678, |
| "learning_rate": 2.1766052496696153e-05, |
| "loss": 0.0061, |
| "step": 14130 |
| }, |
| { |
| "grad_norm": 0.1983485370874405, |
| "learning_rate": 2.169785980983577e-05, |
| "loss": 0.0046, |
| "step": 14140 |
| }, |
| { |
| "grad_norm": 0.18621300160884857, |
| "learning_rate": 2.162974449994593e-05, |
| "loss": 0.0063, |
| "step": 14150 |
| }, |
| { |
| "grad_norm": 0.1051415279507637, |
| "learning_rate": 2.1561706753251337e-05, |
| "loss": 0.0048, |
| "step": 14160 |
| }, |
| { |
| "grad_norm": 0.18732018768787384, |
| "learning_rate": 2.1493746755764544e-05, |
| "loss": 0.0068, |
| "step": 14170 |
| }, |
| { |
| "grad_norm": 0.13659246265888214, |
| "learning_rate": 2.1425864693285635e-05, |
| "loss": 0.0051, |
| "step": 14180 |
| }, |
| { |
| "grad_norm": 0.14370456337928772, |
| "learning_rate": 2.1358060751401547e-05, |
| "loss": 0.0044, |
| "step": 14190 |
| }, |
| { |
| "grad_norm": 0.14926820993423462, |
| "learning_rate": 2.129033511548566e-05, |
| "loss": 0.0052, |
| "step": 14200 |
| }, |
| { |
| "grad_norm": 0.15225178003311157, |
| "learning_rate": 2.1222687970697315e-05, |
| "loss": 0.0071, |
| "step": 14210 |
| }, |
| { |
| "grad_norm": 0.1812531054019928, |
| "learning_rate": 2.1155119501981173e-05, |
| "loss": 0.0063, |
| "step": 14220 |
| }, |
| { |
| "grad_norm": 0.21825918555259705, |
| "learning_rate": 2.1087629894066895e-05, |
| "loss": 0.0065, |
| "step": 14230 |
| }, |
| { |
| "grad_norm": 0.2558552026748657, |
| "learning_rate": 2.1020219331468473e-05, |
| "loss": 0.0077, |
| "step": 14240 |
| }, |
| { |
| "grad_norm": 0.16796202957630157, |
| "learning_rate": 2.095288799848379e-05, |
| "loss": 0.0045, |
| "step": 14250 |
| }, |
| { |
| "grad_norm": 0.15485888719558716, |
| "learning_rate": 2.088563607919417e-05, |
| "loss": 0.005, |
| "step": 14260 |
| }, |
| { |
| "grad_norm": 0.16367082297801971, |
| "learning_rate": 2.0818463757463786e-05, |
| "loss": 0.005, |
| "step": 14270 |
| }, |
| { |
| "grad_norm": 0.23867768049240112, |
| "learning_rate": 2.0751371216939175e-05, |
| "loss": 0.0092, |
| "step": 14280 |
| }, |
| { |
| "grad_norm": 0.2102659046649933, |
| "learning_rate": 2.068435864104882e-05, |
| "loss": 0.0043, |
| "step": 14290 |
| }, |
| { |
| "grad_norm": 0.18887297809123993, |
| "learning_rate": 2.0617426213002506e-05, |
| "loss": 0.0056, |
| "step": 14300 |
| }, |
| { |
| "grad_norm": 0.18655873835086823, |
| "learning_rate": 2.055057411579097e-05, |
| "loss": 0.0061, |
| "step": 14310 |
| }, |
| { |
| "grad_norm": 0.20691068470478058, |
| "learning_rate": 2.0483802532185286e-05, |
| "loss": 0.0061, |
| "step": 14320 |
| }, |
| { |
| "grad_norm": 0.15043525397777557, |
| "learning_rate": 2.041711164473638e-05, |
| "loss": 0.0055, |
| "step": 14330 |
| }, |
| { |
| "grad_norm": 0.14773303270339966, |
| "learning_rate": 2.0350501635774637e-05, |
| "loss": 0.0043, |
| "step": 14340 |
| }, |
| { |
| "grad_norm": 0.16621702909469604, |
| "learning_rate": 2.0283972687409247e-05, |
| "loss": 0.0057, |
| "step": 14350 |
| }, |
| { |
| "grad_norm": 0.14620442688465118, |
| "learning_rate": 2.021752498152784e-05, |
| "loss": 0.0062, |
| "step": 14360 |
| }, |
| { |
| "grad_norm": 0.17155922949314117, |
| "learning_rate": 2.015115869979589e-05, |
| "loss": 0.0054, |
| "step": 14370 |
| }, |
| { |
| "grad_norm": 0.16646042466163635, |
| "learning_rate": 2.0084874023656265e-05, |
| "loss": 0.0045, |
| "step": 14380 |
| }, |
| { |
| "grad_norm": 0.15475299954414368, |
| "learning_rate": 2.001867113432877e-05, |
| "loss": 0.0092, |
| "step": 14390 |
| }, |
| { |
| "grad_norm": 0.1650644838809967, |
| "learning_rate": 1.995255021280954e-05, |
| "loss": 0.006, |
| "step": 14400 |
| }, |
| { |
| "grad_norm": 0.21478888392448425, |
| "learning_rate": 1.9886511439870688e-05, |
| "loss": 0.0055, |
| "step": 14410 |
| }, |
| { |
| "grad_norm": 0.1912056803703308, |
| "learning_rate": 1.9820554996059675e-05, |
| "loss": 0.008, |
| "step": 14420 |
| }, |
| { |
| "grad_norm": 0.16896963119506836, |
| "learning_rate": 1.9754681061698893e-05, |
| "loss": 0.0064, |
| "step": 14430 |
| }, |
| { |
| "grad_norm": 0.17615389823913574, |
| "learning_rate": 1.9688889816885185e-05, |
| "loss": 0.0059, |
| "step": 14440 |
| }, |
| { |
| "grad_norm": 0.17325952649116516, |
| "learning_rate": 1.962318144148928e-05, |
| "loss": 0.0061, |
| "step": 14450 |
| }, |
| { |
| "grad_norm": 0.16517771780490875, |
| "learning_rate": 1.955755611515539e-05, |
| "loss": 0.007, |
| "step": 14460 |
| }, |
| { |
| "grad_norm": 0.11359621584415436, |
| "learning_rate": 1.9492014017300642e-05, |
| "loss": 0.0038, |
| "step": 14470 |
| }, |
| { |
| "grad_norm": 0.1052914410829544, |
| "learning_rate": 1.942655532711461e-05, |
| "loss": 0.0066, |
| "step": 14480 |
| }, |
| { |
| "grad_norm": 0.119536854326725, |
| "learning_rate": 1.9361180223558882e-05, |
| "loss": 0.0055, |
| "step": 14490 |
| }, |
| { |
| "grad_norm": 0.11046026647090912, |
| "learning_rate": 1.929588888536647e-05, |
| "loss": 0.0047, |
| "step": 14500 |
| }, |
| { |
| "grad_norm": 0.13877028226852417, |
| "learning_rate": 1.9230681491041425e-05, |
| "loss": 0.006, |
| "step": 14510 |
| }, |
| { |
| "grad_norm": 0.18733717501163483, |
| "learning_rate": 1.9165558218858264e-05, |
| "loss": 0.005, |
| "step": 14520 |
| }, |
| { |
| "grad_norm": 0.12102721631526947, |
| "learning_rate": 1.9100519246861505e-05, |
| "loss": 0.0052, |
| "step": 14530 |
| }, |
| { |
| "grad_norm": 0.14464715123176575, |
| "learning_rate": 1.9035564752865248e-05, |
| "loss": 0.0048, |
| "step": 14540 |
| }, |
| { |
| "grad_norm": 0.15074194967746735, |
| "learning_rate": 1.897069491445258e-05, |
| "loss": 0.0045, |
| "step": 14550 |
| }, |
| { |
| "grad_norm": 0.15628309547901154, |
| "learning_rate": 1.890590990897515e-05, |
| "loss": 0.0041, |
| "step": 14560 |
| }, |
| { |
| "grad_norm": 0.1759437769651413, |
| "learning_rate": 1.884120991355272e-05, |
| "loss": 0.008, |
| "step": 14570 |
| }, |
| { |
| "grad_norm": 0.14988595247268677, |
| "learning_rate": 1.8776595105072576e-05, |
| "loss": 0.0044, |
| "step": 14580 |
| }, |
| { |
| "grad_norm": 0.15233299136161804, |
| "learning_rate": 1.8712065660189166e-05, |
| "loss": 0.0058, |
| "step": 14590 |
| }, |
| { |
| "grad_norm": 0.118324413895607, |
| "learning_rate": 1.8647621755323513e-05, |
| "loss": 0.0049, |
| "step": 14600 |
| }, |
| { |
| "grad_norm": 0.11879850178956985, |
| "learning_rate": 1.858326356666278e-05, |
| "loss": 0.0049, |
| "step": 14610 |
| }, |
| { |
| "grad_norm": 0.14314360916614532, |
| "learning_rate": 1.851899127015983e-05, |
| "loss": 0.0048, |
| "step": 14620 |
| }, |
| { |
| "grad_norm": 0.12158121913671494, |
| "learning_rate": 1.8454805041532626e-05, |
| "loss": 0.0048, |
| "step": 14630 |
| }, |
| { |
| "grad_norm": 0.1919155865907669, |
| "learning_rate": 1.8390705056263906e-05, |
| "loss": 0.0064, |
| "step": 14640 |
| }, |
| { |
| "grad_norm": 0.2054741382598877, |
| "learning_rate": 1.832669148960057e-05, |
| "loss": 0.0072, |
| "step": 14650 |
| }, |
| { |
| "grad_norm": 0.16221170127391815, |
| "learning_rate": 1.8262764516553233e-05, |
| "loss": 0.0078, |
| "step": 14660 |
| }, |
| { |
| "grad_norm": 0.17063555121421814, |
| "learning_rate": 1.8198924311895843e-05, |
| "loss": 0.0066, |
| "step": 14670 |
| }, |
| { |
| "grad_norm": 0.14749523997306824, |
| "learning_rate": 1.813517105016505e-05, |
| "loss": 0.0044, |
| "step": 14680 |
| }, |
| { |
| "grad_norm": 0.15508879721164703, |
| "learning_rate": 1.8071504905659888e-05, |
| "loss": 0.0049, |
| "step": 14690 |
| }, |
| { |
| "grad_norm": 0.1870722770690918, |
| "learning_rate": 1.800792605244109e-05, |
| "loss": 0.0075, |
| "step": 14700 |
| }, |
| { |
| "grad_norm": 0.11199451237916946, |
| "learning_rate": 1.7944434664330844e-05, |
| "loss": 0.0047, |
| "step": 14710 |
| }, |
| { |
| "grad_norm": 0.15122726559638977, |
| "learning_rate": 1.7881030914912212e-05, |
| "loss": 0.0053, |
| "step": 14720 |
| }, |
| { |
| "grad_norm": 0.18994122743606567, |
| "learning_rate": 1.7817714977528577e-05, |
| "loss": 0.0063, |
| "step": 14730 |
| }, |
| { |
| "grad_norm": 0.14027075469493866, |
| "learning_rate": 1.7754487025283332e-05, |
| "loss": 0.0047, |
| "step": 14740 |
| }, |
| { |
| "grad_norm": 0.15433025360107422, |
| "learning_rate": 1.7691347231039275e-05, |
| "loss": 0.0045, |
| "step": 14750 |
| }, |
| { |
| "grad_norm": 0.12539328634738922, |
| "learning_rate": 1.7628295767418164e-05, |
| "loss": 0.0066, |
| "step": 14760 |
| }, |
| { |
| "grad_norm": 0.16263306140899658, |
| "learning_rate": 1.7565332806800333e-05, |
| "loss": 0.0057, |
| "step": 14770 |
| }, |
| { |
| "grad_norm": 0.20516878366470337, |
| "learning_rate": 1.750245852132408e-05, |
| "loss": 0.0069, |
| "step": 14780 |
| }, |
| { |
| "grad_norm": 0.155752494931221, |
| "learning_rate": 1.7439673082885323e-05, |
| "loss": 0.0042, |
| "step": 14790 |
| }, |
| { |
| "grad_norm": 0.21590086817741394, |
| "learning_rate": 1.7376976663137047e-05, |
| "loss": 0.0068, |
| "step": 14800 |
| }, |
| { |
| "grad_norm": 0.1473667174577713, |
| "learning_rate": 1.7314369433488853e-05, |
| "loss": 0.0086, |
| "step": 14810 |
| }, |
| { |
| "grad_norm": 0.18347211182117462, |
| "learning_rate": 1.7251851565106548e-05, |
| "loss": 0.0054, |
| "step": 14820 |
| }, |
| { |
| "grad_norm": 0.1619826704263687, |
| "learning_rate": 1.7189423228911574e-05, |
| "loss": 0.0052, |
| "step": 14830 |
| }, |
| { |
| "grad_norm": 0.10061666369438171, |
| "learning_rate": 1.7127084595580606e-05, |
| "loss": 0.0056, |
| "step": 14840 |
| }, |
| { |
| "grad_norm": 0.16300608217716217, |
| "learning_rate": 1.706483583554513e-05, |
| "loss": 0.009, |
| "step": 14850 |
| }, |
| { |
| "grad_norm": 0.16306783258914948, |
| "learning_rate": 1.700267711899083e-05, |
| "loss": 0.0121, |
| "step": 14860 |
| }, |
| { |
| "grad_norm": 0.15848280489444733, |
| "learning_rate": 1.69406086158573e-05, |
| "loss": 0.0065, |
| "step": 14870 |
| }, |
| { |
| "grad_norm": 0.1942242980003357, |
| "learning_rate": 1.6878630495837455e-05, |
| "loss": 0.0087, |
| "step": 14880 |
| }, |
| { |
| "grad_norm": 0.262200266122818, |
| "learning_rate": 1.681674292837707e-05, |
| "loss": 0.0063, |
| "step": 14890 |
| }, |
| { |
| "grad_norm": 0.21699029207229614, |
| "learning_rate": 1.6754946082674444e-05, |
| "loss": 0.0072, |
| "step": 14900 |
| }, |
| { |
| "grad_norm": 0.22985686361789703, |
| "learning_rate": 1.6693240127679748e-05, |
| "loss": 0.0044, |
| "step": 14910 |
| }, |
| { |
| "grad_norm": 0.15517984330654144, |
| "learning_rate": 1.663162523209475e-05, |
| "loss": 0.0104, |
| "step": 14920 |
| }, |
| { |
| "grad_norm": 0.1608007550239563, |
| "learning_rate": 1.6570101564372193e-05, |
| "loss": 0.0105, |
| "step": 14930 |
| }, |
| { |
| "grad_norm": 0.23195034265518188, |
| "learning_rate": 1.650866929271543e-05, |
| "loss": 0.0086, |
| "step": 14940 |
| }, |
| { |
| "grad_norm": 0.1510450690984726, |
| "learning_rate": 1.644732858507797e-05, |
| "loss": 0.0067, |
| "step": 14950 |
| }, |
| { |
| "grad_norm": 0.16938410699367523, |
| "learning_rate": 1.6386079609162943e-05, |
| "loss": 0.0054, |
| "step": 14960 |
| }, |
| { |
| "grad_norm": 0.17797565460205078, |
| "learning_rate": 1.6324922532422742e-05, |
| "loss": 0.0052, |
| "step": 14970 |
| }, |
| { |
| "grad_norm": 0.1584869772195816, |
| "learning_rate": 1.6263857522058434e-05, |
| "loss": 0.0079, |
| "step": 14980 |
| }, |
| { |
| "grad_norm": 0.17295604944229126, |
| "learning_rate": 1.6202884745019443e-05, |
| "loss": 0.0069, |
| "step": 14990 |
| }, |
| { |
| "grad_norm": 0.15914912521839142, |
| "learning_rate": 1.614200436800304e-05, |
| "loss": 0.0075, |
| "step": 15000 |
| }, |
| { |
| "grad_norm": 0.1304100751876831, |
| "learning_rate": 1.6081216557453814e-05, |
| "loss": 0.0056, |
| "step": 15010 |
| }, |
| { |
| "grad_norm": 0.1199740543961525, |
| "learning_rate": 1.6020521479563367e-05, |
| "loss": 0.0043, |
| "step": 15020 |
| }, |
| { |
| "grad_norm": 0.14026935398578644, |
| "learning_rate": 1.5959919300269654e-05, |
| "loss": 0.0041, |
| "step": 15030 |
| }, |
| { |
| "grad_norm": 0.12699109315872192, |
| "learning_rate": 1.5899410185256764e-05, |
| "loss": 0.005, |
| "step": 15040 |
| }, |
| { |
| "grad_norm": 0.13951744139194489, |
| "learning_rate": 1.583899429995431e-05, |
| "loss": 0.0044, |
| "step": 15050 |
| }, |
| { |
| "grad_norm": 0.1004301905632019, |
| "learning_rate": 1.5778671809536993e-05, |
| "loss": 0.0072, |
| "step": 15060 |
| }, |
| { |
| "grad_norm": 0.1335887461900711, |
| "learning_rate": 1.5718442878924246e-05, |
| "loss": 0.005, |
| "step": 15070 |
| }, |
| { |
| "grad_norm": 0.17112179100513458, |
| "learning_rate": 1.5658307672779593e-05, |
| "loss": 0.0067, |
| "step": 15080 |
| }, |
| { |
| "grad_norm": 0.16543389856815338, |
| "learning_rate": 1.5598266355510427e-05, |
| "loss": 0.0046, |
| "step": 15090 |
| }, |
| { |
| "grad_norm": 0.13843472301959991, |
| "learning_rate": 1.553831909126744e-05, |
| "loss": 0.0074, |
| "step": 15100 |
| }, |
| { |
| "grad_norm": 0.15110492706298828, |
| "learning_rate": 1.5478466043944135e-05, |
| "loss": 0.0054, |
| "step": 15110 |
| }, |
| { |
| "grad_norm": 0.22796016931533813, |
| "learning_rate": 1.5418707377176468e-05, |
| "loss": 0.0078, |
| "step": 15120 |
| }, |
| { |
| "grad_norm": 0.14223039150238037, |
| "learning_rate": 1.535904325434233e-05, |
| "loss": 0.0054, |
| "step": 15130 |
| }, |
| { |
| "grad_norm": 0.09044605493545532, |
| "learning_rate": 1.529947383856118e-05, |
| "loss": 0.005, |
| "step": 15140 |
| }, |
| { |
| "grad_norm": 0.18607757985591888, |
| "learning_rate": 1.5239999292693524e-05, |
| "loss": 0.0067, |
| "step": 15150 |
| }, |
| { |
| "grad_norm": 0.12722477316856384, |
| "learning_rate": 1.5180619779340505e-05, |
| "loss": 0.0077, |
| "step": 15160 |
| }, |
| { |
| "grad_norm": 0.134136363863945, |
| "learning_rate": 1.5121335460843428e-05, |
| "loss": 0.0045, |
| "step": 15170 |
| }, |
| { |
| "grad_norm": 0.09909097850322723, |
| "learning_rate": 1.5062146499283347e-05, |
| "loss": 0.0036, |
| "step": 15180 |
| }, |
| { |
| "grad_norm": 0.1600300371646881, |
| "learning_rate": 1.5003053056480643e-05, |
| "loss": 0.005, |
| "step": 15190 |
| }, |
| { |
| "grad_norm": 0.0979742482304573, |
| "learning_rate": 1.4944055293994551e-05, |
| "loss": 0.0045, |
| "step": 15200 |
| }, |
| { |
| "grad_norm": 0.1393214762210846, |
| "learning_rate": 1.4885153373122656e-05, |
| "loss": 0.0059, |
| "step": 15210 |
| }, |
| { |
| "grad_norm": 0.16196110844612122, |
| "learning_rate": 1.482634745490059e-05, |
| "loss": 0.0047, |
| "step": 15220 |
| }, |
| { |
| "grad_norm": 0.1884429156780243, |
| "learning_rate": 1.4767637700101466e-05, |
| "loss": 0.0072, |
| "step": 15230 |
| }, |
| { |
| "grad_norm": 0.20021089911460876, |
| "learning_rate": 1.4709024269235528e-05, |
| "loss": 0.0057, |
| "step": 15240 |
| }, |
| { |
| "grad_norm": 0.12900827825069427, |
| "learning_rate": 1.4650507322549684e-05, |
| "loss": 0.0047, |
| "step": 15250 |
| }, |
| { |
| "grad_norm": 0.14503392577171326, |
| "learning_rate": 1.4592087020026972e-05, |
| "loss": 0.0055, |
| "step": 15260 |
| }, |
| { |
| "grad_norm": 0.14478757977485657, |
| "learning_rate": 1.4533763521386318e-05, |
| "loss": 0.0079, |
| "step": 15270 |
| }, |
| { |
| "grad_norm": 0.1581815630197525, |
| "learning_rate": 1.44755369860819e-05, |
| "loss": 0.0098, |
| "step": 15280 |
| }, |
| { |
| "grad_norm": 0.16933442652225494, |
| "learning_rate": 1.441740757330287e-05, |
| "loss": 0.0071, |
| "step": 15290 |
| }, |
| { |
| "grad_norm": 0.1398744434118271, |
| "learning_rate": 1.4359375441972844e-05, |
| "loss": 0.0077, |
| "step": 15300 |
| }, |
| { |
| "grad_norm": 0.11288022249937057, |
| "learning_rate": 1.4301440750749395e-05, |
| "loss": 0.0038, |
| "step": 15310 |
| }, |
| { |
| "grad_norm": 0.12249251455068588, |
| "learning_rate": 1.4243603658023808e-05, |
| "loss": 0.0039, |
| "step": 15320 |
| }, |
| { |
| "grad_norm": 0.14226596057415009, |
| "learning_rate": 1.4185864321920444e-05, |
| "loss": 0.0075, |
| "step": 15330 |
| }, |
| { |
| "grad_norm": 0.16929765045642853, |
| "learning_rate": 1.4128222900296485e-05, |
| "loss": 0.0054, |
| "step": 15340 |
| }, |
| { |
| "grad_norm": 0.12506860494613647, |
| "learning_rate": 1.407067955074135e-05, |
| "loss": 0.0052, |
| "step": 15350 |
| }, |
| { |
| "grad_norm": 0.13736629486083984, |
| "learning_rate": 1.4013234430576356e-05, |
| "loss": 0.0074, |
| "step": 15360 |
| }, |
| { |
| "grad_norm": 0.1849488615989685, |
| "learning_rate": 1.3955887696854286e-05, |
| "loss": 0.0081, |
| "step": 15370 |
| }, |
| { |
| "grad_norm": 0.10855989158153534, |
| "learning_rate": 1.38986395063589e-05, |
| "loss": 0.0045, |
| "step": 15380 |
| }, |
| { |
| "grad_norm": 0.1332855224609375, |
| "learning_rate": 1.3841490015604597e-05, |
| "loss": 0.0051, |
| "step": 15390 |
| }, |
| { |
| "grad_norm": 0.13144734501838684, |
| "learning_rate": 1.3784439380835879e-05, |
| "loss": 0.0057, |
| "step": 15400 |
| }, |
| { |
| "grad_norm": 0.1505713164806366, |
| "learning_rate": 1.3727487758026986e-05, |
| "loss": 0.0046, |
| "step": 15410 |
| }, |
| { |
| "grad_norm": 0.18800579011440277, |
| "learning_rate": 1.3670635302881525e-05, |
| "loss": 0.0073, |
| "step": 15420 |
| }, |
| { |
| "grad_norm": 0.09939403086900711, |
| "learning_rate": 1.3613882170831888e-05, |
| "loss": 0.0035, |
| "step": 15430 |
| }, |
| { |
| "grad_norm": 0.14586606621742249, |
| "learning_rate": 1.355722851703901e-05, |
| "loss": 0.0041, |
| "step": 15440 |
| }, |
| { |
| "grad_norm": 0.15744535624980927, |
| "learning_rate": 1.3500674496391814e-05, |
| "loss": 0.0076, |
| "step": 15450 |
| }, |
| { |
| "grad_norm": 0.11027566343545914, |
| "learning_rate": 1.3444220263506795e-05, |
| "loss": 0.0044, |
| "step": 15460 |
| }, |
| { |
| "grad_norm": 0.16130055487155914, |
| "learning_rate": 1.3387865972727714e-05, |
| "loss": 0.005, |
| "step": 15470 |
| }, |
| { |
| "grad_norm": 0.11999835073947906, |
| "learning_rate": 1.3331611778125036e-05, |
| "loss": 0.0055, |
| "step": 15480 |
| }, |
| { |
| "grad_norm": 0.11079049110412598, |
| "learning_rate": 1.3275457833495564e-05, |
| "loss": 0.0063, |
| "step": 15490 |
| }, |
| { |
| "grad_norm": 0.11384254693984985, |
| "learning_rate": 1.3219404292362065e-05, |
| "loss": 0.005, |
| "step": 15500 |
| }, |
| { |
| "grad_norm": 0.13722357153892517, |
| "learning_rate": 1.3163451307972751e-05, |
| "loss": 0.0099, |
| "step": 15510 |
| }, |
| { |
| "grad_norm": 0.11740435659885406, |
| "learning_rate": 1.3107599033300977e-05, |
| "loss": 0.0037, |
| "step": 15520 |
| }, |
| { |
| "grad_norm": 0.09946759045124054, |
| "learning_rate": 1.305184762104471e-05, |
| "loss": 0.0047, |
| "step": 15530 |
| }, |
| { |
| "grad_norm": 0.09446703642606735, |
| "learning_rate": 1.2996197223626178e-05, |
| "loss": 0.0042, |
| "step": 15540 |
| }, |
| { |
| "grad_norm": 0.14761124551296234, |
| "learning_rate": 1.2940647993191457e-05, |
| "loss": 0.0045, |
| "step": 15550 |
| }, |
| { |
| "grad_norm": 0.15722501277923584, |
| "learning_rate": 1.2885200081610005e-05, |
| "loss": 0.0068, |
| "step": 15560 |
| }, |
| { |
| "grad_norm": 0.15671776235103607, |
| "learning_rate": 1.2829853640474316e-05, |
| "loss": 0.0043, |
| "step": 15570 |
| }, |
| { |
| "grad_norm": 0.08559279143810272, |
| "learning_rate": 1.2774608821099438e-05, |
| "loss": 0.0047, |
| "step": 15580 |
| }, |
| { |
| "grad_norm": 0.13336020708084106, |
| "learning_rate": 1.2719465774522577e-05, |
| "loss": 0.0066, |
| "step": 15590 |
| }, |
| { |
| "grad_norm": 0.17194277048110962, |
| "learning_rate": 1.2664424651502755e-05, |
| "loss": 0.0066, |
| "step": 15600 |
| }, |
| { |
| "grad_norm": 0.17069080471992493, |
| "learning_rate": 1.260948560252026e-05, |
| "loss": 0.0097, |
| "step": 15610 |
| }, |
| { |
| "grad_norm": 0.15968455374240875, |
| "learning_rate": 1.2554648777776396e-05, |
| "loss": 0.0039, |
| "step": 15620 |
| }, |
| { |
| "grad_norm": 0.13884195685386658, |
| "learning_rate": 1.2499914327192919e-05, |
| "loss": 0.004, |
| "step": 15630 |
| }, |
| { |
| "grad_norm": 0.1465432047843933, |
| "learning_rate": 1.2445282400411722e-05, |
| "loss": 0.0055, |
| "step": 15640 |
| }, |
| { |
| "grad_norm": 0.13197597861289978, |
| "learning_rate": 1.2390753146794437e-05, |
| "loss": 0.0052, |
| "step": 15650 |
| }, |
| { |
| "grad_norm": 0.15008197724819183, |
| "learning_rate": 1.2336326715421925e-05, |
| "loss": 0.0046, |
| "step": 15660 |
| }, |
| { |
| "grad_norm": 0.13557665050029755, |
| "learning_rate": 1.2282003255094005e-05, |
| "loss": 0.0045, |
| "step": 15670 |
| }, |
| { |
| "grad_norm": 0.1551276594400406, |
| "learning_rate": 1.2227782914328928e-05, |
| "loss": 0.0065, |
| "step": 15680 |
| }, |
| { |
| "grad_norm": 0.14345529675483704, |
| "learning_rate": 1.2173665841363018e-05, |
| "loss": 0.0033, |
| "step": 15690 |
| }, |
| { |
| "grad_norm": 0.12947356700897217, |
| "learning_rate": 1.211965218415032e-05, |
| "loss": 0.0042, |
| "step": 15700 |
| }, |
| { |
| "grad_norm": 0.08433026820421219, |
| "learning_rate": 1.2065742090362082e-05, |
| "loss": 0.0041, |
| "step": 15710 |
| }, |
| { |
| "grad_norm": 0.16045640408992767, |
| "learning_rate": 1.2011935707386457e-05, |
| "loss": 0.0058, |
| "step": 15720 |
| }, |
| { |
| "grad_norm": 0.10582105070352554, |
| "learning_rate": 1.1958233182328044e-05, |
| "loss": 0.0053, |
| "step": 15730 |
| }, |
| { |
| "grad_norm": 0.0928897112607956, |
| "learning_rate": 1.1904634662007474e-05, |
| "loss": 0.0041, |
| "step": 15740 |
| }, |
| { |
| "grad_norm": 0.13550110161304474, |
| "learning_rate": 1.1851140292961088e-05, |
| "loss": 0.0058, |
| "step": 15750 |
| }, |
| { |
| "grad_norm": 0.1278456449508667, |
| "learning_rate": 1.1797750221440424e-05, |
| "loss": 0.0074, |
| "step": 15760 |
| }, |
| { |
| "grad_norm": 0.194366917014122, |
| "learning_rate": 1.1744464593411897e-05, |
| "loss": 0.0055, |
| "step": 15770 |
| }, |
| { |
| "grad_norm": 0.1663769781589508, |
| "learning_rate": 1.1691283554556399e-05, |
| "loss": 0.0037, |
| "step": 15780 |
| }, |
| { |
| "grad_norm": 0.13579991459846497, |
| "learning_rate": 1.1638207250268834e-05, |
| "loss": 0.0087, |
| "step": 15790 |
| }, |
| { |
| "grad_norm": 0.12101404368877411, |
| "learning_rate": 1.158523582565782e-05, |
| "loss": 0.0059, |
| "step": 15800 |
| }, |
| { |
| "grad_norm": 0.1226835548877716, |
| "learning_rate": 1.1532369425545192e-05, |
| "loss": 0.0057, |
| "step": 15810 |
| }, |
| { |
| "grad_norm": 0.10354090481996536, |
| "learning_rate": 1.1479608194465662e-05, |
| "loss": 0.0097, |
| "step": 15820 |
| }, |
| { |
| "grad_norm": 0.10664427280426025, |
| "learning_rate": 1.1426952276666442e-05, |
| "loss": 0.0054, |
| "step": 15830 |
| }, |
| { |
| "grad_norm": 0.15973882377147675, |
| "learning_rate": 1.1374401816106778e-05, |
| "loss": 0.005, |
| "step": 15840 |
| }, |
| { |
| "grad_norm": 0.12123812735080719, |
| "learning_rate": 1.1321956956457646e-05, |
| "loss": 0.0052, |
| "step": 15850 |
| }, |
| { |
| "grad_norm": 0.15946902334690094, |
| "learning_rate": 1.1269617841101277e-05, |
| "loss": 0.0069, |
| "step": 15860 |
| }, |
| { |
| "grad_norm": 0.10081616789102554, |
| "learning_rate": 1.1217384613130804e-05, |
| "loss": 0.0091, |
| "step": 15870 |
| }, |
| { |
| "grad_norm": 0.11432350426912308, |
| "learning_rate": 1.11652574153499e-05, |
| "loss": 0.0065, |
| "step": 15880 |
| }, |
| { |
| "grad_norm": 0.08289428800344467, |
| "learning_rate": 1.1113236390272303e-05, |
| "loss": 0.0035, |
| "step": 15890 |
| }, |
| { |
| "grad_norm": 0.08323133736848831, |
| "learning_rate": 1.106132168012155e-05, |
| "loss": 0.0047, |
| "step": 15900 |
| }, |
| { |
| "grad_norm": 0.12318079173564911, |
| "learning_rate": 1.1009513426830448e-05, |
| "loss": 0.0044, |
| "step": 15910 |
| }, |
| { |
| "grad_norm": 0.16217155754566193, |
| "learning_rate": 1.0957811772040777e-05, |
| "loss": 0.0064, |
| "step": 15920 |
| }, |
| { |
| "grad_norm": 0.1420840173959732, |
| "learning_rate": 1.0906216857102913e-05, |
| "loss": 0.0059, |
| "step": 15930 |
| }, |
| { |
| "grad_norm": 0.16004914045333862, |
| "learning_rate": 1.0854728823075355e-05, |
| "loss": 0.0054, |
| "step": 15940 |
| }, |
| { |
| "grad_norm": 0.15781958401203156, |
| "learning_rate": 1.0803347810724452e-05, |
| "loss": 0.0096, |
| "step": 15950 |
| }, |
| { |
| "grad_norm": 0.12208251655101776, |
| "learning_rate": 1.0752073960523911e-05, |
| "loss": 0.0034, |
| "step": 15960 |
| }, |
| { |
| "grad_norm": 0.13120897114276886, |
| "learning_rate": 1.070090741265447e-05, |
| "loss": 0.0044, |
| "step": 15970 |
| }, |
| { |
| "grad_norm": 0.10382191836833954, |
| "learning_rate": 1.0649848307003547e-05, |
| "loss": 0.0039, |
| "step": 15980 |
| }, |
| { |
| "grad_norm": 0.09419625997543335, |
| "learning_rate": 1.0598896783164757e-05, |
| "loss": 0.0051, |
| "step": 15990 |
| }, |
| { |
| "grad_norm": 0.10848691314458847, |
| "learning_rate": 1.0548052980437645e-05, |
| "loss": 0.0042, |
| "step": 16000 |
| }, |
| { |
| "grad_norm": 0.13647383451461792, |
| "learning_rate": 1.049731703782722e-05, |
| "loss": 0.0061, |
| "step": 16010 |
| }, |
| { |
| "grad_norm": 0.1732291877269745, |
| "learning_rate": 1.0446689094043587e-05, |
| "loss": 0.0047, |
| "step": 16020 |
| }, |
| { |
| "grad_norm": 0.14692017436027527, |
| "learning_rate": 1.039616928750165e-05, |
| "loss": 0.0096, |
| "step": 16030 |
| }, |
| { |
| "grad_norm": 0.14247895777225494, |
| "learning_rate": 1.0345757756320612e-05, |
| "loss": 0.0047, |
| "step": 16040 |
| }, |
| { |
| "grad_norm": 0.10627839714288712, |
| "learning_rate": 1.0295454638323666e-05, |
| "loss": 0.0036, |
| "step": 16050 |
| }, |
| { |
| "grad_norm": 0.124620720744133, |
| "learning_rate": 1.0245260071037632e-05, |
| "loss": 0.0058, |
| "step": 16060 |
| }, |
| { |
| "grad_norm": 0.1555354744195938, |
| "learning_rate": 1.0195174191692518e-05, |
| "loss": 0.0046, |
| "step": 16070 |
| }, |
| { |
| "grad_norm": 0.16408278048038483, |
| "learning_rate": 1.014519713722124e-05, |
| "loss": 0.0085, |
| "step": 16080 |
| }, |
| { |
| "grad_norm": 0.09864310175180435, |
| "learning_rate": 1.0095329044259132e-05, |
| "loss": 0.0055, |
| "step": 16090 |
| }, |
| { |
| "grad_norm": 0.08889558166265488, |
| "learning_rate": 1.004557004914365e-05, |
| "loss": 0.004, |
| "step": 16100 |
| }, |
| { |
| "grad_norm": 0.10330936312675476, |
| "learning_rate": 9.995920287914007e-06, |
| "loss": 0.0037, |
| "step": 16110 |
| }, |
| { |
| "grad_norm": 0.11861933022737503, |
| "learning_rate": 9.946379896310737e-06, |
| "loss": 0.0048, |
| "step": 16120 |
| }, |
| { |
| "grad_norm": 0.12653601169586182, |
| "learning_rate": 9.896949009775396e-06, |
| "loss": 0.0075, |
| "step": 16130 |
| }, |
| { |
| "grad_norm": 0.09070475399494171, |
| "learning_rate": 9.847627763450134e-06, |
| "loss": 0.0036, |
| "step": 16140 |
| }, |
| { |
| "grad_norm": 0.12315920740365982, |
| "learning_rate": 9.798416292177337e-06, |
| "loss": 0.0055, |
| "step": 16150 |
| }, |
| { |
| "grad_norm": 0.111320361495018, |
| "learning_rate": 9.74931473049932e-06, |
| "loss": 0.0061, |
| "step": 16160 |
| }, |
| { |
| "grad_norm": 0.15200473368167877, |
| "learning_rate": 9.700323212657847e-06, |
| "loss": 0.0047, |
| "step": 16170 |
| }, |
| { |
| "grad_norm": 0.13636194169521332, |
| "learning_rate": 9.65144187259388e-06, |
| "loss": 0.0046, |
| "step": 16180 |
| }, |
| { |
| "grad_norm": 0.1087312325835228, |
| "learning_rate": 9.602670843947132e-06, |
| "loss": 0.007, |
| "step": 16190 |
| }, |
| { |
| "grad_norm": 0.13450048863887787, |
| "learning_rate": 9.554010260055713e-06, |
| "loss": 0.0049, |
| "step": 16200 |
| }, |
| { |
| "grad_norm": 0.09377999603748322, |
| "learning_rate": 9.505460253955834e-06, |
| "loss": 0.0127, |
| "step": 16210 |
| }, |
| { |
| "grad_norm": 0.1321907937526703, |
| "learning_rate": 9.457020958381324e-06, |
| "loss": 0.004, |
| "step": 16220 |
| }, |
| { |
| "grad_norm": 0.1684373915195465, |
| "learning_rate": 9.408692505763395e-06, |
| "loss": 0.0062, |
| "step": 16230 |
| }, |
| { |
| "grad_norm": 0.1340956687927246, |
| "learning_rate": 9.360475028230181e-06, |
| "loss": 0.0069, |
| "step": 16240 |
| }, |
| { |
| "grad_norm": 0.11114493757486343, |
| "learning_rate": 9.312368657606412e-06, |
| "loss": 0.0066, |
| "step": 16250 |
| }, |
| { |
| "grad_norm": 0.08800835907459259, |
| "learning_rate": 9.264373525413096e-06, |
| "loss": 0.0041, |
| "step": 16260 |
| }, |
| { |
| "grad_norm": 0.1255185306072235, |
| "learning_rate": 9.216489762867058e-06, |
| "loss": 0.0051, |
| "step": 16270 |
| }, |
| { |
| "grad_norm": 0.13304315507411957, |
| "learning_rate": 9.168717500880708e-06, |
| "loss": 0.0069, |
| "step": 16280 |
| }, |
| { |
| "grad_norm": 0.13777510821819305, |
| "learning_rate": 9.121056870061574e-06, |
| "loss": 0.0048, |
| "step": 16290 |
| }, |
| { |
| "grad_norm": 0.1482883095741272, |
| "learning_rate": 9.073508000711983e-06, |
| "loss": 0.0063, |
| "step": 16300 |
| }, |
| { |
| "grad_norm": 0.12836310267448425, |
| "learning_rate": 9.026071022828758e-06, |
| "loss": 0.0068, |
| "step": 16310 |
| }, |
| { |
| "grad_norm": 0.13698478043079376, |
| "learning_rate": 8.978746066102771e-06, |
| "loss": 0.0049, |
| "step": 16320 |
| }, |
| { |
| "grad_norm": 0.139384463429451, |
| "learning_rate": 8.931533259918634e-06, |
| "loss": 0.005, |
| "step": 16330 |
| }, |
| { |
| "grad_norm": 0.08790681511163712, |
| "learning_rate": 8.884432733354382e-06, |
| "loss": 0.0062, |
| "step": 16340 |
| }, |
| { |
| "grad_norm": 0.1132570132613182, |
| "learning_rate": 8.837444615181029e-06, |
| "loss": 0.0055, |
| "step": 16350 |
| }, |
| { |
| "grad_norm": 0.15096323192119598, |
| "learning_rate": 8.790569033862323e-06, |
| "loss": 0.0061, |
| "step": 16360 |
| }, |
| { |
| "grad_norm": 0.12137813121080399, |
| "learning_rate": 8.7438061175543e-06, |
| "loss": 0.0058, |
| "step": 16370 |
| }, |
| { |
| "grad_norm": 0.15718196332454681, |
| "learning_rate": 8.697155994104978e-06, |
| "loss": 0.0059, |
| "step": 16380 |
| }, |
| { |
| "grad_norm": 0.12548978626728058, |
| "learning_rate": 8.650618791054033e-06, |
| "loss": 0.0041, |
| "step": 16390 |
| }, |
| { |
| "grad_norm": 0.09397879242897034, |
| "learning_rate": 8.604194635632373e-06, |
| "loss": 0.0035, |
| "step": 16400 |
| }, |
| { |
| "grad_norm": 0.12385103106498718, |
| "learning_rate": 8.557883654761906e-06, |
| "loss": 0.0046, |
| "step": 16410 |
| }, |
| { |
| "grad_norm": 0.12034054100513458, |
| "learning_rate": 8.511685975055061e-06, |
| "loss": 0.0049, |
| "step": 16420 |
| }, |
| { |
| "grad_norm": 0.07118933647871017, |
| "learning_rate": 8.46560172281452e-06, |
| "loss": 0.0037, |
| "step": 16430 |
| }, |
| { |
| "grad_norm": 0.09793756902217865, |
| "learning_rate": 8.419631024032893e-06, |
| "loss": 0.0037, |
| "step": 16440 |
| }, |
| { |
| "grad_norm": 0.08896943926811218, |
| "learning_rate": 8.373774004392293e-06, |
| "loss": 0.0042, |
| "step": 16450 |
| }, |
| { |
| "grad_norm": 0.09761831164360046, |
| "learning_rate": 8.32803078926409e-06, |
| "loss": 0.0042, |
| "step": 16460 |
| }, |
| { |
| "grad_norm": 0.1202096939086914, |
| "learning_rate": 8.282401503708454e-06, |
| "loss": 0.0047, |
| "step": 16470 |
| }, |
| { |
| "grad_norm": 0.08817831426858902, |
| "learning_rate": 8.23688627247412e-06, |
| "loss": 0.0038, |
| "step": 16480 |
| }, |
| { |
| "grad_norm": 0.05380531772971153, |
| "learning_rate": 8.191485219998007e-06, |
| "loss": 0.0031, |
| "step": 16490 |
| }, |
| { |
| "grad_norm": 0.11282724887132645, |
| "learning_rate": 8.146198470404843e-06, |
| "loss": 0.0063, |
| "step": 16500 |
| }, |
| { |
| "grad_norm": 0.13265129923820496, |
| "learning_rate": 8.101026147506897e-06, |
| "loss": 0.0062, |
| "step": 16510 |
| }, |
| { |
| "grad_norm": 0.09475216269493103, |
| "learning_rate": 8.05596837480353e-06, |
| "loss": 0.0048, |
| "step": 16520 |
| }, |
| { |
| "grad_norm": 0.11130519211292267, |
| "learning_rate": 8.011025275480998e-06, |
| "loss": 0.0063, |
| "step": 16530 |
| }, |
| { |
| "grad_norm": 0.09376892447471619, |
| "learning_rate": 7.966196972412027e-06, |
| "loss": 0.0057, |
| "step": 16540 |
| }, |
| { |
| "grad_norm": 0.12341774255037308, |
| "learning_rate": 7.92148358815547e-06, |
| "loss": 0.005, |
| "step": 16550 |
| }, |
| { |
| "grad_norm": 0.1459421068429947, |
| "learning_rate": 7.87688524495604e-06, |
| "loss": 0.006, |
| "step": 16560 |
| }, |
| { |
| "grad_norm": 0.12143165618181229, |
| "learning_rate": 7.83240206474386e-06, |
| "loss": 0.0045, |
| "step": 16570 |
| }, |
| { |
| "grad_norm": 0.0978287011384964, |
| "learning_rate": 7.788034169134272e-06, |
| "loss": 0.0059, |
| "step": 16580 |
| }, |
| { |
| "grad_norm": 0.10168706625699997, |
| "learning_rate": 7.743781679427414e-06, |
| "loss": 0.0044, |
| "step": 16590 |
| }, |
| { |
| "grad_norm": 0.08256179839372635, |
| "learning_rate": 7.699644716607895e-06, |
| "loss": 0.0036, |
| "step": 16600 |
| }, |
| { |
| "grad_norm": 0.059778597205877304, |
| "learning_rate": 7.655623401344486e-06, |
| "loss": 0.0056, |
| "step": 16610 |
| }, |
| { |
| "grad_norm": 0.11977694928646088, |
| "learning_rate": 7.611717853989775e-06, |
| "loss": 0.0059, |
| "step": 16620 |
| }, |
| { |
| "grad_norm": 0.13426047563552856, |
| "learning_rate": 7.567928194579854e-06, |
| "loss": 0.0067, |
| "step": 16630 |
| }, |
| { |
| "grad_norm": 0.12195713818073273, |
| "learning_rate": 7.524254542833997e-06, |
| "loss": 0.0042, |
| "step": 16640 |
| }, |
| { |
| "grad_norm": 0.1642487794160843, |
| "learning_rate": 7.480697018154286e-06, |
| "loss": 0.0061, |
| "step": 16650 |
| }, |
| { |
| "grad_norm": 0.08334190398454666, |
| "learning_rate": 7.437255739625332e-06, |
| "loss": 0.0034, |
| "step": 16660 |
| }, |
| { |
| "grad_norm": 0.09278412163257599, |
| "learning_rate": 7.393930826013923e-06, |
| "loss": 0.0049, |
| "step": 16670 |
| }, |
| { |
| "grad_norm": 0.08380815386772156, |
| "learning_rate": 7.350722395768722e-06, |
| "loss": 0.004, |
| "step": 16680 |
| }, |
| { |
| "grad_norm": 0.08418401330709457, |
| "learning_rate": 7.307630567019963e-06, |
| "loss": 0.0041, |
| "step": 16690 |
| }, |
| { |
| "grad_norm": 0.10579981654882431, |
| "learning_rate": 7.264655457579e-06, |
| "loss": 0.0047, |
| "step": 16700 |
| }, |
| { |
| "grad_norm": 0.09246329963207245, |
| "learning_rate": 7.221797184938184e-06, |
| "loss": 0.0043, |
| "step": 16710 |
| }, |
| { |
| "grad_norm": 0.10854664444923401, |
| "learning_rate": 7.179055866270373e-06, |
| "loss": 0.0037, |
| "step": 16720 |
| }, |
| { |
| "grad_norm": 0.09156115353107452, |
| "learning_rate": 7.136431618428707e-06, |
| "loss": 0.0038, |
| "step": 16730 |
| }, |
| { |
| "grad_norm": 0.056634001433849335, |
| "learning_rate": 7.09392455794628e-06, |
| "loss": 0.0059, |
| "step": 16740 |
| }, |
| { |
| "grad_norm": 0.06214802712202072, |
| "learning_rate": 7.051534801035725e-06, |
| "loss": 0.003, |
| "step": 16750 |
| }, |
| { |
| "grad_norm": 0.11566188186407089, |
| "learning_rate": 7.00926246358905e-06, |
| "loss": 0.0078, |
| "step": 16760 |
| }, |
| { |
| "grad_norm": 0.10004517436027527, |
| "learning_rate": 6.967107661177191e-06, |
| "loss": 0.0043, |
| "step": 16770 |
| }, |
| { |
| "grad_norm": 0.14847685396671295, |
| "learning_rate": 6.925070509049786e-06, |
| "loss": 0.0049, |
| "step": 16780 |
| }, |
| { |
| "grad_norm": 0.08145119249820709, |
| "learning_rate": 6.883151122134812e-06, |
| "loss": 0.0035, |
| "step": 16790 |
| }, |
| { |
| "grad_norm": 0.07774826884269714, |
| "learning_rate": 6.8413496150382394e-06, |
| "loss": 0.0051, |
| "step": 16800 |
| }, |
| { |
| "grad_norm": 0.09139399975538254, |
| "learning_rate": 6.7996661020438165e-06, |
| "loss": 0.0048, |
| "step": 16810 |
| }, |
| { |
| "grad_norm": 0.11714893579483032, |
| "learning_rate": 6.758100697112662e-06, |
| "loss": 0.0049, |
| "step": 16820 |
| }, |
| { |
| "grad_norm": 0.10748549550771713, |
| "learning_rate": 6.716653513883026e-06, |
| "loss": 0.0034, |
| "step": 16830 |
| }, |
| { |
| "grad_norm": 0.13849052786827087, |
| "learning_rate": 6.675324665669913e-06, |
| "loss": 0.0074, |
| "step": 16840 |
| }, |
| { |
| "grad_norm": 0.10507303476333618, |
| "learning_rate": 6.634114265464803e-06, |
| "loss": 0.0037, |
| "step": 16850 |
| }, |
| { |
| "grad_norm": 0.11181323230266571, |
| "learning_rate": 6.59302242593538e-06, |
| "loss": 0.0046, |
| "step": 16860 |
| }, |
| { |
| "grad_norm": 0.0852261483669281, |
| "learning_rate": 6.552049259425141e-06, |
| "loss": 0.0035, |
| "step": 16870 |
| }, |
| { |
| "grad_norm": 0.12089522182941437, |
| "learning_rate": 6.511194877953181e-06, |
| "loss": 0.0071, |
| "step": 16880 |
| }, |
| { |
| "grad_norm": 0.10575058311223984, |
| "learning_rate": 6.470459393213813e-06, |
| "loss": 0.0047, |
| "step": 16890 |
| }, |
| { |
| "grad_norm": 0.1121765598654747, |
| "learning_rate": 6.429842916576279e-06, |
| "loss": 0.0064, |
| "step": 16900 |
| }, |
| { |
| "grad_norm": 0.11497557908296585, |
| "learning_rate": 6.389345559084503e-06, |
| "loss": 0.0038, |
| "step": 16910 |
| }, |
| { |
| "grad_norm": 0.10686445236206055, |
| "learning_rate": 6.348967431456682e-06, |
| "loss": 0.0078, |
| "step": 16920 |
| }, |
| { |
| "grad_norm": 0.11743766814470291, |
| "learning_rate": 6.30870864408511e-06, |
| "loss": 0.007, |
| "step": 16930 |
| }, |
| { |
| "grad_norm": 0.08018866926431656, |
| "learning_rate": 6.268569307035754e-06, |
| "loss": 0.0058, |
| "step": 16940 |
| }, |
| { |
| "grad_norm": 0.0866788849234581, |
| "learning_rate": 6.228549530048022e-06, |
| "loss": 0.0043, |
| "step": 16950 |
| }, |
| { |
| "grad_norm": 0.12864826619625092, |
| "learning_rate": 6.1886494225344814e-06, |
| "loss": 0.0053, |
| "step": 16960 |
| }, |
| { |
| "grad_norm": 0.08638875186443329, |
| "learning_rate": 6.148869093580479e-06, |
| "loss": 0.0041, |
| "step": 16970 |
| }, |
| { |
| "grad_norm": 0.11416413635015488, |
| "learning_rate": 6.109208651943921e-06, |
| "loss": 0.0041, |
| "step": 16980 |
| }, |
| { |
| "grad_norm": 0.08974229544401169, |
| "learning_rate": 6.069668206054946e-06, |
| "loss": 0.0059, |
| "step": 16990 |
| }, |
| { |
| "grad_norm": 0.08993878960609436, |
| "learning_rate": 6.0302478640156145e-06, |
| "loss": 0.0047, |
| "step": 17000 |
| }, |
| { |
| "grad_norm": 0.06425460427999496, |
| "learning_rate": 5.990947733599644e-06, |
| "loss": 0.0041, |
| "step": 17010 |
| }, |
| { |
| "grad_norm": 0.15895497798919678, |
| "learning_rate": 5.951767922252105e-06, |
| "loss": 0.0066, |
| "step": 17020 |
| }, |
| { |
| "grad_norm": 0.12571494281291962, |
| "learning_rate": 5.912708537089068e-06, |
| "loss": 0.004, |
| "step": 17030 |
| }, |
| { |
| "grad_norm": 0.13179023563861847, |
| "learning_rate": 5.873769684897434e-06, |
| "loss": 0.0088, |
| "step": 17040 |
| }, |
| { |
| "grad_norm": 0.12288176268339157, |
| "learning_rate": 5.834951472134514e-06, |
| "loss": 0.0062, |
| "step": 17050 |
| }, |
| { |
| "grad_norm": 0.12752611935138702, |
| "learning_rate": 5.796254004927832e-06, |
| "loss": 0.0082, |
| "step": 17060 |
| }, |
| { |
| "grad_norm": 0.10792287439107895, |
| "learning_rate": 5.757677389074806e-06, |
| "loss": 0.0079, |
| "step": 17070 |
| }, |
| { |
| "grad_norm": 0.12195702642202377, |
| "learning_rate": 5.719221730042385e-06, |
| "loss": 0.0041, |
| "step": 17080 |
| }, |
| { |
| "grad_norm": 0.10768016427755356, |
| "learning_rate": 5.680887132966911e-06, |
| "loss": 0.006, |
| "step": 17090 |
| }, |
| { |
| "grad_norm": 0.09891042858362198, |
| "learning_rate": 5.642673702653683e-06, |
| "loss": 0.004, |
| "step": 17100 |
| }, |
| { |
| "grad_norm": 0.0941566675901413, |
| "learning_rate": 5.604581543576781e-06, |
| "loss": 0.0056, |
| "step": 17110 |
| }, |
| { |
| "grad_norm": 0.10946481674909592, |
| "learning_rate": 5.566610759878704e-06, |
| "loss": 0.0054, |
| "step": 17120 |
| }, |
| { |
| "grad_norm": 0.07776937633752823, |
| "learning_rate": 5.528761455370119e-06, |
| "loss": 0.0032, |
| "step": 17130 |
| }, |
| { |
| "grad_norm": 0.07869040220975876, |
| "learning_rate": 5.491033733529594e-06, |
| "loss": 0.0036, |
| "step": 17140 |
| }, |
| { |
| "grad_norm": 0.08763402700424194, |
| "learning_rate": 5.453427697503255e-06, |
| "loss": 0.0044, |
| "step": 17150 |
| }, |
| { |
| "grad_norm": 0.12128516286611557, |
| "learning_rate": 5.415943450104599e-06, |
| "loss": 0.0061, |
| "step": 17160 |
| }, |
| { |
| "grad_norm": 0.1140231043100357, |
| "learning_rate": 5.378581093814111e-06, |
| "loss": 0.0048, |
| "step": 17170 |
| }, |
| { |
| "grad_norm": 0.07997678965330124, |
| "learning_rate": 5.3413407307790375e-06, |
| "loss": 0.005, |
| "step": 17180 |
| }, |
| { |
| "grad_norm": 0.13127312064170837, |
| "learning_rate": 5.30422246281313e-06, |
| "loss": 0.0057, |
| "step": 17190 |
| }, |
| { |
| "grad_norm": 0.058916617184877396, |
| "learning_rate": 5.267226391396296e-06, |
| "loss": 0.0029, |
| "step": 17200 |
| }, |
| { |
| "grad_norm": 0.1319904774427414, |
| "learning_rate": 5.2303526176744e-06, |
| "loss": 0.0044, |
| "step": 17210 |
| }, |
| { |
| "grad_norm": 0.11650247871875763, |
| "learning_rate": 5.193601242458929e-06, |
| "loss": 0.0051, |
| "step": 17220 |
| }, |
| { |
| "grad_norm": 0.07709518074989319, |
| "learning_rate": 5.156972366226714e-06, |
| "loss": 0.0055, |
| "step": 17230 |
| }, |
| { |
| "grad_norm": 0.08256576210260391, |
| "learning_rate": 5.120466089119735e-06, |
| "loss": 0.0043, |
| "step": 17240 |
| }, |
| { |
| "grad_norm": 0.06434403359889984, |
| "learning_rate": 5.084082510944749e-06, |
| "loss": 0.0027, |
| "step": 17250 |
| }, |
| { |
| "grad_norm": 0.06728126853704453, |
| "learning_rate": 5.047821731173058e-06, |
| "loss": 0.0038, |
| "step": 17260 |
| }, |
| { |
| "grad_norm": 0.090157650411129, |
| "learning_rate": 5.011683848940274e-06, |
| "loss": 0.0051, |
| "step": 17270 |
| }, |
| { |
| "grad_norm": 0.07788987457752228, |
| "learning_rate": 4.975668963045954e-06, |
| "loss": 0.0045, |
| "step": 17280 |
| }, |
| { |
| "grad_norm": 0.1306411623954773, |
| "learning_rate": 4.9397771719534525e-06, |
| "loss": 0.0064, |
| "step": 17290 |
| }, |
| { |
| "grad_norm": 0.10654694586992264, |
| "learning_rate": 4.904008573789548e-06, |
| "loss": 0.0054, |
| "step": 17300 |
| }, |
| { |
| "grad_norm": 0.09149842709302902, |
| "learning_rate": 4.8683632663442005e-06, |
| "loss": 0.0065, |
| "step": 17310 |
| }, |
| { |
| "grad_norm": 0.1496347337961197, |
| "learning_rate": 4.832841347070343e-06, |
| "loss": 0.0057, |
| "step": 17320 |
| }, |
| { |
| "grad_norm": 0.08486293256282806, |
| "learning_rate": 4.797442913083539e-06, |
| "loss": 0.004, |
| "step": 17330 |
| }, |
| { |
| "grad_norm": 0.06311454623937607, |
| "learning_rate": 4.7621680611617596e-06, |
| "loss": 0.0079, |
| "step": 17340 |
| }, |
| { |
| "grad_norm": 0.0772392749786377, |
| "learning_rate": 4.727016887745095e-06, |
| "loss": 0.0059, |
| "step": 17350 |
| }, |
| { |
| "grad_norm": 0.08348176628351212, |
| "learning_rate": 4.691989488935511e-06, |
| "loss": 0.0056, |
| "step": 17360 |
| }, |
| { |
| "grad_norm": 0.08476629853248596, |
| "learning_rate": 4.657085960496588e-06, |
| "loss": 0.0053, |
| "step": 17370 |
| }, |
| { |
| "grad_norm": 0.08960162848234177, |
| "learning_rate": 4.6223063978532265e-06, |
| "loss": 0.0054, |
| "step": 17380 |
| }, |
| { |
| "grad_norm": 0.06759439408779144, |
| "learning_rate": 4.587650896091439e-06, |
| "loss": 0.0028, |
| "step": 17390 |
| }, |
| { |
| "grad_norm": 0.08276814222335815, |
| "learning_rate": 4.553119549958035e-06, |
| "loss": 0.0057, |
| "step": 17400 |
| }, |
| { |
| "grad_norm": 0.10904081165790558, |
| "learning_rate": 4.518712453860385e-06, |
| "loss": 0.0059, |
| "step": 17410 |
| }, |
| { |
| "grad_norm": 0.06815115362405777, |
| "learning_rate": 4.484429701866205e-06, |
| "loss": 0.0032, |
| "step": 17420 |
| }, |
| { |
| "grad_norm": 0.058988507837057114, |
| "learning_rate": 4.4502713877031975e-06, |
| "loss": 0.003, |
| "step": 17430 |
| }, |
| { |
| "grad_norm": 0.07423502206802368, |
| "learning_rate": 4.416237604758911e-06, |
| "loss": 0.0035, |
| "step": 17440 |
| }, |
| { |
| "grad_norm": 0.08950015157461166, |
| "learning_rate": 4.3823284460804025e-06, |
| "loss": 0.0067, |
| "step": 17450 |
| }, |
| { |
| "grad_norm": 0.12428207695484161, |
| "learning_rate": 4.348544004374011e-06, |
| "loss": 0.0047, |
| "step": 17460 |
| }, |
| { |
| "grad_norm": 0.101078100502491, |
| "learning_rate": 4.314884372005123e-06, |
| "loss": 0.0044, |
| "step": 17470 |
| }, |
| { |
| "grad_norm": 0.09248964488506317, |
| "learning_rate": 4.281349640997867e-06, |
| "loss": 0.0046, |
| "step": 17480 |
| }, |
| { |
| "grad_norm": 0.05857797712087631, |
| "learning_rate": 4.247939903034942e-06, |
| "loss": 0.0056, |
| "step": 17490 |
| }, |
| { |
| "grad_norm": 0.08585874736309052, |
| "learning_rate": 4.214655249457284e-06, |
| "loss": 0.0084, |
| "step": 17500 |
| }, |
| { |
| "grad_norm": 0.09325850009918213, |
| "learning_rate": 4.181495771263855e-06, |
| "loss": 0.0038, |
| "step": 17510 |
| }, |
| { |
| "grad_norm": 0.08757645636796951, |
| "learning_rate": 4.148461559111427e-06, |
| "loss": 0.0047, |
| "step": 17520 |
| }, |
| { |
| "grad_norm": 0.0964057445526123, |
| "learning_rate": 4.115552703314252e-06, |
| "loss": 0.0045, |
| "step": 17530 |
| }, |
| { |
| "grad_norm": 0.06798765808343887, |
| "learning_rate": 4.082769293843886e-06, |
| "loss": 0.0033, |
| "step": 17540 |
| }, |
| { |
| "grad_norm": 0.05117792263627052, |
| "learning_rate": 4.050111420328939e-06, |
| "loss": 0.0038, |
| "step": 17550 |
| }, |
| { |
| "grad_norm": 0.1059410348534584, |
| "learning_rate": 4.017579172054764e-06, |
| "loss": 0.0057, |
| "step": 17560 |
| }, |
| { |
| "grad_norm": 0.08445511013269424, |
| "learning_rate": 3.985172637963308e-06, |
| "loss": 0.0041, |
| "step": 17570 |
| }, |
| { |
| "grad_norm": 0.06766512989997864, |
| "learning_rate": 3.952891906652784e-06, |
| "loss": 0.0032, |
| "step": 17580 |
| }, |
| { |
| "grad_norm": 0.10279787331819534, |
| "learning_rate": 3.920737066377478e-06, |
| "loss": 0.005, |
| "step": 17590 |
| }, |
| { |
| "grad_norm": 0.10965480655431747, |
| "learning_rate": 3.888708205047509e-06, |
| "loss": 0.0077, |
| "step": 17600 |
| }, |
| { |
| "grad_norm": 0.09529787302017212, |
| "learning_rate": 3.856805410228542e-06, |
| "loss": 0.005, |
| "step": 17610 |
| }, |
| { |
| "grad_norm": 0.07802815735340118, |
| "learning_rate": 3.82502876914162e-06, |
| "loss": 0.0044, |
| "step": 17620 |
| }, |
| { |
| "grad_norm": 0.063714899122715, |
| "learning_rate": 3.7933783686628586e-06, |
| "loss": 0.0064, |
| "step": 17630 |
| }, |
| { |
| "grad_norm": 0.0940946713089943, |
| "learning_rate": 3.7618542953232306e-06, |
| "loss": 0.0048, |
| "step": 17640 |
| }, |
| { |
| "grad_norm": 0.10446714609861374, |
| "learning_rate": 3.7304566353083658e-06, |
| "loss": 0.0095, |
| "step": 17650 |
| }, |
| { |
| "grad_norm": 0.07436424493789673, |
| "learning_rate": 3.6991854744582555e-06, |
| "loss": 0.0061, |
| "step": 17660 |
| }, |
| { |
| "grad_norm": 0.0746212750673294, |
| "learning_rate": 3.6680408982670777e-06, |
| "loss": 0.0039, |
| "step": 17670 |
| }, |
| { |
| "grad_norm": 0.1222284585237503, |
| "learning_rate": 3.637022991882899e-06, |
| "loss": 0.0084, |
| "step": 17680 |
| }, |
| { |
| "grad_norm": 0.09644754230976105, |
| "learning_rate": 3.606131840107485e-06, |
| "loss": 0.0046, |
| "step": 17690 |
| }, |
| { |
| "grad_norm": 0.15303337574005127, |
| "learning_rate": 3.575367527396084e-06, |
| "loss": 0.0084, |
| "step": 17700 |
| }, |
| { |
| "grad_norm": 0.07879474014043808, |
| "learning_rate": 3.5447301378571386e-06, |
| "loss": 0.0037, |
| "step": 17710 |
| }, |
| { |
| "grad_norm": 0.07187385112047195, |
| "learning_rate": 3.514219755252113e-06, |
| "loss": 0.0065, |
| "step": 17720 |
| }, |
| { |
| "grad_norm": 0.09277717024087906, |
| "learning_rate": 3.4838364629952213e-06, |
| "loss": 0.0074, |
| "step": 17730 |
| }, |
| { |
| "grad_norm": 0.09220196306705475, |
| "learning_rate": 3.4535803441532123e-06, |
| "loss": 0.0038, |
| "step": 17740 |
| }, |
| { |
| "grad_norm": 0.08987642079591751, |
| "learning_rate": 3.4234514814451836e-06, |
| "loss": 0.0043, |
| "step": 17750 |
| }, |
| { |
| "grad_norm": 0.08693262934684753, |
| "learning_rate": 3.393449957242273e-06, |
| "loss": 0.0049, |
| "step": 17760 |
| }, |
| { |
| "grad_norm": 0.12766656279563904, |
| "learning_rate": 3.363575853567524e-06, |
| "loss": 0.0059, |
| "step": 17770 |
| }, |
| { |
| "grad_norm": 0.1169864758849144, |
| "learning_rate": 3.3338292520955826e-06, |
| "loss": 0.0055, |
| "step": 17780 |
| }, |
| { |
| "grad_norm": 0.09048964083194733, |
| "learning_rate": 3.304210234152516e-06, |
| "loss": 0.0034, |
| "step": 17790 |
| }, |
| { |
| "grad_norm": 0.06841456145048141, |
| "learning_rate": 3.2747188807155993e-06, |
| "loss": 0.0031, |
| "step": 17800 |
| }, |
| { |
| "grad_norm": 0.051626693457365036, |
| "learning_rate": 3.2453552724130643e-06, |
| "loss": 0.0054, |
| "step": 17810 |
| }, |
| { |
| "grad_norm": 0.06671755760908127, |
| "learning_rate": 3.216119489523889e-06, |
| "loss": 0.0049, |
| "step": 17820 |
| }, |
| { |
| "grad_norm": 0.09001406282186508, |
| "learning_rate": 3.1870116119775917e-06, |
| "loss": 0.005, |
| "step": 17830 |
| }, |
| { |
| "grad_norm": 0.05401616171002388, |
| "learning_rate": 3.158031719353999e-06, |
| "loss": 0.0043, |
| "step": 17840 |
| }, |
| { |
| "grad_norm": 0.09218642860651016, |
| "learning_rate": 3.1291798908830273e-06, |
| "loss": 0.0066, |
| "step": 17850 |
| }, |
| { |
| "grad_norm": 0.05930738151073456, |
| "learning_rate": 3.1004562054444853e-06, |
| "loss": 0.0037, |
| "step": 17860 |
| }, |
| { |
| "grad_norm": 0.060345377773046494, |
| "learning_rate": 3.071860741567806e-06, |
| "loss": 0.0041, |
| "step": 17870 |
| }, |
| { |
| "grad_norm": 0.04504251852631569, |
| "learning_rate": 3.04339357743193e-06, |
| "loss": 0.0041, |
| "step": 17880 |
| }, |
| { |
| "grad_norm": 0.0834588035941124, |
| "learning_rate": 3.0150547908649628e-06, |
| "loss": 0.0049, |
| "step": 17890 |
| }, |
| { |
| "grad_norm": 0.06421680748462677, |
| "learning_rate": 2.9868444593440957e-06, |
| "loss": 0.0055, |
| "step": 17900 |
| }, |
| { |
| "grad_norm": 0.10178251564502716, |
| "learning_rate": 2.9587626599952846e-06, |
| "loss": 0.0068, |
| "step": 17910 |
| }, |
| { |
| "grad_norm": 0.06935402005910873, |
| "learning_rate": 2.930809469593082e-06, |
| "loss": 0.0067, |
| "step": 17920 |
| }, |
| { |
| "grad_norm": 0.09632204473018646, |
| "learning_rate": 2.9029849645604733e-06, |
| "loss": 0.0046, |
| "step": 17930 |
| }, |
| { |
| "grad_norm": 0.0971589982509613, |
| "learning_rate": 2.8752892209685632e-06, |
| "loss": 0.0053, |
| "step": 17940 |
| }, |
| { |
| "grad_norm": 0.06326834857463837, |
| "learning_rate": 2.847722314536483e-06, |
| "loss": 0.0066, |
| "step": 17950 |
| }, |
| { |
| "grad_norm": 0.08715000748634338, |
| "learning_rate": 2.820284320631078e-06, |
| "loss": 0.0057, |
| "step": 17960 |
| }, |
| { |
| "grad_norm": 0.11452065408229828, |
| "learning_rate": 2.792975314266788e-06, |
| "loss": 0.0046, |
| "step": 17970 |
| }, |
| { |
| "grad_norm": 0.10101866722106934, |
| "learning_rate": 2.7657953701054007e-06, |
| "loss": 0.0077, |
| "step": 17980 |
| }, |
| { |
| "grad_norm": 0.06383367627859116, |
| "learning_rate": 2.7387445624558306e-06, |
| "loss": 0.0084, |
| "step": 17990 |
| }, |
| { |
| "grad_norm": 0.0860927402973175, |
| "learning_rate": 2.7118229652739747e-06, |
| "loss": 0.0068, |
| "step": 18000 |
| }, |
| { |
| "grad_norm": 0.09062928706407547, |
| "learning_rate": 2.6850306521624236e-06, |
| "loss": 0.0077, |
| "step": 18010 |
| }, |
| { |
| "grad_norm": 0.0975007563829422, |
| "learning_rate": 2.6583676963703507e-06, |
| "loss": 0.0053, |
| "step": 18020 |
| }, |
| { |
| "grad_norm": 0.056053951382637024, |
| "learning_rate": 2.631834170793268e-06, |
| "loss": 0.0037, |
| "step": 18030 |
| }, |
| { |
| "grad_norm": 0.09389690309762955, |
| "learning_rate": 2.6054301479728036e-06, |
| "loss": 0.0039, |
| "step": 18040 |
| }, |
| { |
| "grad_norm": 0.08401983976364136, |
| "learning_rate": 2.579155700096575e-06, |
| "loss": 0.0057, |
| "step": 18050 |
| }, |
| { |
| "grad_norm": 0.06630973517894745, |
| "learning_rate": 2.5530108989978873e-06, |
| "loss": 0.007, |
| "step": 18060 |
| }, |
| { |
| "grad_norm": 0.08125243335962296, |
| "learning_rate": 2.5269958161556416e-06, |
| "loss": 0.0059, |
| "step": 18070 |
| }, |
| { |
| "grad_norm": 0.0948253944516182, |
| "learning_rate": 2.5011105226940888e-06, |
| "loss": 0.0065, |
| "step": 18080 |
| }, |
| { |
| "grad_norm": 0.06006864085793495, |
| "learning_rate": 2.4753550893826248e-06, |
| "loss": 0.0025, |
| "step": 18090 |
| }, |
| { |
| "grad_norm": 0.06576282531023026, |
| "learning_rate": 2.4497295866356296e-06, |
| "loss": 0.0082, |
| "step": 18100 |
| }, |
| { |
| "grad_norm": 0.0732223391532898, |
| "learning_rate": 2.424234084512228e-06, |
| "loss": 0.0062, |
| "step": 18110 |
| }, |
| { |
| "grad_norm": 0.0863489881157875, |
| "learning_rate": 2.3988686527161687e-06, |
| "loss": 0.0033, |
| "step": 18120 |
| }, |
| { |
| "grad_norm": 0.05276194587349892, |
| "learning_rate": 2.373633360595573e-06, |
| "loss": 0.0056, |
| "step": 18130 |
| }, |
| { |
| "grad_norm": 0.06433786451816559, |
| "learning_rate": 2.3485282771427585e-06, |
| "loss": 0.0048, |
| "step": 18140 |
| }, |
| { |
| "grad_norm": 0.04832305759191513, |
| "learning_rate": 2.3235534709940665e-06, |
| "loss": 0.0062, |
| "step": 18150 |
| }, |
| { |
| "grad_norm": 0.05605657771229744, |
| "learning_rate": 2.2987090104296617e-06, |
| "loss": 0.0047, |
| "step": 18160 |
| }, |
| { |
| "grad_norm": 0.06013290956616402, |
| "learning_rate": 2.273994963373355e-06, |
| "loss": 0.0055, |
| "step": 18170 |
| }, |
| { |
| "grad_norm": 0.054950859397649765, |
| "learning_rate": 2.249411397392409e-06, |
| "loss": 0.0049, |
| "step": 18180 |
| }, |
| { |
| "grad_norm": 0.04367905482649803, |
| "learning_rate": 2.2249583796973506e-06, |
| "loss": 0.0067, |
| "step": 18190 |
| }, |
| { |
| "grad_norm": 0.06013049557805061, |
| "learning_rate": 2.200635977141796e-06, |
| "loss": 0.004, |
| "step": 18200 |
| }, |
| { |
| "grad_norm": 0.05286560580134392, |
| "learning_rate": 2.17644425622226e-06, |
| "loss": 0.0072, |
| "step": 18210 |
| }, |
| { |
| "grad_norm": 0.04693790152668953, |
| "learning_rate": 2.152383283077991e-06, |
| "loss": 0.0033, |
| "step": 18220 |
| }, |
| { |
| "grad_norm": 0.08133646100759506, |
| "learning_rate": 2.128453123490781e-06, |
| "loss": 0.0049, |
| "step": 18230 |
| }, |
| { |
| "grad_norm": 0.11808016151189804, |
| "learning_rate": 2.1046538428847462e-06, |
| "loss": 0.0075, |
| "step": 18240 |
| }, |
| { |
| "grad_norm": 0.13297493755817413, |
| "learning_rate": 2.0809855063262273e-06, |
| "loss": 0.0039, |
| "step": 18250 |
| }, |
| { |
| "grad_norm": 0.06416473537683487, |
| "learning_rate": 2.057448178523558e-06, |
| "loss": 0.0042, |
| "step": 18260 |
| }, |
| { |
| "grad_norm": 0.08213549107313156, |
| "learning_rate": 2.034041923826885e-06, |
| "loss": 0.004, |
| "step": 18270 |
| }, |
| { |
| "grad_norm": 0.08681398630142212, |
| "learning_rate": 2.0107668062280204e-06, |
| "loss": 0.0062, |
| "step": 18280 |
| }, |
| { |
| "grad_norm": 0.04406105354428291, |
| "learning_rate": 1.9876228893602357e-06, |
| "loss": 0.0026, |
| "step": 18290 |
| }, |
| { |
| "grad_norm": 0.09231877326965332, |
| "learning_rate": 1.9646102364981266e-06, |
| "loss": 0.0048, |
| "step": 18300 |
| }, |
| { |
| "grad_norm": 0.09094957262277603, |
| "learning_rate": 1.9417289105574053e-06, |
| "loss": 0.0045, |
| "step": 18310 |
| }, |
| { |
| "grad_norm": 0.07368715852499008, |
| "learning_rate": 1.9189789740947427e-06, |
| "loss": 0.0048, |
| "step": 18320 |
| }, |
| { |
| "grad_norm": 0.05653895437717438, |
| "learning_rate": 1.896360489307597e-06, |
| "loss": 0.0071, |
| "step": 18330 |
| }, |
| { |
| "grad_norm": 0.061923108994960785, |
| "learning_rate": 1.8738735180340362e-06, |
| "loss": 0.0049, |
| "step": 18340 |
| }, |
| { |
| "grad_norm": 0.06822076439857483, |
| "learning_rate": 1.8515181217525824e-06, |
| "loss": 0.0044, |
| "step": 18350 |
| }, |
| { |
| "grad_norm": 0.06153898686170578, |
| "learning_rate": 1.8292943615820457e-06, |
| "loss": 0.0038, |
| "step": 18360 |
| }, |
| { |
| "grad_norm": 0.0566900372505188, |
| "learning_rate": 1.8072022982813296e-06, |
| "loss": 0.0047, |
| "step": 18370 |
| }, |
| { |
| "grad_norm": 0.10939788818359375, |
| "learning_rate": 1.7852419922492925e-06, |
| "loss": 0.0052, |
| "step": 18380 |
| }, |
| { |
| "grad_norm": 0.06535179167985916, |
| "learning_rate": 1.763413503524569e-06, |
| "loss": 0.0033, |
| "step": 18390 |
| }, |
| { |
| "grad_norm": 0.11079724133014679, |
| "learning_rate": 1.7417168917854165e-06, |
| "loss": 0.0075, |
| "step": 18400 |
| }, |
| { |
| "grad_norm": 0.08610212802886963, |
| "learning_rate": 1.720152216349552e-06, |
| "loss": 0.0078, |
| "step": 18410 |
| }, |
| { |
| "grad_norm": 0.05728074163198471, |
| "learning_rate": 1.6987195361739595e-06, |
| "loss": 0.003, |
| "step": 18420 |
| }, |
| { |
| "grad_norm": 0.08600207418203354, |
| "learning_rate": 1.6774189098547832e-06, |
| "loss": 0.0081, |
| "step": 18430 |
| }, |
| { |
| "grad_norm": 0.08694013953208923, |
| "learning_rate": 1.6562503956271069e-06, |
| "loss": 0.0043, |
| "step": 18440 |
| }, |
| { |
| "grad_norm": 0.06410788744688034, |
| "learning_rate": 1.6352140513648417e-06, |
| "loss": 0.0041, |
| "step": 18450 |
| }, |
| { |
| "grad_norm": 0.035524092614650726, |
| "learning_rate": 1.6143099345805712e-06, |
| "loss": 0.0046, |
| "step": 18460 |
| }, |
| { |
| "grad_norm": 0.06390909850597382, |
| "learning_rate": 1.5935381024253293e-06, |
| "loss": 0.0055, |
| "step": 18470 |
| }, |
| { |
| "grad_norm": 0.05754880607128143, |
| "learning_rate": 1.572898611688517e-06, |
| "loss": 0.0066, |
| "step": 18480 |
| }, |
| { |
| "grad_norm": 0.048745110630989075, |
| "learning_rate": 1.5523915187977133e-06, |
| "loss": 0.0034, |
| "step": 18490 |
| }, |
| { |
| "grad_norm": 0.051464542746543884, |
| "learning_rate": 1.532016879818532e-06, |
| "loss": 0.0052, |
| "step": 18500 |
| }, |
| { |
| "grad_norm": 0.04558296129107475, |
| "learning_rate": 1.51177475045447e-06, |
| "loss": 0.0039, |
| "step": 18510 |
| }, |
| { |
| "grad_norm": 0.050785310566425323, |
| "learning_rate": 1.4916651860467035e-06, |
| "loss": 0.0044, |
| "step": 18520 |
| }, |
| { |
| "grad_norm": 0.07850246131420135, |
| "learning_rate": 1.471688241574043e-06, |
| "loss": 0.0051, |
| "step": 18530 |
| }, |
| { |
| "grad_norm": 0.043779075145721436, |
| "learning_rate": 1.451843971652672e-06, |
| "loss": 0.0056, |
| "step": 18540 |
| }, |
| { |
| "grad_norm": 0.09892037510871887, |
| "learning_rate": 1.432132430536076e-06, |
| "loss": 0.005, |
| "step": 18550 |
| }, |
| { |
| "grad_norm": 0.0711660087108612, |
| "learning_rate": 1.412553672114869e-06, |
| "loss": 0.0049, |
| "step": 18560 |
| }, |
| { |
| "grad_norm": 0.040360577404499054, |
| "learning_rate": 1.3931077499166056e-06, |
| "loss": 0.0041, |
| "step": 18570 |
| }, |
| { |
| "grad_norm": 0.10091473162174225, |
| "learning_rate": 1.3737947171057085e-06, |
| "loss": 0.0053, |
| "step": 18580 |
| }, |
| { |
| "grad_norm": 0.06029286980628967, |
| "learning_rate": 1.3546146264832582e-06, |
| "loss": 0.0051, |
| "step": 18590 |
| }, |
| { |
| "grad_norm": 0.06340325623750687, |
| "learning_rate": 1.3355675304869086e-06, |
| "loss": 0.0054, |
| "step": 18600 |
| }, |
| { |
| "grad_norm": 0.059224601835012436, |
| "learning_rate": 1.3166534811906827e-06, |
| "loss": 0.0037, |
| "step": 18610 |
| }, |
| { |
| "grad_norm": 0.06791342049837112, |
| "learning_rate": 1.2978725303048666e-06, |
| "loss": 0.0042, |
| "step": 18620 |
| }, |
| { |
| "grad_norm": 0.0800284743309021, |
| "learning_rate": 1.2792247291758762e-06, |
| "loss": 0.0041, |
| "step": 18630 |
| }, |
| { |
| "grad_norm": 0.08240664750337601, |
| "learning_rate": 1.2607101287860635e-06, |
| "loss": 0.0038, |
| "step": 18640 |
| }, |
| { |
| "grad_norm": 0.047691259533166885, |
| "learning_rate": 1.2423287797536654e-06, |
| "loss": 0.0047, |
| "step": 18650 |
| }, |
| { |
| "grad_norm": 0.06712372601032257, |
| "learning_rate": 1.2240807323325776e-06, |
| "loss": 0.004, |
| "step": 18660 |
| }, |
| { |
| "grad_norm": 0.049060530960559845, |
| "learning_rate": 1.205966036412254e-06, |
| "loss": 0.0031, |
| "step": 18670 |
| }, |
| { |
| "grad_norm": 0.03923507779836655, |
| "learning_rate": 1.1879847415175949e-06, |
| "loss": 0.0057, |
| "step": 18680 |
| }, |
| { |
| "grad_norm": 0.11399254202842712, |
| "learning_rate": 1.1701368968087712e-06, |
| "loss": 0.0053, |
| "step": 18690 |
| }, |
| { |
| "grad_norm": 0.0680961012840271, |
| "learning_rate": 1.1524225510811116e-06, |
| "loss": 0.0046, |
| "step": 18700 |
| }, |
| { |
| "grad_norm": 0.04934504255652428, |
| "learning_rate": 1.1348417527649535e-06, |
| "loss": 0.0041, |
| "step": 18710 |
| }, |
| { |
| "grad_norm": 0.0424002967774868, |
| "learning_rate": 1.1173945499255268e-06, |
| "loss": 0.0048, |
| "step": 18720 |
| }, |
| { |
| "grad_norm": 0.037728987634181976, |
| "learning_rate": 1.1000809902628307e-06, |
| "loss": 0.003, |
| "step": 18730 |
| }, |
| { |
| "grad_norm": 0.03390166163444519, |
| "learning_rate": 1.082901121111468e-06, |
| "loss": 0.0041, |
| "step": 18740 |
| }, |
| { |
| "grad_norm": 0.044707078486680984, |
| "learning_rate": 1.0658549894405456e-06, |
| "loss": 0.0039, |
| "step": 18750 |
| }, |
| { |
| "grad_norm": 0.0984845906496048, |
| "learning_rate": 1.0489426418535342e-06, |
| "loss": 0.0056, |
| "step": 18760 |
| }, |
| { |
| "grad_norm": 0.03741137310862541, |
| "learning_rate": 1.0321641245881474e-06, |
| "loss": 0.0042, |
| "step": 18770 |
| }, |
| { |
| "grad_norm": 0.03353625163435936, |
| "learning_rate": 1.015519483516214e-06, |
| "loss": 0.0029, |
| "step": 18780 |
| }, |
| { |
| "grad_norm": 0.03772426396608353, |
| "learning_rate": 9.990087641435443e-07, |
| "loss": 0.0035, |
| "step": 18790 |
| }, |
| { |
| "grad_norm": 0.10662414133548737, |
| "learning_rate": 9.826320116098132e-07, |
| "loss": 0.0068, |
| "step": 18800 |
| }, |
| { |
| "grad_norm": 0.03115723840892315, |
| "learning_rate": 9.663892706884447e-07, |
| "loss": 0.0024, |
| "step": 18810 |
| }, |
| { |
| "grad_norm": 0.042881883680820465, |
| "learning_rate": 9.502805857864616e-07, |
| "loss": 0.004, |
| "step": 18820 |
| }, |
| { |
| "grad_norm": 0.0726175531744957, |
| "learning_rate": 9.34306000944396e-07, |
| "loss": 0.0039, |
| "step": 18830 |
| }, |
| { |
| "grad_norm": 0.10121230781078339, |
| "learning_rate": 9.184655598361624e-07, |
| "loss": 0.0064, |
| "step": 18840 |
| }, |
| { |
| "grad_norm": 0.07452309876680374, |
| "learning_rate": 9.027593057689076e-07, |
| "loss": 0.0077, |
| "step": 18850 |
| }, |
| { |
| "grad_norm": 0.047714851796627045, |
| "learning_rate": 8.871872816829441e-07, |
| "loss": 0.0043, |
| "step": 18860 |
| }, |
| { |
| "grad_norm": 0.05202336609363556, |
| "learning_rate": 8.717495301515777e-07, |
| "loss": 0.0063, |
| "step": 18870 |
| }, |
| { |
| "grad_norm": 0.026929423213005066, |
| "learning_rate": 8.564460933810415e-07, |
| "loss": 0.0032, |
| "step": 18880 |
| }, |
| { |
| "grad_norm": 0.1266394555568695, |
| "learning_rate": 8.412770132103453e-07, |
| "loss": 0.0078, |
| "step": 18890 |
| }, |
| { |
| "grad_norm": 0.04397597163915634, |
| "learning_rate": 8.262423311111711e-07, |
| "loss": 0.0046, |
| "step": 18900 |
| }, |
| { |
| "grad_norm": 0.04359564557671547, |
| "learning_rate": 8.113420881877665e-07, |
| "loss": 0.0032, |
| "step": 18910 |
| }, |
| { |
| "grad_norm": 0.05606939271092415, |
| "learning_rate": 7.965763251768288e-07, |
| "loss": 0.0047, |
| "step": 18920 |
| }, |
| { |
| "grad_norm": 0.032073911279439926, |
| "learning_rate": 7.819450824473995e-07, |
| "loss": 0.0044, |
| "step": 18930 |
| }, |
| { |
| "grad_norm": 0.052406635135412216, |
| "learning_rate": 7.674484000007198e-07, |
| "loss": 0.0043, |
| "step": 18940 |
| }, |
| { |
| "grad_norm": 0.06158546730875969, |
| "learning_rate": 7.530863174701752e-07, |
| "loss": 0.0052, |
| "step": 18950 |
| }, |
| { |
| "grad_norm": 0.06597442924976349, |
| "learning_rate": 7.38858874121151e-07, |
| "loss": 0.0061, |
| "step": 18960 |
| }, |
| { |
| "grad_norm": 0.04494604840874672, |
| "learning_rate": 7.247661088509328e-07, |
| "loss": 0.0054, |
| "step": 18970 |
| }, |
| { |
| "grad_norm": 0.03968639299273491, |
| "learning_rate": 7.108080601886002e-07, |
| "loss": 0.0034, |
| "step": 18980 |
| }, |
| { |
| "grad_norm": 0.04516436532139778, |
| "learning_rate": 6.969847662949336e-07, |
| "loss": 0.0046, |
| "step": 18990 |
| }, |
| { |
| "grad_norm": 0.03492464870214462, |
| "learning_rate": 6.832962649622798e-07, |
| "loss": 0.0041, |
| "step": 19000 |
| }, |
| { |
| "grad_norm": 0.03456168621778488, |
| "learning_rate": 6.697425936144863e-07, |
| "loss": 0.007, |
| "step": 19010 |
| }, |
| { |
| "grad_norm": 0.059465594589710236, |
| "learning_rate": 6.563237893067731e-07, |
| "loss": 0.0047, |
| "step": 19020 |
| }, |
| { |
| "grad_norm": 0.08866643160581589, |
| "learning_rate": 6.430398887256328e-07, |
| "loss": 0.0032, |
| "step": 19030 |
| }, |
| { |
| "grad_norm": 0.037074267864227295, |
| "learning_rate": 6.298909281887478e-07, |
| "loss": 0.0043, |
| "step": 19040 |
| }, |
| { |
| "grad_norm": 0.03368697687983513, |
| "learning_rate": 6.168769436448673e-07, |
| "loss": 0.0033, |
| "step": 19050 |
| }, |
| { |
| "grad_norm": 0.058988627046346664, |
| "learning_rate": 6.03997970673742e-07, |
| "loss": 0.003, |
| "step": 19060 |
| }, |
| { |
| "grad_norm": 0.04054597020149231, |
| "learning_rate": 5.912540444859782e-07, |
| "loss": 0.0054, |
| "step": 19070 |
| }, |
| { |
| "grad_norm": 0.04888832941651344, |
| "learning_rate": 5.786451999229837e-07, |
| "loss": 0.0043, |
| "step": 19080 |
| }, |
| { |
| "grad_norm": 0.09000938385725021, |
| "learning_rate": 5.661714714568722e-07, |
| "loss": 0.0075, |
| "step": 19090 |
| }, |
| { |
| "grad_norm": 0.08032537251710892, |
| "learning_rate": 5.538328931903259e-07, |
| "loss": 0.0065, |
| "step": 19100 |
| }, |
| { |
| "grad_norm": 0.03778580576181412, |
| "learning_rate": 5.416294988565551e-07, |
| "loss": 0.0038, |
| "step": 19110 |
| }, |
| { |
| "grad_norm": 0.0301471296697855, |
| "learning_rate": 5.29561321819172e-07, |
| "loss": 0.0064, |
| "step": 19120 |
| }, |
| { |
| "grad_norm": 0.04304756596684456, |
| "learning_rate": 5.176283950721061e-07, |
| "loss": 0.0037, |
| "step": 19130 |
| }, |
| { |
| "grad_norm": 0.08168292790651321, |
| "learning_rate": 5.058307512395332e-07, |
| "loss": 0.006, |
| "step": 19140 |
| }, |
| { |
| "grad_norm": 0.03314193710684776, |
| "learning_rate": 4.941684225757526e-07, |
| "loss": 0.0072, |
| "step": 19150 |
| }, |
| { |
| "grad_norm": 0.03319231793284416, |
| "learning_rate": 4.826414409651314e-07, |
| "loss": 0.0051, |
| "step": 19160 |
| }, |
| { |
| "grad_norm": 0.04385793209075928, |
| "learning_rate": 4.712498379219943e-07, |
| "loss": 0.0063, |
| "step": 19170 |
| }, |
| { |
| "grad_norm": 0.08488308638334274, |
| "learning_rate": 4.599936445905506e-07, |
| "loss": 0.0073, |
| "step": 19180 |
| }, |
| { |
| "grad_norm": 0.026116928085684776, |
| "learning_rate": 4.4887289174480594e-07, |
| "loss": 0.0029, |
| "step": 19190 |
| }, |
| { |
| "grad_norm": 0.038966987282037735, |
| "learning_rate": 4.378876097884621e-07, |
| "loss": 0.005, |
| "step": 19200 |
| }, |
| { |
| "grad_norm": 0.055600617080926895, |
| "learning_rate": 4.2703782875487264e-07, |
| "loss": 0.0062, |
| "step": 19210 |
| }, |
| { |
| "grad_norm": 0.029152419418096542, |
| "learning_rate": 4.163235783069208e-07, |
| "loss": 0.0047, |
| "step": 19220 |
| }, |
| { |
| "grad_norm": 0.02960650622844696, |
| "learning_rate": 4.057448877369585e-07, |
| "loss": 0.0037, |
| "step": 19230 |
| }, |
| { |
| "grad_norm": 0.05459560081362724, |
| "learning_rate": 3.9530178596672295e-07, |
| "loss": 0.0035, |
| "step": 19240 |
| }, |
| { |
| "grad_norm": 0.0414460264146328, |
| "learning_rate": 3.849943015472479e-07, |
| "loss": 0.0036, |
| "step": 19250 |
| }, |
| { |
| "grad_norm": 0.05241796001791954, |
| "learning_rate": 3.748224626588137e-07, |
| "loss": 0.0052, |
| "step": 19260 |
| }, |
| { |
| "grad_norm": 0.06404601037502289, |
| "learning_rate": 3.647862971108307e-07, |
| "loss": 0.0092, |
| "step": 19270 |
| }, |
| { |
| "grad_norm": 0.03816540539264679, |
| "learning_rate": 3.5488583234179473e-07, |
| "loss": 0.0034, |
| "step": 19280 |
| }, |
| { |
| "grad_norm": 0.042802464216947556, |
| "learning_rate": 3.4512109541920413e-07, |
| "loss": 0.0067, |
| "step": 19290 |
| }, |
| { |
| "grad_norm": 0.0605001300573349, |
| "learning_rate": 3.354921130394706e-07, |
| "loss": 0.0053, |
| "step": 19300 |
| }, |
| { |
| "grad_norm": 0.08837027847766876, |
| "learning_rate": 3.259989115278639e-07, |
| "loss": 0.0087, |
| "step": 19310 |
| }, |
| { |
| "grad_norm": 0.10174474865198135, |
| "learning_rate": 3.1664151683843403e-07, |
| "loss": 0.0043, |
| "step": 19320 |
| }, |
| { |
| "grad_norm": 0.03807970881462097, |
| "learning_rate": 3.074199545539447e-07, |
| "loss": 0.0027, |
| "step": 19330 |
| }, |
| { |
| "grad_norm": 0.02359367161989212, |
| "learning_rate": 2.983342498857955e-07, |
| "loss": 0.0043, |
| "step": 19340 |
| }, |
| { |
| "grad_norm": 0.034910764545202255, |
| "learning_rate": 2.893844276739499e-07, |
| "loss": 0.0035, |
| "step": 19350 |
| }, |
| { |
| "grad_norm": 0.08157561719417572, |
| "learning_rate": 2.8057051238688514e-07, |
| "loss": 0.0036, |
| "step": 19360 |
| }, |
| { |
| "grad_norm": 0.03558242693543434, |
| "learning_rate": 2.71892528121509e-07, |
| "loss": 0.0056, |
| "step": 19370 |
| }, |
| { |
| "grad_norm": 0.04014763608574867, |
| "learning_rate": 2.633504986030988e-07, |
| "loss": 0.006, |
| "step": 19380 |
| }, |
| { |
| "grad_norm": 0.049834150820970535, |
| "learning_rate": 2.549444471852347e-07, |
| "loss": 0.0035, |
| "step": 19390 |
| }, |
| { |
| "grad_norm": 0.04006095975637436, |
| "learning_rate": 2.4667439684974423e-07, |
| "loss": 0.0067, |
| "step": 19400 |
| }, |
| { |
| "grad_norm": 0.034812238067388535, |
| "learning_rate": 2.3854037020662467e-07, |
| "loss": 0.0041, |
| "step": 19410 |
| }, |
| { |
| "grad_norm": 0.04246693477034569, |
| "learning_rate": 2.3054238949399288e-07, |
| "loss": 0.0056, |
| "step": 19420 |
| }, |
| { |
| "grad_norm": 0.061797671020030975, |
| "learning_rate": 2.2268047657802993e-07, |
| "loss": 0.005, |
| "step": 19430 |
| }, |
| { |
| "grad_norm": 0.029228825122117996, |
| "learning_rate": 2.149546529529034e-07, |
| "loss": 0.005, |
| "step": 19440 |
| }, |
| { |
| "grad_norm": 0.030314629897475243, |
| "learning_rate": 2.0736493974071736e-07, |
| "loss": 0.0042, |
| "step": 19450 |
| }, |
| { |
| "grad_norm": 0.054420553147792816, |
| "learning_rate": 1.9991135769145686e-07, |
| "loss": 0.0044, |
| "step": 19460 |
| }, |
| { |
| "grad_norm": 0.026409875601530075, |
| "learning_rate": 1.9259392718293245e-07, |
| "loss": 0.004, |
| "step": 19470 |
| }, |
| { |
| "grad_norm": 0.026196150109171867, |
| "learning_rate": 1.8541266822072467e-07, |
| "loss": 0.0058, |
| "step": 19480 |
| }, |
| { |
| "grad_norm": 0.07092944532632828, |
| "learning_rate": 1.7836760043811184e-07, |
| "loss": 0.0037, |
| "step": 19490 |
| }, |
| { |
| "grad_norm": 0.10308902710676193, |
| "learning_rate": 1.7145874309604792e-07, |
| "loss": 0.008, |
| "step": 19500 |
| }, |
| { |
| "grad_norm": 0.06316528469324112, |
| "learning_rate": 1.6468611508308474e-07, |
| "loss": 0.0056, |
| "step": 19510 |
| }, |
| { |
| "grad_norm": 0.04925818368792534, |
| "learning_rate": 1.5804973491532204e-07, |
| "loss": 0.0041, |
| "step": 19520 |
| }, |
| { |
| "grad_norm": 0.06427190452814102, |
| "learning_rate": 1.5154962073637424e-07, |
| "loss": 0.0044, |
| "step": 19530 |
| }, |
| { |
| "grad_norm": 0.027820497751235962, |
| "learning_rate": 1.4518579031730372e-07, |
| "loss": 0.0035, |
| "step": 19540 |
| }, |
| { |
| "grad_norm": 0.08228770643472672, |
| "learning_rate": 1.389582610565876e-07, |
| "loss": 0.0038, |
| "step": 19550 |
| }, |
| { |
| "grad_norm": 0.035630207508802414, |
| "learning_rate": 1.3286704998003995e-07, |
| "loss": 0.0029, |
| "step": 19560 |
| }, |
| { |
| "grad_norm": 0.06858760118484497, |
| "learning_rate": 1.2691217374080632e-07, |
| "loss": 0.0059, |
| "step": 19570 |
| }, |
| { |
| "grad_norm": 0.030942440032958984, |
| "learning_rate": 1.2109364861929705e-07, |
| "loss": 0.0028, |
| "step": 19580 |
| }, |
| { |
| "grad_norm": 0.03261679783463478, |
| "learning_rate": 1.1541149052312628e-07, |
| "loss": 0.0048, |
| "step": 19590 |
| }, |
| { |
| "grad_norm": 0.02540130726993084, |
| "learning_rate": 1.0986571498710074e-07, |
| "loss": 0.0046, |
| "step": 19600 |
| }, |
| { |
| "grad_norm": 0.023928454145789146, |
| "learning_rate": 1.0445633717316438e-07, |
| "loss": 0.0136, |
| "step": 19610 |
| }, |
| { |
| "grad_norm": 0.03369027003645897, |
| "learning_rate": 9.918337187034277e-08, |
| "loss": 0.0058, |
| "step": 19620 |
| }, |
| { |
| "grad_norm": 0.03948524594306946, |
| "learning_rate": 9.404683349472643e-08, |
| "loss": 0.0055, |
| "step": 19630 |
| }, |
| { |
| "grad_norm": 0.03470523655414581, |
| "learning_rate": 8.904673608940983e-08, |
| "loss": 0.0056, |
| "step": 19640 |
| }, |
| { |
| "grad_norm": 0.024040725082159042, |
| "learning_rate": 8.418309332447471e-08, |
| "loss": 0.0092, |
| "step": 19650 |
| }, |
| { |
| "grad_norm": 0.023585395887494087, |
| "learning_rate": 7.945591849692902e-08, |
| "loss": 0.0042, |
| "step": 19660 |
| }, |
| { |
| "grad_norm": 0.029787050560116768, |
| "learning_rate": 7.486522453069578e-08, |
| "loss": 0.0058, |
| "step": 19670 |
| }, |
| { |
| "grad_norm": 0.016384869813919067, |
| "learning_rate": 7.041102397655208e-08, |
| "loss": 0.0067, |
| "step": 19680 |
| }, |
| { |
| "grad_norm": 0.054265912622213364, |
| "learning_rate": 6.609332901210685e-08, |
| "loss": 0.0058, |
| "step": 19690 |
| }, |
| { |
| "grad_norm": 0.04399024322628975, |
| "learning_rate": 6.191215144178419e-08, |
| "loss": 0.0055, |
| "step": 19700 |
| }, |
| { |
| "grad_norm": 0.04143797978758812, |
| "learning_rate": 5.786750269675678e-08, |
| "loss": 0.0045, |
| "step": 19710 |
| }, |
| { |
| "grad_norm": 0.03326734900474548, |
| "learning_rate": 5.395939383494031e-08, |
| "loss": 0.0058, |
| "step": 19720 |
| }, |
| { |
| "grad_norm": 0.015909343957901, |
| "learning_rate": 5.018783554095463e-08, |
| "loss": 0.003, |
| "step": 19730 |
| }, |
| { |
| "grad_norm": 0.08597715944051743, |
| "learning_rate": 4.655283812610156e-08, |
| "loss": 0.0037, |
| "step": 19740 |
| }, |
| { |
| "grad_norm": 0.031575653702020645, |
| "learning_rate": 4.305441152831491e-08, |
| "loss": 0.0038, |
| "step": 19750 |
| }, |
| { |
| "grad_norm": 0.0561877079308033, |
| "learning_rate": 3.9692565312171584e-08, |
| "loss": 0.004, |
| "step": 19760 |
| }, |
| { |
| "grad_norm": 0.08059482276439667, |
| "learning_rate": 3.6467308668824975e-08, |
| "loss": 0.0045, |
| "step": 19770 |
| }, |
| { |
| "grad_norm": 0.02128739282488823, |
| "learning_rate": 3.3378650416004964e-08, |
| "loss": 0.0034, |
| "step": 19780 |
| }, |
| { |
| "grad_norm": 0.030546288937330246, |
| "learning_rate": 3.042659899797906e-08, |
| "loss": 0.0049, |
| "step": 19790 |
| }, |
| { |
| "grad_norm": 0.023038653656840324, |
| "learning_rate": 2.76111624855524e-08, |
| "loss": 0.0035, |
| "step": 19800 |
| }, |
| { |
| "grad_norm": 0.03481736406683922, |
| "learning_rate": 2.4932348576017784e-08, |
| "loss": 0.0049, |
| "step": 19810 |
| }, |
| { |
| "grad_norm": 0.028808515518903732, |
| "learning_rate": 2.239016459314458e-08, |
| "loss": 0.0041, |
| "step": 19820 |
| }, |
| { |
| "grad_norm": 0.06456957757472992, |
| "learning_rate": 1.9984617487173174e-08, |
| "loss": 0.004, |
| "step": 19830 |
| }, |
| { |
| "grad_norm": 0.027450889348983765, |
| "learning_rate": 1.7715713834776105e-08, |
| "loss": 0.0039, |
| "step": 19840 |
| }, |
| { |
| "grad_norm": 0.07811158150434494, |
| "learning_rate": 1.5583459839046964e-08, |
| "loss": 0.0059, |
| "step": 19850 |
| }, |
| { |
| "grad_norm": 0.053056903183460236, |
| "learning_rate": 1.3587861329489304e-08, |
| "loss": 0.0042, |
| "step": 19860 |
| }, |
| { |
| "grad_norm": 0.02446102909743786, |
| "learning_rate": 1.1728923761994415e-08, |
| "loss": 0.0023, |
| "step": 19870 |
| }, |
| { |
| "grad_norm": 0.047315411269664764, |
| "learning_rate": 1.0006652218819135e-08, |
| "loss": 0.0042, |
| "step": 19880 |
| }, |
| { |
| "grad_norm": 0.035163022577762604, |
| "learning_rate": 8.421051408596947e-09, |
| "loss": 0.0077, |
| "step": 19890 |
| }, |
| { |
| "grad_norm": 0.020078569650650024, |
| "learning_rate": 6.972125666299123e-09, |
| "loss": 0.0069, |
| "step": 19900 |
| }, |
| { |
| "grad_norm": 0.045009635388851166, |
| "learning_rate": 5.659878953229169e-09, |
| "loss": 0.0045, |
| "step": 19910 |
| }, |
| { |
| "grad_norm": 0.036355242133140564, |
| "learning_rate": 4.48431485701728e-09, |
| "loss": 0.0043, |
| "step": 19920 |
| }, |
| { |
| "grad_norm": 0.044731684029102325, |
| "learning_rate": 3.4454365916203322e-09, |
| "loss": 0.0055, |
| "step": 19930 |
| }, |
| { |
| "grad_norm": 0.02352859079837799, |
| "learning_rate": 2.5432469972830332e-09, |
| "loss": 0.0029, |
| "step": 19940 |
| }, |
| { |
| "grad_norm": 0.04830089956521988, |
| "learning_rate": 1.7777485405601203e-09, |
| "loss": 0.0031, |
| "step": 19950 |
| }, |
| { |
| "grad_norm": 0.01771678775548935, |
| "learning_rate": 1.1489433142941597e-09, |
| "loss": 0.0034, |
| "step": 19960 |
| }, |
| { |
| "grad_norm": 0.021128326654434204, |
| "learning_rate": 6.568330376210963e-10, |
| "loss": 0.0041, |
| "step": 19970 |
| }, |
| { |
| "grad_norm": 0.04460067301988602, |
| "learning_rate": 3.0141905594249787e-10, |
| "loss": 0.0051, |
| "step": 19980 |
| }, |
| { |
| "grad_norm": 0.021370982751250267, |
| "learning_rate": 8.270234094776008e-11, |
| "loss": 0.0049, |
| "step": 19990 |
| }, |
| { |
| "grad_norm": 0.022540045902132988, |
| "learning_rate": 6.834906085551041e-13, |
| "loss": 0.0044, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|