| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 1.3594739437103271, |
| "learning_rate": 9e-07, |
| "loss": 1.1913, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 1.0572824478149414, |
| "learning_rate": 1.9e-06, |
| "loss": 1.1841, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 0.5717663764953613, |
| "learning_rate": 2.9e-06, |
| "loss": 1.1508, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 0.3898443877696991, |
| "learning_rate": 3.9e-06, |
| "loss": 1.1205, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 0.28664326667785645, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 1.0888, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 0.1729290783405304, |
| "learning_rate": 5.9e-06, |
| "loss": 1.0782, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 0.17002208530902863, |
| "learning_rate": 6.900000000000001e-06, |
| "loss": 1.0691, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.2152942717075348, |
| "learning_rate": 7.9e-06, |
| "loss": 1.0562, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.19103780388832092, |
| "learning_rate": 8.9e-06, |
| "loss": 1.0479, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.3243984878063202, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.0372, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.1820673942565918, |
| "learning_rate": 1.09e-05, |
| "loss": 1.0272, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.21819084882736206, |
| "learning_rate": 1.19e-05, |
| "loss": 1.0236, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.20377595722675323, |
| "learning_rate": 1.29e-05, |
| "loss": 1.0237, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.20572194457054138, |
| "learning_rate": 1.3900000000000002e-05, |
| "loss": 1.0228, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.20157840847969055, |
| "learning_rate": 1.49e-05, |
| "loss": 1.0217, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.23459017276763916, |
| "learning_rate": 1.59e-05, |
| "loss": 1.0192, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.32469043135643005, |
| "learning_rate": 1.69e-05, |
| "loss": 1.0063, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.36008527874946594, |
| "learning_rate": 1.79e-05, |
| "loss": 0.9873, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.5633573532104492, |
| "learning_rate": 1.8900000000000002e-05, |
| "loss": 0.9672, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.7019369006156921, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.9315, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.5538105964660645, |
| "learning_rate": 2.09e-05, |
| "loss": 0.8958, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.5306029319763184, |
| "learning_rate": 2.19e-05, |
| "loss": 0.8707, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.6606974005699158, |
| "learning_rate": 2.29e-05, |
| "loss": 0.8479, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.8058410882949829, |
| "learning_rate": 2.39e-05, |
| "loss": 0.8169, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 0.7277475595474243, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 0.77, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 0.6617355942726135, |
| "learning_rate": 2.5900000000000003e-05, |
| "loss": 0.7456, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 0.8156651258468628, |
| "learning_rate": 2.6900000000000003e-05, |
| "loss": 0.6984, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.7090954780578613, |
| "learning_rate": 2.7900000000000004e-05, |
| "loss": 0.6774, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.8667084574699402, |
| "learning_rate": 2.8899999999999998e-05, |
| "loss": 0.6429, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 0.946596622467041, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.6052, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 0.8120863437652588, |
| "learning_rate": 3.09e-05, |
| "loss": 0.5681, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 0.9630921483039856, |
| "learning_rate": 3.19e-05, |
| "loss": 0.5267, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 0.9185823798179626, |
| "learning_rate": 3.29e-05, |
| "loss": 0.497, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.9909350872039795, |
| "learning_rate": 3.3900000000000004e-05, |
| "loss": 0.4704, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 0.7408623695373535, |
| "learning_rate": 3.49e-05, |
| "loss": 0.4463, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 0.8417967557907104, |
| "learning_rate": 3.59e-05, |
| "loss": 0.4515, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 0.9200495481491089, |
| "learning_rate": 3.69e-05, |
| "loss": 0.417, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 1.146302342414856, |
| "learning_rate": 3.79e-05, |
| "loss": 0.3937, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 1.0057293176651, |
| "learning_rate": 3.8900000000000004e-05, |
| "loss": 0.3773, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.112216591835022, |
| "learning_rate": 3.99e-05, |
| "loss": 0.348, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.0176512002944946, |
| "learning_rate": 4.09e-05, |
| "loss": 0.3392, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 1.0310163497924805, |
| "learning_rate": 4.19e-05, |
| "loss": 0.3065, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.022374153137207, |
| "learning_rate": 4.29e-05, |
| "loss": 0.2808, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 1.368080735206604, |
| "learning_rate": 4.39e-05, |
| "loss": 0.2624, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.1092591285705566, |
| "learning_rate": 4.49e-05, |
| "loss": 0.2405, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 0.9738430380821228, |
| "learning_rate": 4.5900000000000004e-05, |
| "loss": 0.2254, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.033246636390686, |
| "learning_rate": 4.69e-05, |
| "loss": 0.2162, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 0.9855560064315796, |
| "learning_rate": 4.79e-05, |
| "loss": 0.2088, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.0313360691070557, |
| "learning_rate": 4.89e-05, |
| "loss": 0.2188, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 1.100176215171814, |
| "learning_rate": 4.99e-05, |
| "loss": 0.2007, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.0784265995025635, |
| "learning_rate": 5.0900000000000004e-05, |
| "loss": 0.2016, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.0822303295135498, |
| "learning_rate": 5.19e-05, |
| "loss": 0.1961, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 1.067589282989502, |
| "learning_rate": 5.2900000000000005e-05, |
| "loss": 0.1801, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 1.1917147636413574, |
| "learning_rate": 5.390000000000001e-05, |
| "loss": 0.1705, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 1.3141072988510132, |
| "learning_rate": 5.4900000000000006e-05, |
| "loss": 0.1851, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.002855658531189, |
| "learning_rate": 5.590000000000001e-05, |
| "loss": 0.1663, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.167011022567749, |
| "learning_rate": 5.69e-05, |
| "loss": 0.1741, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 1.0936863422393799, |
| "learning_rate": 5.79e-05, |
| "loss": 0.1661, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 0.9669778347015381, |
| "learning_rate": 5.89e-05, |
| "loss": 0.1648, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 0.9405611753463745, |
| "learning_rate": 5.99e-05, |
| "loss": 0.1627, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 1.0284767150878906, |
| "learning_rate": 6.09e-05, |
| "loss": 0.1496, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.1097605228424072, |
| "learning_rate": 6.19e-05, |
| "loss": 0.1628, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 0.9104214310646057, |
| "learning_rate": 6.29e-05, |
| "loss": 0.1302, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 0.8578998446464539, |
| "learning_rate": 6.390000000000001e-05, |
| "loss": 0.1326, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.1287304162979126, |
| "learning_rate": 6.49e-05, |
| "loss": 0.1127, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 0.8655268549919128, |
| "learning_rate": 6.59e-05, |
| "loss": 0.1202, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 0.9937160015106201, |
| "learning_rate": 6.690000000000001e-05, |
| "loss": 0.1198, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 0.9691420197486877, |
| "learning_rate": 6.790000000000001e-05, |
| "loss": 0.1096, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 1.0945252180099487, |
| "learning_rate": 6.89e-05, |
| "loss": 0.105, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 1.0388752222061157, |
| "learning_rate": 6.99e-05, |
| "loss": 0.1027, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 0.881949245929718, |
| "learning_rate": 7.09e-05, |
| "loss": 0.1044, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 0.8678519129753113, |
| "learning_rate": 7.19e-05, |
| "loss": 0.0842, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 1.2314260005950928, |
| "learning_rate": 7.29e-05, |
| "loss": 0.0841, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 0.7337191700935364, |
| "learning_rate": 7.390000000000001e-05, |
| "loss": 0.0771, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 1.194354772567749, |
| "learning_rate": 7.49e-05, |
| "loss": 0.0791, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 1.0703870058059692, |
| "learning_rate": 7.59e-05, |
| "loss": 0.0697, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 0.9820927977561951, |
| "learning_rate": 7.69e-05, |
| "loss": 0.0798, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 1.099042534828186, |
| "learning_rate": 7.790000000000001e-05, |
| "loss": 0.0736, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 0.9056155681610107, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 0.0756, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 0.8292648792266846, |
| "learning_rate": 7.99e-05, |
| "loss": 0.0796, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 0.9507290720939636, |
| "learning_rate": 8.090000000000001e-05, |
| "loss": 0.0829, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 0.9466397762298584, |
| "learning_rate": 8.19e-05, |
| "loss": 0.0688, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 0.7956731915473938, |
| "learning_rate": 8.29e-05, |
| "loss": 0.0747, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 0.7995853424072266, |
| "learning_rate": 8.39e-05, |
| "loss": 0.0634, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 0.7665478587150574, |
| "learning_rate": 8.49e-05, |
| "loss": 0.0661, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 0.9283880591392517, |
| "learning_rate": 8.59e-05, |
| "loss": 0.0702, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 1.126967191696167, |
| "learning_rate": 8.69e-05, |
| "loss": 0.0716, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 0.8662194609642029, |
| "learning_rate": 8.790000000000001e-05, |
| "loss": 0.0667, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 0.9572857022285461, |
| "learning_rate": 8.89e-05, |
| "loss": 0.0791, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 0.9036967158317566, |
| "learning_rate": 8.99e-05, |
| "loss": 0.0745, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 0.7550048828125, |
| "learning_rate": 9.090000000000001e-05, |
| "loss": 0.0746, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 0.9990408420562744, |
| "learning_rate": 9.190000000000001e-05, |
| "loss": 0.0648, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 0.8286410570144653, |
| "learning_rate": 9.290000000000001e-05, |
| "loss": 0.0697, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 0.9783310890197754, |
| "learning_rate": 9.39e-05, |
| "loss": 0.0749, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 0.9899768233299255, |
| "learning_rate": 9.49e-05, |
| "loss": 0.0722, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 0.7450554370880127, |
| "learning_rate": 9.59e-05, |
| "loss": 0.0599, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 0.7791635394096375, |
| "learning_rate": 9.69e-05, |
| "loss": 0.0654, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 0.7614015340805054, |
| "learning_rate": 9.790000000000001e-05, |
| "loss": 0.0558, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 0.9096309542655945, |
| "learning_rate": 9.89e-05, |
| "loss": 0.0581, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 0.668950080871582, |
| "learning_rate": 9.99e-05, |
| "loss": 0.0652, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 0.8658283948898315, |
| "learning_rate": 9.999994463727085e-05, |
| "loss": 0.0529, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 0.7495288848876953, |
| "learning_rate": 9.999975326009292e-05, |
| "loss": 0.059, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 0.9980189204216003, |
| "learning_rate": 9.999942518549879e-05, |
| "loss": 0.0638, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 0.7826606035232544, |
| "learning_rate": 9.999896041438544e-05, |
| "loss": 0.0546, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 0.6360778212547302, |
| "learning_rate": 9.999835894802353e-05, |
| "loss": 0.054, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 0.7757160067558289, |
| "learning_rate": 9.999762078805743e-05, |
| "loss": 0.0591, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 0.7390689849853516, |
| "learning_rate": 9.999674593650526e-05, |
| "loss": 0.0595, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 0.6460424065589905, |
| "learning_rate": 9.99957343957588e-05, |
| "loss": 0.0658, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 0.8082983493804932, |
| "learning_rate": 9.99945861685836e-05, |
| "loss": 0.0596, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 0.7415626645088196, |
| "learning_rate": 9.999330125811884e-05, |
| "loss": 0.0483, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 0.8829818367958069, |
| "learning_rate": 9.999187966787744e-05, |
| "loss": 0.0619, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 0.8239393830299377, |
| "learning_rate": 9.999032140174595e-05, |
| "loss": 0.0528, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 0.8529507517814636, |
| "learning_rate": 9.998862646398464e-05, |
| "loss": 0.0654, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 0.7502208948135376, |
| "learning_rate": 9.998679485922739e-05, |
| "loss": 0.0526, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 0.6970030069351196, |
| "learning_rate": 9.998482659248174e-05, |
| "loss": 0.0547, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 0.9376399517059326, |
| "learning_rate": 9.998272166912883e-05, |
| "loss": 0.0557, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 0.7249330282211304, |
| "learning_rate": 9.998048009492347e-05, |
| "loss": 0.0504, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 0.8968970775604248, |
| "learning_rate": 9.997810187599403e-05, |
| "loss": 0.0526, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 0.7676458358764648, |
| "learning_rate": 9.997558701884249e-05, |
| "loss": 0.0506, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 0.6501711010932922, |
| "learning_rate": 9.997293553034433e-05, |
| "loss": 0.061, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 0.677116870880127, |
| "learning_rate": 9.997014741774866e-05, |
| "loss": 0.0462, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 0.8147766590118408, |
| "learning_rate": 9.996722268867803e-05, |
| "loss": 0.0486, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 0.706069827079773, |
| "learning_rate": 9.996416135112858e-05, |
| "loss": 0.0511, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 0.6159539818763733, |
| "learning_rate": 9.996096341346988e-05, |
| "loss": 0.0492, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 0.6369336843490601, |
| "learning_rate": 9.995762888444495e-05, |
| "loss": 0.0479, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 0.7543830275535583, |
| "learning_rate": 9.995415777317027e-05, |
| "loss": 0.0493, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 0.7505154609680176, |
| "learning_rate": 9.995055008913574e-05, |
| "loss": 0.053, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 0.5397493243217468, |
| "learning_rate": 9.994680584220463e-05, |
| "loss": 0.0432, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 0.6707198619842529, |
| "learning_rate": 9.994292504261355e-05, |
| "loss": 0.0472, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 0.8792182803153992, |
| "learning_rate": 9.993890770097247e-05, |
| "loss": 0.0453, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 0.7324561476707458, |
| "learning_rate": 9.993475382826467e-05, |
| "loss": 0.0479, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 0.8385289907455444, |
| "learning_rate": 9.993046343584664e-05, |
| "loss": 0.0549, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 0.5908923745155334, |
| "learning_rate": 9.992603653544816e-05, |
| "loss": 0.0483, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 0.63700932264328, |
| "learning_rate": 9.992147313917222e-05, |
| "loss": 0.0485, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 0.7525864839553833, |
| "learning_rate": 9.991677325949497e-05, |
| "loss": 0.0469, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 0.5628486275672913, |
| "learning_rate": 9.991193690926568e-05, |
| "loss": 0.0459, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 0.795554518699646, |
| "learning_rate": 9.990696410170678e-05, |
| "loss": 0.0467, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 0.7957155704498291, |
| "learning_rate": 9.990185485041371e-05, |
| "loss": 0.0481, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 0.5773254632949829, |
| "learning_rate": 9.989660916935498e-05, |
| "loss": 0.0471, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 0.6150880455970764, |
| "learning_rate": 9.989122707287208e-05, |
| "loss": 0.0426, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 0.7106145620346069, |
| "learning_rate": 9.988570857567945e-05, |
| "loss": 0.0537, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 0.9491516947746277, |
| "learning_rate": 9.988005369286446e-05, |
| "loss": 0.0525, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 0.6860232353210449, |
| "learning_rate": 9.987426243988734e-05, |
| "loss": 0.0429, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 0.7841853499412537, |
| "learning_rate": 9.986833483258114e-05, |
| "loss": 0.0524, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 0.6175568103790283, |
| "learning_rate": 9.986227088715173e-05, |
| "loss": 0.0385, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 0.5932314991950989, |
| "learning_rate": 9.98560706201777e-05, |
| "loss": 0.0408, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 0.7410153150558472, |
| "learning_rate": 9.984973404861036e-05, |
| "loss": 0.043, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 0.8330276608467102, |
| "learning_rate": 9.984326118977361e-05, |
| "loss": 0.051, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 0.7202706933021545, |
| "learning_rate": 9.983665206136406e-05, |
| "loss": 0.0493, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 0.574433445930481, |
| "learning_rate": 9.982990668145075e-05, |
| "loss": 0.0466, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 0.7351802587509155, |
| "learning_rate": 9.982302506847534e-05, |
| "loss": 0.057, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 0.819564163684845, |
| "learning_rate": 9.981600724125189e-05, |
| "loss": 0.0555, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 0.6065496206283569, |
| "learning_rate": 9.980885321896685e-05, |
| "loss": 0.0509, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 0.6572223901748657, |
| "learning_rate": 9.980156302117905e-05, |
| "loss": 0.044, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 0.6978927254676819, |
| "learning_rate": 9.979413666781963e-05, |
| "loss": 0.0465, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 0.5508580803871155, |
| "learning_rate": 9.978657417919193e-05, |
| "loss": 0.0452, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 0.5769541263580322, |
| "learning_rate": 9.977887557597153e-05, |
| "loss": 0.0475, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 0.5610742568969727, |
| "learning_rate": 9.97710408792061e-05, |
| "loss": 0.0469, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 0.5692776441574097, |
| "learning_rate": 9.976307011031542e-05, |
| "loss": 0.0449, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 0.5226185321807861, |
| "learning_rate": 9.975496329109126e-05, |
| "loss": 0.0476, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 0.7111744284629822, |
| "learning_rate": 9.974672044369732e-05, |
| "loss": 0.047, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 0.514858067035675, |
| "learning_rate": 9.97383415906693e-05, |
| "loss": 0.043, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 0.5856963396072388, |
| "learning_rate": 9.97298267549146e-05, |
| "loss": 0.0471, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 0.6191436052322388, |
| "learning_rate": 9.972117595971249e-05, |
| "loss": 0.0422, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 0.5670982599258423, |
| "learning_rate": 9.971238922871391e-05, |
| "loss": 0.0419, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 0.7190003991127014, |
| "learning_rate": 9.970346658594142e-05, |
| "loss": 0.0453, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 0.6552428007125854, |
| "learning_rate": 9.969440805578923e-05, |
| "loss": 0.046, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 0.578118622303009, |
| "learning_rate": 9.968521366302298e-05, |
| "loss": 0.0392, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 0.7054030895233154, |
| "learning_rate": 9.967588343277981e-05, |
| "loss": 0.0455, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 0.6531293392181396, |
| "learning_rate": 9.966641739056818e-05, |
| "loss": 0.0421, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 0.6111751198768616, |
| "learning_rate": 9.965681556226793e-05, |
| "loss": 0.0517, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 0.4928556978702545, |
| "learning_rate": 9.964707797413006e-05, |
| "loss": 0.044, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 0.6597058773040771, |
| "learning_rate": 9.963720465277679e-05, |
| "loss": 0.047, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 0.6202155351638794, |
| "learning_rate": 9.96271956252014e-05, |
| "loss": 0.0384, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 0.5262959599494934, |
| "learning_rate": 9.961705091876816e-05, |
| "loss": 0.0425, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 0.6935763955116272, |
| "learning_rate": 9.960677056121235e-05, |
| "loss": 0.0409, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 0.6149827837944031, |
| "learning_rate": 9.959635458064005e-05, |
| "loss": 0.0383, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 0.5901826024055481, |
| "learning_rate": 9.958580300552815e-05, |
| "loss": 0.0426, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 0.5597098469734192, |
| "learning_rate": 9.957511586472426e-05, |
| "loss": 0.0352, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 0.5581690073013306, |
| "learning_rate": 9.956429318744662e-05, |
| "loss": 0.0366, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 0.5969916582107544, |
| "learning_rate": 9.955333500328404e-05, |
| "loss": 0.0355, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 0.5474916696548462, |
| "learning_rate": 9.95422413421957e-05, |
| "loss": 0.0376, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 0.5651562809944153, |
| "learning_rate": 9.953101223451133e-05, |
| "loss": 0.0359, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 0.6243921518325806, |
| "learning_rate": 9.951964771093085e-05, |
| "loss": 0.0373, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 0.4624647796154022, |
| "learning_rate": 9.950814780252442e-05, |
| "loss": 0.0347, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 0.5893751382827759, |
| "learning_rate": 9.949651254073236e-05, |
| "loss": 0.0408, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 0.526287317276001, |
| "learning_rate": 9.948474195736504e-05, |
| "loss": 0.0388, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 0.6111840605735779, |
| "learning_rate": 9.947283608460277e-05, |
| "loss": 0.0346, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 0.46461328864097595, |
| "learning_rate": 9.946079495499577e-05, |
| "loss": 0.0411, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 0.610548734664917, |
| "learning_rate": 9.944861860146401e-05, |
| "loss": 0.0407, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 0.5339504480361938, |
| "learning_rate": 9.943630705729719e-05, |
| "loss": 0.0398, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 0.46559029817581177, |
| "learning_rate": 9.942386035615459e-05, |
| "loss": 0.039, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 0.7745798826217651, |
| "learning_rate": 9.941127853206503e-05, |
| "loss": 0.04, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 0.5811882019042969, |
| "learning_rate": 9.939856161942673e-05, |
| "loss": 0.0425, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 0.4856541156768799, |
| "learning_rate": 9.938570965300724e-05, |
| "loss": 0.0363, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 0.5952467918395996, |
| "learning_rate": 9.937272266794335e-05, |
| "loss": 0.0439, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 0.5669976472854614, |
| "learning_rate": 9.935960069974096e-05, |
| "loss": 0.05, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 0.5959198474884033, |
| "learning_rate": 9.934634378427506e-05, |
| "loss": 0.0382, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 0.520875096321106, |
| "learning_rate": 9.933295195778954e-05, |
| "loss": 0.0386, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 0.4351758360862732, |
| "learning_rate": 9.931942525689715e-05, |
| "loss": 0.0488, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 0.6345981359481812, |
| "learning_rate": 9.930576371857936e-05, |
| "loss": 0.0391, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 0.6230748295783997, |
| "learning_rate": 9.929196738018629e-05, |
| "loss": 0.0388, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 0.5425089001655579, |
| "learning_rate": 9.927803627943662e-05, |
| "loss": 0.0395, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 0.49332770705223083, |
| "learning_rate": 9.926397045441744e-05, |
| "loss": 0.039, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 0.6731558442115784, |
| "learning_rate": 9.924976994358417e-05, |
| "loss": 0.0427, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 0.5310463309288025, |
| "learning_rate": 9.923543478576048e-05, |
| "loss": 0.0474, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 0.548930823802948, |
| "learning_rate": 9.922096502013813e-05, |
| "loss": 0.0423, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 0.5744786262512207, |
| "learning_rate": 9.92063606862769e-05, |
| "loss": 0.0372, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 0.6390929222106934, |
| "learning_rate": 9.919162182410453e-05, |
| "loss": 0.0368, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 0.5252511501312256, |
| "learning_rate": 9.917674847391645e-05, |
| "loss": 0.038, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 0.5656434297561646, |
| "learning_rate": 9.916174067637584e-05, |
| "loss": 0.0333, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 0.5288258790969849, |
| "learning_rate": 9.914659847251348e-05, |
| "loss": 0.0406, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 0.5040147304534912, |
| "learning_rate": 9.913132190372753e-05, |
| "loss": 0.0369, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 0.5128138661384583, |
| "learning_rate": 9.911591101178359e-05, |
| "loss": 0.0368, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 0.4942684769630432, |
| "learning_rate": 9.910036583881443e-05, |
| "loss": 0.0334, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 0.5318565368652344, |
| "learning_rate": 9.908468642731995e-05, |
| "loss": 0.0325, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 0.5772367715835571, |
| "learning_rate": 9.906887282016707e-05, |
| "loss": 0.0344, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 0.5957911014556885, |
| "learning_rate": 9.90529250605896e-05, |
| "loss": 0.0368, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 0.6259480714797974, |
| "learning_rate": 9.903684319218809e-05, |
| "loss": 0.0375, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 0.691277801990509, |
| "learning_rate": 9.902062725892976e-05, |
| "loss": 0.0402, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 0.624859094619751, |
| "learning_rate": 9.900427730514834e-05, |
| "loss": 0.0316, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 0.46915674209594727, |
| "learning_rate": 9.8987793375544e-05, |
| "loss": 0.0352, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 0.5559591054916382, |
| "learning_rate": 9.897117551518318e-05, |
| "loss": 0.0353, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 0.47577548027038574, |
| "learning_rate": 9.895442376949844e-05, |
| "loss": 0.0395, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 0.7231595516204834, |
| "learning_rate": 9.893753818428845e-05, |
| "loss": 0.0442, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 0.4607575535774231, |
| "learning_rate": 9.892051880571773e-05, |
| "loss": 0.037, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 0.4901242256164551, |
| "learning_rate": 9.890336568031663e-05, |
| "loss": 0.0342, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.46413323283195496, |
| "learning_rate": 9.888607885498113e-05, |
| "loss": 0.0386, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.5028432607650757, |
| "learning_rate": 9.886865837697275e-05, |
| "loss": 0.0384, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 0.6079827547073364, |
| "learning_rate": 9.88511042939184e-05, |
| "loss": 0.0416, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 0.6189248561859131, |
| "learning_rate": 9.883341665381028e-05, |
| "loss": 0.0372, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.569456160068512, |
| "learning_rate": 9.881559550500575e-05, |
| "loss": 0.0317, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 0.5782006978988647, |
| "learning_rate": 9.879764089622712e-05, |
| "loss": 0.0363, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.6612024307250977, |
| "learning_rate": 9.87795528765616e-05, |
| "loss": 0.0386, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 0.45619797706604004, |
| "learning_rate": 9.876133149546118e-05, |
| "loss": 0.0385, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 0.4743977189064026, |
| "learning_rate": 9.874297680274238e-05, |
| "loss": 0.0384, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 0.5303918719291687, |
| "learning_rate": 9.872448884858624e-05, |
| "loss": 0.0364, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.5923212766647339, |
| "learning_rate": 9.870586768353815e-05, |
| "loss": 0.0366, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 0.5156052112579346, |
| "learning_rate": 9.868711335850764e-05, |
| "loss": 0.0412, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.4702778458595276, |
| "learning_rate": 9.866822592476833e-05, |
| "loss": 0.0353, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.4955006241798401, |
| "learning_rate": 9.86492054339577e-05, |
| "loss": 0.0356, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 0.4722374677658081, |
| "learning_rate": 9.863005193807711e-05, |
| "loss": 0.0328, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 0.5261074900627136, |
| "learning_rate": 9.861076548949143e-05, |
| "loss": 0.0314, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 0.43109720945358276, |
| "learning_rate": 9.859134614092912e-05, |
| "loss": 0.0306, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 0.5150691270828247, |
| "learning_rate": 9.857179394548191e-05, |
| "loss": 0.0331, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 0.413881778717041, |
| "learning_rate": 9.855210895660477e-05, |
| "loss": 0.0313, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 0.5778813362121582, |
| "learning_rate": 9.853229122811568e-05, |
| "loss": 0.0327, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 0.5499809980392456, |
| "learning_rate": 9.851234081419559e-05, |
| "loss": 0.0371, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 0.533755898475647, |
| "learning_rate": 9.849225776938814e-05, |
| "loss": 0.0347, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.5036794543266296, |
| "learning_rate": 9.847204214859964e-05, |
| "loss": 0.0365, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.4547636806964874, |
| "learning_rate": 9.845169400709879e-05, |
| "loss": 0.0284, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.4148177206516266, |
| "learning_rate": 9.843121340051664e-05, |
| "loss": 0.0338, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 0.4307814836502075, |
| "learning_rate": 9.841060038484641e-05, |
| "loss": 0.0401, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.5055217146873474, |
| "learning_rate": 9.838985501644328e-05, |
| "loss": 0.0413, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 0.5252987742424011, |
| "learning_rate": 9.83689773520243e-05, |
| "loss": 0.0334, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 0.5325053334236145, |
| "learning_rate": 9.834796744866819e-05, |
| "loss": 0.0339, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.5485632419586182, |
| "learning_rate": 9.832682536381525e-05, |
| "loss": 0.0354, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 0.5406777262687683, |
| "learning_rate": 9.830555115526711e-05, |
| "loss": 0.0368, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 0.37698280811309814, |
| "learning_rate": 9.828414488118667e-05, |
| "loss": 0.0336, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 0.5253736972808838, |
| "learning_rate": 9.826260660009785e-05, |
| "loss": 0.0337, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 0.482319176197052, |
| "learning_rate": 9.824093637088547e-05, |
| "loss": 0.0299, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 0.43845584988594055, |
| "learning_rate": 9.821913425279514e-05, |
| "loss": 0.032, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.4526597559452057, |
| "learning_rate": 9.8197200305433e-05, |
| "loss": 0.034, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 0.45589521527290344, |
| "learning_rate": 9.817513458876564e-05, |
| "loss": 0.0464, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.5381149649620056, |
| "learning_rate": 9.815293716311987e-05, |
| "loss": 0.0334, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.5279123187065125, |
| "learning_rate": 9.813060808918262e-05, |
| "loss": 0.0318, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.3532435894012451, |
| "learning_rate": 9.810814742800069e-05, |
| "loss": 0.0285, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.3765302896499634, |
| "learning_rate": 9.808555524098074e-05, |
| "loss": 0.0289, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.46037837862968445, |
| "learning_rate": 9.806283158988887e-05, |
| "loss": 0.0291, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.483735591173172, |
| "learning_rate": 9.803997653685072e-05, |
| "loss": 0.0392, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 0.45865148305892944, |
| "learning_rate": 9.801699014435112e-05, |
| "loss": 0.0393, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 0.4620376229286194, |
| "learning_rate": 9.799387247523398e-05, |
| "loss": 0.0352, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 0.41832435131073, |
| "learning_rate": 9.797062359270215e-05, |
| "loss": 0.0319, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.4439375400543213, |
| "learning_rate": 9.794724356031715e-05, |
| "loss": 0.0307, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.5037664771080017, |
| "learning_rate": 9.792373244199913e-05, |
| "loss": 0.0306, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.378164678812027, |
| "learning_rate": 9.790009030202658e-05, |
| "loss": 0.0313, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.5053073763847351, |
| "learning_rate": 9.78763172050362e-05, |
| "loss": 0.0295, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.4680381119251251, |
| "learning_rate": 9.785241321602274e-05, |
| "loss": 0.0277, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.4624013304710388, |
| "learning_rate": 9.782837840033879e-05, |
| "loss": 0.0288, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.5074241757392883, |
| "learning_rate": 9.780421282369461e-05, |
| "loss": 0.0292, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.4835506081581116, |
| "learning_rate": 9.777991655215797e-05, |
| "loss": 0.0294, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.5738292336463928, |
| "learning_rate": 9.775548965215394e-05, |
| "loss": 0.0295, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.5334445238113403, |
| "learning_rate": 9.773093219046474e-05, |
| "loss": 0.0293, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.4011390507221222, |
| "learning_rate": 9.770624423422954e-05, |
| "loss": 0.0291, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.41171419620513916, |
| "learning_rate": 9.768142585094426e-05, |
| "loss": 0.0302, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.46391263604164124, |
| "learning_rate": 9.765647710846142e-05, |
| "loss": 0.0405, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.5071845650672913, |
| "learning_rate": 9.763139807498991e-05, |
| "loss": 0.0285, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.4814237058162689, |
| "learning_rate": 9.760618881909487e-05, |
| "loss": 0.0317, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.5396919846534729, |
| "learning_rate": 9.758084940969744e-05, |
| "loss": 0.0316, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 0.5363779664039612, |
| "learning_rate": 9.755537991607459e-05, |
| "loss": 0.027, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 0.505138099193573, |
| "learning_rate": 9.752978040785895e-05, |
| "loss": 0.0354, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 0.5476271510124207, |
| "learning_rate": 9.750405095503859e-05, |
| "loss": 0.0299, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.5189036130905151, |
| "learning_rate": 9.747819162795686e-05, |
| "loss": 0.0331, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.45717042684555054, |
| "learning_rate": 9.745220249731217e-05, |
| "loss": 0.026, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.4337165355682373, |
| "learning_rate": 9.742608363415781e-05, |
| "loss": 0.0272, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.4811023771762848, |
| "learning_rate": 9.739983510990176e-05, |
| "loss": 0.0288, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.3455168902873993, |
| "learning_rate": 9.737345699630647e-05, |
| "loss": 0.0298, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.5057815313339233, |
| "learning_rate": 9.734694936548869e-05, |
| "loss": 0.0332, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.38619765639305115, |
| "learning_rate": 9.732031228991932e-05, |
| "loss": 0.0256, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.3297816514968872, |
| "learning_rate": 9.729354584242302e-05, |
| "loss": 0.0355, |
| "step": 3000 |
| }, |
| { |
| "grad_norm": 0.5174765586853027, |
| "learning_rate": 9.726665009617832e-05, |
| "loss": 0.0309, |
| "step": 3010 |
| }, |
| { |
| "grad_norm": 0.43245866894721985, |
| "learning_rate": 9.723962512471714e-05, |
| "loss": 0.033, |
| "step": 3020 |
| }, |
| { |
| "grad_norm": 0.516598105430603, |
| "learning_rate": 9.72124710019247e-05, |
| "loss": 0.03, |
| "step": 3030 |
| }, |
| { |
| "grad_norm": 0.48712822794914246, |
| "learning_rate": 9.718518780203934e-05, |
| "loss": 0.0322, |
| "step": 3040 |
| }, |
| { |
| "grad_norm": 0.3674415946006775, |
| "learning_rate": 9.715777559965228e-05, |
| "loss": 0.0319, |
| "step": 3050 |
| }, |
| { |
| "grad_norm": 0.4218079149723053, |
| "learning_rate": 9.713023446970746e-05, |
| "loss": 0.0255, |
| "step": 3060 |
| }, |
| { |
| "grad_norm": 0.4967867136001587, |
| "learning_rate": 9.710256448750126e-05, |
| "loss": 0.0311, |
| "step": 3070 |
| }, |
| { |
| "grad_norm": 0.497653067111969, |
| "learning_rate": 9.707476572868235e-05, |
| "loss": 0.0341, |
| "step": 3080 |
| }, |
| { |
| "grad_norm": 0.4222137928009033, |
| "learning_rate": 9.704683826925149e-05, |
| "loss": 0.0273, |
| "step": 3090 |
| }, |
| { |
| "grad_norm": 0.37705838680267334, |
| "learning_rate": 9.701878218556129e-05, |
| "loss": 0.036, |
| "step": 3100 |
| }, |
| { |
| "grad_norm": 0.5626199841499329, |
| "learning_rate": 9.699059755431598e-05, |
| "loss": 0.0331, |
| "step": 3110 |
| }, |
| { |
| "grad_norm": 0.46293774247169495, |
| "learning_rate": 9.696228445257132e-05, |
| "loss": 0.0277, |
| "step": 3120 |
| }, |
| { |
| "grad_norm": 0.42764750123023987, |
| "learning_rate": 9.693384295773419e-05, |
| "loss": 0.0327, |
| "step": 3130 |
| }, |
| { |
| "grad_norm": 0.4717363715171814, |
| "learning_rate": 9.690527314756259e-05, |
| "loss": 0.0339, |
| "step": 3140 |
| }, |
| { |
| "grad_norm": 0.458967387676239, |
| "learning_rate": 9.687657510016527e-05, |
| "loss": 0.0261, |
| "step": 3150 |
| }, |
| { |
| "grad_norm": 0.45871081948280334, |
| "learning_rate": 9.684774889400161e-05, |
| "loss": 0.0309, |
| "step": 3160 |
| }, |
| { |
| "grad_norm": 0.5132860541343689, |
| "learning_rate": 9.681879460788135e-05, |
| "loss": 0.0264, |
| "step": 3170 |
| }, |
| { |
| "grad_norm": 0.4729975461959839, |
| "learning_rate": 9.67897123209644e-05, |
| "loss": 0.0315, |
| "step": 3180 |
| }, |
| { |
| "grad_norm": 0.4921012818813324, |
| "learning_rate": 9.676050211276062e-05, |
| "loss": 0.035, |
| "step": 3190 |
| }, |
| { |
| "grad_norm": 0.4574073255062103, |
| "learning_rate": 9.673116406312962e-05, |
| "loss": 0.0284, |
| "step": 3200 |
| }, |
| { |
| "grad_norm": 0.48541590571403503, |
| "learning_rate": 9.67016982522805e-05, |
| "loss": 0.028, |
| "step": 3210 |
| }, |
| { |
| "grad_norm": 0.4924331307411194, |
| "learning_rate": 9.667210476077164e-05, |
| "loss": 0.028, |
| "step": 3220 |
| }, |
| { |
| "grad_norm": 0.5730510950088501, |
| "learning_rate": 9.664238366951055e-05, |
| "loss": 0.0288, |
| "step": 3230 |
| }, |
| { |
| "grad_norm": 0.5551027059555054, |
| "learning_rate": 9.661253505975355e-05, |
| "loss": 0.0269, |
| "step": 3240 |
| }, |
| { |
| "grad_norm": 0.4366356134414673, |
| "learning_rate": 9.658255901310557e-05, |
| "loss": 0.0301, |
| "step": 3250 |
| }, |
| { |
| "grad_norm": 0.5327138304710388, |
| "learning_rate": 9.655245561152e-05, |
| "loss": 0.0278, |
| "step": 3260 |
| }, |
| { |
| "grad_norm": 0.4516207277774811, |
| "learning_rate": 9.65222249372984e-05, |
| "loss": 0.0266, |
| "step": 3270 |
| }, |
| { |
| "grad_norm": 0.4709407687187195, |
| "learning_rate": 9.649186707309026e-05, |
| "loss": 0.0325, |
| "step": 3280 |
| }, |
| { |
| "grad_norm": 0.36673372983932495, |
| "learning_rate": 9.646138210189283e-05, |
| "loss": 0.0285, |
| "step": 3290 |
| }, |
| { |
| "grad_norm": 0.5308244824409485, |
| "learning_rate": 9.643077010705087e-05, |
| "loss": 0.0281, |
| "step": 3300 |
| }, |
| { |
| "grad_norm": 0.45568153262138367, |
| "learning_rate": 9.640003117225637e-05, |
| "loss": 0.0286, |
| "step": 3310 |
| }, |
| { |
| "grad_norm": 0.4082559049129486, |
| "learning_rate": 9.636916538154846e-05, |
| "loss": 0.0241, |
| "step": 3320 |
| }, |
| { |
| "grad_norm": 0.48012563586235046, |
| "learning_rate": 9.633817281931296e-05, |
| "loss": 0.0297, |
| "step": 3330 |
| }, |
| { |
| "grad_norm": 0.4177444875240326, |
| "learning_rate": 9.630705357028242e-05, |
| "loss": 0.032, |
| "step": 3340 |
| }, |
| { |
| "grad_norm": 0.48793429136276245, |
| "learning_rate": 9.627580771953563e-05, |
| "loss": 0.0285, |
| "step": 3350 |
| }, |
| { |
| "grad_norm": 0.4371464252471924, |
| "learning_rate": 9.624443535249759e-05, |
| "loss": 0.0275, |
| "step": 3360 |
| }, |
| { |
| "grad_norm": 0.4983312487602234, |
| "learning_rate": 9.621293655493913e-05, |
| "loss": 0.0254, |
| "step": 3370 |
| }, |
| { |
| "grad_norm": 0.5624396204948425, |
| "learning_rate": 9.618131141297675e-05, |
| "loss": 0.027, |
| "step": 3380 |
| }, |
| { |
| "grad_norm": 0.43570947647094727, |
| "learning_rate": 9.614956001307242e-05, |
| "loss": 0.0301, |
| "step": 3390 |
| }, |
| { |
| "grad_norm": 0.4448493719100952, |
| "learning_rate": 9.611768244203321e-05, |
| "loss": 0.0351, |
| "step": 3400 |
| }, |
| { |
| "grad_norm": 0.4213621914386749, |
| "learning_rate": 9.60856787870112e-05, |
| "loss": 0.0292, |
| "step": 3410 |
| }, |
| { |
| "grad_norm": 0.4154338836669922, |
| "learning_rate": 9.605354913550318e-05, |
| "loss": 0.0262, |
| "step": 3420 |
| }, |
| { |
| "grad_norm": 0.45102718472480774, |
| "learning_rate": 9.602129357535037e-05, |
| "loss": 0.0313, |
| "step": 3430 |
| }, |
| { |
| "grad_norm": 0.38145503401756287, |
| "learning_rate": 9.598891219473825e-05, |
| "loss": 0.027, |
| "step": 3440 |
| }, |
| { |
| "grad_norm": 0.41790488362312317, |
| "learning_rate": 9.595640508219625e-05, |
| "loss": 0.0291, |
| "step": 3450 |
| }, |
| { |
| "grad_norm": 0.4644753336906433, |
| "learning_rate": 9.592377232659761e-05, |
| "loss": 0.0249, |
| "step": 3460 |
| }, |
| { |
| "grad_norm": 0.4731713533401489, |
| "learning_rate": 9.589101401715904e-05, |
| "loss": 0.0263, |
| "step": 3470 |
| }, |
| { |
| "grad_norm": 0.42398542165756226, |
| "learning_rate": 9.585813024344045e-05, |
| "loss": 0.026, |
| "step": 3480 |
| }, |
| { |
| "grad_norm": 0.5419644117355347, |
| "learning_rate": 9.58251210953449e-05, |
| "loss": 0.0296, |
| "step": 3490 |
| }, |
| { |
| "grad_norm": 0.463670939207077, |
| "learning_rate": 9.579198666311809e-05, |
| "loss": 0.0238, |
| "step": 3500 |
| }, |
| { |
| "grad_norm": 0.39643239974975586, |
| "learning_rate": 9.575872703734832e-05, |
| "loss": 0.0292, |
| "step": 3510 |
| }, |
| { |
| "grad_norm": 0.3542700409889221, |
| "learning_rate": 9.572534230896611e-05, |
| "loss": 0.0231, |
| "step": 3520 |
| }, |
| { |
| "grad_norm": 0.43060752749443054, |
| "learning_rate": 9.569183256924403e-05, |
| "loss": 0.025, |
| "step": 3530 |
| }, |
| { |
| "grad_norm": 0.40233463048934937, |
| "learning_rate": 9.565819790979646e-05, |
| "loss": 0.0422, |
| "step": 3540 |
| }, |
| { |
| "grad_norm": 0.4497774839401245, |
| "learning_rate": 9.562443842257925e-05, |
| "loss": 0.029, |
| "step": 3550 |
| }, |
| { |
| "grad_norm": 0.5018470287322998, |
| "learning_rate": 9.559055419988956e-05, |
| "loss": 0.0283, |
| "step": 3560 |
| }, |
| { |
| "grad_norm": 0.47868454456329346, |
| "learning_rate": 9.555654533436557e-05, |
| "loss": 0.0349, |
| "step": 3570 |
| }, |
| { |
| "grad_norm": 0.4413691759109497, |
| "learning_rate": 9.552241191898621e-05, |
| "loss": 0.0238, |
| "step": 3580 |
| }, |
| { |
| "grad_norm": 0.40998080372810364, |
| "learning_rate": 9.548815404707092e-05, |
| "loss": 0.03, |
| "step": 3590 |
| }, |
| { |
| "grad_norm": 0.43824273347854614, |
| "learning_rate": 9.545377181227942e-05, |
| "loss": 0.0284, |
| "step": 3600 |
| }, |
| { |
| "grad_norm": 0.4570449888706207, |
| "learning_rate": 9.541926530861145e-05, |
| "loss": 0.0266, |
| "step": 3610 |
| }, |
| { |
| "grad_norm": 0.44766074419021606, |
| "learning_rate": 9.538463463040645e-05, |
| "loss": 0.0278, |
| "step": 3620 |
| }, |
| { |
| "grad_norm": 0.481611967086792, |
| "learning_rate": 9.534987987234337e-05, |
| "loss": 0.0277, |
| "step": 3630 |
| }, |
| { |
| "grad_norm": 0.4858357608318329, |
| "learning_rate": 9.53150011294404e-05, |
| "loss": 0.0265, |
| "step": 3640 |
| }, |
| { |
| "grad_norm": 0.40574368834495544, |
| "learning_rate": 9.527999849705471e-05, |
| "loss": 0.0297, |
| "step": 3650 |
| }, |
| { |
| "grad_norm": 0.4581122100353241, |
| "learning_rate": 9.524487207088213e-05, |
| "loss": 0.0224, |
| "step": 3660 |
| }, |
| { |
| "grad_norm": 0.4100882411003113, |
| "learning_rate": 9.520962194695698e-05, |
| "loss": 0.0239, |
| "step": 3670 |
| }, |
| { |
| "grad_norm": 0.40333643555641174, |
| "learning_rate": 9.517424822165175e-05, |
| "loss": 0.0238, |
| "step": 3680 |
| }, |
| { |
| "grad_norm": 0.5596145987510681, |
| "learning_rate": 9.513875099167685e-05, |
| "loss": 0.0245, |
| "step": 3690 |
| }, |
| { |
| "grad_norm": 0.5230712890625, |
| "learning_rate": 9.510313035408035e-05, |
| "loss": 0.0262, |
| "step": 3700 |
| }, |
| { |
| "grad_norm": 0.39155617356300354, |
| "learning_rate": 9.506738640624775e-05, |
| "loss": 0.0264, |
| "step": 3710 |
| }, |
| { |
| "grad_norm": 0.4129464328289032, |
| "learning_rate": 9.50315192459016e-05, |
| "loss": 0.0208, |
| "step": 3720 |
| }, |
| { |
| "grad_norm": 0.5159543752670288, |
| "learning_rate": 9.499552897110136e-05, |
| "loss": 0.0239, |
| "step": 3730 |
| }, |
| { |
| "grad_norm": 0.5178094506263733, |
| "learning_rate": 9.495941568024304e-05, |
| "loss": 0.0253, |
| "step": 3740 |
| }, |
| { |
| "grad_norm": 0.43580612540245056, |
| "learning_rate": 9.492317947205904e-05, |
| "loss": 0.0268, |
| "step": 3750 |
| }, |
| { |
| "grad_norm": 0.4596274495124817, |
| "learning_rate": 9.488682044561775e-05, |
| "loss": 0.0256, |
| "step": 3760 |
| }, |
| { |
| "grad_norm": 0.41573286056518555, |
| "learning_rate": 9.485033870032335e-05, |
| "loss": 0.0243, |
| "step": 3770 |
| }, |
| { |
| "grad_norm": 0.47876912355422974, |
| "learning_rate": 9.481373433591556e-05, |
| "loss": 0.0215, |
| "step": 3780 |
| }, |
| { |
| "grad_norm": 0.4741547703742981, |
| "learning_rate": 9.47770074524693e-05, |
| "loss": 0.027, |
| "step": 3790 |
| }, |
| { |
| "grad_norm": 0.4306631088256836, |
| "learning_rate": 9.474015815039446e-05, |
| "loss": 0.0277, |
| "step": 3800 |
| }, |
| { |
| "grad_norm": 0.46127429604530334, |
| "learning_rate": 9.470318653043565e-05, |
| "loss": 0.0273, |
| "step": 3810 |
| }, |
| { |
| "grad_norm": 0.5021414160728455, |
| "learning_rate": 9.466609269367185e-05, |
| "loss": 0.0263, |
| "step": 3820 |
| }, |
| { |
| "grad_norm": 0.5333779454231262, |
| "learning_rate": 9.46288767415162e-05, |
| "loss": 0.0234, |
| "step": 3830 |
| }, |
| { |
| "grad_norm": 0.4366990625858307, |
| "learning_rate": 9.459153877571567e-05, |
| "loss": 0.0225, |
| "step": 3840 |
| }, |
| { |
| "grad_norm": 0.4819251298904419, |
| "learning_rate": 9.455407889835087e-05, |
| "loss": 0.0238, |
| "step": 3850 |
| }, |
| { |
| "grad_norm": 0.3999616503715515, |
| "learning_rate": 9.451649721183564e-05, |
| "loss": 0.0234, |
| "step": 3860 |
| }, |
| { |
| "grad_norm": 0.37807697057724, |
| "learning_rate": 9.447879381891692e-05, |
| "loss": 0.0258, |
| "step": 3870 |
| }, |
| { |
| "grad_norm": 0.5266739130020142, |
| "learning_rate": 9.444096882267428e-05, |
| "loss": 0.0329, |
| "step": 3880 |
| }, |
| { |
| "grad_norm": 0.3961910903453827, |
| "learning_rate": 9.440302232651988e-05, |
| "loss": 0.0226, |
| "step": 3890 |
| }, |
| { |
| "grad_norm": 0.3786242604255676, |
| "learning_rate": 9.436495443419795e-05, |
| "loss": 0.024, |
| "step": 3900 |
| }, |
| { |
| "grad_norm": 0.4175941050052643, |
| "learning_rate": 9.432676524978466e-05, |
| "loss": 0.0219, |
| "step": 3910 |
| }, |
| { |
| "grad_norm": 0.44096827507019043, |
| "learning_rate": 9.42884548776878e-05, |
| "loss": 0.0253, |
| "step": 3920 |
| }, |
| { |
| "grad_norm": 0.41201087832450867, |
| "learning_rate": 9.425002342264646e-05, |
| "loss": 0.0223, |
| "step": 3930 |
| }, |
| { |
| "grad_norm": 0.5009353160858154, |
| "learning_rate": 9.421147098973077e-05, |
| "loss": 0.0266, |
| "step": 3940 |
| }, |
| { |
| "grad_norm": 0.5505723357200623, |
| "learning_rate": 9.41727976843416e-05, |
| "loss": 0.0258, |
| "step": 3950 |
| }, |
| { |
| "grad_norm": 0.45981982350349426, |
| "learning_rate": 9.413400361221029e-05, |
| "loss": 0.0279, |
| "step": 3960 |
| }, |
| { |
| "grad_norm": 0.4804719388484955, |
| "learning_rate": 9.409508887939835e-05, |
| "loss": 0.022, |
| "step": 3970 |
| }, |
| { |
| "grad_norm": 0.4238436222076416, |
| "learning_rate": 9.40560535922972e-05, |
| "loss": 0.0212, |
| "step": 3980 |
| }, |
| { |
| "grad_norm": 0.403974324464798, |
| "learning_rate": 9.40168978576278e-05, |
| "loss": 0.0189, |
| "step": 3990 |
| }, |
| { |
| "grad_norm": 0.48837044835090637, |
| "learning_rate": 9.397762178244043e-05, |
| "loss": 0.0244, |
| "step": 4000 |
| }, |
| { |
| "grad_norm": 0.48128196597099304, |
| "learning_rate": 9.393822547411439e-05, |
| "loss": 0.0217, |
| "step": 4010 |
| }, |
| { |
| "grad_norm": 0.3272818624973297, |
| "learning_rate": 9.389870904035769e-05, |
| "loss": 0.0242, |
| "step": 4020 |
| }, |
| { |
| "grad_norm": 0.36953118443489075, |
| "learning_rate": 9.385907258920672e-05, |
| "loss": 0.0246, |
| "step": 4030 |
| }, |
| { |
| "grad_norm": 0.41161492466926575, |
| "learning_rate": 9.381931622902607e-05, |
| "loss": 0.021, |
| "step": 4040 |
| }, |
| { |
| "grad_norm": 0.4544064998626709, |
| "learning_rate": 9.377944006850807e-05, |
| "loss": 0.0193, |
| "step": 4050 |
| }, |
| { |
| "grad_norm": 0.47396498918533325, |
| "learning_rate": 9.373944421667265e-05, |
| "loss": 0.0213, |
| "step": 4060 |
| }, |
| { |
| "grad_norm": 0.4621795117855072, |
| "learning_rate": 9.369932878286691e-05, |
| "loss": 0.0266, |
| "step": 4070 |
| }, |
| { |
| "grad_norm": 0.5184421539306641, |
| "learning_rate": 9.365909387676494e-05, |
| "loss": 0.0196, |
| "step": 4080 |
| }, |
| { |
| "grad_norm": 0.4004800319671631, |
| "learning_rate": 9.361873960836744e-05, |
| "loss": 0.0263, |
| "step": 4090 |
| }, |
| { |
| "grad_norm": 0.3737598657608032, |
| "learning_rate": 9.357826608800142e-05, |
| "loss": 0.0196, |
| "step": 4100 |
| }, |
| { |
| "grad_norm": 0.4000731110572815, |
| "learning_rate": 9.353767342631994e-05, |
| "loss": 0.0203, |
| "step": 4110 |
| }, |
| { |
| "grad_norm": 0.3826330006122589, |
| "learning_rate": 9.34969617343018e-05, |
| "loss": 0.0219, |
| "step": 4120 |
| }, |
| { |
| "grad_norm": 0.5988262891769409, |
| "learning_rate": 9.345613112325122e-05, |
| "loss": 0.0204, |
| "step": 4130 |
| }, |
| { |
| "grad_norm": 0.4280189275741577, |
| "learning_rate": 9.34151817047975e-05, |
| "loss": 0.0224, |
| "step": 4140 |
| }, |
| { |
| "grad_norm": 0.3716961145401001, |
| "learning_rate": 9.33741135908948e-05, |
| "loss": 0.0262, |
| "step": 4150 |
| }, |
| { |
| "grad_norm": 0.4295980930328369, |
| "learning_rate": 9.33329268938218e-05, |
| "loss": 0.0207, |
| "step": 4160 |
| }, |
| { |
| "grad_norm": 0.425942063331604, |
| "learning_rate": 9.329162172618132e-05, |
| "loss": 0.0238, |
| "step": 4170 |
| }, |
| { |
| "grad_norm": 0.416522741317749, |
| "learning_rate": 9.325019820090013e-05, |
| "loss": 0.0226, |
| "step": 4180 |
| }, |
| { |
| "grad_norm": 0.5610533952713013, |
| "learning_rate": 9.320865643122855e-05, |
| "loss": 0.0208, |
| "step": 4190 |
| }, |
| { |
| "grad_norm": 0.379802942276001, |
| "learning_rate": 9.316699653074023e-05, |
| "loss": 0.022, |
| "step": 4200 |
| }, |
| { |
| "grad_norm": 0.4576219618320465, |
| "learning_rate": 9.312521861333172e-05, |
| "loss": 0.0166, |
| "step": 4210 |
| }, |
| { |
| "grad_norm": 0.45310190320014954, |
| "learning_rate": 9.308332279322224e-05, |
| "loss": 0.0242, |
| "step": 4220 |
| }, |
| { |
| "grad_norm": 0.4080248177051544, |
| "learning_rate": 9.304130918495338e-05, |
| "loss": 0.0224, |
| "step": 4230 |
| }, |
| { |
| "grad_norm": 0.33399489521980286, |
| "learning_rate": 9.299917790338874e-05, |
| "loss": 0.0187, |
| "step": 4240 |
| }, |
| { |
| "grad_norm": 0.356057733297348, |
| "learning_rate": 9.295692906371363e-05, |
| "loss": 0.0173, |
| "step": 4250 |
| }, |
| { |
| "grad_norm": 0.42619287967681885, |
| "learning_rate": 9.291456278143476e-05, |
| "loss": 0.0264, |
| "step": 4260 |
| }, |
| { |
| "grad_norm": 0.3479536175727844, |
| "learning_rate": 9.287207917237994e-05, |
| "loss": 0.0213, |
| "step": 4270 |
| }, |
| { |
| "grad_norm": 0.3362795114517212, |
| "learning_rate": 9.282947835269773e-05, |
| "loss": 0.0206, |
| "step": 4280 |
| }, |
| { |
| "grad_norm": 0.43236204981803894, |
| "learning_rate": 9.278676043885715e-05, |
| "loss": 0.0191, |
| "step": 4290 |
| }, |
| { |
| "grad_norm": 0.32585880160331726, |
| "learning_rate": 9.274392554764733e-05, |
| "loss": 0.0194, |
| "step": 4300 |
| }, |
| { |
| "grad_norm": 0.4723697900772095, |
| "learning_rate": 9.270097379617723e-05, |
| "loss": 0.016, |
| "step": 4310 |
| }, |
| { |
| "grad_norm": 0.42713454365730286, |
| "learning_rate": 9.26579053018753e-05, |
| "loss": 0.0154, |
| "step": 4320 |
| }, |
| { |
| "grad_norm": 0.33830246329307556, |
| "learning_rate": 9.261472018248918e-05, |
| "loss": 0.0146, |
| "step": 4330 |
| }, |
| { |
| "grad_norm": 0.4066753387451172, |
| "learning_rate": 9.25714185560853e-05, |
| "loss": 0.0259, |
| "step": 4340 |
| }, |
| { |
| "grad_norm": 0.448772668838501, |
| "learning_rate": 9.252800054104868e-05, |
| "loss": 0.0187, |
| "step": 4350 |
| }, |
| { |
| "grad_norm": 0.4219300448894501, |
| "learning_rate": 9.248446625608252e-05, |
| "loss": 0.0208, |
| "step": 4360 |
| }, |
| { |
| "grad_norm": 0.39920371770858765, |
| "learning_rate": 9.244081582020789e-05, |
| "loss": 0.0175, |
| "step": 4370 |
| }, |
| { |
| "grad_norm": 0.42131638526916504, |
| "learning_rate": 9.239704935276339e-05, |
| "loss": 0.0182, |
| "step": 4380 |
| }, |
| { |
| "grad_norm": 0.45648935437202454, |
| "learning_rate": 9.235316697340489e-05, |
| "loss": 0.0158, |
| "step": 4390 |
| }, |
| { |
| "grad_norm": 0.42188429832458496, |
| "learning_rate": 9.230916880210512e-05, |
| "loss": 0.0183, |
| "step": 4400 |
| }, |
| { |
| "grad_norm": 0.36581969261169434, |
| "learning_rate": 9.226505495915342e-05, |
| "loss": 0.0147, |
| "step": 4410 |
| }, |
| { |
| "grad_norm": 0.42502549290657043, |
| "learning_rate": 9.222082556515536e-05, |
| "loss": 0.0198, |
| "step": 4420 |
| }, |
| { |
| "grad_norm": 0.35229989886283875, |
| "learning_rate": 9.217648074103242e-05, |
| "loss": 0.0153, |
| "step": 4430 |
| }, |
| { |
| "grad_norm": 0.4085313379764557, |
| "learning_rate": 9.213202060802161e-05, |
| "loss": 0.0192, |
| "step": 4440 |
| }, |
| { |
| "grad_norm": 0.4650028645992279, |
| "learning_rate": 9.208744528767528e-05, |
| "loss": 0.0173, |
| "step": 4450 |
| }, |
| { |
| "grad_norm": 0.4048616886138916, |
| "learning_rate": 9.204275490186064e-05, |
| "loss": 0.0204, |
| "step": 4460 |
| }, |
| { |
| "grad_norm": 0.4178619980812073, |
| "learning_rate": 9.199794957275949e-05, |
| "loss": 0.0204, |
| "step": 4470 |
| }, |
| { |
| "grad_norm": 0.46256691217422485, |
| "learning_rate": 9.19530294228679e-05, |
| "loss": 0.0177, |
| "step": 4480 |
| }, |
| { |
| "grad_norm": 0.35352519154548645, |
| "learning_rate": 9.190799457499583e-05, |
| "loss": 0.028, |
| "step": 4490 |
| }, |
| { |
| "grad_norm": 0.4470050632953644, |
| "learning_rate": 9.186284515226686e-05, |
| "loss": 0.0194, |
| "step": 4500 |
| }, |
| { |
| "grad_norm": 0.3508913815021515, |
| "learning_rate": 9.181758127811777e-05, |
| "loss": 0.0241, |
| "step": 4510 |
| }, |
| { |
| "grad_norm": 0.411702424287796, |
| "learning_rate": 9.177220307629825e-05, |
| "loss": 0.0204, |
| "step": 4520 |
| }, |
| { |
| "grad_norm": 0.4468960762023926, |
| "learning_rate": 9.172671067087059e-05, |
| "loss": 0.0194, |
| "step": 4530 |
| }, |
| { |
| "grad_norm": 0.4807928204536438, |
| "learning_rate": 9.16811041862093e-05, |
| "loss": 0.0256, |
| "step": 4540 |
| }, |
| { |
| "grad_norm": 0.39205247163772583, |
| "learning_rate": 9.163538374700076e-05, |
| "loss": 0.0185, |
| "step": 4550 |
| }, |
| { |
| "grad_norm": 0.44329723715782166, |
| "learning_rate": 9.158954947824287e-05, |
| "loss": 0.0178, |
| "step": 4560 |
| }, |
| { |
| "grad_norm": 0.47283023595809937, |
| "learning_rate": 9.154360150524482e-05, |
| "loss": 0.0174, |
| "step": 4570 |
| }, |
| { |
| "grad_norm": 0.38849857449531555, |
| "learning_rate": 9.14975399536266e-05, |
| "loss": 0.0143, |
| "step": 4580 |
| }, |
| { |
| "grad_norm": 0.3656264543533325, |
| "learning_rate": 9.14513649493187e-05, |
| "loss": 0.0212, |
| "step": 4590 |
| }, |
| { |
| "grad_norm": 0.4674840271472931, |
| "learning_rate": 9.140507661856187e-05, |
| "loss": 0.0153, |
| "step": 4600 |
| }, |
| { |
| "grad_norm": 0.4313472509384155, |
| "learning_rate": 9.135867508790661e-05, |
| "loss": 0.0214, |
| "step": 4610 |
| }, |
| { |
| "grad_norm": 0.3471619486808777, |
| "learning_rate": 9.131216048421291e-05, |
| "loss": 0.0165, |
| "step": 4620 |
| }, |
| { |
| "grad_norm": 0.4542539715766907, |
| "learning_rate": 9.126553293464998e-05, |
| "loss": 0.0189, |
| "step": 4630 |
| }, |
| { |
| "grad_norm": 0.47608688473701477, |
| "learning_rate": 9.121879256669572e-05, |
| "loss": 0.017, |
| "step": 4640 |
| }, |
| { |
| "grad_norm": 0.3959465026855469, |
| "learning_rate": 9.117193950813652e-05, |
| "loss": 0.0164, |
| "step": 4650 |
| }, |
| { |
| "grad_norm": 0.408431738615036, |
| "learning_rate": 9.112497388706685e-05, |
| "loss": 0.0255, |
| "step": 4660 |
| }, |
| { |
| "grad_norm": 0.4116475582122803, |
| "learning_rate": 9.10778958318889e-05, |
| "loss": 0.0174, |
| "step": 4670 |
| }, |
| { |
| "grad_norm": 0.3917919993400574, |
| "learning_rate": 9.103070547131232e-05, |
| "loss": 0.0199, |
| "step": 4680 |
| }, |
| { |
| "grad_norm": 0.3482106029987335, |
| "learning_rate": 9.098340293435375e-05, |
| "loss": 0.0179, |
| "step": 4690 |
| }, |
| { |
| "grad_norm": 0.34646838903427124, |
| "learning_rate": 9.093598835033649e-05, |
| "loss": 0.0174, |
| "step": 4700 |
| }, |
| { |
| "grad_norm": 0.39419376850128174, |
| "learning_rate": 9.088846184889021e-05, |
| "loss": 0.0191, |
| "step": 4710 |
| }, |
| { |
| "grad_norm": 0.4543268084526062, |
| "learning_rate": 9.084082355995057e-05, |
| "loss": 0.0213, |
| "step": 4720 |
| }, |
| { |
| "grad_norm": 0.4212946891784668, |
| "learning_rate": 9.079307361375882e-05, |
| "loss": 0.0181, |
| "step": 4730 |
| }, |
| { |
| "grad_norm": 0.3014923334121704, |
| "learning_rate": 9.074521214086149e-05, |
| "loss": 0.019, |
| "step": 4740 |
| }, |
| { |
| "grad_norm": 0.36527299880981445, |
| "learning_rate": 9.069723927211001e-05, |
| "loss": 0.0179, |
| "step": 4750 |
| }, |
| { |
| "grad_norm": 0.3752840757369995, |
| "learning_rate": 9.064915513866037e-05, |
| "loss": 0.0183, |
| "step": 4760 |
| }, |
| { |
| "grad_norm": 0.42201003432273865, |
| "learning_rate": 9.060095987197279e-05, |
| "loss": 0.0162, |
| "step": 4770 |
| }, |
| { |
| "grad_norm": 0.3307137191295624, |
| "learning_rate": 9.055265360381126e-05, |
| "loss": 0.0206, |
| "step": 4780 |
| }, |
| { |
| "grad_norm": 0.33322593569755554, |
| "learning_rate": 9.050423646624326e-05, |
| "loss": 0.016, |
| "step": 4790 |
| }, |
| { |
| "grad_norm": 0.35324618220329285, |
| "learning_rate": 9.045570859163943e-05, |
| "loss": 0.0194, |
| "step": 4800 |
| }, |
| { |
| "grad_norm": 0.427572637796402, |
| "learning_rate": 9.04070701126731e-05, |
| "loss": 0.015, |
| "step": 4810 |
| }, |
| { |
| "grad_norm": 0.3561609983444214, |
| "learning_rate": 9.035832116232001e-05, |
| "loss": 0.0145, |
| "step": 4820 |
| }, |
| { |
| "grad_norm": 0.37716561555862427, |
| "learning_rate": 9.030946187385796e-05, |
| "loss": 0.016, |
| "step": 4830 |
| }, |
| { |
| "grad_norm": 0.39859738945961, |
| "learning_rate": 9.026049238086635e-05, |
| "loss": 0.0178, |
| "step": 4840 |
| }, |
| { |
| "grad_norm": 0.4500395655632019, |
| "learning_rate": 9.021141281722591e-05, |
| "loss": 0.0202, |
| "step": 4850 |
| }, |
| { |
| "grad_norm": 0.34830138087272644, |
| "learning_rate": 9.01622233171183e-05, |
| "loss": 0.0169, |
| "step": 4860 |
| }, |
| { |
| "grad_norm": 0.3729107677936554, |
| "learning_rate": 9.011292401502574e-05, |
| "loss": 0.0212, |
| "step": 4870 |
| }, |
| { |
| "grad_norm": 0.3912448585033417, |
| "learning_rate": 9.006351504573063e-05, |
| "loss": 0.0146, |
| "step": 4880 |
| }, |
| { |
| "grad_norm": 0.4137353003025055, |
| "learning_rate": 9.001399654431519e-05, |
| "loss": 0.0171, |
| "step": 4890 |
| }, |
| { |
| "grad_norm": 0.4444160759449005, |
| "learning_rate": 8.996436864616116e-05, |
| "loss": 0.0162, |
| "step": 4900 |
| }, |
| { |
| "grad_norm": 0.3148241639137268, |
| "learning_rate": 8.991463148694925e-05, |
| "loss": 0.0191, |
| "step": 4910 |
| }, |
| { |
| "grad_norm": 0.4391416907310486, |
| "learning_rate": 8.986478520265902e-05, |
| "loss": 0.0187, |
| "step": 4920 |
| }, |
| { |
| "grad_norm": 0.4296688139438629, |
| "learning_rate": 8.981482992956827e-05, |
| "loss": 0.0143, |
| "step": 4930 |
| }, |
| { |
| "grad_norm": 0.29728299379348755, |
| "learning_rate": 8.976476580425282e-05, |
| "loss": 0.0148, |
| "step": 4940 |
| }, |
| { |
| "grad_norm": 0.4356195032596588, |
| "learning_rate": 8.971459296358606e-05, |
| "loss": 0.0287, |
| "step": 4950 |
| }, |
| { |
| "grad_norm": 0.4179481565952301, |
| "learning_rate": 8.966431154473864e-05, |
| "loss": 0.0157, |
| "step": 4960 |
| }, |
| { |
| "grad_norm": 0.3610477149486542, |
| "learning_rate": 8.961392168517803e-05, |
| "loss": 0.0159, |
| "step": 4970 |
| }, |
| { |
| "grad_norm": 0.34345686435699463, |
| "learning_rate": 8.956342352266821e-05, |
| "loss": 0.016, |
| "step": 4980 |
| }, |
| { |
| "grad_norm": 0.3698787987232208, |
| "learning_rate": 8.95128171952692e-05, |
| "loss": 0.0214, |
| "step": 4990 |
| }, |
| { |
| "grad_norm": 0.327648788690567, |
| "learning_rate": 8.946210284133676e-05, |
| "loss": 0.0173, |
| "step": 5000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 24, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|