| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.3, |
| "eval_steps": 500, |
| "global_step": 30000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "grad_norm": 2.47322154045105, |
| "learning_rate": 1.8e-07, |
| "loss": 1.3541, |
| "step": 10 |
| }, |
| { |
| "grad_norm": 2.554671049118042, |
| "learning_rate": 3.8e-07, |
| "loss": 1.3667, |
| "step": 20 |
| }, |
| { |
| "grad_norm": 2.3967344760894775, |
| "learning_rate": 5.8e-07, |
| "loss": 1.351, |
| "step": 30 |
| }, |
| { |
| "grad_norm": 2.0986547470092773, |
| "learning_rate": 7.8e-07, |
| "loss": 1.3457, |
| "step": 40 |
| }, |
| { |
| "grad_norm": 2.090355634689331, |
| "learning_rate": 9.8e-07, |
| "loss": 1.3238, |
| "step": 50 |
| }, |
| { |
| "grad_norm": 1.3943339586257935, |
| "learning_rate": 1.18e-06, |
| "loss": 1.3015, |
| "step": 60 |
| }, |
| { |
| "grad_norm": 1.176537036895752, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 1.254, |
| "step": 70 |
| }, |
| { |
| "grad_norm": 0.8108309507369995, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 1.2257, |
| "step": 80 |
| }, |
| { |
| "grad_norm": 0.7492016553878784, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 1.1949, |
| "step": 90 |
| }, |
| { |
| "grad_norm": 0.5984260439872742, |
| "learning_rate": 1.98e-06, |
| "loss": 1.1803, |
| "step": 100 |
| }, |
| { |
| "grad_norm": 0.45398518443107605, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 1.1486, |
| "step": 110 |
| }, |
| { |
| "grad_norm": 0.3421183228492737, |
| "learning_rate": 2.38e-06, |
| "loss": 1.1417, |
| "step": 120 |
| }, |
| { |
| "grad_norm": 0.453471839427948, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 1.1415, |
| "step": 130 |
| }, |
| { |
| "grad_norm": 0.3436436653137207, |
| "learning_rate": 2.78e-06, |
| "loss": 1.1536, |
| "step": 140 |
| }, |
| { |
| "grad_norm": 0.2761518359184265, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 1.151, |
| "step": 150 |
| }, |
| { |
| "grad_norm": 0.2847629189491272, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 1.1371, |
| "step": 160 |
| }, |
| { |
| "grad_norm": 0.2499399036169052, |
| "learning_rate": 3.38e-06, |
| "loss": 1.1377, |
| "step": 170 |
| }, |
| { |
| "grad_norm": 0.3377789556980133, |
| "learning_rate": 3.58e-06, |
| "loss": 1.1361, |
| "step": 180 |
| }, |
| { |
| "grad_norm": 0.39922669529914856, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 1.1307, |
| "step": 190 |
| }, |
| { |
| "grad_norm": 0.38898763060569763, |
| "learning_rate": 3.98e-06, |
| "loss": 1.1293, |
| "step": 200 |
| }, |
| { |
| "grad_norm": 0.37199997901916504, |
| "learning_rate": 4.18e-06, |
| "loss": 1.132, |
| "step": 210 |
| }, |
| { |
| "grad_norm": 0.3737226724624634, |
| "learning_rate": 4.38e-06, |
| "loss": 1.1173, |
| "step": 220 |
| }, |
| { |
| "grad_norm": 0.41701287031173706, |
| "learning_rate": 4.58e-06, |
| "loss": 1.1092, |
| "step": 230 |
| }, |
| { |
| "grad_norm": 0.37127238512039185, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 1.0874, |
| "step": 240 |
| }, |
| { |
| "grad_norm": 0.377769410610199, |
| "learning_rate": 4.98e-06, |
| "loss": 1.0752, |
| "step": 250 |
| }, |
| { |
| "grad_norm": 0.40639528632164, |
| "learning_rate": 5.18e-06, |
| "loss": 1.0763, |
| "step": 260 |
| }, |
| { |
| "grad_norm": 0.45707887411117554, |
| "learning_rate": 5.38e-06, |
| "loss": 1.0654, |
| "step": 270 |
| }, |
| { |
| "grad_norm": 0.3577777147293091, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 1.0626, |
| "step": 280 |
| }, |
| { |
| "grad_norm": 0.44091716408729553, |
| "learning_rate": 5.78e-06, |
| "loss": 1.0506, |
| "step": 290 |
| }, |
| { |
| "grad_norm": 0.5065949559211731, |
| "learning_rate": 5.98e-06, |
| "loss": 1.0486, |
| "step": 300 |
| }, |
| { |
| "grad_norm": 0.40768519043922424, |
| "learning_rate": 6.18e-06, |
| "loss": 1.0475, |
| "step": 310 |
| }, |
| { |
| "grad_norm": 0.3999238908290863, |
| "learning_rate": 6.38e-06, |
| "loss": 1.0496, |
| "step": 320 |
| }, |
| { |
| "grad_norm": 0.4539977014064789, |
| "learning_rate": 6.58e-06, |
| "loss": 1.0382, |
| "step": 330 |
| }, |
| { |
| "grad_norm": 0.4279543459415436, |
| "learning_rate": 6.78e-06, |
| "loss": 1.0321, |
| "step": 340 |
| }, |
| { |
| "grad_norm": 0.572868824005127, |
| "learning_rate": 6.98e-06, |
| "loss": 1.0361, |
| "step": 350 |
| }, |
| { |
| "grad_norm": 0.7423160076141357, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 1.0229, |
| "step": 360 |
| }, |
| { |
| "grad_norm": 0.6083391904830933, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 1.0066, |
| "step": 370 |
| }, |
| { |
| "grad_norm": 0.7845941781997681, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 0.9973, |
| "step": 380 |
| }, |
| { |
| "grad_norm": 0.8202695846557617, |
| "learning_rate": 7.78e-06, |
| "loss": 0.9842, |
| "step": 390 |
| }, |
| { |
| "grad_norm": 1.003799319267273, |
| "learning_rate": 7.98e-06, |
| "loss": 0.9593, |
| "step": 400 |
| }, |
| { |
| "grad_norm": 1.0290818214416504, |
| "learning_rate": 8.18e-06, |
| "loss": 0.9326, |
| "step": 410 |
| }, |
| { |
| "grad_norm": 0.9908057451248169, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.8944, |
| "step": 420 |
| }, |
| { |
| "grad_norm": 1.069297194480896, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.8615, |
| "step": 430 |
| }, |
| { |
| "grad_norm": 1.4509676694869995, |
| "learning_rate": 8.78e-06, |
| "loss": 0.829, |
| "step": 440 |
| }, |
| { |
| "grad_norm": 1.1007381677627563, |
| "learning_rate": 8.98e-06, |
| "loss": 0.7987, |
| "step": 450 |
| }, |
| { |
| "grad_norm": 1.1426440477371216, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.7687, |
| "step": 460 |
| }, |
| { |
| "grad_norm": 1.2663944959640503, |
| "learning_rate": 9.38e-06, |
| "loss": 0.7481, |
| "step": 470 |
| }, |
| { |
| "grad_norm": 1.2125422954559326, |
| "learning_rate": 9.58e-06, |
| "loss": 0.7177, |
| "step": 480 |
| }, |
| { |
| "grad_norm": 1.4529684782028198, |
| "learning_rate": 9.78e-06, |
| "loss": 0.688, |
| "step": 490 |
| }, |
| { |
| "grad_norm": 1.2782946825027466, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.6622, |
| "step": 500 |
| }, |
| { |
| "grad_norm": 1.3107942342758179, |
| "learning_rate": 1.018e-05, |
| "loss": 0.632, |
| "step": 510 |
| }, |
| { |
| "grad_norm": 1.3640458583831787, |
| "learning_rate": 1.038e-05, |
| "loss": 0.6102, |
| "step": 520 |
| }, |
| { |
| "grad_norm": 1.5290552377700806, |
| "learning_rate": 1.058e-05, |
| "loss": 0.587, |
| "step": 530 |
| }, |
| { |
| "grad_norm": 1.355726718902588, |
| "learning_rate": 1.0780000000000002e-05, |
| "loss": 0.563, |
| "step": 540 |
| }, |
| { |
| "grad_norm": 1.5167773962020874, |
| "learning_rate": 1.098e-05, |
| "loss": 0.5297, |
| "step": 550 |
| }, |
| { |
| "grad_norm": 1.3675698041915894, |
| "learning_rate": 1.118e-05, |
| "loss": 0.5078, |
| "step": 560 |
| }, |
| { |
| "grad_norm": 1.5590559244155884, |
| "learning_rate": 1.1380000000000001e-05, |
| "loss": 0.4944, |
| "step": 570 |
| }, |
| { |
| "grad_norm": 1.3931667804718018, |
| "learning_rate": 1.1580000000000001e-05, |
| "loss": 0.476, |
| "step": 580 |
| }, |
| { |
| "grad_norm": 1.481071949005127, |
| "learning_rate": 1.178e-05, |
| "loss": 0.4501, |
| "step": 590 |
| }, |
| { |
| "grad_norm": 1.8843756914138794, |
| "learning_rate": 1.198e-05, |
| "loss": 0.4445, |
| "step": 600 |
| }, |
| { |
| "grad_norm": 1.8522933721542358, |
| "learning_rate": 1.2180000000000002e-05, |
| "loss": 0.4157, |
| "step": 610 |
| }, |
| { |
| "grad_norm": 1.8875459432601929, |
| "learning_rate": 1.238e-05, |
| "loss": 0.413, |
| "step": 620 |
| }, |
| { |
| "grad_norm": 1.7943925857543945, |
| "learning_rate": 1.258e-05, |
| "loss": 0.3932, |
| "step": 630 |
| }, |
| { |
| "grad_norm": 1.6024821996688843, |
| "learning_rate": 1.278e-05, |
| "loss": 0.3799, |
| "step": 640 |
| }, |
| { |
| "grad_norm": 1.91981041431427, |
| "learning_rate": 1.2980000000000001e-05, |
| "loss": 0.3688, |
| "step": 650 |
| }, |
| { |
| "grad_norm": 1.5477911233901978, |
| "learning_rate": 1.3180000000000001e-05, |
| "loss": 0.359, |
| "step": 660 |
| }, |
| { |
| "grad_norm": 1.8666833639144897, |
| "learning_rate": 1.338e-05, |
| "loss": 0.3447, |
| "step": 670 |
| }, |
| { |
| "grad_norm": 1.873641848564148, |
| "learning_rate": 1.358e-05, |
| "loss": 0.3336, |
| "step": 680 |
| }, |
| { |
| "grad_norm": 1.6930667161941528, |
| "learning_rate": 1.3780000000000002e-05, |
| "loss": 0.3242, |
| "step": 690 |
| }, |
| { |
| "grad_norm": 1.8604819774627686, |
| "learning_rate": 1.3980000000000002e-05, |
| "loss": 0.3085, |
| "step": 700 |
| }, |
| { |
| "grad_norm": 2.4730148315429688, |
| "learning_rate": 1.4180000000000001e-05, |
| "loss": 0.3181, |
| "step": 710 |
| }, |
| { |
| "grad_norm": 1.7881016731262207, |
| "learning_rate": 1.4380000000000001e-05, |
| "loss": 0.2946, |
| "step": 720 |
| }, |
| { |
| "grad_norm": 1.8197489976882935, |
| "learning_rate": 1.4580000000000003e-05, |
| "loss": 0.2729, |
| "step": 730 |
| }, |
| { |
| "grad_norm": 1.5902010202407837, |
| "learning_rate": 1.4779999999999999e-05, |
| "loss": 0.2768, |
| "step": 740 |
| }, |
| { |
| "grad_norm": 2.1884756088256836, |
| "learning_rate": 1.4979999999999999e-05, |
| "loss": 0.2703, |
| "step": 750 |
| }, |
| { |
| "grad_norm": 1.566630482673645, |
| "learning_rate": 1.518e-05, |
| "loss": 0.2704, |
| "step": 760 |
| }, |
| { |
| "grad_norm": 2.237656593322754, |
| "learning_rate": 1.538e-05, |
| "loss": 0.2548, |
| "step": 770 |
| }, |
| { |
| "grad_norm": 2.109281539916992, |
| "learning_rate": 1.558e-05, |
| "loss": 0.2497, |
| "step": 780 |
| }, |
| { |
| "grad_norm": 2.126896858215332, |
| "learning_rate": 1.578e-05, |
| "loss": 0.2333, |
| "step": 790 |
| }, |
| { |
| "grad_norm": 1.431854486465454, |
| "learning_rate": 1.598e-05, |
| "loss": 0.2318, |
| "step": 800 |
| }, |
| { |
| "grad_norm": 1.5760784149169922, |
| "learning_rate": 1.618e-05, |
| "loss": 0.2188, |
| "step": 810 |
| }, |
| { |
| "grad_norm": 2.163811683654785, |
| "learning_rate": 1.6380000000000002e-05, |
| "loss": 0.2161, |
| "step": 820 |
| }, |
| { |
| "grad_norm": 1.6761753559112549, |
| "learning_rate": 1.658e-05, |
| "loss": 0.2113, |
| "step": 830 |
| }, |
| { |
| "grad_norm": 1.8158925771713257, |
| "learning_rate": 1.6780000000000002e-05, |
| "loss": 0.2033, |
| "step": 840 |
| }, |
| { |
| "grad_norm": 2.6011154651641846, |
| "learning_rate": 1.698e-05, |
| "loss": 0.2014, |
| "step": 850 |
| }, |
| { |
| "grad_norm": 1.9855470657348633, |
| "learning_rate": 1.718e-05, |
| "loss": 0.2005, |
| "step": 860 |
| }, |
| { |
| "grad_norm": 2.884546995162964, |
| "learning_rate": 1.7380000000000003e-05, |
| "loss": 0.1839, |
| "step": 870 |
| }, |
| { |
| "grad_norm": 1.792996883392334, |
| "learning_rate": 1.758e-05, |
| "loss": 0.1782, |
| "step": 880 |
| }, |
| { |
| "grad_norm": 2.064910411834717, |
| "learning_rate": 1.7780000000000003e-05, |
| "loss": 0.1651, |
| "step": 890 |
| }, |
| { |
| "grad_norm": 2.0864901542663574, |
| "learning_rate": 1.798e-05, |
| "loss": 0.1519, |
| "step": 900 |
| }, |
| { |
| "grad_norm": 1.6949527263641357, |
| "learning_rate": 1.818e-05, |
| "loss": 0.1458, |
| "step": 910 |
| }, |
| { |
| "grad_norm": 1.77072012424469, |
| "learning_rate": 1.838e-05, |
| "loss": 0.1477, |
| "step": 920 |
| }, |
| { |
| "grad_norm": 1.7796998023986816, |
| "learning_rate": 1.858e-05, |
| "loss": 0.1508, |
| "step": 930 |
| }, |
| { |
| "grad_norm": 2.0563416481018066, |
| "learning_rate": 1.878e-05, |
| "loss": 0.1373, |
| "step": 940 |
| }, |
| { |
| "grad_norm": 1.5273425579071045, |
| "learning_rate": 1.898e-05, |
| "loss": 0.1417, |
| "step": 950 |
| }, |
| { |
| "grad_norm": 2.0723495483398438, |
| "learning_rate": 1.918e-05, |
| "loss": 0.1415, |
| "step": 960 |
| }, |
| { |
| "grad_norm": 1.857120156288147, |
| "learning_rate": 1.938e-05, |
| "loss": 0.1382, |
| "step": 970 |
| }, |
| { |
| "grad_norm": 2.464312791824341, |
| "learning_rate": 1.9580000000000002e-05, |
| "loss": 0.145, |
| "step": 980 |
| }, |
| { |
| "grad_norm": 2.2345287799835205, |
| "learning_rate": 1.978e-05, |
| "loss": 0.1388, |
| "step": 990 |
| }, |
| { |
| "grad_norm": 1.707922101020813, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 0.1418, |
| "step": 1000 |
| }, |
| { |
| "grad_norm": 1.7697720527648926, |
| "learning_rate": 2.0180000000000003e-05, |
| "loss": 0.1341, |
| "step": 1010 |
| }, |
| { |
| "grad_norm": 1.818679690361023, |
| "learning_rate": 2.038e-05, |
| "loss": 0.1363, |
| "step": 1020 |
| }, |
| { |
| "grad_norm": 2.0030717849731445, |
| "learning_rate": 2.0580000000000003e-05, |
| "loss": 0.1364, |
| "step": 1030 |
| }, |
| { |
| "grad_norm": 1.891526222229004, |
| "learning_rate": 2.078e-05, |
| "loss": 0.1296, |
| "step": 1040 |
| }, |
| { |
| "grad_norm": 2.0210978984832764, |
| "learning_rate": 2.098e-05, |
| "loss": 0.1214, |
| "step": 1050 |
| }, |
| { |
| "grad_norm": 1.7460476160049438, |
| "learning_rate": 2.118e-05, |
| "loss": 0.1178, |
| "step": 1060 |
| }, |
| { |
| "grad_norm": 2.0199179649353027, |
| "learning_rate": 2.138e-05, |
| "loss": 0.1216, |
| "step": 1070 |
| }, |
| { |
| "grad_norm": 1.982852816581726, |
| "learning_rate": 2.158e-05, |
| "loss": 0.1176, |
| "step": 1080 |
| }, |
| { |
| "grad_norm": 2.053598403930664, |
| "learning_rate": 2.178e-05, |
| "loss": 0.1195, |
| "step": 1090 |
| }, |
| { |
| "grad_norm": 1.8797863721847534, |
| "learning_rate": 2.198e-05, |
| "loss": 0.1294, |
| "step": 1100 |
| }, |
| { |
| "grad_norm": 1.9531686305999756, |
| "learning_rate": 2.218e-05, |
| "loss": 0.1114, |
| "step": 1110 |
| }, |
| { |
| "grad_norm": 1.813335657119751, |
| "learning_rate": 2.2380000000000003e-05, |
| "loss": 0.1218, |
| "step": 1120 |
| }, |
| { |
| "grad_norm": 1.4582650661468506, |
| "learning_rate": 2.258e-05, |
| "loss": 0.1183, |
| "step": 1130 |
| }, |
| { |
| "grad_norm": 2.1429901123046875, |
| "learning_rate": 2.2780000000000002e-05, |
| "loss": 0.1156, |
| "step": 1140 |
| }, |
| { |
| "grad_norm": 1.813545823097229, |
| "learning_rate": 2.298e-05, |
| "loss": 0.1225, |
| "step": 1150 |
| }, |
| { |
| "grad_norm": 2.017956256866455, |
| "learning_rate": 2.318e-05, |
| "loss": 0.1151, |
| "step": 1160 |
| }, |
| { |
| "grad_norm": 1.8820877075195312, |
| "learning_rate": 2.3380000000000003e-05, |
| "loss": 0.1195, |
| "step": 1170 |
| }, |
| { |
| "grad_norm": 1.7476214170455933, |
| "learning_rate": 2.358e-05, |
| "loss": 0.1189, |
| "step": 1180 |
| }, |
| { |
| "grad_norm": 1.9252310991287231, |
| "learning_rate": 2.3780000000000003e-05, |
| "loss": 0.1126, |
| "step": 1190 |
| }, |
| { |
| "grad_norm": 1.512940526008606, |
| "learning_rate": 2.398e-05, |
| "loss": 0.1198, |
| "step": 1200 |
| }, |
| { |
| "grad_norm": 1.8000988960266113, |
| "learning_rate": 2.418e-05, |
| "loss": 0.1117, |
| "step": 1210 |
| }, |
| { |
| "grad_norm": 1.5912659168243408, |
| "learning_rate": 2.438e-05, |
| "loss": 0.105, |
| "step": 1220 |
| }, |
| { |
| "grad_norm": 2.5894980430603027, |
| "learning_rate": 2.4580000000000002e-05, |
| "loss": 0.1116, |
| "step": 1230 |
| }, |
| { |
| "grad_norm": 1.9173959493637085, |
| "learning_rate": 2.478e-05, |
| "loss": 0.1132, |
| "step": 1240 |
| }, |
| { |
| "grad_norm": 2.345515251159668, |
| "learning_rate": 2.498e-05, |
| "loss": 0.1153, |
| "step": 1250 |
| }, |
| { |
| "grad_norm": 2.1796514987945557, |
| "learning_rate": 2.5180000000000003e-05, |
| "loss": 0.0973, |
| "step": 1260 |
| }, |
| { |
| "grad_norm": 1.5757145881652832, |
| "learning_rate": 2.5380000000000004e-05, |
| "loss": 0.1166, |
| "step": 1270 |
| }, |
| { |
| "grad_norm": 1.9904712438583374, |
| "learning_rate": 2.5580000000000002e-05, |
| "loss": 0.1088, |
| "step": 1280 |
| }, |
| { |
| "grad_norm": 2.3049025535583496, |
| "learning_rate": 2.5779999999999997e-05, |
| "loss": 0.1079, |
| "step": 1290 |
| }, |
| { |
| "grad_norm": 1.8384699821472168, |
| "learning_rate": 2.598e-05, |
| "loss": 0.106, |
| "step": 1300 |
| }, |
| { |
| "grad_norm": 2.0108070373535156, |
| "learning_rate": 2.618e-05, |
| "loss": 0.1027, |
| "step": 1310 |
| }, |
| { |
| "grad_norm": 1.599073052406311, |
| "learning_rate": 2.6379999999999998e-05, |
| "loss": 0.1189, |
| "step": 1320 |
| }, |
| { |
| "grad_norm": 1.7642953395843506, |
| "learning_rate": 2.658e-05, |
| "loss": 0.1021, |
| "step": 1330 |
| }, |
| { |
| "grad_norm": 1.6248481273651123, |
| "learning_rate": 2.678e-05, |
| "loss": 0.0942, |
| "step": 1340 |
| }, |
| { |
| "grad_norm": 1.5131402015686035, |
| "learning_rate": 2.698e-05, |
| "loss": 0.1064, |
| "step": 1350 |
| }, |
| { |
| "grad_norm": 1.8669644594192505, |
| "learning_rate": 2.718e-05, |
| "loss": 0.1084, |
| "step": 1360 |
| }, |
| { |
| "grad_norm": 1.998981237411499, |
| "learning_rate": 2.738e-05, |
| "loss": 0.0964, |
| "step": 1370 |
| }, |
| { |
| "grad_norm": 1.9910788536071777, |
| "learning_rate": 2.758e-05, |
| "loss": 0.0933, |
| "step": 1380 |
| }, |
| { |
| "grad_norm": 1.8466424942016602, |
| "learning_rate": 2.778e-05, |
| "loss": 0.09, |
| "step": 1390 |
| }, |
| { |
| "grad_norm": 1.6654435396194458, |
| "learning_rate": 2.798e-05, |
| "loss": 0.0926, |
| "step": 1400 |
| }, |
| { |
| "grad_norm": 1.728222131729126, |
| "learning_rate": 2.818e-05, |
| "loss": 0.0945, |
| "step": 1410 |
| }, |
| { |
| "grad_norm": 1.7263716459274292, |
| "learning_rate": 2.8380000000000003e-05, |
| "loss": 0.0951, |
| "step": 1420 |
| }, |
| { |
| "grad_norm": 1.734931468963623, |
| "learning_rate": 2.858e-05, |
| "loss": 0.1099, |
| "step": 1430 |
| }, |
| { |
| "grad_norm": 1.6889578104019165, |
| "learning_rate": 2.8780000000000002e-05, |
| "loss": 0.0939, |
| "step": 1440 |
| }, |
| { |
| "grad_norm": 1.6777198314666748, |
| "learning_rate": 2.898e-05, |
| "loss": 0.087, |
| "step": 1450 |
| }, |
| { |
| "grad_norm": 1.748708963394165, |
| "learning_rate": 2.9180000000000002e-05, |
| "loss": 0.0993, |
| "step": 1460 |
| }, |
| { |
| "grad_norm": 1.6266566514968872, |
| "learning_rate": 2.9380000000000003e-05, |
| "loss": 0.0906, |
| "step": 1470 |
| }, |
| { |
| "grad_norm": 1.117978811264038, |
| "learning_rate": 2.958e-05, |
| "loss": 0.0901, |
| "step": 1480 |
| }, |
| { |
| "grad_norm": 1.562015175819397, |
| "learning_rate": 2.9780000000000003e-05, |
| "loss": 0.0984, |
| "step": 1490 |
| }, |
| { |
| "grad_norm": 1.7531830072402954, |
| "learning_rate": 2.998e-05, |
| "loss": 0.0924, |
| "step": 1500 |
| }, |
| { |
| "grad_norm": 1.4399970769882202, |
| "learning_rate": 3.0180000000000002e-05, |
| "loss": 0.0949, |
| "step": 1510 |
| }, |
| { |
| "grad_norm": 1.5905520915985107, |
| "learning_rate": 3.0380000000000004e-05, |
| "loss": 0.0951, |
| "step": 1520 |
| }, |
| { |
| "grad_norm": 1.9324289560317993, |
| "learning_rate": 3.058e-05, |
| "loss": 0.0875, |
| "step": 1530 |
| }, |
| { |
| "grad_norm": 1.8375229835510254, |
| "learning_rate": 3.078e-05, |
| "loss": 0.0925, |
| "step": 1540 |
| }, |
| { |
| "grad_norm": 1.8989778757095337, |
| "learning_rate": 3.0980000000000005e-05, |
| "loss": 0.0919, |
| "step": 1550 |
| }, |
| { |
| "grad_norm": 1.6824874877929688, |
| "learning_rate": 3.118e-05, |
| "loss": 0.0928, |
| "step": 1560 |
| }, |
| { |
| "grad_norm": 1.7934844493865967, |
| "learning_rate": 3.138e-05, |
| "loss": 0.0944, |
| "step": 1570 |
| }, |
| { |
| "grad_norm": 1.8108294010162354, |
| "learning_rate": 3.1580000000000006e-05, |
| "loss": 0.0895, |
| "step": 1580 |
| }, |
| { |
| "grad_norm": 1.4770379066467285, |
| "learning_rate": 3.1780000000000004e-05, |
| "loss": 0.085, |
| "step": 1590 |
| }, |
| { |
| "grad_norm": 1.576690673828125, |
| "learning_rate": 3.198e-05, |
| "loss": 0.0963, |
| "step": 1600 |
| }, |
| { |
| "grad_norm": 1.2559643983840942, |
| "learning_rate": 3.218e-05, |
| "loss": 0.0856, |
| "step": 1610 |
| }, |
| { |
| "grad_norm": 2.0229685306549072, |
| "learning_rate": 3.238e-05, |
| "loss": 0.0857, |
| "step": 1620 |
| }, |
| { |
| "grad_norm": 1.7076901197433472, |
| "learning_rate": 3.2579999999999996e-05, |
| "loss": 0.0954, |
| "step": 1630 |
| }, |
| { |
| "grad_norm": 2.049443244934082, |
| "learning_rate": 3.278e-05, |
| "loss": 0.0937, |
| "step": 1640 |
| }, |
| { |
| "grad_norm": 1.7413936853408813, |
| "learning_rate": 3.298e-05, |
| "loss": 0.0864, |
| "step": 1650 |
| }, |
| { |
| "grad_norm": 1.6512004137039185, |
| "learning_rate": 3.318e-05, |
| "loss": 0.086, |
| "step": 1660 |
| }, |
| { |
| "grad_norm": 1.6482833623886108, |
| "learning_rate": 3.338e-05, |
| "loss": 0.0921, |
| "step": 1670 |
| }, |
| { |
| "grad_norm": 1.646140694618225, |
| "learning_rate": 3.358e-05, |
| "loss": 0.0856, |
| "step": 1680 |
| }, |
| { |
| "grad_norm": 1.54812490940094, |
| "learning_rate": 3.378e-05, |
| "loss": 0.0819, |
| "step": 1690 |
| }, |
| { |
| "grad_norm": 1.646413803100586, |
| "learning_rate": 3.398e-05, |
| "loss": 0.0991, |
| "step": 1700 |
| }, |
| { |
| "grad_norm": 1.7182942628860474, |
| "learning_rate": 3.418e-05, |
| "loss": 0.0866, |
| "step": 1710 |
| }, |
| { |
| "grad_norm": 1.6291934251785278, |
| "learning_rate": 3.438e-05, |
| "loss": 0.0891, |
| "step": 1720 |
| }, |
| { |
| "grad_norm": 1.3367891311645508, |
| "learning_rate": 3.4580000000000004e-05, |
| "loss": 0.0864, |
| "step": 1730 |
| }, |
| { |
| "grad_norm": 1.670127511024475, |
| "learning_rate": 3.478e-05, |
| "loss": 0.0826, |
| "step": 1740 |
| }, |
| { |
| "grad_norm": 1.263794183731079, |
| "learning_rate": 3.498e-05, |
| "loss": 0.0823, |
| "step": 1750 |
| }, |
| { |
| "grad_norm": 1.2835052013397217, |
| "learning_rate": 3.518e-05, |
| "loss": 0.0789, |
| "step": 1760 |
| }, |
| { |
| "grad_norm": 1.7164093255996704, |
| "learning_rate": 3.5380000000000003e-05, |
| "loss": 0.0973, |
| "step": 1770 |
| }, |
| { |
| "grad_norm": 1.360939860343933, |
| "learning_rate": 3.558e-05, |
| "loss": 0.0882, |
| "step": 1780 |
| }, |
| { |
| "grad_norm": 1.6290953159332275, |
| "learning_rate": 3.578e-05, |
| "loss": 0.083, |
| "step": 1790 |
| }, |
| { |
| "grad_norm": 1.4504064321517944, |
| "learning_rate": 3.5980000000000004e-05, |
| "loss": 0.0926, |
| "step": 1800 |
| }, |
| { |
| "grad_norm": 1.495097041130066, |
| "learning_rate": 3.618e-05, |
| "loss": 0.0841, |
| "step": 1810 |
| }, |
| { |
| "grad_norm": 1.4847289323806763, |
| "learning_rate": 3.638e-05, |
| "loss": 0.0848, |
| "step": 1820 |
| }, |
| { |
| "grad_norm": 1.649829626083374, |
| "learning_rate": 3.6580000000000006e-05, |
| "loss": 0.0812, |
| "step": 1830 |
| }, |
| { |
| "grad_norm": 1.4198445081710815, |
| "learning_rate": 3.6780000000000004e-05, |
| "loss": 0.0798, |
| "step": 1840 |
| }, |
| { |
| "grad_norm": 1.7414052486419678, |
| "learning_rate": 3.698e-05, |
| "loss": 0.0799, |
| "step": 1850 |
| }, |
| { |
| "grad_norm": 1.5118916034698486, |
| "learning_rate": 3.7180000000000007e-05, |
| "loss": 0.0794, |
| "step": 1860 |
| }, |
| { |
| "grad_norm": 1.7728333473205566, |
| "learning_rate": 3.7380000000000005e-05, |
| "loss": 0.0816, |
| "step": 1870 |
| }, |
| { |
| "grad_norm": 1.4119099378585815, |
| "learning_rate": 3.758e-05, |
| "loss": 0.0795, |
| "step": 1880 |
| }, |
| { |
| "grad_norm": 1.4366050958633423, |
| "learning_rate": 3.778000000000001e-05, |
| "loss": 0.0771, |
| "step": 1890 |
| }, |
| { |
| "grad_norm": 1.3541799783706665, |
| "learning_rate": 3.7980000000000006e-05, |
| "loss": 0.0833, |
| "step": 1900 |
| }, |
| { |
| "grad_norm": 1.5392347574234009, |
| "learning_rate": 3.818e-05, |
| "loss": 0.085, |
| "step": 1910 |
| }, |
| { |
| "grad_norm": 1.500915765762329, |
| "learning_rate": 3.838e-05, |
| "loss": 0.0752, |
| "step": 1920 |
| }, |
| { |
| "grad_norm": 1.705000400543213, |
| "learning_rate": 3.858e-05, |
| "loss": 0.0816, |
| "step": 1930 |
| }, |
| { |
| "grad_norm": 1.6263823509216309, |
| "learning_rate": 3.878e-05, |
| "loss": 0.0925, |
| "step": 1940 |
| }, |
| { |
| "grad_norm": 1.429979681968689, |
| "learning_rate": 3.898e-05, |
| "loss": 0.0817, |
| "step": 1950 |
| }, |
| { |
| "grad_norm": 1.6098904609680176, |
| "learning_rate": 3.918e-05, |
| "loss": 0.0771, |
| "step": 1960 |
| }, |
| { |
| "grad_norm": 1.1158515214920044, |
| "learning_rate": 3.938e-05, |
| "loss": 0.0806, |
| "step": 1970 |
| }, |
| { |
| "grad_norm": 1.2952687740325928, |
| "learning_rate": 3.958e-05, |
| "loss": 0.0839, |
| "step": 1980 |
| }, |
| { |
| "grad_norm": 1.4825958013534546, |
| "learning_rate": 3.978e-05, |
| "loss": 0.0764, |
| "step": 1990 |
| }, |
| { |
| "grad_norm": 1.162972331047058, |
| "learning_rate": 3.998e-05, |
| "loss": 0.078, |
| "step": 2000 |
| }, |
| { |
| "grad_norm": 1.2341431379318237, |
| "learning_rate": 4.018e-05, |
| "loss": 0.0791, |
| "step": 2010 |
| }, |
| { |
| "grad_norm": 1.4787181615829468, |
| "learning_rate": 4.038e-05, |
| "loss": 0.0804, |
| "step": 2020 |
| }, |
| { |
| "grad_norm": 1.3419722318649292, |
| "learning_rate": 4.058e-05, |
| "loss": 0.0726, |
| "step": 2030 |
| }, |
| { |
| "grad_norm": 1.2373441457748413, |
| "learning_rate": 4.078e-05, |
| "loss": 0.0811, |
| "step": 2040 |
| }, |
| { |
| "grad_norm": 1.595677137374878, |
| "learning_rate": 4.0980000000000004e-05, |
| "loss": 0.084, |
| "step": 2050 |
| }, |
| { |
| "grad_norm": 1.3520156145095825, |
| "learning_rate": 4.118e-05, |
| "loss": 0.0811, |
| "step": 2060 |
| }, |
| { |
| "grad_norm": 1.1588202714920044, |
| "learning_rate": 4.138e-05, |
| "loss": 0.081, |
| "step": 2070 |
| }, |
| { |
| "grad_norm": 1.2145284414291382, |
| "learning_rate": 4.1580000000000005e-05, |
| "loss": 0.0783, |
| "step": 2080 |
| }, |
| { |
| "grad_norm": 1.0782665014266968, |
| "learning_rate": 4.178e-05, |
| "loss": 0.0804, |
| "step": 2090 |
| }, |
| { |
| "grad_norm": 1.3053752183914185, |
| "learning_rate": 4.198e-05, |
| "loss": 0.0785, |
| "step": 2100 |
| }, |
| { |
| "grad_norm": 1.4105626344680786, |
| "learning_rate": 4.2180000000000006e-05, |
| "loss": 0.0794, |
| "step": 2110 |
| }, |
| { |
| "grad_norm": 1.27247154712677, |
| "learning_rate": 4.2380000000000004e-05, |
| "loss": 0.0741, |
| "step": 2120 |
| }, |
| { |
| "grad_norm": 1.4848867654800415, |
| "learning_rate": 4.258e-05, |
| "loss": 0.0849, |
| "step": 2130 |
| }, |
| { |
| "grad_norm": 1.5192539691925049, |
| "learning_rate": 4.278e-05, |
| "loss": 0.0819, |
| "step": 2140 |
| }, |
| { |
| "grad_norm": 1.677297592163086, |
| "learning_rate": 4.2980000000000005e-05, |
| "loss": 0.0812, |
| "step": 2150 |
| }, |
| { |
| "grad_norm": 1.2418543100357056, |
| "learning_rate": 4.318e-05, |
| "loss": 0.0671, |
| "step": 2160 |
| }, |
| { |
| "grad_norm": 1.105080008506775, |
| "learning_rate": 4.338e-05, |
| "loss": 0.0777, |
| "step": 2170 |
| }, |
| { |
| "grad_norm": 1.1512064933776855, |
| "learning_rate": 4.3580000000000006e-05, |
| "loss": 0.0784, |
| "step": 2180 |
| }, |
| { |
| "grad_norm": 1.164589524269104, |
| "learning_rate": 4.3780000000000004e-05, |
| "loss": 0.0755, |
| "step": 2190 |
| }, |
| { |
| "grad_norm": 1.8655939102172852, |
| "learning_rate": 4.398e-05, |
| "loss": 0.0698, |
| "step": 2200 |
| }, |
| { |
| "grad_norm": 1.3707654476165771, |
| "learning_rate": 4.418000000000001e-05, |
| "loss": 0.0736, |
| "step": 2210 |
| }, |
| { |
| "grad_norm": 1.2294223308563232, |
| "learning_rate": 4.438e-05, |
| "loss": 0.0721, |
| "step": 2220 |
| }, |
| { |
| "grad_norm": 1.0154931545257568, |
| "learning_rate": 4.458e-05, |
| "loss": 0.0719, |
| "step": 2230 |
| }, |
| { |
| "grad_norm": 1.0982472896575928, |
| "learning_rate": 4.478e-05, |
| "loss": 0.0764, |
| "step": 2240 |
| }, |
| { |
| "grad_norm": 1.222943902015686, |
| "learning_rate": 4.498e-05, |
| "loss": 0.0828, |
| "step": 2250 |
| }, |
| { |
| "grad_norm": 1.0855426788330078, |
| "learning_rate": 4.518e-05, |
| "loss": 0.0834, |
| "step": 2260 |
| }, |
| { |
| "grad_norm": 1.4420665502548218, |
| "learning_rate": 4.538e-05, |
| "loss": 0.0811, |
| "step": 2270 |
| }, |
| { |
| "grad_norm": 0.8978270888328552, |
| "learning_rate": 4.558e-05, |
| "loss": 0.0743, |
| "step": 2280 |
| }, |
| { |
| "grad_norm": 0.984847366809845, |
| "learning_rate": 4.578e-05, |
| "loss": 0.0685, |
| "step": 2290 |
| }, |
| { |
| "grad_norm": 1.1125538349151611, |
| "learning_rate": 4.5980000000000004e-05, |
| "loss": 0.0728, |
| "step": 2300 |
| }, |
| { |
| "grad_norm": 1.0328806638717651, |
| "learning_rate": 4.618e-05, |
| "loss": 0.0748, |
| "step": 2310 |
| }, |
| { |
| "grad_norm": 0.9082860350608826, |
| "learning_rate": 4.638e-05, |
| "loss": 0.0753, |
| "step": 2320 |
| }, |
| { |
| "grad_norm": 1.2041865587234497, |
| "learning_rate": 4.6580000000000005e-05, |
| "loss": 0.0777, |
| "step": 2330 |
| }, |
| { |
| "grad_norm": 0.8943385481834412, |
| "learning_rate": 4.678e-05, |
| "loss": 0.0757, |
| "step": 2340 |
| }, |
| { |
| "grad_norm": 1.3722305297851562, |
| "learning_rate": 4.698e-05, |
| "loss": 0.0722, |
| "step": 2350 |
| }, |
| { |
| "grad_norm": 1.38225519657135, |
| "learning_rate": 4.718e-05, |
| "loss": 0.0782, |
| "step": 2360 |
| }, |
| { |
| "grad_norm": 1.0935956239700317, |
| "learning_rate": 4.7380000000000004e-05, |
| "loss": 0.0741, |
| "step": 2370 |
| }, |
| { |
| "grad_norm": 0.9566012024879456, |
| "learning_rate": 4.758e-05, |
| "loss": 0.0781, |
| "step": 2380 |
| }, |
| { |
| "grad_norm": 1.022346019744873, |
| "learning_rate": 4.778e-05, |
| "loss": 0.0712, |
| "step": 2390 |
| }, |
| { |
| "grad_norm": 0.8095312714576721, |
| "learning_rate": 4.7980000000000005e-05, |
| "loss": 0.0734, |
| "step": 2400 |
| }, |
| { |
| "grad_norm": 0.9501664638519287, |
| "learning_rate": 4.818e-05, |
| "loss": 0.069, |
| "step": 2410 |
| }, |
| { |
| "grad_norm": 1.172012209892273, |
| "learning_rate": 4.838e-05, |
| "loss": 0.0747, |
| "step": 2420 |
| }, |
| { |
| "grad_norm": 1.157981038093567, |
| "learning_rate": 4.8580000000000006e-05, |
| "loss": 0.0737, |
| "step": 2430 |
| }, |
| { |
| "grad_norm": 1.4430627822875977, |
| "learning_rate": 4.8780000000000004e-05, |
| "loss": 0.0732, |
| "step": 2440 |
| }, |
| { |
| "grad_norm": 1.2398930788040161, |
| "learning_rate": 4.898e-05, |
| "loss": 0.0734, |
| "step": 2450 |
| }, |
| { |
| "grad_norm": 1.2574845552444458, |
| "learning_rate": 4.918000000000001e-05, |
| "loss": 0.0814, |
| "step": 2460 |
| }, |
| { |
| "grad_norm": 1.1682604551315308, |
| "learning_rate": 4.9380000000000005e-05, |
| "loss": 0.07, |
| "step": 2470 |
| }, |
| { |
| "grad_norm": 1.03831946849823, |
| "learning_rate": 4.958e-05, |
| "loss": 0.076, |
| "step": 2480 |
| }, |
| { |
| "grad_norm": 1.021917462348938, |
| "learning_rate": 4.978e-05, |
| "loss": 0.071, |
| "step": 2490 |
| }, |
| { |
| "grad_norm": 0.910327136516571, |
| "learning_rate": 4.9980000000000006e-05, |
| "loss": 0.0737, |
| "step": 2500 |
| }, |
| { |
| "grad_norm": 0.8853943943977356, |
| "learning_rate": 5.0180000000000004e-05, |
| "loss": 0.0776, |
| "step": 2510 |
| }, |
| { |
| "grad_norm": 0.9305204749107361, |
| "learning_rate": 5.038e-05, |
| "loss": 0.0702, |
| "step": 2520 |
| }, |
| { |
| "grad_norm": 1.1481506824493408, |
| "learning_rate": 5.058000000000001e-05, |
| "loss": 0.0659, |
| "step": 2530 |
| }, |
| { |
| "grad_norm": 0.8893597722053528, |
| "learning_rate": 5.0780000000000005e-05, |
| "loss": 0.0664, |
| "step": 2540 |
| }, |
| { |
| "grad_norm": 1.3460512161254883, |
| "learning_rate": 5.098e-05, |
| "loss": 0.0759, |
| "step": 2550 |
| }, |
| { |
| "grad_norm": 1.0590441226959229, |
| "learning_rate": 5.118000000000001e-05, |
| "loss": 0.0679, |
| "step": 2560 |
| }, |
| { |
| "grad_norm": 0.8986999988555908, |
| "learning_rate": 5.1380000000000006e-05, |
| "loss": 0.0676, |
| "step": 2570 |
| }, |
| { |
| "grad_norm": 1.1072858572006226, |
| "learning_rate": 5.1580000000000004e-05, |
| "loss": 0.0754, |
| "step": 2580 |
| }, |
| { |
| "grad_norm": 1.118714451789856, |
| "learning_rate": 5.178000000000001e-05, |
| "loss": 0.0677, |
| "step": 2590 |
| }, |
| { |
| "grad_norm": 1.1817659139633179, |
| "learning_rate": 5.198000000000001e-05, |
| "loss": 0.0723, |
| "step": 2600 |
| }, |
| { |
| "grad_norm": 1.1865016222000122, |
| "learning_rate": 5.2180000000000005e-05, |
| "loss": 0.0714, |
| "step": 2610 |
| }, |
| { |
| "grad_norm": 1.191595435142517, |
| "learning_rate": 5.238000000000001e-05, |
| "loss": 0.0663, |
| "step": 2620 |
| }, |
| { |
| "grad_norm": 0.8069055676460266, |
| "learning_rate": 5.258000000000001e-05, |
| "loss": 0.0739, |
| "step": 2630 |
| }, |
| { |
| "grad_norm": 1.0193506479263306, |
| "learning_rate": 5.2780000000000006e-05, |
| "loss": 0.0694, |
| "step": 2640 |
| }, |
| { |
| "grad_norm": 0.9555947184562683, |
| "learning_rate": 5.2980000000000004e-05, |
| "loss": 0.0707, |
| "step": 2650 |
| }, |
| { |
| "grad_norm": 0.7661393284797668, |
| "learning_rate": 5.318000000000001e-05, |
| "loss": 0.0676, |
| "step": 2660 |
| }, |
| { |
| "grad_norm": 0.8302662968635559, |
| "learning_rate": 5.338000000000001e-05, |
| "loss": 0.0678, |
| "step": 2670 |
| }, |
| { |
| "grad_norm": 0.959805428981781, |
| "learning_rate": 5.3580000000000005e-05, |
| "loss": 0.0629, |
| "step": 2680 |
| }, |
| { |
| "grad_norm": 0.9672425985336304, |
| "learning_rate": 5.378e-05, |
| "loss": 0.0723, |
| "step": 2690 |
| }, |
| { |
| "grad_norm": 0.8574071526527405, |
| "learning_rate": 5.3979999999999995e-05, |
| "loss": 0.0697, |
| "step": 2700 |
| }, |
| { |
| "grad_norm": 1.0239274501800537, |
| "learning_rate": 5.418e-05, |
| "loss": 0.0719, |
| "step": 2710 |
| }, |
| { |
| "grad_norm": 1.321614384651184, |
| "learning_rate": 5.438e-05, |
| "loss": 0.0625, |
| "step": 2720 |
| }, |
| { |
| "grad_norm": 1.24257230758667, |
| "learning_rate": 5.4579999999999996e-05, |
| "loss": 0.0726, |
| "step": 2730 |
| }, |
| { |
| "grad_norm": 0.8882970809936523, |
| "learning_rate": 5.478e-05, |
| "loss": 0.0708, |
| "step": 2740 |
| }, |
| { |
| "grad_norm": 0.8559255003929138, |
| "learning_rate": 5.498e-05, |
| "loss": 0.0674, |
| "step": 2750 |
| }, |
| { |
| "grad_norm": 0.9045839309692383, |
| "learning_rate": 5.518e-05, |
| "loss": 0.0704, |
| "step": 2760 |
| }, |
| { |
| "grad_norm": 0.614384114742279, |
| "learning_rate": 5.538e-05, |
| "loss": 0.0623, |
| "step": 2770 |
| }, |
| { |
| "grad_norm": 0.795306384563446, |
| "learning_rate": 5.558e-05, |
| "loss": 0.0648, |
| "step": 2780 |
| }, |
| { |
| "grad_norm": 0.886047899723053, |
| "learning_rate": 5.578e-05, |
| "loss": 0.0651, |
| "step": 2790 |
| }, |
| { |
| "grad_norm": 0.6749942302703857, |
| "learning_rate": 5.5979999999999996e-05, |
| "loss": 0.0642, |
| "step": 2800 |
| }, |
| { |
| "grad_norm": 0.8019365072250366, |
| "learning_rate": 5.618e-05, |
| "loss": 0.0609, |
| "step": 2810 |
| }, |
| { |
| "grad_norm": 0.795505702495575, |
| "learning_rate": 5.638e-05, |
| "loss": 0.0545, |
| "step": 2820 |
| }, |
| { |
| "grad_norm": 0.960997998714447, |
| "learning_rate": 5.658e-05, |
| "loss": 0.0671, |
| "step": 2830 |
| }, |
| { |
| "grad_norm": 0.8016205430030823, |
| "learning_rate": 5.678e-05, |
| "loss": 0.0673, |
| "step": 2840 |
| }, |
| { |
| "grad_norm": 0.9191439747810364, |
| "learning_rate": 5.698e-05, |
| "loss": 0.0606, |
| "step": 2850 |
| }, |
| { |
| "grad_norm": 0.9075175523757935, |
| "learning_rate": 5.718e-05, |
| "loss": 0.0609, |
| "step": 2860 |
| }, |
| { |
| "grad_norm": 0.8477808833122253, |
| "learning_rate": 5.738e-05, |
| "loss": 0.0646, |
| "step": 2870 |
| }, |
| { |
| "grad_norm": 0.8051248788833618, |
| "learning_rate": 5.758e-05, |
| "loss": 0.0603, |
| "step": 2880 |
| }, |
| { |
| "grad_norm": 0.7463070750236511, |
| "learning_rate": 5.778e-05, |
| "loss": 0.0606, |
| "step": 2890 |
| }, |
| { |
| "grad_norm": 1.0391595363616943, |
| "learning_rate": 5.7980000000000004e-05, |
| "loss": 0.0625, |
| "step": 2900 |
| }, |
| { |
| "grad_norm": 1.04859459400177, |
| "learning_rate": 5.818e-05, |
| "loss": 0.0623, |
| "step": 2910 |
| }, |
| { |
| "grad_norm": 1.0546082258224487, |
| "learning_rate": 5.838e-05, |
| "loss": 0.0605, |
| "step": 2920 |
| }, |
| { |
| "grad_norm": 0.848749041557312, |
| "learning_rate": 5.858e-05, |
| "loss": 0.0707, |
| "step": 2930 |
| }, |
| { |
| "grad_norm": 0.9367526173591614, |
| "learning_rate": 5.878e-05, |
| "loss": 0.0692, |
| "step": 2940 |
| }, |
| { |
| "grad_norm": 0.7209922671318054, |
| "learning_rate": 5.898e-05, |
| "loss": 0.0655, |
| "step": 2950 |
| }, |
| { |
| "grad_norm": 0.8936607837677002, |
| "learning_rate": 5.918e-05, |
| "loss": 0.0668, |
| "step": 2960 |
| }, |
| { |
| "grad_norm": 0.8779922723770142, |
| "learning_rate": 5.9380000000000004e-05, |
| "loss": 0.0628, |
| "step": 2970 |
| }, |
| { |
| "grad_norm": 0.7889860272407532, |
| "learning_rate": 5.958e-05, |
| "loss": 0.0606, |
| "step": 2980 |
| }, |
| { |
| "grad_norm": 0.9793576598167419, |
| "learning_rate": 5.978e-05, |
| "loss": 0.0605, |
| "step": 2990 |
| }, |
| { |
| "grad_norm": 0.8431522250175476, |
| "learning_rate": 5.9980000000000005e-05, |
| "loss": 0.063, |
| "step": 3000 |
| }, |
| { |
| "grad_norm": 0.776141881942749, |
| "learning_rate": 6.018e-05, |
| "loss": 0.0714, |
| "step": 3010 |
| }, |
| { |
| "grad_norm": 1.036238193511963, |
| "learning_rate": 6.038e-05, |
| "loss": 0.0658, |
| "step": 3020 |
| }, |
| { |
| "grad_norm": 0.7154647707939148, |
| "learning_rate": 6.0580000000000006e-05, |
| "loss": 0.0628, |
| "step": 3030 |
| }, |
| { |
| "grad_norm": 0.7451269626617432, |
| "learning_rate": 6.0780000000000004e-05, |
| "loss": 0.0603, |
| "step": 3040 |
| }, |
| { |
| "grad_norm": 0.6692603826522827, |
| "learning_rate": 6.098e-05, |
| "loss": 0.0624, |
| "step": 3050 |
| }, |
| { |
| "grad_norm": 1.0105844736099243, |
| "learning_rate": 6.118000000000001e-05, |
| "loss": 0.0643, |
| "step": 3060 |
| }, |
| { |
| "grad_norm": 0.9448408484458923, |
| "learning_rate": 6.138e-05, |
| "loss": 0.0654, |
| "step": 3070 |
| }, |
| { |
| "grad_norm": 0.8452638983726501, |
| "learning_rate": 6.158e-05, |
| "loss": 0.0622, |
| "step": 3080 |
| }, |
| { |
| "grad_norm": 0.9193839430809021, |
| "learning_rate": 6.178000000000001e-05, |
| "loss": 0.0613, |
| "step": 3090 |
| }, |
| { |
| "grad_norm": 0.92941814661026, |
| "learning_rate": 6.198e-05, |
| "loss": 0.0639, |
| "step": 3100 |
| }, |
| { |
| "grad_norm": 0.8201425075531006, |
| "learning_rate": 6.218e-05, |
| "loss": 0.0711, |
| "step": 3110 |
| }, |
| { |
| "grad_norm": 0.7324569225311279, |
| "learning_rate": 6.238000000000001e-05, |
| "loss": 0.0567, |
| "step": 3120 |
| }, |
| { |
| "grad_norm": 0.8137140274047852, |
| "learning_rate": 6.258e-05, |
| "loss": 0.0673, |
| "step": 3130 |
| }, |
| { |
| "grad_norm": 0.8640687465667725, |
| "learning_rate": 6.278e-05, |
| "loss": 0.0591, |
| "step": 3140 |
| }, |
| { |
| "grad_norm": 0.9455040097236633, |
| "learning_rate": 6.298000000000001e-05, |
| "loss": 0.0665, |
| "step": 3150 |
| }, |
| { |
| "grad_norm": 0.84979248046875, |
| "learning_rate": 6.318e-05, |
| "loss": 0.0565, |
| "step": 3160 |
| }, |
| { |
| "grad_norm": 0.6208917498588562, |
| "learning_rate": 6.338e-05, |
| "loss": 0.0613, |
| "step": 3170 |
| }, |
| { |
| "grad_norm": 0.6841639280319214, |
| "learning_rate": 6.358000000000001e-05, |
| "loss": 0.0612, |
| "step": 3180 |
| }, |
| { |
| "grad_norm": 0.7783037424087524, |
| "learning_rate": 6.378e-05, |
| "loss": 0.0635, |
| "step": 3190 |
| }, |
| { |
| "grad_norm": 0.6852115392684937, |
| "learning_rate": 6.398000000000001e-05, |
| "loss": 0.0567, |
| "step": 3200 |
| }, |
| { |
| "grad_norm": 0.7794846296310425, |
| "learning_rate": 6.418000000000001e-05, |
| "loss": 0.0556, |
| "step": 3210 |
| }, |
| { |
| "grad_norm": 0.9447958469390869, |
| "learning_rate": 6.438e-05, |
| "loss": 0.0619, |
| "step": 3220 |
| }, |
| { |
| "grad_norm": 0.6817174553871155, |
| "learning_rate": 6.458000000000001e-05, |
| "loss": 0.0694, |
| "step": 3230 |
| }, |
| { |
| "grad_norm": 0.8658831119537354, |
| "learning_rate": 6.478000000000001e-05, |
| "loss": 0.0622, |
| "step": 3240 |
| }, |
| { |
| "grad_norm": 0.741489052772522, |
| "learning_rate": 6.498e-05, |
| "loss": 0.0619, |
| "step": 3250 |
| }, |
| { |
| "grad_norm": 0.7767813205718994, |
| "learning_rate": 6.518000000000001e-05, |
| "loss": 0.0618, |
| "step": 3260 |
| }, |
| { |
| "grad_norm": 0.8194187879562378, |
| "learning_rate": 6.538000000000001e-05, |
| "loss": 0.0651, |
| "step": 3270 |
| }, |
| { |
| "grad_norm": 0.6319522857666016, |
| "learning_rate": 6.558e-05, |
| "loss": 0.0569, |
| "step": 3280 |
| }, |
| { |
| "grad_norm": 0.7433358430862427, |
| "learning_rate": 6.578000000000001e-05, |
| "loss": 0.0605, |
| "step": 3290 |
| }, |
| { |
| "grad_norm": 0.7706665992736816, |
| "learning_rate": 6.598e-05, |
| "loss": 0.0611, |
| "step": 3300 |
| }, |
| { |
| "grad_norm": 0.6321914792060852, |
| "learning_rate": 6.618e-05, |
| "loss": 0.0599, |
| "step": 3310 |
| }, |
| { |
| "grad_norm": 0.6240274310112, |
| "learning_rate": 6.638e-05, |
| "loss": 0.0619, |
| "step": 3320 |
| }, |
| { |
| "grad_norm": 0.7866604328155518, |
| "learning_rate": 6.658e-05, |
| "loss": 0.0589, |
| "step": 3330 |
| }, |
| { |
| "grad_norm": 0.8589717745780945, |
| "learning_rate": 6.678e-05, |
| "loss": 0.0609, |
| "step": 3340 |
| }, |
| { |
| "grad_norm": 0.7344950437545776, |
| "learning_rate": 6.698e-05, |
| "loss": 0.0627, |
| "step": 3350 |
| }, |
| { |
| "grad_norm": 0.879833996295929, |
| "learning_rate": 6.718e-05, |
| "loss": 0.0586, |
| "step": 3360 |
| }, |
| { |
| "grad_norm": 0.5960578918457031, |
| "learning_rate": 6.738e-05, |
| "loss": 0.0574, |
| "step": 3370 |
| }, |
| { |
| "grad_norm": 0.8435735702514648, |
| "learning_rate": 6.758e-05, |
| "loss": 0.0576, |
| "step": 3380 |
| }, |
| { |
| "grad_norm": 0.6635606288909912, |
| "learning_rate": 6.778e-05, |
| "loss": 0.0615, |
| "step": 3390 |
| }, |
| { |
| "grad_norm": 0.656762421131134, |
| "learning_rate": 6.798e-05, |
| "loss": 0.0621, |
| "step": 3400 |
| }, |
| { |
| "grad_norm": 0.7073556780815125, |
| "learning_rate": 6.818e-05, |
| "loss": 0.0673, |
| "step": 3410 |
| }, |
| { |
| "grad_norm": 0.8063334226608276, |
| "learning_rate": 6.838e-05, |
| "loss": 0.0591, |
| "step": 3420 |
| }, |
| { |
| "grad_norm": 0.6699609160423279, |
| "learning_rate": 6.858e-05, |
| "loss": 0.0542, |
| "step": 3430 |
| }, |
| { |
| "grad_norm": 0.9240128993988037, |
| "learning_rate": 6.878e-05, |
| "loss": 0.0627, |
| "step": 3440 |
| }, |
| { |
| "grad_norm": 0.6918993592262268, |
| "learning_rate": 6.898e-05, |
| "loss": 0.051, |
| "step": 3450 |
| }, |
| { |
| "grad_norm": 0.5891866683959961, |
| "learning_rate": 6.918e-05, |
| "loss": 0.0568, |
| "step": 3460 |
| }, |
| { |
| "grad_norm": 0.8400129079818726, |
| "learning_rate": 6.938e-05, |
| "loss": 0.065, |
| "step": 3470 |
| }, |
| { |
| "grad_norm": 0.7416741251945496, |
| "learning_rate": 6.958e-05, |
| "loss": 0.0603, |
| "step": 3480 |
| }, |
| { |
| "grad_norm": 0.749572217464447, |
| "learning_rate": 6.978e-05, |
| "loss": 0.0595, |
| "step": 3490 |
| }, |
| { |
| "grad_norm": 0.8416393399238586, |
| "learning_rate": 6.998e-05, |
| "loss": 0.0554, |
| "step": 3500 |
| }, |
| { |
| "grad_norm": 0.8583645224571228, |
| "learning_rate": 7.018e-05, |
| "loss": 0.0632, |
| "step": 3510 |
| }, |
| { |
| "grad_norm": 0.5239070653915405, |
| "learning_rate": 7.038e-05, |
| "loss": 0.0579, |
| "step": 3520 |
| }, |
| { |
| "grad_norm": 0.6293121576309204, |
| "learning_rate": 7.058e-05, |
| "loss": 0.058, |
| "step": 3530 |
| }, |
| { |
| "grad_norm": 0.7208630442619324, |
| "learning_rate": 7.078e-05, |
| "loss": 0.0582, |
| "step": 3540 |
| }, |
| { |
| "grad_norm": 0.72175532579422, |
| "learning_rate": 7.098e-05, |
| "loss": 0.0573, |
| "step": 3550 |
| }, |
| { |
| "grad_norm": 0.8187160491943359, |
| "learning_rate": 7.118e-05, |
| "loss": 0.0594, |
| "step": 3560 |
| }, |
| { |
| "grad_norm": 0.7715169191360474, |
| "learning_rate": 7.138e-05, |
| "loss": 0.0611, |
| "step": 3570 |
| }, |
| { |
| "grad_norm": 0.5853291153907776, |
| "learning_rate": 7.158e-05, |
| "loss": 0.0615, |
| "step": 3580 |
| }, |
| { |
| "grad_norm": 0.6136788725852966, |
| "learning_rate": 7.178000000000001e-05, |
| "loss": 0.0528, |
| "step": 3590 |
| }, |
| { |
| "grad_norm": 0.74713134765625, |
| "learning_rate": 7.198e-05, |
| "loss": 0.0559, |
| "step": 3600 |
| }, |
| { |
| "grad_norm": 0.7303146123886108, |
| "learning_rate": 7.218e-05, |
| "loss": 0.0566, |
| "step": 3610 |
| }, |
| { |
| "grad_norm": 0.7068881988525391, |
| "learning_rate": 7.238000000000001e-05, |
| "loss": 0.0553, |
| "step": 3620 |
| }, |
| { |
| "grad_norm": 0.6625126600265503, |
| "learning_rate": 7.258e-05, |
| "loss": 0.056, |
| "step": 3630 |
| }, |
| { |
| "grad_norm": 0.7499129176139832, |
| "learning_rate": 7.278e-05, |
| "loss": 0.0511, |
| "step": 3640 |
| }, |
| { |
| "grad_norm": 0.8373027443885803, |
| "learning_rate": 7.298000000000001e-05, |
| "loss": 0.0519, |
| "step": 3650 |
| }, |
| { |
| "grad_norm": 0.6602651476860046, |
| "learning_rate": 7.318e-05, |
| "loss": 0.0558, |
| "step": 3660 |
| }, |
| { |
| "grad_norm": 0.7000454068183899, |
| "learning_rate": 7.338e-05, |
| "loss": 0.0535, |
| "step": 3670 |
| }, |
| { |
| "grad_norm": 0.6676427721977234, |
| "learning_rate": 7.358000000000001e-05, |
| "loss": 0.0499, |
| "step": 3680 |
| }, |
| { |
| "grad_norm": 0.6426597833633423, |
| "learning_rate": 7.378e-05, |
| "loss": 0.0617, |
| "step": 3690 |
| }, |
| { |
| "grad_norm": 0.830104649066925, |
| "learning_rate": 7.398e-05, |
| "loss": 0.0516, |
| "step": 3700 |
| }, |
| { |
| "grad_norm": 0.6411163210868835, |
| "learning_rate": 7.418000000000001e-05, |
| "loss": 0.0534, |
| "step": 3710 |
| }, |
| { |
| "grad_norm": 0.44452545046806335, |
| "learning_rate": 7.438e-05, |
| "loss": 0.0554, |
| "step": 3720 |
| }, |
| { |
| "grad_norm": 0.698541522026062, |
| "learning_rate": 7.458000000000001e-05, |
| "loss": 0.057, |
| "step": 3730 |
| }, |
| { |
| "grad_norm": 0.8207803964614868, |
| "learning_rate": 7.478e-05, |
| "loss": 0.0584, |
| "step": 3740 |
| }, |
| { |
| "grad_norm": 0.6441124081611633, |
| "learning_rate": 7.498e-05, |
| "loss": 0.0569, |
| "step": 3750 |
| }, |
| { |
| "grad_norm": 0.5994744896888733, |
| "learning_rate": 7.518000000000001e-05, |
| "loss": 0.0577, |
| "step": 3760 |
| }, |
| { |
| "grad_norm": 0.5829905867576599, |
| "learning_rate": 7.538e-05, |
| "loss": 0.0492, |
| "step": 3770 |
| }, |
| { |
| "grad_norm": 0.6664201021194458, |
| "learning_rate": 7.558e-05, |
| "loss": 0.0558, |
| "step": 3780 |
| }, |
| { |
| "grad_norm": 0.6259579062461853, |
| "learning_rate": 7.578000000000001e-05, |
| "loss": 0.0538, |
| "step": 3790 |
| }, |
| { |
| "grad_norm": 0.6106650233268738, |
| "learning_rate": 7.598e-05, |
| "loss": 0.0588, |
| "step": 3800 |
| }, |
| { |
| "grad_norm": 0.7316349744796753, |
| "learning_rate": 7.618e-05, |
| "loss": 0.0632, |
| "step": 3810 |
| }, |
| { |
| "grad_norm": 0.6063339114189148, |
| "learning_rate": 7.638000000000001e-05, |
| "loss": 0.0657, |
| "step": 3820 |
| }, |
| { |
| "grad_norm": 0.537717878818512, |
| "learning_rate": 7.658e-05, |
| "loss": 0.0518, |
| "step": 3830 |
| }, |
| { |
| "grad_norm": 0.717360258102417, |
| "learning_rate": 7.678000000000001e-05, |
| "loss": 0.0541, |
| "step": 3840 |
| }, |
| { |
| "grad_norm": 0.66494220495224, |
| "learning_rate": 7.698000000000001e-05, |
| "loss": 0.0578, |
| "step": 3850 |
| }, |
| { |
| "grad_norm": 0.5406110882759094, |
| "learning_rate": 7.718e-05, |
| "loss": 0.0558, |
| "step": 3860 |
| }, |
| { |
| "grad_norm": 0.529040515422821, |
| "learning_rate": 7.738000000000001e-05, |
| "loss": 0.0505, |
| "step": 3870 |
| }, |
| { |
| "grad_norm": 0.8478358387947083, |
| "learning_rate": 7.758000000000001e-05, |
| "loss": 0.0541, |
| "step": 3880 |
| }, |
| { |
| "grad_norm": 0.731211245059967, |
| "learning_rate": 7.778e-05, |
| "loss": 0.0602, |
| "step": 3890 |
| }, |
| { |
| "grad_norm": 0.5998793244361877, |
| "learning_rate": 7.798000000000001e-05, |
| "loss": 0.0605, |
| "step": 3900 |
| }, |
| { |
| "grad_norm": 0.6982648968696594, |
| "learning_rate": 7.818000000000001e-05, |
| "loss": 0.0567, |
| "step": 3910 |
| }, |
| { |
| "grad_norm": 0.8007875084877014, |
| "learning_rate": 7.838e-05, |
| "loss": 0.0661, |
| "step": 3920 |
| }, |
| { |
| "grad_norm": 0.5826359391212463, |
| "learning_rate": 7.858000000000001e-05, |
| "loss": 0.0528, |
| "step": 3930 |
| }, |
| { |
| "grad_norm": 0.6248698830604553, |
| "learning_rate": 7.878e-05, |
| "loss": 0.0547, |
| "step": 3940 |
| }, |
| { |
| "grad_norm": 0.659730851650238, |
| "learning_rate": 7.897999999999999e-05, |
| "loss": 0.0554, |
| "step": 3950 |
| }, |
| { |
| "grad_norm": 0.719498336315155, |
| "learning_rate": 7.918e-05, |
| "loss": 0.0524, |
| "step": 3960 |
| }, |
| { |
| "grad_norm": 0.6503873467445374, |
| "learning_rate": 7.938e-05, |
| "loss": 0.0508, |
| "step": 3970 |
| }, |
| { |
| "grad_norm": 0.47922801971435547, |
| "learning_rate": 7.958e-05, |
| "loss": 0.0496, |
| "step": 3980 |
| }, |
| { |
| "grad_norm": 0.44167473912239075, |
| "learning_rate": 7.978e-05, |
| "loss": 0.0525, |
| "step": 3990 |
| }, |
| { |
| "grad_norm": 0.5266247391700745, |
| "learning_rate": 7.998e-05, |
| "loss": 0.0565, |
| "step": 4000 |
| }, |
| { |
| "grad_norm": 0.45653995871543884, |
| "learning_rate": 8.018e-05, |
| "loss": 0.0508, |
| "step": 4010 |
| }, |
| { |
| "grad_norm": 0.48484233021736145, |
| "learning_rate": 8.038e-05, |
| "loss": 0.0498, |
| "step": 4020 |
| }, |
| { |
| "grad_norm": 0.6050201058387756, |
| "learning_rate": 8.058e-05, |
| "loss": 0.0563, |
| "step": 4030 |
| }, |
| { |
| "grad_norm": 0.4977985918521881, |
| "learning_rate": 8.078e-05, |
| "loss": 0.0504, |
| "step": 4040 |
| }, |
| { |
| "grad_norm": 0.5700753927230835, |
| "learning_rate": 8.098e-05, |
| "loss": 0.0541, |
| "step": 4050 |
| }, |
| { |
| "grad_norm": 0.6012455224990845, |
| "learning_rate": 8.118e-05, |
| "loss": 0.0541, |
| "step": 4060 |
| }, |
| { |
| "grad_norm": 0.642540693283081, |
| "learning_rate": 8.138e-05, |
| "loss": 0.0502, |
| "step": 4070 |
| }, |
| { |
| "grad_norm": 0.5907772779464722, |
| "learning_rate": 8.158e-05, |
| "loss": 0.0543, |
| "step": 4080 |
| }, |
| { |
| "grad_norm": 0.6549518704414368, |
| "learning_rate": 8.178e-05, |
| "loss": 0.0532, |
| "step": 4090 |
| }, |
| { |
| "grad_norm": 0.58327716588974, |
| "learning_rate": 8.198e-05, |
| "loss": 0.0501, |
| "step": 4100 |
| }, |
| { |
| "grad_norm": 0.5398632287979126, |
| "learning_rate": 8.218e-05, |
| "loss": 0.0569, |
| "step": 4110 |
| }, |
| { |
| "grad_norm": 0.3961046040058136, |
| "learning_rate": 8.238000000000001e-05, |
| "loss": 0.057, |
| "step": 4120 |
| }, |
| { |
| "grad_norm": 0.6423419117927551, |
| "learning_rate": 8.258e-05, |
| "loss": 0.06, |
| "step": 4130 |
| }, |
| { |
| "grad_norm": 0.4764283299446106, |
| "learning_rate": 8.278e-05, |
| "loss": 0.0525, |
| "step": 4140 |
| }, |
| { |
| "grad_norm": 0.6112470030784607, |
| "learning_rate": 8.298000000000001e-05, |
| "loss": 0.0551, |
| "step": 4150 |
| }, |
| { |
| "grad_norm": 0.6958449482917786, |
| "learning_rate": 8.318e-05, |
| "loss": 0.0519, |
| "step": 4160 |
| }, |
| { |
| "grad_norm": 0.5838958024978638, |
| "learning_rate": 8.338e-05, |
| "loss": 0.0556, |
| "step": 4170 |
| }, |
| { |
| "grad_norm": 0.5253514647483826, |
| "learning_rate": 8.358e-05, |
| "loss": 0.053, |
| "step": 4180 |
| }, |
| { |
| "grad_norm": 0.6679093837738037, |
| "learning_rate": 8.378e-05, |
| "loss": 0.0568, |
| "step": 4190 |
| }, |
| { |
| "grad_norm": 0.6899139285087585, |
| "learning_rate": 8.398e-05, |
| "loss": 0.0499, |
| "step": 4200 |
| }, |
| { |
| "grad_norm": 0.5431265234947205, |
| "learning_rate": 8.418e-05, |
| "loss": 0.0517, |
| "step": 4210 |
| }, |
| { |
| "grad_norm": 0.7031345963478088, |
| "learning_rate": 8.438e-05, |
| "loss": 0.0527, |
| "step": 4220 |
| }, |
| { |
| "grad_norm": 0.5682622790336609, |
| "learning_rate": 8.458e-05, |
| "loss": 0.0503, |
| "step": 4230 |
| }, |
| { |
| "grad_norm": 0.6090052127838135, |
| "learning_rate": 8.478e-05, |
| "loss": 0.0524, |
| "step": 4240 |
| }, |
| { |
| "grad_norm": 0.5961223840713501, |
| "learning_rate": 8.498e-05, |
| "loss": 0.0521, |
| "step": 4250 |
| }, |
| { |
| "grad_norm": 0.6355141401290894, |
| "learning_rate": 8.518000000000001e-05, |
| "loss": 0.0546, |
| "step": 4260 |
| }, |
| { |
| "grad_norm": 0.6242051124572754, |
| "learning_rate": 8.538e-05, |
| "loss": 0.0532, |
| "step": 4270 |
| }, |
| { |
| "grad_norm": 0.6339730024337769, |
| "learning_rate": 8.558e-05, |
| "loss": 0.0557, |
| "step": 4280 |
| }, |
| { |
| "grad_norm": 0.586755096912384, |
| "learning_rate": 8.578000000000001e-05, |
| "loss": 0.0502, |
| "step": 4290 |
| }, |
| { |
| "grad_norm": 0.5713655352592468, |
| "learning_rate": 8.598e-05, |
| "loss": 0.0472, |
| "step": 4300 |
| }, |
| { |
| "grad_norm": 0.8385711312294006, |
| "learning_rate": 8.618e-05, |
| "loss": 0.0515, |
| "step": 4310 |
| }, |
| { |
| "grad_norm": 0.5189787149429321, |
| "learning_rate": 8.638000000000001e-05, |
| "loss": 0.0553, |
| "step": 4320 |
| }, |
| { |
| "grad_norm": 0.5025926232337952, |
| "learning_rate": 8.658e-05, |
| "loss": 0.0519, |
| "step": 4330 |
| }, |
| { |
| "grad_norm": 0.6731868386268616, |
| "learning_rate": 8.678e-05, |
| "loss": 0.0538, |
| "step": 4340 |
| }, |
| { |
| "grad_norm": 0.48949819803237915, |
| "learning_rate": 8.698000000000001e-05, |
| "loss": 0.0584, |
| "step": 4350 |
| }, |
| { |
| "grad_norm": 0.5631033778190613, |
| "learning_rate": 8.718e-05, |
| "loss": 0.0483, |
| "step": 4360 |
| }, |
| { |
| "grad_norm": 0.5556790232658386, |
| "learning_rate": 8.738000000000001e-05, |
| "loss": 0.0541, |
| "step": 4370 |
| }, |
| { |
| "grad_norm": 0.48878926038742065, |
| "learning_rate": 8.758000000000001e-05, |
| "loss": 0.0544, |
| "step": 4380 |
| }, |
| { |
| "grad_norm": 0.5779238939285278, |
| "learning_rate": 8.778e-05, |
| "loss": 0.0595, |
| "step": 4390 |
| }, |
| { |
| "grad_norm": 0.6489167809486389, |
| "learning_rate": 8.798000000000001e-05, |
| "loss": 0.0514, |
| "step": 4400 |
| }, |
| { |
| "grad_norm": 0.5112762451171875, |
| "learning_rate": 8.818000000000001e-05, |
| "loss": 0.0519, |
| "step": 4410 |
| }, |
| { |
| "grad_norm": 0.4689539968967438, |
| "learning_rate": 8.838e-05, |
| "loss": 0.0521, |
| "step": 4420 |
| }, |
| { |
| "grad_norm": 0.681573212146759, |
| "learning_rate": 8.858000000000001e-05, |
| "loss": 0.0501, |
| "step": 4430 |
| }, |
| { |
| "grad_norm": 0.606069803237915, |
| "learning_rate": 8.878000000000001e-05, |
| "loss": 0.0523, |
| "step": 4440 |
| }, |
| { |
| "grad_norm": 0.5324500799179077, |
| "learning_rate": 8.898e-05, |
| "loss": 0.0527, |
| "step": 4450 |
| }, |
| { |
| "grad_norm": 0.6580777764320374, |
| "learning_rate": 8.918000000000001e-05, |
| "loss": 0.0505, |
| "step": 4460 |
| }, |
| { |
| "grad_norm": 0.527855396270752, |
| "learning_rate": 8.938e-05, |
| "loss": 0.0473, |
| "step": 4470 |
| }, |
| { |
| "grad_norm": 0.5778601765632629, |
| "learning_rate": 8.958e-05, |
| "loss": 0.0532, |
| "step": 4480 |
| }, |
| { |
| "grad_norm": 0.5860138535499573, |
| "learning_rate": 8.978000000000001e-05, |
| "loss": 0.058, |
| "step": 4490 |
| }, |
| { |
| "grad_norm": 0.4269658029079437, |
| "learning_rate": 8.998e-05, |
| "loss": 0.0491, |
| "step": 4500 |
| }, |
| { |
| "grad_norm": 0.5216055512428284, |
| "learning_rate": 9.018000000000001e-05, |
| "loss": 0.0465, |
| "step": 4510 |
| }, |
| { |
| "grad_norm": 0.4757327735424042, |
| "learning_rate": 9.038000000000001e-05, |
| "loss": 0.0482, |
| "step": 4520 |
| }, |
| { |
| "grad_norm": 0.6530254483222961, |
| "learning_rate": 9.058e-05, |
| "loss": 0.0531, |
| "step": 4530 |
| }, |
| { |
| "grad_norm": 0.6443282961845398, |
| "learning_rate": 9.078000000000001e-05, |
| "loss": 0.0527, |
| "step": 4540 |
| }, |
| { |
| "grad_norm": 0.3884166479110718, |
| "learning_rate": 9.098000000000001e-05, |
| "loss": 0.0453, |
| "step": 4550 |
| }, |
| { |
| "grad_norm": 0.4695942997932434, |
| "learning_rate": 9.118e-05, |
| "loss": 0.0469, |
| "step": 4560 |
| }, |
| { |
| "grad_norm": 0.515466034412384, |
| "learning_rate": 9.138e-05, |
| "loss": 0.0475, |
| "step": 4570 |
| }, |
| { |
| "grad_norm": 0.6207025647163391, |
| "learning_rate": 9.158e-05, |
| "loss": 0.0577, |
| "step": 4580 |
| }, |
| { |
| "grad_norm": 0.5003461837768555, |
| "learning_rate": 9.178e-05, |
| "loss": 0.0559, |
| "step": 4590 |
| }, |
| { |
| "grad_norm": 0.4500420093536377, |
| "learning_rate": 9.198e-05, |
| "loss": 0.0487, |
| "step": 4600 |
| }, |
| { |
| "grad_norm": 0.4946185350418091, |
| "learning_rate": 9.218e-05, |
| "loss": 0.0553, |
| "step": 4610 |
| }, |
| { |
| "grad_norm": 0.5304247736930847, |
| "learning_rate": 9.238e-05, |
| "loss": 0.0514, |
| "step": 4620 |
| }, |
| { |
| "grad_norm": 0.5968793630599976, |
| "learning_rate": 9.258e-05, |
| "loss": 0.0515, |
| "step": 4630 |
| }, |
| { |
| "grad_norm": 0.44556036591529846, |
| "learning_rate": 9.278e-05, |
| "loss": 0.0515, |
| "step": 4640 |
| }, |
| { |
| "grad_norm": 0.4487205445766449, |
| "learning_rate": 9.298e-05, |
| "loss": 0.0468, |
| "step": 4650 |
| }, |
| { |
| "grad_norm": 0.4759437143802643, |
| "learning_rate": 9.318e-05, |
| "loss": 0.0527, |
| "step": 4660 |
| }, |
| { |
| "grad_norm": 0.5389978885650635, |
| "learning_rate": 9.338e-05, |
| "loss": 0.0498, |
| "step": 4670 |
| }, |
| { |
| "grad_norm": 0.48506706953048706, |
| "learning_rate": 9.358e-05, |
| "loss": 0.0456, |
| "step": 4680 |
| }, |
| { |
| "grad_norm": 0.5717599391937256, |
| "learning_rate": 9.378e-05, |
| "loss": 0.0503, |
| "step": 4690 |
| }, |
| { |
| "grad_norm": 0.5160998702049255, |
| "learning_rate": 9.398e-05, |
| "loss": 0.0574, |
| "step": 4700 |
| }, |
| { |
| "grad_norm": 0.48355981707572937, |
| "learning_rate": 9.418e-05, |
| "loss": 0.0527, |
| "step": 4710 |
| }, |
| { |
| "grad_norm": 0.48949745297431946, |
| "learning_rate": 9.438e-05, |
| "loss": 0.0447, |
| "step": 4720 |
| }, |
| { |
| "grad_norm": 0.5389792323112488, |
| "learning_rate": 9.458e-05, |
| "loss": 0.0499, |
| "step": 4730 |
| }, |
| { |
| "grad_norm": 0.6133130192756653, |
| "learning_rate": 9.478e-05, |
| "loss": 0.0494, |
| "step": 4740 |
| }, |
| { |
| "grad_norm": 0.4897119998931885, |
| "learning_rate": 9.498e-05, |
| "loss": 0.0494, |
| "step": 4750 |
| }, |
| { |
| "grad_norm": 0.4778435230255127, |
| "learning_rate": 9.518000000000001e-05, |
| "loss": 0.0547, |
| "step": 4760 |
| }, |
| { |
| "grad_norm": 0.5236737728118896, |
| "learning_rate": 9.538e-05, |
| "loss": 0.0572, |
| "step": 4770 |
| }, |
| { |
| "grad_norm": 0.4555071294307709, |
| "learning_rate": 9.558e-05, |
| "loss": 0.0511, |
| "step": 4780 |
| }, |
| { |
| "grad_norm": 0.5759738087654114, |
| "learning_rate": 9.578000000000001e-05, |
| "loss": 0.054, |
| "step": 4790 |
| }, |
| { |
| "grad_norm": 0.48411187529563904, |
| "learning_rate": 9.598e-05, |
| "loss": 0.0526, |
| "step": 4800 |
| }, |
| { |
| "grad_norm": 0.38313448429107666, |
| "learning_rate": 9.618e-05, |
| "loss": 0.0474, |
| "step": 4810 |
| }, |
| { |
| "grad_norm": 0.5272445678710938, |
| "learning_rate": 9.638000000000001e-05, |
| "loss": 0.048, |
| "step": 4820 |
| }, |
| { |
| "grad_norm": 0.42718085646629333, |
| "learning_rate": 9.658e-05, |
| "loss": 0.0466, |
| "step": 4830 |
| }, |
| { |
| "grad_norm": 0.4786417484283447, |
| "learning_rate": 9.678e-05, |
| "loss": 0.0482, |
| "step": 4840 |
| }, |
| { |
| "grad_norm": 0.41158753633499146, |
| "learning_rate": 9.698000000000001e-05, |
| "loss": 0.0487, |
| "step": 4850 |
| }, |
| { |
| "grad_norm": 0.5833826661109924, |
| "learning_rate": 9.718e-05, |
| "loss": 0.0458, |
| "step": 4860 |
| }, |
| { |
| "grad_norm": 0.5385518074035645, |
| "learning_rate": 9.738e-05, |
| "loss": 0.0537, |
| "step": 4870 |
| }, |
| { |
| "grad_norm": 0.39039990305900574, |
| "learning_rate": 9.758000000000001e-05, |
| "loss": 0.0469, |
| "step": 4880 |
| }, |
| { |
| "grad_norm": 0.49037373065948486, |
| "learning_rate": 9.778e-05, |
| "loss": 0.0497, |
| "step": 4890 |
| }, |
| { |
| "grad_norm": 0.3821620047092438, |
| "learning_rate": 9.798000000000001e-05, |
| "loss": 0.0484, |
| "step": 4900 |
| }, |
| { |
| "grad_norm": 0.4072476327419281, |
| "learning_rate": 9.818000000000001e-05, |
| "loss": 0.0444, |
| "step": 4910 |
| }, |
| { |
| "grad_norm": 0.5461483001708984, |
| "learning_rate": 9.838e-05, |
| "loss": 0.0518, |
| "step": 4920 |
| }, |
| { |
| "grad_norm": 0.5809780359268188, |
| "learning_rate": 9.858000000000001e-05, |
| "loss": 0.052, |
| "step": 4930 |
| }, |
| { |
| "grad_norm": 0.459931880235672, |
| "learning_rate": 9.878e-05, |
| "loss": 0.0458, |
| "step": 4940 |
| }, |
| { |
| "grad_norm": 0.5790089964866638, |
| "learning_rate": 9.898e-05, |
| "loss": 0.0512, |
| "step": 4950 |
| }, |
| { |
| "grad_norm": 0.5393385291099548, |
| "learning_rate": 9.918000000000001e-05, |
| "loss": 0.0515, |
| "step": 4960 |
| }, |
| { |
| "grad_norm": 0.5185596942901611, |
| "learning_rate": 9.938e-05, |
| "loss": 0.0446, |
| "step": 4970 |
| }, |
| { |
| "grad_norm": 0.47808054089546204, |
| "learning_rate": 9.958e-05, |
| "loss": 0.0547, |
| "step": 4980 |
| }, |
| { |
| "grad_norm": 0.5483494400978088, |
| "learning_rate": 9.978000000000001e-05, |
| "loss": 0.0519, |
| "step": 4990 |
| }, |
| { |
| "grad_norm": 0.640078604221344, |
| "learning_rate": 9.998e-05, |
| "loss": 0.0492, |
| "step": 5000 |
| }, |
| { |
| "grad_norm": 0.5359305739402771, |
| "learning_rate": 9.999999778549045e-05, |
| "loss": 0.0478, |
| "step": 5010 |
| }, |
| { |
| "grad_norm": 0.5365594029426575, |
| "learning_rate": 9.999999013039593e-05, |
| "loss": 0.0537, |
| "step": 5020 |
| }, |
| { |
| "grad_norm": 0.44698792695999146, |
| "learning_rate": 9.999997700737766e-05, |
| "loss": 0.0479, |
| "step": 5030 |
| }, |
| { |
| "grad_norm": 0.5172343254089355, |
| "learning_rate": 9.999995841643709e-05, |
| "loss": 0.0456, |
| "step": 5040 |
| }, |
| { |
| "grad_norm": 0.6057367920875549, |
| "learning_rate": 9.999993435757623e-05, |
| "loss": 0.0479, |
| "step": 5050 |
| }, |
| { |
| "grad_norm": 0.6013140678405762, |
| "learning_rate": 9.999990483079773e-05, |
| "loss": 0.0512, |
| "step": 5060 |
| }, |
| { |
| "grad_norm": 0.6480153799057007, |
| "learning_rate": 9.999986983610481e-05, |
| "loss": 0.0488, |
| "step": 5070 |
| }, |
| { |
| "grad_norm": 0.5187014937400818, |
| "learning_rate": 9.99998293735013e-05, |
| "loss": 0.0468, |
| "step": 5080 |
| }, |
| { |
| "grad_norm": 0.6135046482086182, |
| "learning_rate": 9.999978344299161e-05, |
| "loss": 0.0499, |
| "step": 5090 |
| }, |
| { |
| "grad_norm": 0.5284314751625061, |
| "learning_rate": 9.99997320445808e-05, |
| "loss": 0.0486, |
| "step": 5100 |
| }, |
| { |
| "grad_norm": 0.4867863655090332, |
| "learning_rate": 9.999967517827444e-05, |
| "loss": 0.0497, |
| "step": 5110 |
| }, |
| { |
| "grad_norm": 0.4951011538505554, |
| "learning_rate": 9.999961284407879e-05, |
| "loss": 0.0454, |
| "step": 5120 |
| }, |
| { |
| "grad_norm": 0.38466599583625793, |
| "learning_rate": 9.999954504200067e-05, |
| "loss": 0.0481, |
| "step": 5130 |
| }, |
| { |
| "grad_norm": 0.4279499053955078, |
| "learning_rate": 9.999947177204744e-05, |
| "loss": 0.0498, |
| "step": 5140 |
| }, |
| { |
| "grad_norm": 0.5256782174110413, |
| "learning_rate": 9.999939303422718e-05, |
| "loss": 0.0485, |
| "step": 5150 |
| }, |
| { |
| "grad_norm": 0.5838196277618408, |
| "learning_rate": 9.999930882854847e-05, |
| "loss": 0.0484, |
| "step": 5160 |
| }, |
| { |
| "grad_norm": 0.4046095311641693, |
| "learning_rate": 9.999921915502051e-05, |
| "loss": 0.0445, |
| "step": 5170 |
| }, |
| { |
| "grad_norm": 0.47813159227371216, |
| "learning_rate": 9.99991240136531e-05, |
| "loss": 0.0416, |
| "step": 5180 |
| }, |
| { |
| "grad_norm": 0.38682985305786133, |
| "learning_rate": 9.999902340445668e-05, |
| "loss": 0.048, |
| "step": 5190 |
| }, |
| { |
| "grad_norm": 0.41376394033432007, |
| "learning_rate": 9.999891732744224e-05, |
| "loss": 0.0452, |
| "step": 5200 |
| }, |
| { |
| "grad_norm": 0.5256609320640564, |
| "learning_rate": 9.999880578262135e-05, |
| "loss": 0.0531, |
| "step": 5210 |
| }, |
| { |
| "grad_norm": 0.4892725944519043, |
| "learning_rate": 9.999868877000624e-05, |
| "loss": 0.055, |
| "step": 5220 |
| }, |
| { |
| "grad_norm": 0.4581248164176941, |
| "learning_rate": 9.99985662896097e-05, |
| "loss": 0.051, |
| "step": 5230 |
| }, |
| { |
| "grad_norm": 0.37346044182777405, |
| "learning_rate": 9.999843834144513e-05, |
| "loss": 0.0421, |
| "step": 5240 |
| }, |
| { |
| "grad_norm": 0.4182156026363373, |
| "learning_rate": 9.99983049255265e-05, |
| "loss": 0.0461, |
| "step": 5250 |
| }, |
| { |
| "grad_norm": 0.3648982048034668, |
| "learning_rate": 9.999816604186843e-05, |
| "loss": 0.0463, |
| "step": 5260 |
| }, |
| { |
| "grad_norm": 0.4943232834339142, |
| "learning_rate": 9.999802169048609e-05, |
| "loss": 0.0492, |
| "step": 5270 |
| }, |
| { |
| "grad_norm": 0.5068058967590332, |
| "learning_rate": 9.999787187139527e-05, |
| "loss": 0.0501, |
| "step": 5280 |
| }, |
| { |
| "grad_norm": 0.39948412775993347, |
| "learning_rate": 9.999771658461234e-05, |
| "loss": 0.0489, |
| "step": 5290 |
| }, |
| { |
| "grad_norm": 0.5398508310317993, |
| "learning_rate": 9.999755583015431e-05, |
| "loss": 0.0507, |
| "step": 5300 |
| }, |
| { |
| "grad_norm": 0.41905033588409424, |
| "learning_rate": 9.999738960803874e-05, |
| "loss": 0.0541, |
| "step": 5310 |
| }, |
| { |
| "grad_norm": 0.4965907037258148, |
| "learning_rate": 9.99972179182838e-05, |
| "loss": 0.0551, |
| "step": 5320 |
| }, |
| { |
| "grad_norm": 0.5021808743476868, |
| "learning_rate": 9.99970407609083e-05, |
| "loss": 0.0466, |
| "step": 5330 |
| }, |
| { |
| "grad_norm": 0.4862288534641266, |
| "learning_rate": 9.999685813593159e-05, |
| "loss": 0.0515, |
| "step": 5340 |
| }, |
| { |
| "grad_norm": 0.3867112398147583, |
| "learning_rate": 9.999667004337362e-05, |
| "loss": 0.049, |
| "step": 5350 |
| }, |
| { |
| "grad_norm": 0.3625693619251251, |
| "learning_rate": 9.9996476483255e-05, |
| "loss": 0.0423, |
| "step": 5360 |
| }, |
| { |
| "grad_norm": 0.5136606097221375, |
| "learning_rate": 9.999627745559688e-05, |
| "loss": 0.0441, |
| "step": 5370 |
| }, |
| { |
| "grad_norm": 0.3735745847225189, |
| "learning_rate": 9.999607296042101e-05, |
| "loss": 0.0439, |
| "step": 5380 |
| }, |
| { |
| "grad_norm": 0.4072813093662262, |
| "learning_rate": 9.99958629977498e-05, |
| "loss": 0.0452, |
| "step": 5390 |
| }, |
| { |
| "grad_norm": 0.49238812923431396, |
| "learning_rate": 9.999564756760615e-05, |
| "loss": 0.0409, |
| "step": 5400 |
| }, |
| { |
| "grad_norm": 0.5166733860969543, |
| "learning_rate": 9.999542667001366e-05, |
| "loss": 0.0443, |
| "step": 5410 |
| }, |
| { |
| "grad_norm": 0.3779316246509552, |
| "learning_rate": 9.999520030499647e-05, |
| "loss": 0.0444, |
| "step": 5420 |
| }, |
| { |
| "grad_norm": 0.5949298739433289, |
| "learning_rate": 9.999496847257936e-05, |
| "loss": 0.0479, |
| "step": 5430 |
| }, |
| { |
| "grad_norm": 0.4279668927192688, |
| "learning_rate": 9.999473117278764e-05, |
| "loss": 0.0463, |
| "step": 5440 |
| }, |
| { |
| "grad_norm": 0.48803475499153137, |
| "learning_rate": 9.999448840564731e-05, |
| "loss": 0.0488, |
| "step": 5450 |
| }, |
| { |
| "grad_norm": 0.4599801301956177, |
| "learning_rate": 9.999424017118488e-05, |
| "loss": 0.0448, |
| "step": 5460 |
| }, |
| { |
| "grad_norm": 0.5404091477394104, |
| "learning_rate": 9.999398646942751e-05, |
| "loss": 0.0452, |
| "step": 5470 |
| }, |
| { |
| "grad_norm": 0.3551173210144043, |
| "learning_rate": 9.999372730040296e-05, |
| "loss": 0.0464, |
| "step": 5480 |
| }, |
| { |
| "grad_norm": 0.4278314411640167, |
| "learning_rate": 9.999346266413953e-05, |
| "loss": 0.0439, |
| "step": 5490 |
| }, |
| { |
| "grad_norm": 0.38749974966049194, |
| "learning_rate": 9.99931925606662e-05, |
| "loss": 0.0411, |
| "step": 5500 |
| }, |
| { |
| "grad_norm": 0.4055821895599365, |
| "learning_rate": 9.99929169900125e-05, |
| "loss": 0.0475, |
| "step": 5510 |
| }, |
| { |
| "grad_norm": 0.4296989440917969, |
| "learning_rate": 9.999263595220855e-05, |
| "loss": 0.0484, |
| "step": 5520 |
| }, |
| { |
| "grad_norm": 0.42303040623664856, |
| "learning_rate": 9.99923494472851e-05, |
| "loss": 0.0465, |
| "step": 5530 |
| }, |
| { |
| "grad_norm": 0.37740418314933777, |
| "learning_rate": 9.999205747527348e-05, |
| "loss": 0.0429, |
| "step": 5540 |
| }, |
| { |
| "grad_norm": 0.4639616012573242, |
| "learning_rate": 9.999176003620561e-05, |
| "loss": 0.0454, |
| "step": 5550 |
| }, |
| { |
| "grad_norm": 0.4544775187969208, |
| "learning_rate": 9.999145713011405e-05, |
| "loss": 0.0461, |
| "step": 5560 |
| }, |
| { |
| "grad_norm": 0.4225658178329468, |
| "learning_rate": 9.999114875703186e-05, |
| "loss": 0.0466, |
| "step": 5570 |
| }, |
| { |
| "grad_norm": 0.41953185200691223, |
| "learning_rate": 9.999083491699281e-05, |
| "loss": 0.0415, |
| "step": 5580 |
| }, |
| { |
| "grad_norm": 0.4081561267375946, |
| "learning_rate": 9.999051561003123e-05, |
| "loss": 0.0452, |
| "step": 5590 |
| }, |
| { |
| "grad_norm": 0.4112689197063446, |
| "learning_rate": 9.999019083618202e-05, |
| "loss": 0.0415, |
| "step": 5600 |
| }, |
| { |
| "grad_norm": 0.3751114308834076, |
| "learning_rate": 9.99898605954807e-05, |
| "loss": 0.0431, |
| "step": 5610 |
| }, |
| { |
| "grad_norm": 0.5612055063247681, |
| "learning_rate": 9.998952488796338e-05, |
| "loss": 0.0435, |
| "step": 5620 |
| }, |
| { |
| "grad_norm": 0.44595974683761597, |
| "learning_rate": 9.998918371366676e-05, |
| "loss": 0.0427, |
| "step": 5630 |
| }, |
| { |
| "grad_norm": 0.4542756676673889, |
| "learning_rate": 9.99888370726282e-05, |
| "loss": 0.0431, |
| "step": 5640 |
| }, |
| { |
| "grad_norm": 0.4422266185283661, |
| "learning_rate": 9.998848496488556e-05, |
| "loss": 0.0425, |
| "step": 5650 |
| }, |
| { |
| "grad_norm": 0.5079705715179443, |
| "learning_rate": 9.998812739047736e-05, |
| "loss": 0.0509, |
| "step": 5660 |
| }, |
| { |
| "grad_norm": 0.37379220128059387, |
| "learning_rate": 9.99877643494427e-05, |
| "loss": 0.0435, |
| "step": 5670 |
| }, |
| { |
| "grad_norm": 0.5022728443145752, |
| "learning_rate": 9.998739584182128e-05, |
| "loss": 0.045, |
| "step": 5680 |
| }, |
| { |
| "grad_norm": 0.34019261598587036, |
| "learning_rate": 9.998702186765342e-05, |
| "loss": 0.0396, |
| "step": 5690 |
| }, |
| { |
| "grad_norm": 0.429571270942688, |
| "learning_rate": 9.998664242698e-05, |
| "loss": 0.0444, |
| "step": 5700 |
| }, |
| { |
| "grad_norm": 0.4040437638759613, |
| "learning_rate": 9.998625751984251e-05, |
| "loss": 0.0423, |
| "step": 5710 |
| }, |
| { |
| "grad_norm": 0.3098539710044861, |
| "learning_rate": 9.998586714628307e-05, |
| "loss": 0.0442, |
| "step": 5720 |
| }, |
| { |
| "grad_norm": 0.40934598445892334, |
| "learning_rate": 9.998547130634432e-05, |
| "loss": 0.0444, |
| "step": 5730 |
| }, |
| { |
| "grad_norm": 0.36247605085372925, |
| "learning_rate": 9.99850700000696e-05, |
| "loss": 0.046, |
| "step": 5740 |
| }, |
| { |
| "grad_norm": 0.3583070635795593, |
| "learning_rate": 9.998466322750278e-05, |
| "loss": 0.0446, |
| "step": 5750 |
| }, |
| { |
| "grad_norm": 0.4379226863384247, |
| "learning_rate": 9.998425098868834e-05, |
| "loss": 0.0421, |
| "step": 5760 |
| }, |
| { |
| "grad_norm": 0.44016969203948975, |
| "learning_rate": 9.998383328367136e-05, |
| "loss": 0.0451, |
| "step": 5770 |
| }, |
| { |
| "grad_norm": 0.36797237396240234, |
| "learning_rate": 9.99834101124975e-05, |
| "loss": 0.0432, |
| "step": 5780 |
| }, |
| { |
| "grad_norm": 0.5354440808296204, |
| "learning_rate": 9.998298147521309e-05, |
| "loss": 0.0471, |
| "step": 5790 |
| }, |
| { |
| "grad_norm": 0.42204543948173523, |
| "learning_rate": 9.998254737186496e-05, |
| "loss": 0.0437, |
| "step": 5800 |
| }, |
| { |
| "grad_norm": 0.37009891867637634, |
| "learning_rate": 9.99821078025006e-05, |
| "loss": 0.0418, |
| "step": 5810 |
| }, |
| { |
| "grad_norm": 0.3839789032936096, |
| "learning_rate": 9.998166276716807e-05, |
| "loss": 0.0461, |
| "step": 5820 |
| }, |
| { |
| "grad_norm": 0.3813186585903168, |
| "learning_rate": 9.998121226591606e-05, |
| "loss": 0.0453, |
| "step": 5830 |
| }, |
| { |
| "grad_norm": 0.41642969846725464, |
| "learning_rate": 9.998075629879382e-05, |
| "loss": 0.046, |
| "step": 5840 |
| }, |
| { |
| "grad_norm": 0.44692158699035645, |
| "learning_rate": 9.99802948658512e-05, |
| "loss": 0.0465, |
| "step": 5850 |
| }, |
| { |
| "grad_norm": 0.4168972373008728, |
| "learning_rate": 9.99798279671387e-05, |
| "loss": 0.0481, |
| "step": 5860 |
| }, |
| { |
| "grad_norm": 0.5036101341247559, |
| "learning_rate": 9.997935560270734e-05, |
| "loss": 0.0493, |
| "step": 5870 |
| }, |
| { |
| "grad_norm": 0.37841522693634033, |
| "learning_rate": 9.997887777260879e-05, |
| "loss": 0.0455, |
| "step": 5880 |
| }, |
| { |
| "grad_norm": 0.32745361328125, |
| "learning_rate": 9.997839447689532e-05, |
| "loss": 0.0411, |
| "step": 5890 |
| }, |
| { |
| "grad_norm": 0.6070501804351807, |
| "learning_rate": 9.997790571561978e-05, |
| "loss": 0.045, |
| "step": 5900 |
| }, |
| { |
| "grad_norm": 0.4841315448284149, |
| "learning_rate": 9.99774114888356e-05, |
| "loss": 0.0476, |
| "step": 5910 |
| }, |
| { |
| "grad_norm": 0.36092159152030945, |
| "learning_rate": 9.997691179659684e-05, |
| "loss": 0.0423, |
| "step": 5920 |
| }, |
| { |
| "grad_norm": 0.5659105777740479, |
| "learning_rate": 9.997640663895815e-05, |
| "loss": 0.0397, |
| "step": 5930 |
| }, |
| { |
| "grad_norm": 0.42008090019226074, |
| "learning_rate": 9.997589601597477e-05, |
| "loss": 0.0459, |
| "step": 5940 |
| }, |
| { |
| "grad_norm": 0.39080119132995605, |
| "learning_rate": 9.997537992770252e-05, |
| "loss": 0.0415, |
| "step": 5950 |
| }, |
| { |
| "grad_norm": 0.4096440374851227, |
| "learning_rate": 9.997485837419788e-05, |
| "loss": 0.0391, |
| "step": 5960 |
| }, |
| { |
| "grad_norm": 0.40421029925346375, |
| "learning_rate": 9.997433135551786e-05, |
| "loss": 0.0434, |
| "step": 5970 |
| }, |
| { |
| "grad_norm": 0.3745020925998688, |
| "learning_rate": 9.997379887172009e-05, |
| "loss": 0.0416, |
| "step": 5980 |
| }, |
| { |
| "grad_norm": 0.3250255584716797, |
| "learning_rate": 9.997326092286281e-05, |
| "loss": 0.0448, |
| "step": 5990 |
| }, |
| { |
| "grad_norm": 0.4551812708377838, |
| "learning_rate": 9.997271750900486e-05, |
| "loss": 0.0459, |
| "step": 6000 |
| }, |
| { |
| "grad_norm": 0.41356295347213745, |
| "learning_rate": 9.997216863020565e-05, |
| "loss": 0.0415, |
| "step": 6010 |
| }, |
| { |
| "grad_norm": 0.4534608721733093, |
| "learning_rate": 9.99716142865252e-05, |
| "loss": 0.0406, |
| "step": 6020 |
| }, |
| { |
| "grad_norm": 0.42121708393096924, |
| "learning_rate": 9.997105447802415e-05, |
| "loss": 0.0396, |
| "step": 6030 |
| }, |
| { |
| "grad_norm": 0.30873870849609375, |
| "learning_rate": 9.997048920476373e-05, |
| "loss": 0.0425, |
| "step": 6040 |
| }, |
| { |
| "grad_norm": 0.353882372379303, |
| "learning_rate": 9.996991846680572e-05, |
| "loss": 0.0438, |
| "step": 6050 |
| }, |
| { |
| "grad_norm": 0.3697453439235687, |
| "learning_rate": 9.996934226421257e-05, |
| "loss": 0.041, |
| "step": 6060 |
| }, |
| { |
| "grad_norm": 0.3074539005756378, |
| "learning_rate": 9.996876059704726e-05, |
| "loss": 0.0425, |
| "step": 6070 |
| }, |
| { |
| "grad_norm": 0.44980335235595703, |
| "learning_rate": 9.996817346537343e-05, |
| "loss": 0.0469, |
| "step": 6080 |
| }, |
| { |
| "grad_norm": 0.4123890697956085, |
| "learning_rate": 9.996758086925526e-05, |
| "loss": 0.0383, |
| "step": 6090 |
| }, |
| { |
| "grad_norm": 0.5198633074760437, |
| "learning_rate": 9.996698280875759e-05, |
| "loss": 0.0466, |
| "step": 6100 |
| }, |
| { |
| "grad_norm": 0.3925876319408417, |
| "learning_rate": 9.99663792839458e-05, |
| "loss": 0.043, |
| "step": 6110 |
| }, |
| { |
| "grad_norm": 0.46037840843200684, |
| "learning_rate": 9.99657702948859e-05, |
| "loss": 0.0502, |
| "step": 6120 |
| }, |
| { |
| "grad_norm": 0.3833174705505371, |
| "learning_rate": 9.996515584164448e-05, |
| "loss": 0.0428, |
| "step": 6130 |
| }, |
| { |
| "grad_norm": 0.4500272572040558, |
| "learning_rate": 9.996453592428873e-05, |
| "loss": 0.0433, |
| "step": 6140 |
| }, |
| { |
| "grad_norm": 0.5032421350479126, |
| "learning_rate": 9.996391054288646e-05, |
| "loss": 0.0468, |
| "step": 6150 |
| }, |
| { |
| "grad_norm": 0.5007117986679077, |
| "learning_rate": 9.996327969750605e-05, |
| "loss": 0.0425, |
| "step": 6160 |
| }, |
| { |
| "grad_norm": 0.4883004128932953, |
| "learning_rate": 9.996264338821649e-05, |
| "loss": 0.0464, |
| "step": 6170 |
| }, |
| { |
| "grad_norm": 0.43868330121040344, |
| "learning_rate": 9.996200161508735e-05, |
| "loss": 0.043, |
| "step": 6180 |
| }, |
| { |
| "grad_norm": 0.39309653639793396, |
| "learning_rate": 9.996135437818885e-05, |
| "loss": 0.0401, |
| "step": 6190 |
| }, |
| { |
| "grad_norm": 0.391730397939682, |
| "learning_rate": 9.996070167759175e-05, |
| "loss": 0.0423, |
| "step": 6200 |
| }, |
| { |
| "grad_norm": 0.45517247915267944, |
| "learning_rate": 9.996004351336743e-05, |
| "loss": 0.0434, |
| "step": 6210 |
| }, |
| { |
| "grad_norm": 0.4464564621448517, |
| "learning_rate": 9.995937988558785e-05, |
| "loss": 0.0466, |
| "step": 6220 |
| }, |
| { |
| "grad_norm": 0.4495352804660797, |
| "learning_rate": 9.995871079432561e-05, |
| "loss": 0.0432, |
| "step": 6230 |
| }, |
| { |
| "grad_norm": 0.45086175203323364, |
| "learning_rate": 9.995803623965389e-05, |
| "loss": 0.0415, |
| "step": 6240 |
| }, |
| { |
| "grad_norm": 0.37620794773101807, |
| "learning_rate": 9.995735622164641e-05, |
| "loss": 0.0448, |
| "step": 6250 |
| }, |
| { |
| "grad_norm": 0.42657265067100525, |
| "learning_rate": 9.995667074037758e-05, |
| "loss": 0.0432, |
| "step": 6260 |
| }, |
| { |
| "grad_norm": 0.3334987163543701, |
| "learning_rate": 9.995597979592232e-05, |
| "loss": 0.0379, |
| "step": 6270 |
| }, |
| { |
| "grad_norm": 0.44120296835899353, |
| "learning_rate": 9.995528338835625e-05, |
| "loss": 0.0416, |
| "step": 6280 |
| }, |
| { |
| "grad_norm": 0.32200494408607483, |
| "learning_rate": 9.995458151775547e-05, |
| "loss": 0.0378, |
| "step": 6290 |
| }, |
| { |
| "grad_norm": 0.3396989703178406, |
| "learning_rate": 9.995387418419677e-05, |
| "loss": 0.0466, |
| "step": 6300 |
| }, |
| { |
| "grad_norm": 0.42114686965942383, |
| "learning_rate": 9.99531613877575e-05, |
| "loss": 0.0425, |
| "step": 6310 |
| }, |
| { |
| "grad_norm": 0.474528044462204, |
| "learning_rate": 9.995244312851559e-05, |
| "loss": 0.041, |
| "step": 6320 |
| }, |
| { |
| "grad_norm": 0.46179601550102234, |
| "learning_rate": 9.995171940654961e-05, |
| "loss": 0.0438, |
| "step": 6330 |
| }, |
| { |
| "grad_norm": 0.3656163513660431, |
| "learning_rate": 9.995099022193871e-05, |
| "loss": 0.0433, |
| "step": 6340 |
| }, |
| { |
| "grad_norm": 0.4622880220413208, |
| "learning_rate": 9.995025557476261e-05, |
| "loss": 0.0438, |
| "step": 6350 |
| }, |
| { |
| "grad_norm": 0.3380992114543915, |
| "learning_rate": 9.994951546510165e-05, |
| "loss": 0.0455, |
| "step": 6360 |
| }, |
| { |
| "grad_norm": 0.4336497485637665, |
| "learning_rate": 9.994876989303679e-05, |
| "loss": 0.0434, |
| "step": 6370 |
| }, |
| { |
| "grad_norm": 0.4440676271915436, |
| "learning_rate": 9.994801885864955e-05, |
| "loss": 0.0469, |
| "step": 6380 |
| }, |
| { |
| "grad_norm": 0.40563204884529114, |
| "learning_rate": 9.994726236202205e-05, |
| "loss": 0.0437, |
| "step": 6390 |
| }, |
| { |
| "grad_norm": 0.4887060821056366, |
| "learning_rate": 9.994650040323704e-05, |
| "loss": 0.039, |
| "step": 6400 |
| }, |
| { |
| "grad_norm": 0.3582417964935303, |
| "learning_rate": 9.994573298237784e-05, |
| "loss": 0.0461, |
| "step": 6410 |
| }, |
| { |
| "grad_norm": 0.389259397983551, |
| "learning_rate": 9.994496009952837e-05, |
| "loss": 0.0454, |
| "step": 6420 |
| }, |
| { |
| "grad_norm": 0.4717545211315155, |
| "learning_rate": 9.994418175477316e-05, |
| "loss": 0.0409, |
| "step": 6430 |
| }, |
| { |
| "grad_norm": 0.4662460386753082, |
| "learning_rate": 9.994339794819733e-05, |
| "loss": 0.0395, |
| "step": 6440 |
| }, |
| { |
| "grad_norm": 0.4239952266216278, |
| "learning_rate": 9.994260867988658e-05, |
| "loss": 0.0421, |
| "step": 6450 |
| }, |
| { |
| "grad_norm": 0.3706813454627991, |
| "learning_rate": 9.994181394992723e-05, |
| "loss": 0.0409, |
| "step": 6460 |
| }, |
| { |
| "grad_norm": 0.311974436044693, |
| "learning_rate": 9.994101375840618e-05, |
| "loss": 0.0409, |
| "step": 6470 |
| }, |
| { |
| "grad_norm": 0.4108457565307617, |
| "learning_rate": 9.994020810541098e-05, |
| "loss": 0.0427, |
| "step": 6480 |
| }, |
| { |
| "grad_norm": 0.5299150347709656, |
| "learning_rate": 9.99393969910297e-05, |
| "loss": 0.0455, |
| "step": 6490 |
| }, |
| { |
| "grad_norm": 0.4159790873527527, |
| "learning_rate": 9.993858041535104e-05, |
| "loss": 0.0395, |
| "step": 6500 |
| }, |
| { |
| "grad_norm": 0.37181058526039124, |
| "learning_rate": 9.99377583784643e-05, |
| "loss": 0.0404, |
| "step": 6510 |
| }, |
| { |
| "grad_norm": 0.4369353950023651, |
| "learning_rate": 9.993693088045939e-05, |
| "loss": 0.04, |
| "step": 6520 |
| }, |
| { |
| "grad_norm": 0.31191983819007874, |
| "learning_rate": 9.99360979214268e-05, |
| "loss": 0.0392, |
| "step": 6530 |
| }, |
| { |
| "grad_norm": 0.41286006569862366, |
| "learning_rate": 9.99352595014576e-05, |
| "loss": 0.0401, |
| "step": 6540 |
| }, |
| { |
| "grad_norm": 0.5197806358337402, |
| "learning_rate": 9.993441562064354e-05, |
| "loss": 0.0429, |
| "step": 6550 |
| }, |
| { |
| "grad_norm": 0.4494584798812866, |
| "learning_rate": 9.993356627907685e-05, |
| "loss": 0.0439, |
| "step": 6560 |
| }, |
| { |
| "grad_norm": 0.3176094591617584, |
| "learning_rate": 9.99327114768504e-05, |
| "loss": 0.0449, |
| "step": 6570 |
| }, |
| { |
| "grad_norm": 0.3514400124549866, |
| "learning_rate": 9.99318512140577e-05, |
| "loss": 0.0378, |
| "step": 6580 |
| }, |
| { |
| "grad_norm": 0.3636268377304077, |
| "learning_rate": 9.993098549079284e-05, |
| "loss": 0.0407, |
| "step": 6590 |
| }, |
| { |
| "grad_norm": 0.3437356650829315, |
| "learning_rate": 9.993011430715047e-05, |
| "loss": 0.0458, |
| "step": 6600 |
| }, |
| { |
| "grad_norm": 0.3289712369441986, |
| "learning_rate": 9.992923766322586e-05, |
| "loss": 0.0434, |
| "step": 6610 |
| }, |
| { |
| "grad_norm": 0.4166223406791687, |
| "learning_rate": 9.99283555591149e-05, |
| "loss": 0.0348, |
| "step": 6620 |
| }, |
| { |
| "grad_norm": 0.48521310091018677, |
| "learning_rate": 9.992746799491404e-05, |
| "loss": 0.0458, |
| "step": 6630 |
| }, |
| { |
| "grad_norm": 0.3943791091442108, |
| "learning_rate": 9.992657497072033e-05, |
| "loss": 0.0432, |
| "step": 6640 |
| }, |
| { |
| "grad_norm": 0.4498193562030792, |
| "learning_rate": 9.992567648663147e-05, |
| "loss": 0.0384, |
| "step": 6650 |
| }, |
| { |
| "grad_norm": 0.37011364102363586, |
| "learning_rate": 9.992477254274568e-05, |
| "loss": 0.0437, |
| "step": 6660 |
| }, |
| { |
| "grad_norm": 0.39060381054878235, |
| "learning_rate": 9.992386313916183e-05, |
| "loss": 0.0447, |
| "step": 6670 |
| }, |
| { |
| "grad_norm": 0.3888266682624817, |
| "learning_rate": 9.992294827597934e-05, |
| "loss": 0.043, |
| "step": 6680 |
| }, |
| { |
| "grad_norm": 0.4197387099266052, |
| "learning_rate": 9.992202795329831e-05, |
| "loss": 0.0415, |
| "step": 6690 |
| }, |
| { |
| "grad_norm": 0.37308135628700256, |
| "learning_rate": 9.992110217121936e-05, |
| "loss": 0.0419, |
| "step": 6700 |
| }, |
| { |
| "grad_norm": 0.4515061378479004, |
| "learning_rate": 9.992017092984372e-05, |
| "loss": 0.0418, |
| "step": 6710 |
| }, |
| { |
| "grad_norm": 0.380657821893692, |
| "learning_rate": 9.991923422927326e-05, |
| "loss": 0.0416, |
| "step": 6720 |
| }, |
| { |
| "grad_norm": 0.34842175245285034, |
| "learning_rate": 9.991829206961037e-05, |
| "loss": 0.0394, |
| "step": 6730 |
| }, |
| { |
| "grad_norm": 0.4592163562774658, |
| "learning_rate": 9.991734445095813e-05, |
| "loss": 0.0411, |
| "step": 6740 |
| }, |
| { |
| "grad_norm": 0.35680124163627625, |
| "learning_rate": 9.991639137342015e-05, |
| "loss": 0.0383, |
| "step": 6750 |
| }, |
| { |
| "grad_norm": 0.408777117729187, |
| "learning_rate": 9.991543283710064e-05, |
| "loss": 0.0389, |
| "step": 6760 |
| }, |
| { |
| "grad_norm": 0.4205833077430725, |
| "learning_rate": 9.991446884210445e-05, |
| "loss": 0.0379, |
| "step": 6770 |
| }, |
| { |
| "grad_norm": 0.3167155385017395, |
| "learning_rate": 9.9913499388537e-05, |
| "loss": 0.0386, |
| "step": 6780 |
| }, |
| { |
| "grad_norm": 0.3687867522239685, |
| "learning_rate": 9.99125244765043e-05, |
| "loss": 0.0389, |
| "step": 6790 |
| }, |
| { |
| "grad_norm": 0.43708962202072144, |
| "learning_rate": 9.991154410611296e-05, |
| "loss": 0.0385, |
| "step": 6800 |
| }, |
| { |
| "grad_norm": 0.4002821743488312, |
| "learning_rate": 9.99105582774702e-05, |
| "loss": 0.0385, |
| "step": 6810 |
| }, |
| { |
| "grad_norm": 0.25123393535614014, |
| "learning_rate": 9.990956699068384e-05, |
| "loss": 0.04, |
| "step": 6820 |
| }, |
| { |
| "grad_norm": 0.33417415618896484, |
| "learning_rate": 9.990857024586224e-05, |
| "loss": 0.0389, |
| "step": 6830 |
| }, |
| { |
| "grad_norm": 0.36421480774879456, |
| "learning_rate": 9.990756804311446e-05, |
| "loss": 0.0389, |
| "step": 6840 |
| }, |
| { |
| "grad_norm": 0.4153655469417572, |
| "learning_rate": 9.990656038255006e-05, |
| "loss": 0.0422, |
| "step": 6850 |
| }, |
| { |
| "grad_norm": 0.3924720883369446, |
| "learning_rate": 9.990554726427926e-05, |
| "loss": 0.0441, |
| "step": 6860 |
| }, |
| { |
| "grad_norm": 0.26164504885673523, |
| "learning_rate": 9.990452868841284e-05, |
| "loss": 0.0374, |
| "step": 6870 |
| }, |
| { |
| "grad_norm": 0.3427872955799103, |
| "learning_rate": 9.99035046550622e-05, |
| "loss": 0.0431, |
| "step": 6880 |
| }, |
| { |
| "grad_norm": 0.3402078151702881, |
| "learning_rate": 9.99024751643393e-05, |
| "loss": 0.038, |
| "step": 6890 |
| }, |
| { |
| "grad_norm": 0.441245973110199, |
| "learning_rate": 9.990144021635677e-05, |
| "loss": 0.0382, |
| "step": 6900 |
| }, |
| { |
| "grad_norm": 0.5558918714523315, |
| "learning_rate": 9.990039981122775e-05, |
| "loss": 0.0391, |
| "step": 6910 |
| }, |
| { |
| "grad_norm": 0.3786897659301758, |
| "learning_rate": 9.989935394906602e-05, |
| "loss": 0.0416, |
| "step": 6920 |
| }, |
| { |
| "grad_norm": 0.4389326870441437, |
| "learning_rate": 9.989830262998598e-05, |
| "loss": 0.0405, |
| "step": 6930 |
| }, |
| { |
| "grad_norm": 0.3421044647693634, |
| "learning_rate": 9.989724585410259e-05, |
| "loss": 0.0387, |
| "step": 6940 |
| }, |
| { |
| "grad_norm": 0.4314367473125458, |
| "learning_rate": 9.989618362153139e-05, |
| "loss": 0.0367, |
| "step": 6950 |
| }, |
| { |
| "grad_norm": 0.4381873309612274, |
| "learning_rate": 9.989511593238859e-05, |
| "loss": 0.0407, |
| "step": 6960 |
| }, |
| { |
| "grad_norm": 0.317180335521698, |
| "learning_rate": 9.98940427867909e-05, |
| "loss": 0.0372, |
| "step": 6970 |
| }, |
| { |
| "grad_norm": 0.4186713695526123, |
| "learning_rate": 9.989296418485573e-05, |
| "loss": 0.0402, |
| "step": 6980 |
| }, |
| { |
| "grad_norm": 0.3646087050437927, |
| "learning_rate": 9.989188012670101e-05, |
| "loss": 0.0384, |
| "step": 6990 |
| }, |
| { |
| "grad_norm": 0.3898526430130005, |
| "learning_rate": 9.989079061244528e-05, |
| "loss": 0.0381, |
| "step": 7000 |
| }, |
| { |
| "grad_norm": 0.33809134364128113, |
| "learning_rate": 9.988969564220769e-05, |
| "loss": 0.0417, |
| "step": 7010 |
| }, |
| { |
| "grad_norm": 0.3403474986553192, |
| "learning_rate": 9.988859521610801e-05, |
| "loss": 0.04, |
| "step": 7020 |
| }, |
| { |
| "grad_norm": 0.33483290672302246, |
| "learning_rate": 9.988748933426656e-05, |
| "loss": 0.0357, |
| "step": 7030 |
| }, |
| { |
| "grad_norm": 0.4800526201725006, |
| "learning_rate": 9.988637799680428e-05, |
| "loss": 0.0388, |
| "step": 7040 |
| }, |
| { |
| "grad_norm": 0.4275050759315491, |
| "learning_rate": 9.98852612038427e-05, |
| "loss": 0.0383, |
| "step": 7050 |
| }, |
| { |
| "grad_norm": 0.3904567062854767, |
| "learning_rate": 9.988413895550397e-05, |
| "loss": 0.04, |
| "step": 7060 |
| }, |
| { |
| "grad_norm": 0.3881678581237793, |
| "learning_rate": 9.98830112519108e-05, |
| "loss": 0.0361, |
| "step": 7070 |
| }, |
| { |
| "grad_norm": 0.33301061391830444, |
| "learning_rate": 9.98818780931865e-05, |
| "loss": 0.0378, |
| "step": 7080 |
| }, |
| { |
| "grad_norm": 0.35649630427360535, |
| "learning_rate": 9.988073947945502e-05, |
| "loss": 0.039, |
| "step": 7090 |
| }, |
| { |
| "grad_norm": 0.29644250869750977, |
| "learning_rate": 9.987959541084087e-05, |
| "loss": 0.0332, |
| "step": 7100 |
| }, |
| { |
| "grad_norm": 0.34819456934928894, |
| "learning_rate": 9.987844588746915e-05, |
| "loss": 0.0385, |
| "step": 7110 |
| }, |
| { |
| "grad_norm": 0.35695385932922363, |
| "learning_rate": 9.987729090946558e-05, |
| "loss": 0.0385, |
| "step": 7120 |
| }, |
| { |
| "grad_norm": 0.3464387357234955, |
| "learning_rate": 9.987613047695647e-05, |
| "loss": 0.0336, |
| "step": 7130 |
| }, |
| { |
| "grad_norm": 0.4068809151649475, |
| "learning_rate": 9.987496459006871e-05, |
| "loss": 0.0396, |
| "step": 7140 |
| }, |
| { |
| "grad_norm": 0.3142383396625519, |
| "learning_rate": 9.987379324892982e-05, |
| "loss": 0.0344, |
| "step": 7150 |
| }, |
| { |
| "grad_norm": 0.5131341218948364, |
| "learning_rate": 9.987261645366788e-05, |
| "loss": 0.0361, |
| "step": 7160 |
| }, |
| { |
| "grad_norm": 0.33127930760383606, |
| "learning_rate": 9.987143420441158e-05, |
| "loss": 0.0347, |
| "step": 7170 |
| }, |
| { |
| "grad_norm": 0.2790936827659607, |
| "learning_rate": 9.987024650129022e-05, |
| "loss": 0.0388, |
| "step": 7180 |
| }, |
| { |
| "grad_norm": 0.4384543001651764, |
| "learning_rate": 9.986905334443368e-05, |
| "loss": 0.0365, |
| "step": 7190 |
| }, |
| { |
| "grad_norm": 0.35652369260787964, |
| "learning_rate": 9.986785473397245e-05, |
| "loss": 0.0385, |
| "step": 7200 |
| }, |
| { |
| "grad_norm": 0.29669007658958435, |
| "learning_rate": 9.98666506700376e-05, |
| "loss": 0.0389, |
| "step": 7210 |
| }, |
| { |
| "grad_norm": 0.3526216745376587, |
| "learning_rate": 9.986544115276081e-05, |
| "loss": 0.0387, |
| "step": 7220 |
| }, |
| { |
| "grad_norm": 0.34248629212379456, |
| "learning_rate": 9.986422618227433e-05, |
| "loss": 0.0401, |
| "step": 7230 |
| }, |
| { |
| "grad_norm": 0.36855003237724304, |
| "learning_rate": 9.986300575871106e-05, |
| "loss": 0.0343, |
| "step": 7240 |
| }, |
| { |
| "grad_norm": 0.4254695177078247, |
| "learning_rate": 9.986177988220444e-05, |
| "loss": 0.035, |
| "step": 7250 |
| }, |
| { |
| "grad_norm": 0.28067904710769653, |
| "learning_rate": 9.986054855288856e-05, |
| "loss": 0.0353, |
| "step": 7260 |
| }, |
| { |
| "grad_norm": 0.39105573296546936, |
| "learning_rate": 9.985931177089802e-05, |
| "loss": 0.0361, |
| "step": 7270 |
| }, |
| { |
| "grad_norm": 0.32079729437828064, |
| "learning_rate": 9.985806953636814e-05, |
| "loss": 0.032, |
| "step": 7280 |
| }, |
| { |
| "grad_norm": 0.318202406167984, |
| "learning_rate": 9.985682184943471e-05, |
| "loss": 0.0388, |
| "step": 7290 |
| }, |
| { |
| "grad_norm": 0.34409984946250916, |
| "learning_rate": 9.98555687102342e-05, |
| "loss": 0.0369, |
| "step": 7300 |
| }, |
| { |
| "grad_norm": 0.3259167969226837, |
| "learning_rate": 9.985431011890367e-05, |
| "loss": 0.0337, |
| "step": 7310 |
| }, |
| { |
| "grad_norm": 0.36389780044555664, |
| "learning_rate": 9.985304607558075e-05, |
| "loss": 0.0372, |
| "step": 7320 |
| }, |
| { |
| "grad_norm": 0.44165968894958496, |
| "learning_rate": 9.985177658040364e-05, |
| "loss": 0.041, |
| "step": 7330 |
| }, |
| { |
| "grad_norm": 0.3812031149864197, |
| "learning_rate": 9.985050163351119e-05, |
| "loss": 0.0391, |
| "step": 7340 |
| }, |
| { |
| "grad_norm": 0.411511093378067, |
| "learning_rate": 9.984922123504286e-05, |
| "loss": 0.0412, |
| "step": 7350 |
| }, |
| { |
| "grad_norm": 0.3777042031288147, |
| "learning_rate": 9.984793538513862e-05, |
| "loss": 0.0354, |
| "step": 7360 |
| }, |
| { |
| "grad_norm": 0.36922356486320496, |
| "learning_rate": 9.984664408393912e-05, |
| "loss": 0.0354, |
| "step": 7370 |
| }, |
| { |
| "grad_norm": 0.384548544883728, |
| "learning_rate": 9.984534733158556e-05, |
| "loss": 0.0361, |
| "step": 7380 |
| }, |
| { |
| "grad_norm": 0.3720257878303528, |
| "learning_rate": 9.984404512821977e-05, |
| "loss": 0.0386, |
| "step": 7390 |
| }, |
| { |
| "grad_norm": 0.41220420598983765, |
| "learning_rate": 9.984273747398411e-05, |
| "loss": 0.0366, |
| "step": 7400 |
| }, |
| { |
| "grad_norm": 0.40043506026268005, |
| "learning_rate": 9.984142436902165e-05, |
| "loss": 0.041, |
| "step": 7410 |
| }, |
| { |
| "grad_norm": 0.3745039701461792, |
| "learning_rate": 9.984010581347596e-05, |
| "loss": 0.0379, |
| "step": 7420 |
| }, |
| { |
| "grad_norm": 0.37869614362716675, |
| "learning_rate": 9.983878180749121e-05, |
| "loss": 0.0446, |
| "step": 7430 |
| }, |
| { |
| "grad_norm": 0.38546255230903625, |
| "learning_rate": 9.983745235121222e-05, |
| "loss": 0.0402, |
| "step": 7440 |
| }, |
| { |
| "grad_norm": 0.39511385560035706, |
| "learning_rate": 9.983611744478438e-05, |
| "loss": 0.0393, |
| "step": 7450 |
| }, |
| { |
| "grad_norm": 0.37394529581069946, |
| "learning_rate": 9.983477708835365e-05, |
| "loss": 0.0401, |
| "step": 7460 |
| }, |
| { |
| "grad_norm": 0.3911571204662323, |
| "learning_rate": 9.983343128206664e-05, |
| "loss": 0.0427, |
| "step": 7470 |
| }, |
| { |
| "grad_norm": 0.39284974336624146, |
| "learning_rate": 9.983208002607049e-05, |
| "loss": 0.0382, |
| "step": 7480 |
| }, |
| { |
| "grad_norm": 0.35614168643951416, |
| "learning_rate": 9.9830723320513e-05, |
| "loss": 0.038, |
| "step": 7490 |
| }, |
| { |
| "grad_norm": 0.24101810157299042, |
| "learning_rate": 9.982936116554254e-05, |
| "loss": 0.0384, |
| "step": 7500 |
| }, |
| { |
| "grad_norm": 0.3587195575237274, |
| "learning_rate": 9.982799356130803e-05, |
| "loss": 0.0343, |
| "step": 7510 |
| }, |
| { |
| "grad_norm": 0.32080957293510437, |
| "learning_rate": 9.982662050795908e-05, |
| "loss": 0.033, |
| "step": 7520 |
| }, |
| { |
| "grad_norm": 0.4107617437839508, |
| "learning_rate": 9.982524200564583e-05, |
| "loss": 0.0386, |
| "step": 7530 |
| }, |
| { |
| "grad_norm": 0.29533591866493225, |
| "learning_rate": 9.982385805451901e-05, |
| "loss": 0.0375, |
| "step": 7540 |
| }, |
| { |
| "grad_norm": 0.30470678210258484, |
| "learning_rate": 9.982246865472998e-05, |
| "loss": 0.037, |
| "step": 7550 |
| }, |
| { |
| "grad_norm": 0.27039673924446106, |
| "learning_rate": 9.982107380643069e-05, |
| "loss": 0.0391, |
| "step": 7560 |
| }, |
| { |
| "grad_norm": 0.3668907582759857, |
| "learning_rate": 9.981967350977368e-05, |
| "loss": 0.0332, |
| "step": 7570 |
| }, |
| { |
| "grad_norm": 0.3666737377643585, |
| "learning_rate": 9.981826776491208e-05, |
| "loss": 0.0375, |
| "step": 7580 |
| }, |
| { |
| "grad_norm": 0.2968786358833313, |
| "learning_rate": 9.98168565719996e-05, |
| "loss": 0.0314, |
| "step": 7590 |
| }, |
| { |
| "grad_norm": 0.32582610845565796, |
| "learning_rate": 9.98154399311906e-05, |
| "loss": 0.0342, |
| "step": 7600 |
| }, |
| { |
| "grad_norm": 0.39996621012687683, |
| "learning_rate": 9.981401784263997e-05, |
| "loss": 0.0355, |
| "step": 7610 |
| }, |
| { |
| "grad_norm": 0.43698522448539734, |
| "learning_rate": 9.981259030650326e-05, |
| "loss": 0.0395, |
| "step": 7620 |
| }, |
| { |
| "grad_norm": 0.3867236077785492, |
| "learning_rate": 9.981115732293655e-05, |
| "loss": 0.0346, |
| "step": 7630 |
| }, |
| { |
| "grad_norm": 0.37524399161338806, |
| "learning_rate": 9.980971889209659e-05, |
| "loss": 0.043, |
| "step": 7640 |
| }, |
| { |
| "grad_norm": 0.29783526062965393, |
| "learning_rate": 9.980827501414064e-05, |
| "loss": 0.0403, |
| "step": 7650 |
| }, |
| { |
| "grad_norm": 0.311921626329422, |
| "learning_rate": 9.980682568922663e-05, |
| "loss": 0.0316, |
| "step": 7660 |
| }, |
| { |
| "grad_norm": 0.287200003862381, |
| "learning_rate": 9.980537091751304e-05, |
| "loss": 0.0346, |
| "step": 7670 |
| }, |
| { |
| "grad_norm": 0.36688342690467834, |
| "learning_rate": 9.980391069915897e-05, |
| "loss": 0.0366, |
| "step": 7680 |
| }, |
| { |
| "grad_norm": 0.3305376470088959, |
| "learning_rate": 9.98024450343241e-05, |
| "loss": 0.0383, |
| "step": 7690 |
| }, |
| { |
| "grad_norm": 0.24818134307861328, |
| "learning_rate": 9.980097392316872e-05, |
| "loss": 0.0377, |
| "step": 7700 |
| }, |
| { |
| "grad_norm": 0.323808878660202, |
| "learning_rate": 9.97994973658537e-05, |
| "loss": 0.0384, |
| "step": 7710 |
| }, |
| { |
| "grad_norm": 0.3871549963951111, |
| "learning_rate": 9.979801536254054e-05, |
| "loss": 0.0311, |
| "step": 7720 |
| }, |
| { |
| "grad_norm": 0.34450316429138184, |
| "learning_rate": 9.979652791339127e-05, |
| "loss": 0.0367, |
| "step": 7730 |
| }, |
| { |
| "grad_norm": 0.3580043911933899, |
| "learning_rate": 9.97950350185686e-05, |
| "loss": 0.0344, |
| "step": 7740 |
| }, |
| { |
| "grad_norm": 0.3398709297180176, |
| "learning_rate": 9.979353667823574e-05, |
| "loss": 0.036, |
| "step": 7750 |
| }, |
| { |
| "grad_norm": 0.3904750943183899, |
| "learning_rate": 9.979203289255658e-05, |
| "loss": 0.0359, |
| "step": 7760 |
| }, |
| { |
| "grad_norm": 0.3727996051311493, |
| "learning_rate": 9.979052366169557e-05, |
| "loss": 0.0339, |
| "step": 7770 |
| }, |
| { |
| "grad_norm": 0.30604737997055054, |
| "learning_rate": 9.978900898581775e-05, |
| "loss": 0.0361, |
| "step": 7780 |
| }, |
| { |
| "grad_norm": 0.443472295999527, |
| "learning_rate": 9.978748886508875e-05, |
| "loss": 0.032, |
| "step": 7790 |
| }, |
| { |
| "grad_norm": 0.34036096930503845, |
| "learning_rate": 9.978596329967484e-05, |
| "loss": 0.0397, |
| "step": 7800 |
| }, |
| { |
| "grad_norm": 0.33206358551979065, |
| "learning_rate": 9.978443228974284e-05, |
| "loss": 0.0382, |
| "step": 7810 |
| }, |
| { |
| "grad_norm": 0.3829366862773895, |
| "learning_rate": 9.978289583546015e-05, |
| "loss": 0.0356, |
| "step": 7820 |
| }, |
| { |
| "grad_norm": 0.44401052594184875, |
| "learning_rate": 9.978135393699484e-05, |
| "loss": 0.0332, |
| "step": 7830 |
| }, |
| { |
| "grad_norm": 0.41893044114112854, |
| "learning_rate": 9.977980659451548e-05, |
| "loss": 0.036, |
| "step": 7840 |
| }, |
| { |
| "grad_norm": 0.35988131165504456, |
| "learning_rate": 9.977825380819135e-05, |
| "loss": 0.0312, |
| "step": 7850 |
| }, |
| { |
| "grad_norm": 0.41945135593414307, |
| "learning_rate": 9.97766955781922e-05, |
| "loss": 0.0346, |
| "step": 7860 |
| }, |
| { |
| "grad_norm": 0.2597813606262207, |
| "learning_rate": 9.977513190468848e-05, |
| "loss": 0.033, |
| "step": 7870 |
| }, |
| { |
| "grad_norm": 0.3056352734565735, |
| "learning_rate": 9.977356278785116e-05, |
| "loss": 0.0373, |
| "step": 7880 |
| }, |
| { |
| "grad_norm": 0.24130724370479584, |
| "learning_rate": 9.977198822785184e-05, |
| "loss": 0.0326, |
| "step": 7890 |
| }, |
| { |
| "grad_norm": 0.22455158829689026, |
| "learning_rate": 9.977040822486273e-05, |
| "loss": 0.0335, |
| "step": 7900 |
| }, |
| { |
| "grad_norm": 0.3280586302280426, |
| "learning_rate": 9.97688227790566e-05, |
| "loss": 0.0388, |
| "step": 7910 |
| }, |
| { |
| "grad_norm": 0.3441072702407837, |
| "learning_rate": 9.976723189060684e-05, |
| "loss": 0.0338, |
| "step": 7920 |
| }, |
| { |
| "grad_norm": 0.3744092583656311, |
| "learning_rate": 9.976563555968742e-05, |
| "loss": 0.0359, |
| "step": 7930 |
| }, |
| { |
| "grad_norm": 0.25684285163879395, |
| "learning_rate": 9.976403378647292e-05, |
| "loss": 0.0374, |
| "step": 7940 |
| }, |
| { |
| "grad_norm": 0.27023550868034363, |
| "learning_rate": 9.97624265711385e-05, |
| "loss": 0.0312, |
| "step": 7950 |
| }, |
| { |
| "grad_norm": 0.3883087635040283, |
| "learning_rate": 9.976081391385993e-05, |
| "loss": 0.0361, |
| "step": 7960 |
| }, |
| { |
| "grad_norm": 0.26038286089897156, |
| "learning_rate": 9.975919581481356e-05, |
| "loss": 0.0336, |
| "step": 7970 |
| }, |
| { |
| "grad_norm": 0.2714274227619171, |
| "learning_rate": 9.975757227417634e-05, |
| "loss": 0.0348, |
| "step": 7980 |
| }, |
| { |
| "grad_norm": 0.3606083393096924, |
| "learning_rate": 9.975594329212586e-05, |
| "loss": 0.0335, |
| "step": 7990 |
| }, |
| { |
| "grad_norm": 0.32764312624931335, |
| "learning_rate": 9.97543088688402e-05, |
| "loss": 0.0317, |
| "step": 8000 |
| }, |
| { |
| "grad_norm": 0.41286933422088623, |
| "learning_rate": 9.975266900449814e-05, |
| "loss": 0.0353, |
| "step": 8010 |
| }, |
| { |
| "grad_norm": 0.39461225271224976, |
| "learning_rate": 9.975102369927898e-05, |
| "loss": 0.0386, |
| "step": 8020 |
| }, |
| { |
| "grad_norm": 0.37906038761138916, |
| "learning_rate": 9.974937295336269e-05, |
| "loss": 0.0325, |
| "step": 8030 |
| }, |
| { |
| "grad_norm": 0.41981109976768494, |
| "learning_rate": 9.974771676692975e-05, |
| "loss": 0.0385, |
| "step": 8040 |
| }, |
| { |
| "grad_norm": 0.3024406433105469, |
| "learning_rate": 9.974605514016131e-05, |
| "loss": 0.0328, |
| "step": 8050 |
| }, |
| { |
| "grad_norm": 0.31535062193870544, |
| "learning_rate": 9.974438807323907e-05, |
| "loss": 0.0302, |
| "step": 8060 |
| }, |
| { |
| "grad_norm": 0.3659132421016693, |
| "learning_rate": 9.974271556634535e-05, |
| "loss": 0.0334, |
| "step": 8070 |
| }, |
| { |
| "grad_norm": 0.3096853792667389, |
| "learning_rate": 9.974103761966302e-05, |
| "loss": 0.0331, |
| "step": 8080 |
| }, |
| { |
| "grad_norm": 0.3210420310497284, |
| "learning_rate": 9.973935423337563e-05, |
| "loss": 0.0317, |
| "step": 8090 |
| }, |
| { |
| "grad_norm": 0.3665407598018646, |
| "learning_rate": 9.973766540766722e-05, |
| "loss": 0.0339, |
| "step": 8100 |
| }, |
| { |
| "grad_norm": 0.3262102007865906, |
| "learning_rate": 9.97359711427225e-05, |
| "loss": 0.037, |
| "step": 8110 |
| }, |
| { |
| "grad_norm": 0.33875203132629395, |
| "learning_rate": 9.973427143872677e-05, |
| "loss": 0.0361, |
| "step": 8120 |
| }, |
| { |
| "grad_norm": 0.33026519417762756, |
| "learning_rate": 9.973256629586589e-05, |
| "loss": 0.0323, |
| "step": 8130 |
| }, |
| { |
| "grad_norm": 0.3862609267234802, |
| "learning_rate": 9.973085571432632e-05, |
| "loss": 0.0366, |
| "step": 8140 |
| }, |
| { |
| "grad_norm": 0.3992314636707306, |
| "learning_rate": 9.972913969429513e-05, |
| "loss": 0.035, |
| "step": 8150 |
| }, |
| { |
| "grad_norm": 0.3993546962738037, |
| "learning_rate": 9.972741823596e-05, |
| "loss": 0.0344, |
| "step": 8160 |
| }, |
| { |
| "grad_norm": 0.38570424914360046, |
| "learning_rate": 9.972569133950917e-05, |
| "loss": 0.0351, |
| "step": 8170 |
| }, |
| { |
| "grad_norm": 0.29162174463272095, |
| "learning_rate": 9.972395900513151e-05, |
| "loss": 0.0344, |
| "step": 8180 |
| }, |
| { |
| "grad_norm": 0.3276876211166382, |
| "learning_rate": 9.972222123301645e-05, |
| "loss": 0.0333, |
| "step": 8190 |
| }, |
| { |
| "grad_norm": 0.3507404029369354, |
| "learning_rate": 9.972047802335403e-05, |
| "loss": 0.0307, |
| "step": 8200 |
| }, |
| { |
| "grad_norm": 0.32428833842277527, |
| "learning_rate": 9.971872937633488e-05, |
| "loss": 0.0345, |
| "step": 8210 |
| }, |
| { |
| "grad_norm": 0.3443835377693176, |
| "learning_rate": 9.971697529215024e-05, |
| "loss": 0.0325, |
| "step": 8220 |
| }, |
| { |
| "grad_norm": 0.3595477044582367, |
| "learning_rate": 9.971521577099192e-05, |
| "loss": 0.0326, |
| "step": 8230 |
| }, |
| { |
| "grad_norm": 0.3238294720649719, |
| "learning_rate": 9.971345081305236e-05, |
| "loss": 0.0294, |
| "step": 8240 |
| }, |
| { |
| "grad_norm": 0.29126474261283875, |
| "learning_rate": 9.971168041852456e-05, |
| "loss": 0.0315, |
| "step": 8250 |
| }, |
| { |
| "grad_norm": 0.29169949889183044, |
| "learning_rate": 9.970990458760215e-05, |
| "loss": 0.0345, |
| "step": 8260 |
| }, |
| { |
| "grad_norm": 0.3826935887336731, |
| "learning_rate": 9.970812332047929e-05, |
| "loss": 0.0365, |
| "step": 8270 |
| }, |
| { |
| "grad_norm": 0.30441099405288696, |
| "learning_rate": 9.97063366173508e-05, |
| "loss": 0.0358, |
| "step": 8280 |
| }, |
| { |
| "grad_norm": 0.37987688183784485, |
| "learning_rate": 9.970454447841207e-05, |
| "loss": 0.0335, |
| "step": 8290 |
| }, |
| { |
| "grad_norm": 0.31489792466163635, |
| "learning_rate": 9.970274690385909e-05, |
| "loss": 0.0302, |
| "step": 8300 |
| }, |
| { |
| "grad_norm": 0.3352809250354767, |
| "learning_rate": 9.970094389388844e-05, |
| "loss": 0.0357, |
| "step": 8310 |
| }, |
| { |
| "grad_norm": 0.2745957672595978, |
| "learning_rate": 9.969913544869728e-05, |
| "loss": 0.0333, |
| "step": 8320 |
| }, |
| { |
| "grad_norm": 0.3647013008594513, |
| "learning_rate": 9.96973215684834e-05, |
| "loss": 0.031, |
| "step": 8330 |
| }, |
| { |
| "grad_norm": 0.3371700644493103, |
| "learning_rate": 9.969550225344513e-05, |
| "loss": 0.032, |
| "step": 8340 |
| }, |
| { |
| "grad_norm": 0.3446926176548004, |
| "learning_rate": 9.969367750378147e-05, |
| "loss": 0.0308, |
| "step": 8350 |
| }, |
| { |
| "grad_norm": 0.2754344642162323, |
| "learning_rate": 9.969184731969194e-05, |
| "loss": 0.029, |
| "step": 8360 |
| }, |
| { |
| "grad_norm": 0.3323659300804138, |
| "learning_rate": 9.96900117013767e-05, |
| "loss": 0.0313, |
| "step": 8370 |
| }, |
| { |
| "grad_norm": 0.4204343557357788, |
| "learning_rate": 9.96881706490365e-05, |
| "loss": 0.0328, |
| "step": 8380 |
| }, |
| { |
| "grad_norm": 0.3801116943359375, |
| "learning_rate": 9.968632416287265e-05, |
| "loss": 0.0323, |
| "step": 8390 |
| }, |
| { |
| "grad_norm": 0.33352887630462646, |
| "learning_rate": 9.96844722430871e-05, |
| "loss": 0.0313, |
| "step": 8400 |
| }, |
| { |
| "grad_norm": 0.31583085656166077, |
| "learning_rate": 9.968261488988235e-05, |
| "loss": 0.0314, |
| "step": 8410 |
| }, |
| { |
| "grad_norm": 0.35499894618988037, |
| "learning_rate": 9.968075210346155e-05, |
| "loss": 0.0306, |
| "step": 8420 |
| }, |
| { |
| "grad_norm": 0.31201276183128357, |
| "learning_rate": 9.967888388402839e-05, |
| "loss": 0.0297, |
| "step": 8430 |
| }, |
| { |
| "grad_norm": 0.31827130913734436, |
| "learning_rate": 9.967701023178717e-05, |
| "loss": 0.0315, |
| "step": 8440 |
| }, |
| { |
| "grad_norm": 0.3984922170639038, |
| "learning_rate": 9.967513114694282e-05, |
| "loss": 0.0317, |
| "step": 8450 |
| }, |
| { |
| "grad_norm": 0.3843662738800049, |
| "learning_rate": 9.967324662970079e-05, |
| "loss": 0.0339, |
| "step": 8460 |
| }, |
| { |
| "grad_norm": 0.34678617119789124, |
| "learning_rate": 9.96713566802672e-05, |
| "loss": 0.0348, |
| "step": 8470 |
| }, |
| { |
| "grad_norm": 0.3988155424594879, |
| "learning_rate": 9.966946129884873e-05, |
| "loss": 0.0316, |
| "step": 8480 |
| }, |
| { |
| "grad_norm": 0.37476977705955505, |
| "learning_rate": 9.966756048565265e-05, |
| "loss": 0.0298, |
| "step": 8490 |
| }, |
| { |
| "grad_norm": 0.4248075783252716, |
| "learning_rate": 9.966565424088681e-05, |
| "loss": 0.0335, |
| "step": 8500 |
| }, |
| { |
| "grad_norm": 0.34596335887908936, |
| "learning_rate": 9.96637425647597e-05, |
| "loss": 0.0321, |
| "step": 8510 |
| }, |
| { |
| "grad_norm": 0.3652588129043579, |
| "learning_rate": 9.966182545748038e-05, |
| "loss": 0.0336, |
| "step": 8520 |
| }, |
| { |
| "grad_norm": 0.2632305920124054, |
| "learning_rate": 9.96599029192585e-05, |
| "loss": 0.0353, |
| "step": 8530 |
| }, |
| { |
| "grad_norm": 0.3150872588157654, |
| "learning_rate": 9.965797495030428e-05, |
| "loss": 0.0358, |
| "step": 8540 |
| }, |
| { |
| "grad_norm": 0.4308720529079437, |
| "learning_rate": 9.96560415508286e-05, |
| "loss": 0.0335, |
| "step": 8550 |
| }, |
| { |
| "grad_norm": 0.44409430027008057, |
| "learning_rate": 9.965410272104286e-05, |
| "loss": 0.0339, |
| "step": 8560 |
| }, |
| { |
| "grad_norm": 0.4050586223602295, |
| "learning_rate": 9.96521584611591e-05, |
| "loss": 0.0289, |
| "step": 8570 |
| }, |
| { |
| "grad_norm": 0.2942954897880554, |
| "learning_rate": 9.965020877138994e-05, |
| "loss": 0.0339, |
| "step": 8580 |
| }, |
| { |
| "grad_norm": 0.32356417179107666, |
| "learning_rate": 9.964825365194861e-05, |
| "loss": 0.0312, |
| "step": 8590 |
| }, |
| { |
| "grad_norm": 0.24951142072677612, |
| "learning_rate": 9.96462931030489e-05, |
| "loss": 0.0299, |
| "step": 8600 |
| }, |
| { |
| "grad_norm": 0.3212977349758148, |
| "learning_rate": 9.96443271249052e-05, |
| "loss": 0.0313, |
| "step": 8610 |
| }, |
| { |
| "grad_norm": 0.3393869996070862, |
| "learning_rate": 9.964235571773255e-05, |
| "loss": 0.0328, |
| "step": 8620 |
| }, |
| { |
| "grad_norm": 0.3633405566215515, |
| "learning_rate": 9.96403788817465e-05, |
| "loss": 0.0284, |
| "step": 8630 |
| }, |
| { |
| "grad_norm": 0.35588517785072327, |
| "learning_rate": 9.963839661716325e-05, |
| "loss": 0.0312, |
| "step": 8640 |
| }, |
| { |
| "grad_norm": 0.22534853219985962, |
| "learning_rate": 9.963640892419958e-05, |
| "loss": 0.0317, |
| "step": 8650 |
| }, |
| { |
| "grad_norm": 0.29241955280303955, |
| "learning_rate": 9.963441580307286e-05, |
| "loss": 0.0322, |
| "step": 8660 |
| }, |
| { |
| "grad_norm": 0.3198605477809906, |
| "learning_rate": 9.963241725400104e-05, |
| "loss": 0.0329, |
| "step": 8670 |
| }, |
| { |
| "grad_norm": 0.3798952102661133, |
| "learning_rate": 9.963041327720271e-05, |
| "loss": 0.0333, |
| "step": 8680 |
| }, |
| { |
| "grad_norm": 0.3572438955307007, |
| "learning_rate": 9.962840387289697e-05, |
| "loss": 0.033, |
| "step": 8690 |
| }, |
| { |
| "grad_norm": 0.37413084506988525, |
| "learning_rate": 9.962638904130363e-05, |
| "loss": 0.0292, |
| "step": 8700 |
| }, |
| { |
| "grad_norm": 0.2612970769405365, |
| "learning_rate": 9.962436878264298e-05, |
| "loss": 0.0287, |
| "step": 8710 |
| }, |
| { |
| "grad_norm": 0.30455777049064636, |
| "learning_rate": 9.962234309713598e-05, |
| "loss": 0.0266, |
| "step": 8720 |
| }, |
| { |
| "grad_norm": 0.37009328603744507, |
| "learning_rate": 9.962031198500414e-05, |
| "loss": 0.0335, |
| "step": 8730 |
| }, |
| { |
| "grad_norm": 0.3331073820590973, |
| "learning_rate": 9.961827544646958e-05, |
| "loss": 0.0295, |
| "step": 8740 |
| }, |
| { |
| "grad_norm": 0.3138063848018646, |
| "learning_rate": 9.961623348175501e-05, |
| "loss": 0.0334, |
| "step": 8750 |
| }, |
| { |
| "grad_norm": 0.2899344563484192, |
| "learning_rate": 9.961418609108377e-05, |
| "loss": 0.0301, |
| "step": 8760 |
| }, |
| { |
| "grad_norm": 0.23412205278873444, |
| "learning_rate": 9.961213327467971e-05, |
| "loss": 0.0255, |
| "step": 8770 |
| }, |
| { |
| "grad_norm": 0.2888118028640747, |
| "learning_rate": 9.961007503276736e-05, |
| "loss": 0.0287, |
| "step": 8780 |
| }, |
| { |
| "grad_norm": 0.3145732879638672, |
| "learning_rate": 9.960801136557179e-05, |
| "loss": 0.0281, |
| "step": 8790 |
| }, |
| { |
| "grad_norm": 0.34552791714668274, |
| "learning_rate": 9.960594227331866e-05, |
| "loss": 0.0339, |
| "step": 8800 |
| }, |
| { |
| "grad_norm": 0.43196219205856323, |
| "learning_rate": 9.960386775623429e-05, |
| "loss": 0.0325, |
| "step": 8810 |
| }, |
| { |
| "grad_norm": 0.39344289898872375, |
| "learning_rate": 9.96017878145455e-05, |
| "loss": 0.0336, |
| "step": 8820 |
| }, |
| { |
| "grad_norm": 0.37254130840301514, |
| "learning_rate": 9.959970244847977e-05, |
| "loss": 0.0358, |
| "step": 8830 |
| }, |
| { |
| "grad_norm": 0.3082648813724518, |
| "learning_rate": 9.959761165826518e-05, |
| "loss": 0.0328, |
| "step": 8840 |
| }, |
| { |
| "grad_norm": 0.342195063829422, |
| "learning_rate": 9.959551544413033e-05, |
| "loss": 0.0367, |
| "step": 8850 |
| }, |
| { |
| "grad_norm": 0.29704222083091736, |
| "learning_rate": 9.959341380630448e-05, |
| "loss": 0.0315, |
| "step": 8860 |
| }, |
| { |
| "grad_norm": 0.30029696226119995, |
| "learning_rate": 9.959130674501746e-05, |
| "loss": 0.0333, |
| "step": 8870 |
| }, |
| { |
| "grad_norm": 0.4003520607948303, |
| "learning_rate": 9.958919426049968e-05, |
| "loss": 0.0307, |
| "step": 8880 |
| }, |
| { |
| "grad_norm": 0.3678707778453827, |
| "learning_rate": 9.958707635298219e-05, |
| "loss": 0.0335, |
| "step": 8890 |
| }, |
| { |
| "grad_norm": 0.4012024402618408, |
| "learning_rate": 9.958495302269657e-05, |
| "loss": 0.0281, |
| "step": 8900 |
| }, |
| { |
| "grad_norm": 0.25086092948913574, |
| "learning_rate": 9.958282426987503e-05, |
| "loss": 0.0302, |
| "step": 8910 |
| }, |
| { |
| "grad_norm": 0.3453119695186615, |
| "learning_rate": 9.95806900947504e-05, |
| "loss": 0.028, |
| "step": 8920 |
| }, |
| { |
| "grad_norm": 0.31570178270339966, |
| "learning_rate": 9.957855049755604e-05, |
| "loss": 0.0293, |
| "step": 8930 |
| }, |
| { |
| "grad_norm": 0.292519211769104, |
| "learning_rate": 9.957640547852593e-05, |
| "loss": 0.0278, |
| "step": 8940 |
| }, |
| { |
| "grad_norm": 0.35711362957954407, |
| "learning_rate": 9.957425503789466e-05, |
| "loss": 0.0325, |
| "step": 8950 |
| }, |
| { |
| "grad_norm": 0.34886813163757324, |
| "learning_rate": 9.957209917589738e-05, |
| "loss": 0.035, |
| "step": 8960 |
| }, |
| { |
| "grad_norm": 0.3956030607223511, |
| "learning_rate": 9.956993789276987e-05, |
| "loss": 0.0332, |
| "step": 8970 |
| }, |
| { |
| "grad_norm": 0.32648199796676636, |
| "learning_rate": 9.956777118874847e-05, |
| "loss": 0.0334, |
| "step": 8980 |
| }, |
| { |
| "grad_norm": 0.36385777592658997, |
| "learning_rate": 9.956559906407016e-05, |
| "loss": 0.0314, |
| "step": 8990 |
| }, |
| { |
| "grad_norm": 0.30030733346939087, |
| "learning_rate": 9.956342151897245e-05, |
| "loss": 0.0369, |
| "step": 9000 |
| }, |
| { |
| "grad_norm": 0.2771800458431244, |
| "learning_rate": 9.956123855369346e-05, |
| "loss": 0.0342, |
| "step": 9010 |
| }, |
| { |
| "grad_norm": 0.2924581468105316, |
| "learning_rate": 9.955905016847196e-05, |
| "loss": 0.0265, |
| "step": 9020 |
| }, |
| { |
| "grad_norm": 0.2809629440307617, |
| "learning_rate": 9.955685636354723e-05, |
| "loss": 0.0337, |
| "step": 9030 |
| }, |
| { |
| "grad_norm": 0.32476192712783813, |
| "learning_rate": 9.95546571391592e-05, |
| "loss": 0.0329, |
| "step": 9040 |
| }, |
| { |
| "grad_norm": 0.4399145543575287, |
| "learning_rate": 9.955245249554837e-05, |
| "loss": 0.0286, |
| "step": 9050 |
| }, |
| { |
| "grad_norm": 0.277260422706604, |
| "learning_rate": 9.955024243295582e-05, |
| "loss": 0.0293, |
| "step": 9060 |
| }, |
| { |
| "grad_norm": 0.2997894883155823, |
| "learning_rate": 9.954802695162328e-05, |
| "loss": 0.0302, |
| "step": 9070 |
| }, |
| { |
| "grad_norm": 0.2895433306694031, |
| "learning_rate": 9.954580605179302e-05, |
| "loss": 0.0328, |
| "step": 9080 |
| }, |
| { |
| "grad_norm": 0.39543765783309937, |
| "learning_rate": 9.954357973370788e-05, |
| "loss": 0.0312, |
| "step": 9090 |
| }, |
| { |
| "grad_norm": 0.3004811704158783, |
| "learning_rate": 9.954134799761135e-05, |
| "loss": 0.0289, |
| "step": 9100 |
| }, |
| { |
| "grad_norm": 0.27135422825813293, |
| "learning_rate": 9.953911084374748e-05, |
| "loss": 0.031, |
| "step": 9110 |
| }, |
| { |
| "grad_norm": 0.280902236700058, |
| "learning_rate": 9.953686827236093e-05, |
| "loss": 0.0304, |
| "step": 9120 |
| }, |
| { |
| "grad_norm": 0.3989546597003937, |
| "learning_rate": 9.953462028369695e-05, |
| "loss": 0.029, |
| "step": 9130 |
| }, |
| { |
| "grad_norm": 0.30520907044410706, |
| "learning_rate": 9.953236687800136e-05, |
| "loss": 0.0299, |
| "step": 9140 |
| }, |
| { |
| "grad_norm": 0.2653387784957886, |
| "learning_rate": 9.95301080555206e-05, |
| "loss": 0.0273, |
| "step": 9150 |
| }, |
| { |
| "grad_norm": 0.2956410050392151, |
| "learning_rate": 9.952784381650171e-05, |
| "loss": 0.0287, |
| "step": 9160 |
| }, |
| { |
| "grad_norm": 0.3985331058502197, |
| "learning_rate": 9.952557416119226e-05, |
| "loss": 0.0313, |
| "step": 9170 |
| }, |
| { |
| "grad_norm": 0.33697912096977234, |
| "learning_rate": 9.95232990898405e-05, |
| "loss": 0.0273, |
| "step": 9180 |
| }, |
| { |
| "grad_norm": 0.3343970775604248, |
| "learning_rate": 9.95210186026952e-05, |
| "loss": 0.0291, |
| "step": 9190 |
| }, |
| { |
| "grad_norm": 0.30179843306541443, |
| "learning_rate": 9.951873270000576e-05, |
| "loss": 0.0308, |
| "step": 9200 |
| }, |
| { |
| "grad_norm": 0.33838656544685364, |
| "learning_rate": 9.951644138202216e-05, |
| "loss": 0.0314, |
| "step": 9210 |
| }, |
| { |
| "grad_norm": 0.44132405519485474, |
| "learning_rate": 9.951414464899498e-05, |
| "loss": 0.0324, |
| "step": 9220 |
| }, |
| { |
| "grad_norm": 0.3706565201282501, |
| "learning_rate": 9.951184250117538e-05, |
| "loss": 0.0286, |
| "step": 9230 |
| }, |
| { |
| "grad_norm": 0.321702778339386, |
| "learning_rate": 9.950953493881513e-05, |
| "loss": 0.0318, |
| "step": 9240 |
| }, |
| { |
| "grad_norm": 0.3702629506587982, |
| "learning_rate": 9.950722196216658e-05, |
| "loss": 0.0277, |
| "step": 9250 |
| }, |
| { |
| "grad_norm": 0.2816333472728729, |
| "learning_rate": 9.950490357148265e-05, |
| "loss": 0.0293, |
| "step": 9260 |
| }, |
| { |
| "grad_norm": 0.29560866951942444, |
| "learning_rate": 9.950257976701692e-05, |
| "loss": 0.0302, |
| "step": 9270 |
| }, |
| { |
| "grad_norm": 0.3872072696685791, |
| "learning_rate": 9.950025054902348e-05, |
| "loss": 0.0312, |
| "step": 9280 |
| }, |
| { |
| "grad_norm": 0.383853942155838, |
| "learning_rate": 9.949791591775706e-05, |
| "loss": 0.0296, |
| "step": 9290 |
| }, |
| { |
| "grad_norm": 0.2638419568538666, |
| "learning_rate": 9.949557587347298e-05, |
| "loss": 0.0312, |
| "step": 9300 |
| }, |
| { |
| "grad_norm": 0.2792360186576843, |
| "learning_rate": 9.949323041642713e-05, |
| "loss": 0.0266, |
| "step": 9310 |
| }, |
| { |
| "grad_norm": 0.34312698245048523, |
| "learning_rate": 9.949087954687602e-05, |
| "loss": 0.0274, |
| "step": 9320 |
| }, |
| { |
| "grad_norm": 0.2778584957122803, |
| "learning_rate": 9.948852326507672e-05, |
| "loss": 0.0274, |
| "step": 9330 |
| }, |
| { |
| "grad_norm": 0.35982704162597656, |
| "learning_rate": 9.948616157128694e-05, |
| "loss": 0.027, |
| "step": 9340 |
| }, |
| { |
| "grad_norm": 0.18458214402198792, |
| "learning_rate": 9.948379446576493e-05, |
| "loss": 0.0304, |
| "step": 9350 |
| }, |
| { |
| "grad_norm": 0.29817137122154236, |
| "learning_rate": 9.948142194876952e-05, |
| "loss": 0.0273, |
| "step": 9360 |
| }, |
| { |
| "grad_norm": 0.3318087160587311, |
| "learning_rate": 9.947904402056024e-05, |
| "loss": 0.0274, |
| "step": 9370 |
| }, |
| { |
| "grad_norm": 0.31133589148521423, |
| "learning_rate": 9.947666068139708e-05, |
| "loss": 0.0289, |
| "step": 9380 |
| }, |
| { |
| "grad_norm": 0.2739274799823761, |
| "learning_rate": 9.947427193154071e-05, |
| "loss": 0.0254, |
| "step": 9390 |
| }, |
| { |
| "grad_norm": 0.31195876002311707, |
| "learning_rate": 9.947187777125233e-05, |
| "loss": 0.029, |
| "step": 9400 |
| }, |
| { |
| "grad_norm": 0.3110937774181366, |
| "learning_rate": 9.946947820079377e-05, |
| "loss": 0.025, |
| "step": 9410 |
| }, |
| { |
| "grad_norm": 0.31233981251716614, |
| "learning_rate": 9.946707322042747e-05, |
| "loss": 0.0283, |
| "step": 9420 |
| }, |
| { |
| "grad_norm": 0.2944711148738861, |
| "learning_rate": 9.94646628304164e-05, |
| "loss": 0.0298, |
| "step": 9430 |
| }, |
| { |
| "grad_norm": 0.28662657737731934, |
| "learning_rate": 9.946224703102418e-05, |
| "loss": 0.0266, |
| "step": 9440 |
| }, |
| { |
| "grad_norm": 0.26988640427589417, |
| "learning_rate": 9.945982582251498e-05, |
| "loss": 0.0299, |
| "step": 9450 |
| }, |
| { |
| "grad_norm": 0.34679415822029114, |
| "learning_rate": 9.94573992051536e-05, |
| "loss": 0.0259, |
| "step": 9460 |
| }, |
| { |
| "grad_norm": 0.27944183349609375, |
| "learning_rate": 9.94549671792054e-05, |
| "loss": 0.0262, |
| "step": 9470 |
| }, |
| { |
| "grad_norm": 0.2616512179374695, |
| "learning_rate": 9.945252974493635e-05, |
| "loss": 0.0285, |
| "step": 9480 |
| }, |
| { |
| "grad_norm": 0.3048749566078186, |
| "learning_rate": 9.9450086902613e-05, |
| "loss": 0.028, |
| "step": 9490 |
| }, |
| { |
| "grad_norm": 0.27681034803390503, |
| "learning_rate": 9.944763865250248e-05, |
| "loss": 0.0289, |
| "step": 9500 |
| }, |
| { |
| "grad_norm": 0.34117570519447327, |
| "learning_rate": 9.944518499487254e-05, |
| "loss": 0.0303, |
| "step": 9510 |
| }, |
| { |
| "grad_norm": 0.30211806297302246, |
| "learning_rate": 9.944272592999151e-05, |
| "loss": 0.0261, |
| "step": 9520 |
| }, |
| { |
| "grad_norm": 0.2559513747692108, |
| "learning_rate": 9.94402614581283e-05, |
| "loss": 0.0234, |
| "step": 9530 |
| }, |
| { |
| "grad_norm": 0.2847338318824768, |
| "learning_rate": 9.943779157955244e-05, |
| "loss": 0.0251, |
| "step": 9540 |
| }, |
| { |
| "grad_norm": 0.27755439281463623, |
| "learning_rate": 9.943531629453403e-05, |
| "loss": 0.0325, |
| "step": 9550 |
| }, |
| { |
| "grad_norm": 0.2980590760707855, |
| "learning_rate": 9.943283560334375e-05, |
| "loss": 0.0292, |
| "step": 9560 |
| }, |
| { |
| "grad_norm": 0.3490363359451294, |
| "learning_rate": 9.943034950625288e-05, |
| "loss": 0.0281, |
| "step": 9570 |
| }, |
| { |
| "grad_norm": 0.3410196304321289, |
| "learning_rate": 9.942785800353332e-05, |
| "loss": 0.0326, |
| "step": 9580 |
| }, |
| { |
| "grad_norm": 0.28175175189971924, |
| "learning_rate": 9.942536109545751e-05, |
| "loss": 0.0313, |
| "step": 9590 |
| }, |
| { |
| "grad_norm": 0.310227632522583, |
| "learning_rate": 9.942285878229853e-05, |
| "loss": 0.0259, |
| "step": 9600 |
| }, |
| { |
| "grad_norm": 0.2682930827140808, |
| "learning_rate": 9.942035106433001e-05, |
| "loss": 0.0301, |
| "step": 9610 |
| }, |
| { |
| "grad_norm": 0.3313826620578766, |
| "learning_rate": 9.94178379418262e-05, |
| "loss": 0.0246, |
| "step": 9620 |
| }, |
| { |
| "grad_norm": 0.36503440141677856, |
| "learning_rate": 9.941531941506194e-05, |
| "loss": 0.0262, |
| "step": 9630 |
| }, |
| { |
| "grad_norm": 0.29071158170700073, |
| "learning_rate": 9.941279548431263e-05, |
| "loss": 0.0313, |
| "step": 9640 |
| }, |
| { |
| "grad_norm": 0.35149767994880676, |
| "learning_rate": 9.941026614985431e-05, |
| "loss": 0.0305, |
| "step": 9650 |
| }, |
| { |
| "grad_norm": 0.34794124960899353, |
| "learning_rate": 9.940773141196357e-05, |
| "loss": 0.0303, |
| "step": 9660 |
| }, |
| { |
| "grad_norm": 0.4192151427268982, |
| "learning_rate": 9.94051912709176e-05, |
| "loss": 0.0267, |
| "step": 9670 |
| }, |
| { |
| "grad_norm": 0.24447749555110931, |
| "learning_rate": 9.940264572699421e-05, |
| "loss": 0.0262, |
| "step": 9680 |
| }, |
| { |
| "grad_norm": 0.25707894563674927, |
| "learning_rate": 9.940009478047174e-05, |
| "loss": 0.0308, |
| "step": 9690 |
| }, |
| { |
| "grad_norm": 0.38117682933807373, |
| "learning_rate": 9.939753843162918e-05, |
| "loss": 0.0291, |
| "step": 9700 |
| }, |
| { |
| "grad_norm": 0.3183417022228241, |
| "learning_rate": 9.939497668074609e-05, |
| "loss": 0.0278, |
| "step": 9710 |
| }, |
| { |
| "grad_norm": 0.28810641169548035, |
| "learning_rate": 9.93924095281026e-05, |
| "loss": 0.0268, |
| "step": 9720 |
| }, |
| { |
| "grad_norm": 0.2791811525821686, |
| "learning_rate": 9.938983697397948e-05, |
| "loss": 0.0253, |
| "step": 9730 |
| }, |
| { |
| "grad_norm": 0.2798166573047638, |
| "learning_rate": 9.938725901865805e-05, |
| "loss": 0.0278, |
| "step": 9740 |
| }, |
| { |
| "grad_norm": 0.4268825054168701, |
| "learning_rate": 9.93846756624202e-05, |
| "loss": 0.0307, |
| "step": 9750 |
| }, |
| { |
| "grad_norm": 0.27204859256744385, |
| "learning_rate": 9.938208690554849e-05, |
| "loss": 0.0278, |
| "step": 9760 |
| }, |
| { |
| "grad_norm": 0.331249475479126, |
| "learning_rate": 9.9379492748326e-05, |
| "loss": 0.0262, |
| "step": 9770 |
| }, |
| { |
| "grad_norm": 0.2749841809272766, |
| "learning_rate": 9.937689319103641e-05, |
| "loss": 0.0246, |
| "step": 9780 |
| }, |
| { |
| "grad_norm": 0.2845693826675415, |
| "learning_rate": 9.937428823396404e-05, |
| "loss": 0.0256, |
| "step": 9790 |
| }, |
| { |
| "grad_norm": 0.39069777727127075, |
| "learning_rate": 9.937167787739372e-05, |
| "loss": 0.0265, |
| "step": 9800 |
| }, |
| { |
| "grad_norm": 0.3815779685974121, |
| "learning_rate": 9.936906212161095e-05, |
| "loss": 0.0361, |
| "step": 9810 |
| }, |
| { |
| "grad_norm": 0.3536510765552521, |
| "learning_rate": 9.936644096690176e-05, |
| "loss": 0.0292, |
| "step": 9820 |
| }, |
| { |
| "grad_norm": 0.24151362478733063, |
| "learning_rate": 9.936381441355282e-05, |
| "loss": 0.0307, |
| "step": 9830 |
| }, |
| { |
| "grad_norm": 0.2514011859893799, |
| "learning_rate": 9.936118246185136e-05, |
| "loss": 0.0286, |
| "step": 9840 |
| }, |
| { |
| "grad_norm": 0.2729114592075348, |
| "learning_rate": 9.935854511208518e-05, |
| "loss": 0.0331, |
| "step": 9850 |
| }, |
| { |
| "grad_norm": 0.24144065380096436, |
| "learning_rate": 9.935590236454272e-05, |
| "loss": 0.0271, |
| "step": 9860 |
| }, |
| { |
| "grad_norm": 0.24960418045520782, |
| "learning_rate": 9.935325421951298e-05, |
| "loss": 0.0226, |
| "step": 9870 |
| }, |
| { |
| "grad_norm": 0.33603090047836304, |
| "learning_rate": 9.935060067728557e-05, |
| "loss": 0.0251, |
| "step": 9880 |
| }, |
| { |
| "grad_norm": 0.2939727008342743, |
| "learning_rate": 9.934794173815067e-05, |
| "loss": 0.0264, |
| "step": 9890 |
| }, |
| { |
| "grad_norm": 0.3095412254333496, |
| "learning_rate": 9.934527740239906e-05, |
| "loss": 0.0253, |
| "step": 9900 |
| }, |
| { |
| "grad_norm": 0.31406790018081665, |
| "learning_rate": 9.934260767032209e-05, |
| "loss": 0.0276, |
| "step": 9910 |
| }, |
| { |
| "grad_norm": 0.43631061911582947, |
| "learning_rate": 9.933993254221172e-05, |
| "loss": 0.0264, |
| "step": 9920 |
| }, |
| { |
| "grad_norm": 0.3380442261695862, |
| "learning_rate": 9.933725201836053e-05, |
| "loss": 0.0258, |
| "step": 9930 |
| }, |
| { |
| "grad_norm": 0.32592689990997314, |
| "learning_rate": 9.933456609906162e-05, |
| "loss": 0.0232, |
| "step": 9940 |
| }, |
| { |
| "grad_norm": 0.36649060249328613, |
| "learning_rate": 9.933187478460875e-05, |
| "loss": 0.0318, |
| "step": 9950 |
| }, |
| { |
| "grad_norm": 0.34584930539131165, |
| "learning_rate": 9.93291780752962e-05, |
| "loss": 0.0272, |
| "step": 9960 |
| }, |
| { |
| "grad_norm": 0.3089320957660675, |
| "learning_rate": 9.932647597141893e-05, |
| "loss": 0.0261, |
| "step": 9970 |
| }, |
| { |
| "grad_norm": 0.262470006942749, |
| "learning_rate": 9.932376847327239e-05, |
| "loss": 0.0222, |
| "step": 9980 |
| }, |
| { |
| "grad_norm": 0.2766602337360382, |
| "learning_rate": 9.932105558115268e-05, |
| "loss": 0.0261, |
| "step": 9990 |
| }, |
| { |
| "grad_norm": 0.2999885678291321, |
| "learning_rate": 9.931833729535651e-05, |
| "loss": 0.027, |
| "step": 10000 |
| }, |
| { |
| "grad_norm": 0.24559569358825684, |
| "learning_rate": 9.931561361618111e-05, |
| "loss": 0.0225, |
| "step": 10010 |
| }, |
| { |
| "grad_norm": 0.32899388670921326, |
| "learning_rate": 9.931288454392435e-05, |
| "loss": 0.0259, |
| "step": 10020 |
| }, |
| { |
| "grad_norm": 0.28741541504859924, |
| "learning_rate": 9.931015007888467e-05, |
| "loss": 0.0287, |
| "step": 10030 |
| }, |
| { |
| "grad_norm": 0.3540486991405487, |
| "learning_rate": 9.930741022136112e-05, |
| "loss": 0.0253, |
| "step": 10040 |
| }, |
| { |
| "grad_norm": 0.29190653562545776, |
| "learning_rate": 9.930466497165333e-05, |
| "loss": 0.0243, |
| "step": 10050 |
| }, |
| { |
| "grad_norm": 0.3176731467247009, |
| "learning_rate": 9.93019143300615e-05, |
| "loss": 0.0232, |
| "step": 10060 |
| }, |
| { |
| "grad_norm": 0.3307768702507019, |
| "learning_rate": 9.929915829688644e-05, |
| "loss": 0.0255, |
| "step": 10070 |
| }, |
| { |
| "grad_norm": 0.2830311954021454, |
| "learning_rate": 9.929639687242955e-05, |
| "loss": 0.0346, |
| "step": 10080 |
| }, |
| { |
| "grad_norm": 0.27388209104537964, |
| "learning_rate": 9.929363005699281e-05, |
| "loss": 0.0266, |
| "step": 10090 |
| }, |
| { |
| "grad_norm": 0.2982178032398224, |
| "learning_rate": 9.92908578508788e-05, |
| "loss": 0.0247, |
| "step": 10100 |
| }, |
| { |
| "grad_norm": 0.2913467288017273, |
| "learning_rate": 9.928808025439069e-05, |
| "loss": 0.0226, |
| "step": 10110 |
| }, |
| { |
| "grad_norm": 0.22734889388084412, |
| "learning_rate": 9.928529726783223e-05, |
| "loss": 0.025, |
| "step": 10120 |
| }, |
| { |
| "grad_norm": 0.2596288323402405, |
| "learning_rate": 9.928250889150774e-05, |
| "loss": 0.0278, |
| "step": 10130 |
| }, |
| { |
| "grad_norm": 0.2922511100769043, |
| "learning_rate": 9.92797151257222e-05, |
| "loss": 0.0251, |
| "step": 10140 |
| }, |
| { |
| "grad_norm": 0.2565639615058899, |
| "learning_rate": 9.927691597078108e-05, |
| "loss": 0.0235, |
| "step": 10150 |
| }, |
| { |
| "grad_norm": 0.29596906900405884, |
| "learning_rate": 9.927411142699053e-05, |
| "loss": 0.0271, |
| "step": 10160 |
| }, |
| { |
| "grad_norm": 0.3740658760070801, |
| "learning_rate": 9.927130149465725e-05, |
| "loss": 0.0293, |
| "step": 10170 |
| }, |
| { |
| "grad_norm": 0.2871254086494446, |
| "learning_rate": 9.92684861740885e-05, |
| "loss": 0.0285, |
| "step": 10180 |
| }, |
| { |
| "grad_norm": 0.31287142634391785, |
| "learning_rate": 9.926566546559217e-05, |
| "loss": 0.0279, |
| "step": 10190 |
| }, |
| { |
| "grad_norm": 0.3119584619998932, |
| "learning_rate": 9.926283936947673e-05, |
| "loss": 0.0274, |
| "step": 10200 |
| }, |
| { |
| "grad_norm": 0.40003690123558044, |
| "learning_rate": 9.926000788605126e-05, |
| "loss": 0.0322, |
| "step": 10210 |
| }, |
| { |
| "grad_norm": 0.29922744631767273, |
| "learning_rate": 9.92571710156254e-05, |
| "loss": 0.0326, |
| "step": 10220 |
| }, |
| { |
| "grad_norm": 0.27242857217788696, |
| "learning_rate": 9.925432875850936e-05, |
| "loss": 0.0271, |
| "step": 10230 |
| }, |
| { |
| "grad_norm": 0.26456117630004883, |
| "learning_rate": 9.925148111501396e-05, |
| "loss": 0.0242, |
| "step": 10240 |
| }, |
| { |
| "grad_norm": 0.32744866609573364, |
| "learning_rate": 9.924862808545066e-05, |
| "loss": 0.0249, |
| "step": 10250 |
| }, |
| { |
| "grad_norm": 0.26610448956489563, |
| "learning_rate": 9.924576967013141e-05, |
| "loss": 0.0281, |
| "step": 10260 |
| }, |
| { |
| "grad_norm": 0.22870276868343353, |
| "learning_rate": 9.924290586936887e-05, |
| "loss": 0.0286, |
| "step": 10270 |
| }, |
| { |
| "grad_norm": 0.3537036180496216, |
| "learning_rate": 9.924003668347614e-05, |
| "loss": 0.0289, |
| "step": 10280 |
| }, |
| { |
| "grad_norm": 0.2901184856891632, |
| "learning_rate": 9.923716211276704e-05, |
| "loss": 0.0242, |
| "step": 10290 |
| }, |
| { |
| "grad_norm": 0.26121214032173157, |
| "learning_rate": 9.923428215755594e-05, |
| "loss": 0.0252, |
| "step": 10300 |
| }, |
| { |
| "grad_norm": 0.3089354634284973, |
| "learning_rate": 9.923139681815775e-05, |
| "loss": 0.0249, |
| "step": 10310 |
| }, |
| { |
| "grad_norm": 0.37658077478408813, |
| "learning_rate": 9.922850609488801e-05, |
| "loss": 0.0247, |
| "step": 10320 |
| }, |
| { |
| "grad_norm": 0.31644684076309204, |
| "learning_rate": 9.922560998806287e-05, |
| "loss": 0.026, |
| "step": 10330 |
| }, |
| { |
| "grad_norm": 0.26241806149482727, |
| "learning_rate": 9.922270849799905e-05, |
| "loss": 0.0233, |
| "step": 10340 |
| }, |
| { |
| "grad_norm": 0.2751232087612152, |
| "learning_rate": 9.92198016250138e-05, |
| "loss": 0.0298, |
| "step": 10350 |
| }, |
| { |
| "grad_norm": 0.38322126865386963, |
| "learning_rate": 9.921688936942506e-05, |
| "loss": 0.029, |
| "step": 10360 |
| }, |
| { |
| "grad_norm": 0.37642014026641846, |
| "learning_rate": 9.921397173155129e-05, |
| "loss": 0.0248, |
| "step": 10370 |
| }, |
| { |
| "grad_norm": 0.27151739597320557, |
| "learning_rate": 9.921104871171157e-05, |
| "loss": 0.0239, |
| "step": 10380 |
| }, |
| { |
| "grad_norm": 0.2456636130809784, |
| "learning_rate": 9.920812031022554e-05, |
| "loss": 0.0252, |
| "step": 10390 |
| }, |
| { |
| "grad_norm": 0.2831108570098877, |
| "learning_rate": 9.920518652741348e-05, |
| "loss": 0.024, |
| "step": 10400 |
| }, |
| { |
| "grad_norm": 0.3394615650177002, |
| "learning_rate": 9.920224736359618e-05, |
| "loss": 0.0237, |
| "step": 10410 |
| }, |
| { |
| "grad_norm": 0.2199944406747818, |
| "learning_rate": 9.91993028190951e-05, |
| "loss": 0.0221, |
| "step": 10420 |
| }, |
| { |
| "grad_norm": 0.2913322150707245, |
| "learning_rate": 9.919635289423222e-05, |
| "loss": 0.0263, |
| "step": 10430 |
| }, |
| { |
| "grad_norm": 0.2598559856414795, |
| "learning_rate": 9.919339758933015e-05, |
| "loss": 0.0258, |
| "step": 10440 |
| }, |
| { |
| "grad_norm": 0.26241859793663025, |
| "learning_rate": 9.919043690471209e-05, |
| "loss": 0.0234, |
| "step": 10450 |
| }, |
| { |
| "grad_norm": 0.20276913046836853, |
| "learning_rate": 9.91874708407018e-05, |
| "loss": 0.0238, |
| "step": 10460 |
| }, |
| { |
| "grad_norm": 0.3424587845802307, |
| "learning_rate": 9.918449939762367e-05, |
| "loss": 0.0263, |
| "step": 10470 |
| }, |
| { |
| "grad_norm": 0.3462485373020172, |
| "learning_rate": 9.91815225758026e-05, |
| "loss": 0.0266, |
| "step": 10480 |
| }, |
| { |
| "grad_norm": 0.2496924102306366, |
| "learning_rate": 9.917854037556419e-05, |
| "loss": 0.0247, |
| "step": 10490 |
| }, |
| { |
| "grad_norm": 0.32007890939712524, |
| "learning_rate": 9.917555279723454e-05, |
| "loss": 0.025, |
| "step": 10500 |
| }, |
| { |
| "grad_norm": 0.32690268754959106, |
| "learning_rate": 9.917255984114036e-05, |
| "loss": 0.0279, |
| "step": 10510 |
| }, |
| { |
| "grad_norm": 0.3497851490974426, |
| "learning_rate": 9.916956150760896e-05, |
| "loss": 0.0286, |
| "step": 10520 |
| }, |
| { |
| "grad_norm": 0.36993327736854553, |
| "learning_rate": 9.916655779696826e-05, |
| "loss": 0.0263, |
| "step": 10530 |
| }, |
| { |
| "grad_norm": 0.2767674922943115, |
| "learning_rate": 9.916354870954671e-05, |
| "loss": 0.026, |
| "step": 10540 |
| }, |
| { |
| "grad_norm": 0.30353987216949463, |
| "learning_rate": 9.91605342456734e-05, |
| "loss": 0.0291, |
| "step": 10550 |
| }, |
| { |
| "grad_norm": 0.2900550365447998, |
| "learning_rate": 9.915751440567795e-05, |
| "loss": 0.0236, |
| "step": 10560 |
| }, |
| { |
| "grad_norm": 0.28913983702659607, |
| "learning_rate": 9.915448918989066e-05, |
| "loss": 0.0262, |
| "step": 10570 |
| }, |
| { |
| "grad_norm": 0.25887352228164673, |
| "learning_rate": 9.915145859864232e-05, |
| "loss": 0.0227, |
| "step": 10580 |
| }, |
| { |
| "grad_norm": 0.3585141599178314, |
| "learning_rate": 9.914842263226437e-05, |
| "loss": 0.0285, |
| "step": 10590 |
| }, |
| { |
| "grad_norm": 0.2905959188938141, |
| "learning_rate": 9.914538129108882e-05, |
| "loss": 0.0272, |
| "step": 10600 |
| }, |
| { |
| "grad_norm": 0.3087858259677887, |
| "learning_rate": 9.914233457544825e-05, |
| "loss": 0.029, |
| "step": 10610 |
| }, |
| { |
| "grad_norm": 0.3118123710155487, |
| "learning_rate": 9.913928248567586e-05, |
| "loss": 0.032, |
| "step": 10620 |
| }, |
| { |
| "grad_norm": 0.28956368565559387, |
| "learning_rate": 9.913622502210542e-05, |
| "loss": 0.0248, |
| "step": 10630 |
| }, |
| { |
| "grad_norm": 0.2686956524848938, |
| "learning_rate": 9.913316218507128e-05, |
| "loss": 0.028, |
| "step": 10640 |
| }, |
| { |
| "grad_norm": 0.32353708148002625, |
| "learning_rate": 9.91300939749084e-05, |
| "loss": 0.0247, |
| "step": 10650 |
| }, |
| { |
| "grad_norm": 0.3663651645183563, |
| "learning_rate": 9.91270203919523e-05, |
| "loss": 0.0255, |
| "step": 10660 |
| }, |
| { |
| "grad_norm": 0.2726764678955078, |
| "learning_rate": 9.912394143653912e-05, |
| "loss": 0.0228, |
| "step": 10670 |
| }, |
| { |
| "grad_norm": 0.26007094979286194, |
| "learning_rate": 9.912085710900555e-05, |
| "loss": 0.0204, |
| "step": 10680 |
| }, |
| { |
| "grad_norm": 0.32784974575042725, |
| "learning_rate": 9.911776740968892e-05, |
| "loss": 0.022, |
| "step": 10690 |
| }, |
| { |
| "grad_norm": 0.2323165386915207, |
| "learning_rate": 9.911467233892709e-05, |
| "loss": 0.0245, |
| "step": 10700 |
| }, |
| { |
| "grad_norm": 0.32837867736816406, |
| "learning_rate": 9.911157189705853e-05, |
| "loss": 0.0219, |
| "step": 10710 |
| }, |
| { |
| "grad_norm": 0.31170499324798584, |
| "learning_rate": 9.910846608442229e-05, |
| "loss": 0.0269, |
| "step": 10720 |
| }, |
| { |
| "grad_norm": 0.315674751996994, |
| "learning_rate": 9.910535490135805e-05, |
| "loss": 0.0264, |
| "step": 10730 |
| }, |
| { |
| "grad_norm": 0.3200323283672333, |
| "learning_rate": 9.910223834820603e-05, |
| "loss": 0.0224, |
| "step": 10740 |
| }, |
| { |
| "grad_norm": 0.32182928919792175, |
| "learning_rate": 9.909911642530703e-05, |
| "loss": 0.0228, |
| "step": 10750 |
| }, |
| { |
| "grad_norm": 0.19040712714195251, |
| "learning_rate": 9.909598913300249e-05, |
| "loss": 0.024, |
| "step": 10760 |
| }, |
| { |
| "grad_norm": 0.25349172949790955, |
| "learning_rate": 9.909285647163438e-05, |
| "loss": 0.0248, |
| "step": 10770 |
| }, |
| { |
| "grad_norm": 0.2588328719139099, |
| "learning_rate": 9.908971844154531e-05, |
| "loss": 0.0226, |
| "step": 10780 |
| }, |
| { |
| "grad_norm": 0.38836732506752014, |
| "learning_rate": 9.908657504307843e-05, |
| "loss": 0.0284, |
| "step": 10790 |
| }, |
| { |
| "grad_norm": 0.29620569944381714, |
| "learning_rate": 9.908342627657751e-05, |
| "loss": 0.025, |
| "step": 10800 |
| }, |
| { |
| "grad_norm": 0.2874799072742462, |
| "learning_rate": 9.908027214238689e-05, |
| "loss": 0.0207, |
| "step": 10810 |
| }, |
| { |
| "grad_norm": 0.3038623332977295, |
| "learning_rate": 9.90771126408515e-05, |
| "loss": 0.0271, |
| "step": 10820 |
| }, |
| { |
| "grad_norm": 0.2247525006532669, |
| "learning_rate": 9.907394777231685e-05, |
| "loss": 0.0262, |
| "step": 10830 |
| }, |
| { |
| "grad_norm": 0.37762928009033203, |
| "learning_rate": 9.907077753712905e-05, |
| "loss": 0.0243, |
| "step": 10840 |
| }, |
| { |
| "grad_norm": 0.2622915506362915, |
| "learning_rate": 9.906760193563482e-05, |
| "loss": 0.0214, |
| "step": 10850 |
| }, |
| { |
| "grad_norm": 0.27154263854026794, |
| "learning_rate": 9.906442096818139e-05, |
| "loss": 0.0266, |
| "step": 10860 |
| }, |
| { |
| "grad_norm": 0.3040686845779419, |
| "learning_rate": 9.906123463511665e-05, |
| "loss": 0.025, |
| "step": 10870 |
| }, |
| { |
| "grad_norm": 0.3142922520637512, |
| "learning_rate": 9.905804293678907e-05, |
| "loss": 0.0245, |
| "step": 10880 |
| }, |
| { |
| "grad_norm": 0.34915608167648315, |
| "learning_rate": 9.905484587354766e-05, |
| "loss": 0.0277, |
| "step": 10890 |
| }, |
| { |
| "grad_norm": 0.259529709815979, |
| "learning_rate": 9.905164344574205e-05, |
| "loss": 0.0217, |
| "step": 10900 |
| }, |
| { |
| "grad_norm": 0.2816735506057739, |
| "learning_rate": 9.904843565372248e-05, |
| "loss": 0.0243, |
| "step": 10910 |
| }, |
| { |
| "grad_norm": 0.24057213962078094, |
| "learning_rate": 9.904522249783972e-05, |
| "loss": 0.027, |
| "step": 10920 |
| }, |
| { |
| "grad_norm": 0.32466766238212585, |
| "learning_rate": 9.904200397844517e-05, |
| "loss": 0.0263, |
| "step": 10930 |
| }, |
| { |
| "grad_norm": 0.23282542824745178, |
| "learning_rate": 9.903878009589078e-05, |
| "loss": 0.0251, |
| "step": 10940 |
| }, |
| { |
| "grad_norm": 0.21603845059871674, |
| "learning_rate": 9.903555085052915e-05, |
| "loss": 0.0208, |
| "step": 10950 |
| }, |
| { |
| "grad_norm": 0.2442980855703354, |
| "learning_rate": 9.903231624271338e-05, |
| "loss": 0.0225, |
| "step": 10960 |
| }, |
| { |
| "grad_norm": 0.22851979732513428, |
| "learning_rate": 9.902907627279724e-05, |
| "loss": 0.0222, |
| "step": 10970 |
| }, |
| { |
| "grad_norm": 0.24267442524433136, |
| "learning_rate": 9.902583094113504e-05, |
| "loss": 0.0256, |
| "step": 10980 |
| }, |
| { |
| "grad_norm": 0.31775084137916565, |
| "learning_rate": 9.902258024808168e-05, |
| "loss": 0.0291, |
| "step": 10990 |
| }, |
| { |
| "grad_norm": 0.2924094498157501, |
| "learning_rate": 9.901932419399264e-05, |
| "loss": 0.0257, |
| "step": 11000 |
| }, |
| { |
| "grad_norm": 0.2933579981327057, |
| "learning_rate": 9.9016062779224e-05, |
| "loss": 0.0229, |
| "step": 11010 |
| }, |
| { |
| "grad_norm": 0.3177044689655304, |
| "learning_rate": 9.901279600413242e-05, |
| "loss": 0.0245, |
| "step": 11020 |
| }, |
| { |
| "grad_norm": 0.28048941493034363, |
| "learning_rate": 9.900952386907518e-05, |
| "loss": 0.0262, |
| "step": 11030 |
| }, |
| { |
| "grad_norm": 0.2474532425403595, |
| "learning_rate": 9.90062463744101e-05, |
| "loss": 0.0233, |
| "step": 11040 |
| }, |
| { |
| "grad_norm": 0.28395146131515503, |
| "learning_rate": 9.900296352049558e-05, |
| "loss": 0.024, |
| "step": 11050 |
| }, |
| { |
| "grad_norm": 0.2619207203388214, |
| "learning_rate": 9.899967530769065e-05, |
| "loss": 0.0235, |
| "step": 11060 |
| }, |
| { |
| "grad_norm": 0.30180880427360535, |
| "learning_rate": 9.899638173635489e-05, |
| "loss": 0.0245, |
| "step": 11070 |
| }, |
| { |
| "grad_norm": 0.32666757702827454, |
| "learning_rate": 9.899308280684849e-05, |
| "loss": 0.0254, |
| "step": 11080 |
| }, |
| { |
| "grad_norm": 0.29050329327583313, |
| "learning_rate": 9.898977851953222e-05, |
| "loss": 0.0248, |
| "step": 11090 |
| }, |
| { |
| "grad_norm": 0.26069408655166626, |
| "learning_rate": 9.898646887476741e-05, |
| "loss": 0.0253, |
| "step": 11100 |
| }, |
| { |
| "grad_norm": 0.2715095281600952, |
| "learning_rate": 9.898315387291603e-05, |
| "loss": 0.024, |
| "step": 11110 |
| }, |
| { |
| "grad_norm": 0.30262792110443115, |
| "learning_rate": 9.89798335143406e-05, |
| "loss": 0.0229, |
| "step": 11120 |
| }, |
| { |
| "grad_norm": 0.250119149684906, |
| "learning_rate": 9.897650779940419e-05, |
| "loss": 0.0234, |
| "step": 11130 |
| }, |
| { |
| "grad_norm": 0.25867602229118347, |
| "learning_rate": 9.897317672847054e-05, |
| "loss": 0.0253, |
| "step": 11140 |
| }, |
| { |
| "grad_norm": 0.24908185005187988, |
| "learning_rate": 9.89698403019039e-05, |
| "loss": 0.0253, |
| "step": 11150 |
| }, |
| { |
| "grad_norm": 0.2526894509792328, |
| "learning_rate": 9.896649852006917e-05, |
| "loss": 0.0202, |
| "step": 11160 |
| }, |
| { |
| "grad_norm": 0.2598399519920349, |
| "learning_rate": 9.896315138333177e-05, |
| "loss": 0.0218, |
| "step": 11170 |
| }, |
| { |
| "grad_norm": 0.3139992356300354, |
| "learning_rate": 9.895979889205774e-05, |
| "loss": 0.0256, |
| "step": 11180 |
| }, |
| { |
| "grad_norm": 0.3360525071620941, |
| "learning_rate": 9.895644104661372e-05, |
| "loss": 0.024, |
| "step": 11190 |
| }, |
| { |
| "grad_norm": 0.29311561584472656, |
| "learning_rate": 9.895307784736691e-05, |
| "loss": 0.0242, |
| "step": 11200 |
| }, |
| { |
| "grad_norm": 0.28087183833122253, |
| "learning_rate": 9.894970929468512e-05, |
| "loss": 0.0274, |
| "step": 11210 |
| }, |
| { |
| "grad_norm": 0.3319748640060425, |
| "learning_rate": 9.89463353889367e-05, |
| "loss": 0.0225, |
| "step": 11220 |
| }, |
| { |
| "grad_norm": 0.24747245013713837, |
| "learning_rate": 9.894295613049065e-05, |
| "loss": 0.0242, |
| "step": 11230 |
| }, |
| { |
| "grad_norm": 0.24396617710590363, |
| "learning_rate": 9.893957151971649e-05, |
| "loss": 0.0213, |
| "step": 11240 |
| }, |
| { |
| "grad_norm": 0.3030615746974945, |
| "learning_rate": 9.893618155698436e-05, |
| "loss": 0.024, |
| "step": 11250 |
| }, |
| { |
| "grad_norm": 0.4588127136230469, |
| "learning_rate": 9.8932786242665e-05, |
| "loss": 0.0255, |
| "step": 11260 |
| }, |
| { |
| "grad_norm": 0.3490641415119171, |
| "learning_rate": 9.89293855771297e-05, |
| "loss": 0.0288, |
| "step": 11270 |
| }, |
| { |
| "grad_norm": 0.27173346281051636, |
| "learning_rate": 9.892597956075036e-05, |
| "loss": 0.0229, |
| "step": 11280 |
| }, |
| { |
| "grad_norm": 0.2763062119483948, |
| "learning_rate": 9.892256819389947e-05, |
| "loss": 0.021, |
| "step": 11290 |
| }, |
| { |
| "grad_norm": 0.28217893838882446, |
| "learning_rate": 9.891915147695006e-05, |
| "loss": 0.023, |
| "step": 11300 |
| }, |
| { |
| "grad_norm": 0.3276444375514984, |
| "learning_rate": 9.891572941027577e-05, |
| "loss": 0.028, |
| "step": 11310 |
| }, |
| { |
| "grad_norm": 0.32746949791908264, |
| "learning_rate": 9.89123019942509e-05, |
| "loss": 0.0216, |
| "step": 11320 |
| }, |
| { |
| "grad_norm": 0.24576155841350555, |
| "learning_rate": 9.89088692292502e-05, |
| "loss": 0.0238, |
| "step": 11330 |
| }, |
| { |
| "grad_norm": 0.25501197576522827, |
| "learning_rate": 9.89054311156491e-05, |
| "loss": 0.0261, |
| "step": 11340 |
| }, |
| { |
| "grad_norm": 0.2533109486103058, |
| "learning_rate": 9.890198765382357e-05, |
| "loss": 0.0262, |
| "step": 11350 |
| }, |
| { |
| "grad_norm": 0.2431899756193161, |
| "learning_rate": 9.889853884415021e-05, |
| "loss": 0.0209, |
| "step": 11360 |
| }, |
| { |
| "grad_norm": 0.36103177070617676, |
| "learning_rate": 9.889508468700614e-05, |
| "loss": 0.0231, |
| "step": 11370 |
| }, |
| { |
| "grad_norm": 0.33830827474594116, |
| "learning_rate": 9.889162518276915e-05, |
| "loss": 0.0259, |
| "step": 11380 |
| }, |
| { |
| "grad_norm": 0.23270663619041443, |
| "learning_rate": 9.888816033181752e-05, |
| "loss": 0.0244, |
| "step": 11390 |
| }, |
| { |
| "grad_norm": 0.20536769926548004, |
| "learning_rate": 9.888469013453018e-05, |
| "loss": 0.0219, |
| "step": 11400 |
| }, |
| { |
| "grad_norm": 0.3499191105365753, |
| "learning_rate": 9.888121459128663e-05, |
| "loss": 0.0264, |
| "step": 11410 |
| }, |
| { |
| "grad_norm": 0.30719149112701416, |
| "learning_rate": 9.887773370246693e-05, |
| "loss": 0.0293, |
| "step": 11420 |
| }, |
| { |
| "grad_norm": 0.251043438911438, |
| "learning_rate": 9.887424746845177e-05, |
| "loss": 0.0269, |
| "step": 11430 |
| }, |
| { |
| "grad_norm": 0.3854045271873474, |
| "learning_rate": 9.887075588962239e-05, |
| "loss": 0.0258, |
| "step": 11440 |
| }, |
| { |
| "grad_norm": 0.34220167994499207, |
| "learning_rate": 9.88672589663606e-05, |
| "loss": 0.0215, |
| "step": 11450 |
| }, |
| { |
| "grad_norm": 0.2944101393222809, |
| "learning_rate": 9.886375669904886e-05, |
| "loss": 0.0246, |
| "step": 11460 |
| }, |
| { |
| "grad_norm": 0.31172212958335876, |
| "learning_rate": 9.886024908807014e-05, |
| "loss": 0.0259, |
| "step": 11470 |
| }, |
| { |
| "grad_norm": 0.2905510663986206, |
| "learning_rate": 9.885673613380806e-05, |
| "loss": 0.0245, |
| "step": 11480 |
| }, |
| { |
| "grad_norm": 0.25882405042648315, |
| "learning_rate": 9.885321783664676e-05, |
| "loss": 0.0215, |
| "step": 11490 |
| }, |
| { |
| "grad_norm": 0.2980867922306061, |
| "learning_rate": 9.884969419697101e-05, |
| "loss": 0.0269, |
| "step": 11500 |
| }, |
| { |
| "grad_norm": 0.23805956542491913, |
| "learning_rate": 9.884616521516614e-05, |
| "loss": 0.0231, |
| "step": 11510 |
| }, |
| { |
| "grad_norm": 0.26967158913612366, |
| "learning_rate": 9.88426308916181e-05, |
| "loss": 0.0251, |
| "step": 11520 |
| }, |
| { |
| "grad_norm": 0.2954730987548828, |
| "learning_rate": 9.883909122671335e-05, |
| "loss": 0.0248, |
| "step": 11530 |
| }, |
| { |
| "grad_norm": 0.2668631076812744, |
| "learning_rate": 9.883554622083904e-05, |
| "loss": 0.0266, |
| "step": 11540 |
| }, |
| { |
| "grad_norm": 0.24218979477882385, |
| "learning_rate": 9.88319958743828e-05, |
| "loss": 0.0206, |
| "step": 11550 |
| }, |
| { |
| "grad_norm": 0.23225300014019012, |
| "learning_rate": 9.882844018773291e-05, |
| "loss": 0.026, |
| "step": 11560 |
| }, |
| { |
| "grad_norm": 0.37825363874435425, |
| "learning_rate": 9.882487916127823e-05, |
| "loss": 0.0251, |
| "step": 11570 |
| }, |
| { |
| "grad_norm": 0.25535348057746887, |
| "learning_rate": 9.882131279540815e-05, |
| "loss": 0.0218, |
| "step": 11580 |
| }, |
| { |
| "grad_norm": 0.29476526379585266, |
| "learning_rate": 9.881774109051271e-05, |
| "loss": 0.0217, |
| "step": 11590 |
| }, |
| { |
| "grad_norm": 0.24866236746311188, |
| "learning_rate": 9.881416404698252e-05, |
| "loss": 0.02, |
| "step": 11600 |
| }, |
| { |
| "grad_norm": 0.28169095516204834, |
| "learning_rate": 9.881058166520873e-05, |
| "loss": 0.0225, |
| "step": 11610 |
| }, |
| { |
| "grad_norm": 0.22448545694351196, |
| "learning_rate": 9.880699394558311e-05, |
| "loss": 0.026, |
| "step": 11620 |
| }, |
| { |
| "grad_norm": 0.2574785351753235, |
| "learning_rate": 9.880340088849801e-05, |
| "loss": 0.0212, |
| "step": 11630 |
| }, |
| { |
| "grad_norm": 0.2824767827987671, |
| "learning_rate": 9.879980249434637e-05, |
| "loss": 0.0229, |
| "step": 11640 |
| }, |
| { |
| "grad_norm": 0.30236056447029114, |
| "learning_rate": 9.879619876352168e-05, |
| "loss": 0.0235, |
| "step": 11650 |
| }, |
| { |
| "grad_norm": 0.2773892283439636, |
| "learning_rate": 9.879258969641809e-05, |
| "loss": 0.0226, |
| "step": 11660 |
| }, |
| { |
| "grad_norm": 0.2351124882698059, |
| "learning_rate": 9.878897529343023e-05, |
| "loss": 0.02, |
| "step": 11670 |
| }, |
| { |
| "grad_norm": 0.2677806317806244, |
| "learning_rate": 9.878535555495338e-05, |
| "loss": 0.0209, |
| "step": 11680 |
| }, |
| { |
| "grad_norm": 0.307692289352417, |
| "learning_rate": 9.87817304813834e-05, |
| "loss": 0.0264, |
| "step": 11690 |
| }, |
| { |
| "grad_norm": 0.32135266065597534, |
| "learning_rate": 9.877810007311671e-05, |
| "loss": 0.025, |
| "step": 11700 |
| }, |
| { |
| "grad_norm": 0.22899632155895233, |
| "learning_rate": 9.877446433055035e-05, |
| "loss": 0.0224, |
| "step": 11710 |
| }, |
| { |
| "grad_norm": 0.26118001341819763, |
| "learning_rate": 9.877082325408191e-05, |
| "loss": 0.0248, |
| "step": 11720 |
| }, |
| { |
| "grad_norm": 0.24061886966228485, |
| "learning_rate": 9.876717684410954e-05, |
| "loss": 0.0186, |
| "step": 11730 |
| }, |
| { |
| "grad_norm": 0.1753281205892563, |
| "learning_rate": 9.876352510103204e-05, |
| "loss": 0.0188, |
| "step": 11740 |
| }, |
| { |
| "grad_norm": 0.2684527635574341, |
| "learning_rate": 9.875986802524875e-05, |
| "loss": 0.0218, |
| "step": 11750 |
| }, |
| { |
| "grad_norm": 0.260416716337204, |
| "learning_rate": 9.87562056171596e-05, |
| "loss": 0.0236, |
| "step": 11760 |
| }, |
| { |
| "grad_norm": 0.33505305647850037, |
| "learning_rate": 9.875253787716511e-05, |
| "loss": 0.0225, |
| "step": 11770 |
| }, |
| { |
| "grad_norm": 0.27778613567352295, |
| "learning_rate": 9.874886480566637e-05, |
| "loss": 0.0264, |
| "step": 11780 |
| }, |
| { |
| "grad_norm": 0.28708258271217346, |
| "learning_rate": 9.874518640306507e-05, |
| "loss": 0.0206, |
| "step": 11790 |
| }, |
| { |
| "grad_norm": 0.28900209069252014, |
| "learning_rate": 9.874150266976347e-05, |
| "loss": 0.0277, |
| "step": 11800 |
| }, |
| { |
| "grad_norm": 0.29606348276138306, |
| "learning_rate": 9.873781360616443e-05, |
| "loss": 0.0209, |
| "step": 11810 |
| }, |
| { |
| "grad_norm": 0.29875603318214417, |
| "learning_rate": 9.873411921267137e-05, |
| "loss": 0.0256, |
| "step": 11820 |
| }, |
| { |
| "grad_norm": 0.2832305133342743, |
| "learning_rate": 9.873041948968829e-05, |
| "loss": 0.0248, |
| "step": 11830 |
| }, |
| { |
| "grad_norm": 0.2795998752117157, |
| "learning_rate": 9.872671443761981e-05, |
| "loss": 0.0199, |
| "step": 11840 |
| }, |
| { |
| "grad_norm": 0.2344684898853302, |
| "learning_rate": 9.872300405687109e-05, |
| "loss": 0.0247, |
| "step": 11850 |
| }, |
| { |
| "grad_norm": 0.25030753016471863, |
| "learning_rate": 9.871928834784792e-05, |
| "loss": 0.0247, |
| "step": 11860 |
| }, |
| { |
| "grad_norm": 0.24843540787696838, |
| "learning_rate": 9.871556731095661e-05, |
| "loss": 0.0262, |
| "step": 11870 |
| }, |
| { |
| "grad_norm": 0.2917851507663727, |
| "learning_rate": 9.871184094660411e-05, |
| "loss": 0.0229, |
| "step": 11880 |
| }, |
| { |
| "grad_norm": 0.30386418104171753, |
| "learning_rate": 9.870810925519791e-05, |
| "loss": 0.0224, |
| "step": 11890 |
| }, |
| { |
| "grad_norm": 0.29892122745513916, |
| "learning_rate": 9.870437223714612e-05, |
| "loss": 0.0303, |
| "step": 11900 |
| }, |
| { |
| "grad_norm": 0.2550772428512573, |
| "learning_rate": 9.87006298928574e-05, |
| "loss": 0.0224, |
| "step": 11910 |
| }, |
| { |
| "grad_norm": 0.24139836430549622, |
| "learning_rate": 9.869688222274103e-05, |
| "loss": 0.0229, |
| "step": 11920 |
| }, |
| { |
| "grad_norm": 0.25056102871894836, |
| "learning_rate": 9.869312922720681e-05, |
| "loss": 0.0223, |
| "step": 11930 |
| }, |
| { |
| "grad_norm": 0.27529701590538025, |
| "learning_rate": 9.868937090666521e-05, |
| "loss": 0.0203, |
| "step": 11940 |
| }, |
| { |
| "grad_norm": 0.23277588188648224, |
| "learning_rate": 9.86856072615272e-05, |
| "loss": 0.0252, |
| "step": 11950 |
| }, |
| { |
| "grad_norm": 0.22750338912010193, |
| "learning_rate": 9.868183829220438e-05, |
| "loss": 0.0237, |
| "step": 11960 |
| }, |
| { |
| "grad_norm": 0.32950612902641296, |
| "learning_rate": 9.867806399910893e-05, |
| "loss": 0.0232, |
| "step": 11970 |
| }, |
| { |
| "grad_norm": 0.2424909621477127, |
| "learning_rate": 9.867428438265356e-05, |
| "loss": 0.0271, |
| "step": 11980 |
| }, |
| { |
| "grad_norm": 0.31055083870887756, |
| "learning_rate": 9.867049944325165e-05, |
| "loss": 0.0237, |
| "step": 11990 |
| }, |
| { |
| "grad_norm": 0.2964540123939514, |
| "learning_rate": 9.86667091813171e-05, |
| "loss": 0.0242, |
| "step": 12000 |
| }, |
| { |
| "grad_norm": 0.22154490649700165, |
| "learning_rate": 9.866291359726438e-05, |
| "loss": 0.025, |
| "step": 12010 |
| }, |
| { |
| "grad_norm": 0.2891708016395569, |
| "learning_rate": 9.865911269150861e-05, |
| "loss": 0.0238, |
| "step": 12020 |
| }, |
| { |
| "grad_norm": 0.33079537749290466, |
| "learning_rate": 9.865530646446544e-05, |
| "loss": 0.0244, |
| "step": 12030 |
| }, |
| { |
| "grad_norm": 0.338015079498291, |
| "learning_rate": 9.86514949165511e-05, |
| "loss": 0.0257, |
| "step": 12040 |
| }, |
| { |
| "grad_norm": 0.3109557628631592, |
| "learning_rate": 9.864767804818243e-05, |
| "loss": 0.0217, |
| "step": 12050 |
| }, |
| { |
| "grad_norm": 0.25862064957618713, |
| "learning_rate": 9.86438558597768e-05, |
| "loss": 0.0228, |
| "step": 12060 |
| }, |
| { |
| "grad_norm": 0.2686013877391815, |
| "learning_rate": 9.864002835175225e-05, |
| "loss": 0.0225, |
| "step": 12070 |
| }, |
| { |
| "grad_norm": 0.29204437136650085, |
| "learning_rate": 9.863619552452734e-05, |
| "loss": 0.023, |
| "step": 12080 |
| }, |
| { |
| "grad_norm": 0.29866817593574524, |
| "learning_rate": 9.863235737852119e-05, |
| "loss": 0.0208, |
| "step": 12090 |
| }, |
| { |
| "grad_norm": 0.2732008695602417, |
| "learning_rate": 9.862851391415356e-05, |
| "loss": 0.0232, |
| "step": 12100 |
| }, |
| { |
| "grad_norm": 0.22158949077129364, |
| "learning_rate": 9.862466513184477e-05, |
| "loss": 0.0224, |
| "step": 12110 |
| }, |
| { |
| "grad_norm": 0.262530118227005, |
| "learning_rate": 9.86208110320157e-05, |
| "loss": 0.0254, |
| "step": 12120 |
| }, |
| { |
| "grad_norm": 0.22451408207416534, |
| "learning_rate": 9.861695161508784e-05, |
| "loss": 0.0225, |
| "step": 12130 |
| }, |
| { |
| "grad_norm": 0.2171703577041626, |
| "learning_rate": 9.861308688148324e-05, |
| "loss": 0.0193, |
| "step": 12140 |
| }, |
| { |
| "grad_norm": 0.22081957757472992, |
| "learning_rate": 9.860921683162455e-05, |
| "loss": 0.0214, |
| "step": 12150 |
| }, |
| { |
| "grad_norm": 0.26277512311935425, |
| "learning_rate": 9.860534146593499e-05, |
| "loss": 0.0245, |
| "step": 12160 |
| }, |
| { |
| "grad_norm": 0.23533903062343597, |
| "learning_rate": 9.860146078483836e-05, |
| "loss": 0.024, |
| "step": 12170 |
| }, |
| { |
| "grad_norm": 0.29710593819618225, |
| "learning_rate": 9.859757478875905e-05, |
| "loss": 0.021, |
| "step": 12180 |
| }, |
| { |
| "grad_norm": 0.2295854687690735, |
| "learning_rate": 9.859368347812204e-05, |
| "loss": 0.0201, |
| "step": 12190 |
| }, |
| { |
| "grad_norm": 0.30774781107902527, |
| "learning_rate": 9.858978685335285e-05, |
| "loss": 0.0264, |
| "step": 12200 |
| }, |
| { |
| "grad_norm": 0.20226168632507324, |
| "learning_rate": 9.858588491487763e-05, |
| "loss": 0.0215, |
| "step": 12210 |
| }, |
| { |
| "grad_norm": 0.27741652727127075, |
| "learning_rate": 9.858197766312308e-05, |
| "loss": 0.0216, |
| "step": 12220 |
| }, |
| { |
| "grad_norm": 0.24613800644874573, |
| "learning_rate": 9.857806509851649e-05, |
| "loss": 0.0205, |
| "step": 12230 |
| }, |
| { |
| "grad_norm": 0.3640504777431488, |
| "learning_rate": 9.857414722148574e-05, |
| "loss": 0.0211, |
| "step": 12240 |
| }, |
| { |
| "grad_norm": 0.28440797328948975, |
| "learning_rate": 9.857022403245928e-05, |
| "loss": 0.0239, |
| "step": 12250 |
| }, |
| { |
| "grad_norm": 0.29080355167388916, |
| "learning_rate": 9.856629553186615e-05, |
| "loss": 0.0227, |
| "step": 12260 |
| }, |
| { |
| "grad_norm": 0.27145934104919434, |
| "learning_rate": 9.856236172013595e-05, |
| "loss": 0.027, |
| "step": 12270 |
| }, |
| { |
| "grad_norm": 0.2906491458415985, |
| "learning_rate": 9.85584225976989e-05, |
| "loss": 0.0233, |
| "step": 12280 |
| }, |
| { |
| "grad_norm": 0.28224772214889526, |
| "learning_rate": 9.855447816498575e-05, |
| "loss": 0.0231, |
| "step": 12290 |
| }, |
| { |
| "grad_norm": 0.24714432656764984, |
| "learning_rate": 9.855052842242787e-05, |
| "loss": 0.0228, |
| "step": 12300 |
| }, |
| { |
| "grad_norm": 0.3262035846710205, |
| "learning_rate": 9.85465733704572e-05, |
| "loss": 0.0237, |
| "step": 12310 |
| }, |
| { |
| "grad_norm": 0.32095709443092346, |
| "learning_rate": 9.854261300950624e-05, |
| "loss": 0.0205, |
| "step": 12320 |
| }, |
| { |
| "grad_norm": 0.2512904107570648, |
| "learning_rate": 9.853864734000813e-05, |
| "loss": 0.0221, |
| "step": 12330 |
| }, |
| { |
| "grad_norm": 0.3358675241470337, |
| "learning_rate": 9.85346763623965e-05, |
| "loss": 0.0255, |
| "step": 12340 |
| }, |
| { |
| "grad_norm": 0.20079484581947327, |
| "learning_rate": 9.853070007710564e-05, |
| "loss": 0.0222, |
| "step": 12350 |
| }, |
| { |
| "grad_norm": 0.33502423763275146, |
| "learning_rate": 9.85267184845704e-05, |
| "loss": 0.0278, |
| "step": 12360 |
| }, |
| { |
| "grad_norm": 0.25794944167137146, |
| "learning_rate": 9.852273158522616e-05, |
| "loss": 0.0238, |
| "step": 12370 |
| }, |
| { |
| "grad_norm": 0.25033092498779297, |
| "learning_rate": 9.851873937950896e-05, |
| "loss": 0.0239, |
| "step": 12380 |
| }, |
| { |
| "grad_norm": 0.22771897912025452, |
| "learning_rate": 9.851474186785537e-05, |
| "loss": 0.022, |
| "step": 12390 |
| }, |
| { |
| "grad_norm": 0.2817453145980835, |
| "learning_rate": 9.851073905070254e-05, |
| "loss": 0.0249, |
| "step": 12400 |
| }, |
| { |
| "grad_norm": 0.2184731364250183, |
| "learning_rate": 9.850673092848824e-05, |
| "loss": 0.0216, |
| "step": 12410 |
| }, |
| { |
| "grad_norm": 0.3057407736778259, |
| "learning_rate": 9.850271750165077e-05, |
| "loss": 0.0209, |
| "step": 12420 |
| }, |
| { |
| "grad_norm": 0.23111845552921295, |
| "learning_rate": 9.849869877062902e-05, |
| "loss": 0.0185, |
| "step": 12430 |
| }, |
| { |
| "grad_norm": 0.3109472990036011, |
| "learning_rate": 9.849467473586252e-05, |
| "loss": 0.0206, |
| "step": 12440 |
| }, |
| { |
| "grad_norm": 0.2345299869775772, |
| "learning_rate": 9.849064539779127e-05, |
| "loss": 0.0296, |
| "step": 12450 |
| }, |
| { |
| "grad_norm": 0.2816222012042999, |
| "learning_rate": 9.848661075685594e-05, |
| "loss": 0.0216, |
| "step": 12460 |
| }, |
| { |
| "grad_norm": 0.27281153202056885, |
| "learning_rate": 9.848257081349778e-05, |
| "loss": 0.0256, |
| "step": 12470 |
| }, |
| { |
| "grad_norm": 0.24961847066879272, |
| "learning_rate": 9.847852556815856e-05, |
| "loss": 0.0202, |
| "step": 12480 |
| }, |
| { |
| "grad_norm": 0.24978555738925934, |
| "learning_rate": 9.847447502128067e-05, |
| "loss": 0.0194, |
| "step": 12490 |
| }, |
| { |
| "grad_norm": 0.2963050305843353, |
| "learning_rate": 9.847041917330708e-05, |
| "loss": 0.0207, |
| "step": 12500 |
| }, |
| { |
| "grad_norm": 0.19255203008651733, |
| "learning_rate": 9.846635802468132e-05, |
| "loss": 0.0203, |
| "step": 12510 |
| }, |
| { |
| "grad_norm": 0.2264157235622406, |
| "learning_rate": 9.84622915758475e-05, |
| "loss": 0.0219, |
| "step": 12520 |
| }, |
| { |
| "grad_norm": 0.2677379548549652, |
| "learning_rate": 9.845821982725034e-05, |
| "loss": 0.0236, |
| "step": 12530 |
| }, |
| { |
| "grad_norm": 0.2737414240837097, |
| "learning_rate": 9.845414277933514e-05, |
| "loss": 0.0224, |
| "step": 12540 |
| }, |
| { |
| "grad_norm": 0.2770121097564697, |
| "learning_rate": 9.845006043254771e-05, |
| "loss": 0.023, |
| "step": 12550 |
| }, |
| { |
| "grad_norm": 0.30070507526397705, |
| "learning_rate": 9.844597278733451e-05, |
| "loss": 0.0208, |
| "step": 12560 |
| }, |
| { |
| "grad_norm": 0.21809224784374237, |
| "learning_rate": 9.844187984414259e-05, |
| "loss": 0.0197, |
| "step": 12570 |
| }, |
| { |
| "grad_norm": 0.2893144190311432, |
| "learning_rate": 9.84377816034195e-05, |
| "loss": 0.0202, |
| "step": 12580 |
| }, |
| { |
| "grad_norm": 0.25457078218460083, |
| "learning_rate": 9.843367806561345e-05, |
| "loss": 0.0209, |
| "step": 12590 |
| }, |
| { |
| "grad_norm": 0.23254764080047607, |
| "learning_rate": 9.842956923117317e-05, |
| "loss": 0.0229, |
| "step": 12600 |
| }, |
| { |
| "grad_norm": 0.26205703616142273, |
| "learning_rate": 9.842545510054802e-05, |
| "loss": 0.0214, |
| "step": 12610 |
| }, |
| { |
| "grad_norm": 0.2194891721010208, |
| "learning_rate": 9.842133567418792e-05, |
| "loss": 0.0242, |
| "step": 12620 |
| }, |
| { |
| "grad_norm": 0.27267029881477356, |
| "learning_rate": 9.841721095254333e-05, |
| "loss": 0.0209, |
| "step": 12630 |
| }, |
| { |
| "grad_norm": 0.2827804982662201, |
| "learning_rate": 9.841308093606537e-05, |
| "loss": 0.0217, |
| "step": 12640 |
| }, |
| { |
| "grad_norm": 0.24511675536632538, |
| "learning_rate": 9.840894562520565e-05, |
| "loss": 0.0225, |
| "step": 12650 |
| }, |
| { |
| "grad_norm": 0.2633540630340576, |
| "learning_rate": 9.840480502041642e-05, |
| "loss": 0.018, |
| "step": 12660 |
| }, |
| { |
| "grad_norm": 0.3520581126213074, |
| "learning_rate": 9.840065912215049e-05, |
| "loss": 0.0199, |
| "step": 12670 |
| }, |
| { |
| "grad_norm": 0.29521462321281433, |
| "learning_rate": 9.839650793086124e-05, |
| "loss": 0.0224, |
| "step": 12680 |
| }, |
| { |
| "grad_norm": 0.22336840629577637, |
| "learning_rate": 9.839235144700265e-05, |
| "loss": 0.0224, |
| "step": 12690 |
| }, |
| { |
| "grad_norm": 0.2869412302970886, |
| "learning_rate": 9.838818967102926e-05, |
| "loss": 0.0204, |
| "step": 12700 |
| }, |
| { |
| "grad_norm": 0.26980113983154297, |
| "learning_rate": 9.83840226033962e-05, |
| "loss": 0.0191, |
| "step": 12710 |
| }, |
| { |
| "grad_norm": 0.2633763551712036, |
| "learning_rate": 9.837985024455918e-05, |
| "loss": 0.0218, |
| "step": 12720 |
| }, |
| { |
| "grad_norm": 0.26450225710868835, |
| "learning_rate": 9.837567259497447e-05, |
| "loss": 0.0211, |
| "step": 12730 |
| }, |
| { |
| "grad_norm": 0.3228972554206848, |
| "learning_rate": 9.837148965509894e-05, |
| "loss": 0.0217, |
| "step": 12740 |
| }, |
| { |
| "grad_norm": 0.25866827368736267, |
| "learning_rate": 9.836730142539001e-05, |
| "loss": 0.0219, |
| "step": 12750 |
| }, |
| { |
| "grad_norm": 0.1909390091896057, |
| "learning_rate": 9.836310790630574e-05, |
| "loss": 0.0206, |
| "step": 12760 |
| }, |
| { |
| "grad_norm": 0.282137393951416, |
| "learning_rate": 9.83589090983047e-05, |
| "loss": 0.0219, |
| "step": 12770 |
| }, |
| { |
| "grad_norm": 0.29815107583999634, |
| "learning_rate": 9.835470500184605e-05, |
| "loss": 0.0251, |
| "step": 12780 |
| }, |
| { |
| "grad_norm": 0.21475166082382202, |
| "learning_rate": 9.835049561738957e-05, |
| "loss": 0.0201, |
| "step": 12790 |
| }, |
| { |
| "grad_norm": 0.16367527842521667, |
| "learning_rate": 9.834628094539558e-05, |
| "loss": 0.0207, |
| "step": 12800 |
| }, |
| { |
| "grad_norm": 0.2078922986984253, |
| "learning_rate": 9.834206098632499e-05, |
| "loss": 0.0181, |
| "step": 12810 |
| }, |
| { |
| "grad_norm": 0.32321515679359436, |
| "learning_rate": 9.833783574063931e-05, |
| "loss": 0.0224, |
| "step": 12820 |
| }, |
| { |
| "grad_norm": 0.254509299993515, |
| "learning_rate": 9.833360520880058e-05, |
| "loss": 0.0215, |
| "step": 12830 |
| }, |
| { |
| "grad_norm": 0.20796674489974976, |
| "learning_rate": 9.832936939127144e-05, |
| "loss": 0.0218, |
| "step": 12840 |
| }, |
| { |
| "grad_norm": 0.2586333155632019, |
| "learning_rate": 9.832512828851515e-05, |
| "loss": 0.0213, |
| "step": 12850 |
| }, |
| { |
| "grad_norm": 0.3397057354450226, |
| "learning_rate": 9.832088190099546e-05, |
| "loss": 0.0229, |
| "step": 12860 |
| }, |
| { |
| "grad_norm": 0.2928526997566223, |
| "learning_rate": 9.831663022917679e-05, |
| "loss": 0.0208, |
| "step": 12870 |
| }, |
| { |
| "grad_norm": 0.2452399730682373, |
| "learning_rate": 9.831237327352407e-05, |
| "loss": 0.0205, |
| "step": 12880 |
| }, |
| { |
| "grad_norm": 0.2470838874578476, |
| "learning_rate": 9.830811103450286e-05, |
| "loss": 0.0225, |
| "step": 12890 |
| }, |
| { |
| "grad_norm": 0.23086868226528168, |
| "learning_rate": 9.830384351257924e-05, |
| "loss": 0.0215, |
| "step": 12900 |
| }, |
| { |
| "grad_norm": 0.2385035753250122, |
| "learning_rate": 9.829957070821993e-05, |
| "loss": 0.0201, |
| "step": 12910 |
| }, |
| { |
| "grad_norm": 0.27688005566596985, |
| "learning_rate": 9.829529262189218e-05, |
| "loss": 0.0217, |
| "step": 12920 |
| }, |
| { |
| "grad_norm": 0.22894538938999176, |
| "learning_rate": 9.829100925406385e-05, |
| "loss": 0.0222, |
| "step": 12930 |
| }, |
| { |
| "grad_norm": 0.24850626289844513, |
| "learning_rate": 9.828672060520333e-05, |
| "loss": 0.0228, |
| "step": 12940 |
| }, |
| { |
| "grad_norm": 0.2373901605606079, |
| "learning_rate": 9.828242667577966e-05, |
| "loss": 0.0195, |
| "step": 12950 |
| }, |
| { |
| "grad_norm": 0.3337841331958771, |
| "learning_rate": 9.82781274662624e-05, |
| "loss": 0.0224, |
| "step": 12960 |
| }, |
| { |
| "grad_norm": 0.2683013677597046, |
| "learning_rate": 9.82738229771217e-05, |
| "loss": 0.0201, |
| "step": 12970 |
| }, |
| { |
| "grad_norm": 0.22946369647979736, |
| "learning_rate": 9.826951320882829e-05, |
| "loss": 0.0232, |
| "step": 12980 |
| }, |
| { |
| "grad_norm": 0.3642555773258209, |
| "learning_rate": 9.826519816185351e-05, |
| "loss": 0.0295, |
| "step": 12990 |
| }, |
| { |
| "grad_norm": 0.2674192488193512, |
| "learning_rate": 9.826087783666921e-05, |
| "loss": 0.0197, |
| "step": 13000 |
| }, |
| { |
| "grad_norm": 0.2643831670284271, |
| "learning_rate": 9.825655223374787e-05, |
| "loss": 0.0213, |
| "step": 13010 |
| }, |
| { |
| "grad_norm": 0.2896205484867096, |
| "learning_rate": 9.825222135356253e-05, |
| "loss": 0.0219, |
| "step": 13020 |
| }, |
| { |
| "grad_norm": 0.2912057042121887, |
| "learning_rate": 9.82478851965868e-05, |
| "loss": 0.0198, |
| "step": 13030 |
| }, |
| { |
| "grad_norm": 0.2574427127838135, |
| "learning_rate": 9.82435437632949e-05, |
| "loss": 0.026, |
| "step": 13040 |
| }, |
| { |
| "grad_norm": 0.26649150252342224, |
| "learning_rate": 9.823919705416158e-05, |
| "loss": 0.0235, |
| "step": 13050 |
| }, |
| { |
| "grad_norm": 0.27899083495140076, |
| "learning_rate": 9.82348450696622e-05, |
| "loss": 0.0187, |
| "step": 13060 |
| }, |
| { |
| "grad_norm": 0.26895758509635925, |
| "learning_rate": 9.823048781027268e-05, |
| "loss": 0.0191, |
| "step": 13070 |
| }, |
| { |
| "grad_norm": 0.3179563879966736, |
| "learning_rate": 9.822612527646953e-05, |
| "loss": 0.0195, |
| "step": 13080 |
| }, |
| { |
| "grad_norm": 0.29025205969810486, |
| "learning_rate": 9.822175746872984e-05, |
| "loss": 0.0204, |
| "step": 13090 |
| }, |
| { |
| "grad_norm": 0.3235120177268982, |
| "learning_rate": 9.821738438753123e-05, |
| "loss": 0.0181, |
| "step": 13100 |
| }, |
| { |
| "grad_norm": 0.3427630364894867, |
| "learning_rate": 9.821300603335196e-05, |
| "loss": 0.023, |
| "step": 13110 |
| }, |
| { |
| "grad_norm": 0.2155032753944397, |
| "learning_rate": 9.820862240667085e-05, |
| "loss": 0.0194, |
| "step": 13120 |
| }, |
| { |
| "grad_norm": 0.1895771026611328, |
| "learning_rate": 9.820423350796726e-05, |
| "loss": 0.0222, |
| "step": 13130 |
| }, |
| { |
| "grad_norm": 0.21924172341823578, |
| "learning_rate": 9.819983933772118e-05, |
| "loss": 0.0222, |
| "step": 13140 |
| }, |
| { |
| "grad_norm": 0.28095996379852295, |
| "learning_rate": 9.819543989641314e-05, |
| "loss": 0.0241, |
| "step": 13150 |
| }, |
| { |
| "grad_norm": 0.30448660254478455, |
| "learning_rate": 9.819103518452423e-05, |
| "loss": 0.0216, |
| "step": 13160 |
| }, |
| { |
| "grad_norm": 0.22671166062355042, |
| "learning_rate": 9.818662520253618e-05, |
| "loss": 0.0239, |
| "step": 13170 |
| }, |
| { |
| "grad_norm": 0.26471537351608276, |
| "learning_rate": 9.818220995093126e-05, |
| "loss": 0.0208, |
| "step": 13180 |
| }, |
| { |
| "grad_norm": 0.2450067400932312, |
| "learning_rate": 9.817778943019228e-05, |
| "loss": 0.0224, |
| "step": 13190 |
| }, |
| { |
| "grad_norm": 0.2603633403778076, |
| "learning_rate": 9.81733636408027e-05, |
| "loss": 0.0212, |
| "step": 13200 |
| }, |
| { |
| "grad_norm": 0.21010948717594147, |
| "learning_rate": 9.816893258324649e-05, |
| "loss": 0.02, |
| "step": 13210 |
| }, |
| { |
| "grad_norm": 0.24279074370861053, |
| "learning_rate": 9.816449625800823e-05, |
| "loss": 0.0248, |
| "step": 13220 |
| }, |
| { |
| "grad_norm": 0.3085545301437378, |
| "learning_rate": 9.816005466557308e-05, |
| "loss": 0.0242, |
| "step": 13230 |
| }, |
| { |
| "grad_norm": 0.40831342339515686, |
| "learning_rate": 9.815560780642674e-05, |
| "loss": 0.0271, |
| "step": 13240 |
| }, |
| { |
| "grad_norm": 0.3233740031719208, |
| "learning_rate": 9.815115568105555e-05, |
| "loss": 0.0218, |
| "step": 13250 |
| }, |
| { |
| "grad_norm": 0.34060555696487427, |
| "learning_rate": 9.814669828994638e-05, |
| "loss": 0.0225, |
| "step": 13260 |
| }, |
| { |
| "grad_norm": 0.15952211618423462, |
| "learning_rate": 9.814223563358665e-05, |
| "loss": 0.0195, |
| "step": 13270 |
| }, |
| { |
| "grad_norm": 0.24137960374355316, |
| "learning_rate": 9.813776771246443e-05, |
| "loss": 0.0215, |
| "step": 13280 |
| }, |
| { |
| "grad_norm": 0.2525627613067627, |
| "learning_rate": 9.813329452706829e-05, |
| "loss": 0.0177, |
| "step": 13290 |
| }, |
| { |
| "grad_norm": 0.254814088344574, |
| "learning_rate": 9.812881607788744e-05, |
| "loss": 0.0225, |
| "step": 13300 |
| }, |
| { |
| "grad_norm": 0.23892925679683685, |
| "learning_rate": 9.812433236541163e-05, |
| "loss": 0.0202, |
| "step": 13310 |
| }, |
| { |
| "grad_norm": 0.23065580427646637, |
| "learning_rate": 9.811984339013116e-05, |
| "loss": 0.0205, |
| "step": 13320 |
| }, |
| { |
| "grad_norm": 0.22503262758255005, |
| "learning_rate": 9.811534915253698e-05, |
| "loss": 0.0201, |
| "step": 13330 |
| }, |
| { |
| "grad_norm": 0.2968944311141968, |
| "learning_rate": 9.811084965312056e-05, |
| "loss": 0.0197, |
| "step": 13340 |
| }, |
| { |
| "grad_norm": 0.24379391968250275, |
| "learning_rate": 9.810634489237396e-05, |
| "loss": 0.02, |
| "step": 13350 |
| }, |
| { |
| "grad_norm": 0.2536276876926422, |
| "learning_rate": 9.81018348707898e-05, |
| "loss": 0.0211, |
| "step": 13360 |
| }, |
| { |
| "grad_norm": 0.29255765676498413, |
| "learning_rate": 9.809731958886131e-05, |
| "loss": 0.022, |
| "step": 13370 |
| }, |
| { |
| "grad_norm": 0.2708306610584259, |
| "learning_rate": 9.809279904708224e-05, |
| "loss": 0.0215, |
| "step": 13380 |
| }, |
| { |
| "grad_norm": 0.28281521797180176, |
| "learning_rate": 9.808827324594699e-05, |
| "loss": 0.0184, |
| "step": 13390 |
| }, |
| { |
| "grad_norm": 0.21512441337108612, |
| "learning_rate": 9.808374218595046e-05, |
| "loss": 0.0206, |
| "step": 13400 |
| }, |
| { |
| "grad_norm": 0.24113260209560394, |
| "learning_rate": 9.80792058675882e-05, |
| "loss": 0.0177, |
| "step": 13410 |
| }, |
| { |
| "grad_norm": 0.24429504573345184, |
| "learning_rate": 9.807466429135627e-05, |
| "loss": 0.0184, |
| "step": 13420 |
| }, |
| { |
| "grad_norm": 0.2660101056098938, |
| "learning_rate": 9.807011745775132e-05, |
| "loss": 0.0217, |
| "step": 13430 |
| }, |
| { |
| "grad_norm": 0.30305543541908264, |
| "learning_rate": 9.806556536727061e-05, |
| "loss": 0.0245, |
| "step": 13440 |
| }, |
| { |
| "grad_norm": 0.19905737042427063, |
| "learning_rate": 9.806100802041193e-05, |
| "loss": 0.0191, |
| "step": 13450 |
| }, |
| { |
| "grad_norm": 0.3235015273094177, |
| "learning_rate": 9.805644541767368e-05, |
| "loss": 0.0216, |
| "step": 13460 |
| }, |
| { |
| "grad_norm": 0.21118152141571045, |
| "learning_rate": 9.805187755955478e-05, |
| "loss": 0.025, |
| "step": 13470 |
| }, |
| { |
| "grad_norm": 0.3211771845817566, |
| "learning_rate": 9.804730444655483e-05, |
| "loss": 0.0213, |
| "step": 13480 |
| }, |
| { |
| "grad_norm": 0.3155767619609833, |
| "learning_rate": 9.804272607917388e-05, |
| "loss": 0.0196, |
| "step": 13490 |
| }, |
| { |
| "grad_norm": 0.24246951937675476, |
| "learning_rate": 9.803814245791265e-05, |
| "loss": 0.0195, |
| "step": 13500 |
| }, |
| { |
| "grad_norm": 0.22159545123577118, |
| "learning_rate": 9.803355358327239e-05, |
| "loss": 0.0223, |
| "step": 13510 |
| }, |
| { |
| "grad_norm": 0.2195427268743515, |
| "learning_rate": 9.802895945575492e-05, |
| "loss": 0.0181, |
| "step": 13520 |
| }, |
| { |
| "grad_norm": 0.2641836702823639, |
| "learning_rate": 9.802436007586266e-05, |
| "loss": 0.0179, |
| "step": 13530 |
| }, |
| { |
| "grad_norm": 0.31182828545570374, |
| "learning_rate": 9.801975544409858e-05, |
| "loss": 0.0186, |
| "step": 13540 |
| }, |
| { |
| "grad_norm": 0.3050755262374878, |
| "learning_rate": 9.801514556096625e-05, |
| "loss": 0.0198, |
| "step": 13550 |
| }, |
| { |
| "grad_norm": 0.28792455792427063, |
| "learning_rate": 9.801053042696977e-05, |
| "loss": 0.0251, |
| "step": 13560 |
| }, |
| { |
| "grad_norm": 0.26547062397003174, |
| "learning_rate": 9.800591004261388e-05, |
| "loss": 0.0204, |
| "step": 13570 |
| }, |
| { |
| "grad_norm": 0.2921355962753296, |
| "learning_rate": 9.800128440840385e-05, |
| "loss": 0.0193, |
| "step": 13580 |
| }, |
| { |
| "grad_norm": 0.25927549600601196, |
| "learning_rate": 9.799665352484552e-05, |
| "loss": 0.0193, |
| "step": 13590 |
| }, |
| { |
| "grad_norm": 0.2548466622829437, |
| "learning_rate": 9.799201739244532e-05, |
| "loss": 0.0171, |
| "step": 13600 |
| }, |
| { |
| "grad_norm": 0.25240445137023926, |
| "learning_rate": 9.798737601171025e-05, |
| "loss": 0.0205, |
| "step": 13610 |
| }, |
| { |
| "grad_norm": 0.28021761775016785, |
| "learning_rate": 9.79827293831479e-05, |
| "loss": 0.0206, |
| "step": 13620 |
| }, |
| { |
| "grad_norm": 0.23501914739608765, |
| "learning_rate": 9.797807750726638e-05, |
| "loss": 0.0197, |
| "step": 13630 |
| }, |
| { |
| "grad_norm": 0.23027177155017853, |
| "learning_rate": 9.797342038457446e-05, |
| "loss": 0.02, |
| "step": 13640 |
| }, |
| { |
| "grad_norm": 0.2953159213066101, |
| "learning_rate": 9.796875801558141e-05, |
| "loss": 0.0209, |
| "step": 13650 |
| }, |
| { |
| "grad_norm": 0.28940150141716003, |
| "learning_rate": 9.79640904007971e-05, |
| "loss": 0.0185, |
| "step": 13660 |
| }, |
| { |
| "grad_norm": 0.2605695128440857, |
| "learning_rate": 9.795941754073199e-05, |
| "loss": 0.0204, |
| "step": 13670 |
| }, |
| { |
| "grad_norm": 0.2594354748725891, |
| "learning_rate": 9.795473943589705e-05, |
| "loss": 0.0198, |
| "step": 13680 |
| }, |
| { |
| "grad_norm": 0.3043878376483917, |
| "learning_rate": 9.795005608680394e-05, |
| "loss": 0.0205, |
| "step": 13690 |
| }, |
| { |
| "grad_norm": 0.2961346507072449, |
| "learning_rate": 9.794536749396477e-05, |
| "loss": 0.0207, |
| "step": 13700 |
| }, |
| { |
| "grad_norm": 0.25186237692832947, |
| "learning_rate": 9.79406736578923e-05, |
| "loss": 0.0224, |
| "step": 13710 |
| }, |
| { |
| "grad_norm": 0.2529714107513428, |
| "learning_rate": 9.793597457909984e-05, |
| "loss": 0.0231, |
| "step": 13720 |
| }, |
| { |
| "grad_norm": 0.2206045389175415, |
| "learning_rate": 9.793127025810127e-05, |
| "loss": 0.0197, |
| "step": 13730 |
| }, |
| { |
| "grad_norm": 0.24219457805156708, |
| "learning_rate": 9.792656069541104e-05, |
| "loss": 0.023, |
| "step": 13740 |
| }, |
| { |
| "grad_norm": 0.2709461450576782, |
| "learning_rate": 9.79218458915442e-05, |
| "loss": 0.0184, |
| "step": 13750 |
| }, |
| { |
| "grad_norm": 0.25277814269065857, |
| "learning_rate": 9.791712584701634e-05, |
| "loss": 0.0189, |
| "step": 13760 |
| }, |
| { |
| "grad_norm": 0.3072836399078369, |
| "learning_rate": 9.791240056234364e-05, |
| "loss": 0.0226, |
| "step": 13770 |
| }, |
| { |
| "grad_norm": 0.24425631761550903, |
| "learning_rate": 9.790767003804283e-05, |
| "loss": 0.0235, |
| "step": 13780 |
| }, |
| { |
| "grad_norm": 0.2431863695383072, |
| "learning_rate": 9.790293427463126e-05, |
| "loss": 0.0195, |
| "step": 13790 |
| }, |
| { |
| "grad_norm": 0.2647920548915863, |
| "learning_rate": 9.789819327262684e-05, |
| "loss": 0.0204, |
| "step": 13800 |
| }, |
| { |
| "grad_norm": 0.2761968672275543, |
| "learning_rate": 9.7893447032548e-05, |
| "loss": 0.0202, |
| "step": 13810 |
| }, |
| { |
| "grad_norm": 0.3204233944416046, |
| "learning_rate": 9.78886955549138e-05, |
| "loss": 0.0223, |
| "step": 13820 |
| }, |
| { |
| "grad_norm": 0.3227749466896057, |
| "learning_rate": 9.788393884024387e-05, |
| "loss": 0.0199, |
| "step": 13830 |
| }, |
| { |
| "grad_norm": 0.2341262400150299, |
| "learning_rate": 9.787917688905836e-05, |
| "loss": 0.0173, |
| "step": 13840 |
| }, |
| { |
| "grad_norm": 0.24751389026641846, |
| "learning_rate": 9.787440970187807e-05, |
| "loss": 0.0212, |
| "step": 13850 |
| }, |
| { |
| "grad_norm": 0.3391607105731964, |
| "learning_rate": 9.786963727922429e-05, |
| "loss": 0.0217, |
| "step": 13860 |
| }, |
| { |
| "grad_norm": 0.24142494797706604, |
| "learning_rate": 9.786485962161897e-05, |
| "loss": 0.0207, |
| "step": 13870 |
| }, |
| { |
| "grad_norm": 0.22415021061897278, |
| "learning_rate": 9.786007672958455e-05, |
| "loss": 0.0214, |
| "step": 13880 |
| }, |
| { |
| "grad_norm": 0.24598553776741028, |
| "learning_rate": 9.78552886036441e-05, |
| "loss": 0.0192, |
| "step": 13890 |
| }, |
| { |
| "grad_norm": 0.2527545988559723, |
| "learning_rate": 9.785049524432124e-05, |
| "loss": 0.0218, |
| "step": 13900 |
| }, |
| { |
| "grad_norm": 0.25652772188186646, |
| "learning_rate": 9.784569665214016e-05, |
| "loss": 0.0174, |
| "step": 13910 |
| }, |
| { |
| "grad_norm": 0.2599169611930847, |
| "learning_rate": 9.784089282762563e-05, |
| "loss": 0.0178, |
| "step": 13920 |
| }, |
| { |
| "grad_norm": 0.22017325460910797, |
| "learning_rate": 9.7836083771303e-05, |
| "loss": 0.0191, |
| "step": 13930 |
| }, |
| { |
| "grad_norm": 0.26483476161956787, |
| "learning_rate": 9.783126948369817e-05, |
| "loss": 0.02, |
| "step": 13940 |
| }, |
| { |
| "grad_norm": 0.24816817045211792, |
| "learning_rate": 9.78264499653376e-05, |
| "loss": 0.0225, |
| "step": 13950 |
| }, |
| { |
| "grad_norm": 0.301017701625824, |
| "learning_rate": 9.782162521674838e-05, |
| "loss": 0.0202, |
| "step": 13960 |
| }, |
| { |
| "grad_norm": 0.21281808614730835, |
| "learning_rate": 9.781679523845812e-05, |
| "loss": 0.0224, |
| "step": 13970 |
| }, |
| { |
| "grad_norm": 0.27746960520744324, |
| "learning_rate": 9.781196003099502e-05, |
| "loss": 0.0227, |
| "step": 13980 |
| }, |
| { |
| "grad_norm": 0.23323650658130646, |
| "learning_rate": 9.780711959488786e-05, |
| "loss": 0.024, |
| "step": 13990 |
| }, |
| { |
| "grad_norm": 0.23771823942661285, |
| "learning_rate": 9.780227393066599e-05, |
| "loss": 0.0253, |
| "step": 14000 |
| }, |
| { |
| "grad_norm": 0.2411212921142578, |
| "learning_rate": 9.77974230388593e-05, |
| "loss": 0.0171, |
| "step": 14010 |
| }, |
| { |
| "grad_norm": 0.23389559984207153, |
| "learning_rate": 9.779256691999829e-05, |
| "loss": 0.0201, |
| "step": 14020 |
| }, |
| { |
| "grad_norm": 0.21322768926620483, |
| "learning_rate": 9.778770557461403e-05, |
| "loss": 0.0196, |
| "step": 14030 |
| }, |
| { |
| "grad_norm": 0.37174296379089355, |
| "learning_rate": 9.778283900323812e-05, |
| "loss": 0.0225, |
| "step": 14040 |
| }, |
| { |
| "grad_norm": 0.3192085921764374, |
| "learning_rate": 9.777796720640277e-05, |
| "loss": 0.0217, |
| "step": 14050 |
| }, |
| { |
| "grad_norm": 0.21999326348304749, |
| "learning_rate": 9.777309018464078e-05, |
| "loss": 0.0206, |
| "step": 14060 |
| }, |
| { |
| "grad_norm": 0.24725963175296783, |
| "learning_rate": 9.776820793848547e-05, |
| "loss": 0.021, |
| "step": 14070 |
| }, |
| { |
| "grad_norm": 0.28076156973838806, |
| "learning_rate": 9.776332046847075e-05, |
| "loss": 0.0194, |
| "step": 14080 |
| }, |
| { |
| "grad_norm": 0.20957839488983154, |
| "learning_rate": 9.775842777513111e-05, |
| "loss": 0.0204, |
| "step": 14090 |
| }, |
| { |
| "grad_norm": 0.2436676174402237, |
| "learning_rate": 9.775352985900163e-05, |
| "loss": 0.0188, |
| "step": 14100 |
| }, |
| { |
| "grad_norm": 0.19794858992099762, |
| "learning_rate": 9.774862672061791e-05, |
| "loss": 0.0173, |
| "step": 14110 |
| }, |
| { |
| "grad_norm": 0.27602648735046387, |
| "learning_rate": 9.774371836051616e-05, |
| "loss": 0.0205, |
| "step": 14120 |
| }, |
| { |
| "grad_norm": 0.345101922750473, |
| "learning_rate": 9.773880477923315e-05, |
| "loss": 0.0188, |
| "step": 14130 |
| }, |
| { |
| "grad_norm": 0.23460453748703003, |
| "learning_rate": 9.773388597730623e-05, |
| "loss": 0.0171, |
| "step": 14140 |
| }, |
| { |
| "grad_norm": 0.29975447058677673, |
| "learning_rate": 9.77289619552733e-05, |
| "loss": 0.021, |
| "step": 14150 |
| }, |
| { |
| "grad_norm": 0.2128962129354477, |
| "learning_rate": 9.772403271367285e-05, |
| "loss": 0.0205, |
| "step": 14160 |
| }, |
| { |
| "grad_norm": 0.3548290729522705, |
| "learning_rate": 9.771909825304396e-05, |
| "loss": 0.0203, |
| "step": 14170 |
| }, |
| { |
| "grad_norm": 0.2556484043598175, |
| "learning_rate": 9.771415857392619e-05, |
| "loss": 0.0244, |
| "step": 14180 |
| }, |
| { |
| "grad_norm": 0.23943066596984863, |
| "learning_rate": 9.770921367685978e-05, |
| "loss": 0.0202, |
| "step": 14190 |
| }, |
| { |
| "grad_norm": 0.21408414840698242, |
| "learning_rate": 9.770426356238551e-05, |
| "loss": 0.0224, |
| "step": 14200 |
| }, |
| { |
| "grad_norm": 0.2104213684797287, |
| "learning_rate": 9.769930823104469e-05, |
| "loss": 0.0159, |
| "step": 14210 |
| }, |
| { |
| "grad_norm": 0.21027202904224396, |
| "learning_rate": 9.769434768337926e-05, |
| "loss": 0.0187, |
| "step": 14220 |
| }, |
| { |
| "grad_norm": 0.2199854850769043, |
| "learning_rate": 9.768938191993164e-05, |
| "loss": 0.0188, |
| "step": 14230 |
| }, |
| { |
| "grad_norm": 0.2762155532836914, |
| "learning_rate": 9.768441094124494e-05, |
| "loss": 0.0227, |
| "step": 14240 |
| }, |
| { |
| "grad_norm": 0.29159846901893616, |
| "learning_rate": 9.767943474786275e-05, |
| "loss": 0.0222, |
| "step": 14250 |
| }, |
| { |
| "grad_norm": 0.21874549984931946, |
| "learning_rate": 9.767445334032923e-05, |
| "loss": 0.0193, |
| "step": 14260 |
| }, |
| { |
| "grad_norm": 0.22311334311962128, |
| "learning_rate": 9.766946671918919e-05, |
| "loss": 0.022, |
| "step": 14270 |
| }, |
| { |
| "grad_norm": 0.22728633880615234, |
| "learning_rate": 9.766447488498796e-05, |
| "loss": 0.0197, |
| "step": 14280 |
| }, |
| { |
| "grad_norm": 0.3445588946342468, |
| "learning_rate": 9.765947783827139e-05, |
| "loss": 0.0204, |
| "step": 14290 |
| }, |
| { |
| "grad_norm": 0.2580435872077942, |
| "learning_rate": 9.765447557958599e-05, |
| "loss": 0.0235, |
| "step": 14300 |
| }, |
| { |
| "grad_norm": 0.2422706037759781, |
| "learning_rate": 9.764946810947879e-05, |
| "loss": 0.0199, |
| "step": 14310 |
| }, |
| { |
| "grad_norm": 0.3658464252948761, |
| "learning_rate": 9.764445542849738e-05, |
| "loss": 0.0219, |
| "step": 14320 |
| }, |
| { |
| "grad_norm": 0.2647552788257599, |
| "learning_rate": 9.763943753718998e-05, |
| "loss": 0.0218, |
| "step": 14330 |
| }, |
| { |
| "grad_norm": 0.22951330244541168, |
| "learning_rate": 9.76344144361053e-05, |
| "loss": 0.0195, |
| "step": 14340 |
| }, |
| { |
| "grad_norm": 0.20395617187023163, |
| "learning_rate": 9.762938612579269e-05, |
| "loss": 0.0202, |
| "step": 14350 |
| }, |
| { |
| "grad_norm": 0.2150609940290451, |
| "learning_rate": 9.762435260680202e-05, |
| "loss": 0.018, |
| "step": 14360 |
| }, |
| { |
| "grad_norm": 0.219925656914711, |
| "learning_rate": 9.761931387968373e-05, |
| "loss": 0.0175, |
| "step": 14370 |
| }, |
| { |
| "grad_norm": 0.21585464477539062, |
| "learning_rate": 9.76142699449889e-05, |
| "loss": 0.0183, |
| "step": 14380 |
| }, |
| { |
| "grad_norm": 0.21576769649982452, |
| "learning_rate": 9.760922080326908e-05, |
| "loss": 0.0203, |
| "step": 14390 |
| }, |
| { |
| "grad_norm": 0.23750628530979156, |
| "learning_rate": 9.760416645507644e-05, |
| "loss": 0.0202, |
| "step": 14400 |
| }, |
| { |
| "grad_norm": 0.255420982837677, |
| "learning_rate": 9.759910690096375e-05, |
| "loss": 0.0212, |
| "step": 14410 |
| }, |
| { |
| "grad_norm": 0.3221551179885864, |
| "learning_rate": 9.759404214148429e-05, |
| "loss": 0.0192, |
| "step": 14420 |
| }, |
| { |
| "grad_norm": 0.2521008551120758, |
| "learning_rate": 9.758897217719191e-05, |
| "loss": 0.0175, |
| "step": 14430 |
| }, |
| { |
| "grad_norm": 0.2588962912559509, |
| "learning_rate": 9.758389700864113e-05, |
| "loss": 0.0182, |
| "step": 14440 |
| }, |
| { |
| "grad_norm": 0.2366463840007782, |
| "learning_rate": 9.757881663638688e-05, |
| "loss": 0.0186, |
| "step": 14450 |
| }, |
| { |
| "grad_norm": 0.2948257327079773, |
| "learning_rate": 9.757373106098478e-05, |
| "loss": 0.02, |
| "step": 14460 |
| }, |
| { |
| "grad_norm": 0.30074384808540344, |
| "learning_rate": 9.756864028299097e-05, |
| "loss": 0.0215, |
| "step": 14470 |
| }, |
| { |
| "grad_norm": 0.28855177760124207, |
| "learning_rate": 9.75635443029622e-05, |
| "loss": 0.0196, |
| "step": 14480 |
| }, |
| { |
| "grad_norm": 0.1691199094057083, |
| "learning_rate": 9.755844312145572e-05, |
| "loss": 0.0191, |
| "step": 14490 |
| }, |
| { |
| "grad_norm": 0.27137795090675354, |
| "learning_rate": 9.755333673902941e-05, |
| "loss": 0.0181, |
| "step": 14500 |
| }, |
| { |
| "grad_norm": 0.24129636585712433, |
| "learning_rate": 9.75482251562417e-05, |
| "loss": 0.0214, |
| "step": 14510 |
| }, |
| { |
| "grad_norm": 0.2521432340145111, |
| "learning_rate": 9.754310837365155e-05, |
| "loss": 0.0181, |
| "step": 14520 |
| }, |
| { |
| "grad_norm": 0.28629830479621887, |
| "learning_rate": 9.753798639181856e-05, |
| "loss": 0.0185, |
| "step": 14530 |
| }, |
| { |
| "grad_norm": 0.25693315267562866, |
| "learning_rate": 9.753285921130286e-05, |
| "loss": 0.0224, |
| "step": 14540 |
| }, |
| { |
| "grad_norm": 0.23772841691970825, |
| "learning_rate": 9.752772683266512e-05, |
| "loss": 0.0178, |
| "step": 14550 |
| }, |
| { |
| "grad_norm": 0.20092158019542694, |
| "learning_rate": 9.752258925646665e-05, |
| "loss": 0.0188, |
| "step": 14560 |
| }, |
| { |
| "grad_norm": 0.25228646397590637, |
| "learning_rate": 9.751744648326926e-05, |
| "loss": 0.0195, |
| "step": 14570 |
| }, |
| { |
| "grad_norm": 0.2741185426712036, |
| "learning_rate": 9.751229851363536e-05, |
| "loss": 0.0203, |
| "step": 14580 |
| }, |
| { |
| "grad_norm": 0.3167235255241394, |
| "learning_rate": 9.750714534812793e-05, |
| "loss": 0.0201, |
| "step": 14590 |
| }, |
| { |
| "grad_norm": 0.2366606742143631, |
| "learning_rate": 9.750198698731053e-05, |
| "loss": 0.0186, |
| "step": 14600 |
| }, |
| { |
| "grad_norm": 0.29967039823532104, |
| "learning_rate": 9.749682343174722e-05, |
| "loss": 0.02, |
| "step": 14610 |
| }, |
| { |
| "grad_norm": 0.26064541935920715, |
| "learning_rate": 9.749165468200272e-05, |
| "loss": 0.0203, |
| "step": 14620 |
| }, |
| { |
| "grad_norm": 0.28458788990974426, |
| "learning_rate": 9.748648073864229e-05, |
| "loss": 0.022, |
| "step": 14630 |
| }, |
| { |
| "grad_norm": 0.18595005571842194, |
| "learning_rate": 9.748130160223168e-05, |
| "loss": 0.0183, |
| "step": 14640 |
| }, |
| { |
| "grad_norm": 0.266886830329895, |
| "learning_rate": 9.747611727333734e-05, |
| "loss": 0.0166, |
| "step": 14650 |
| }, |
| { |
| "grad_norm": 0.1858520209789276, |
| "learning_rate": 9.74709277525262e-05, |
| "loss": 0.0236, |
| "step": 14660 |
| }, |
| { |
| "grad_norm": 0.282135009765625, |
| "learning_rate": 9.746573304036576e-05, |
| "loss": 0.0191, |
| "step": 14670 |
| }, |
| { |
| "grad_norm": 0.22414512932300568, |
| "learning_rate": 9.746053313742412e-05, |
| "loss": 0.0198, |
| "step": 14680 |
| }, |
| { |
| "grad_norm": 0.2562432289123535, |
| "learning_rate": 9.745532804426994e-05, |
| "loss": 0.0192, |
| "step": 14690 |
| }, |
| { |
| "grad_norm": 0.25288599729537964, |
| "learning_rate": 9.745011776147242e-05, |
| "loss": 0.0191, |
| "step": 14700 |
| }, |
| { |
| "grad_norm": 0.19824698567390442, |
| "learning_rate": 9.744490228960138e-05, |
| "loss": 0.0171, |
| "step": 14710 |
| }, |
| { |
| "grad_norm": 0.256298303604126, |
| "learning_rate": 9.743968162922713e-05, |
| "loss": 0.0205, |
| "step": 14720 |
| }, |
| { |
| "grad_norm": 0.2583651542663574, |
| "learning_rate": 9.743445578092064e-05, |
| "loss": 0.02, |
| "step": 14730 |
| }, |
| { |
| "grad_norm": 0.3547195792198181, |
| "learning_rate": 9.742922474525338e-05, |
| "loss": 0.0244, |
| "step": 14740 |
| }, |
| { |
| "grad_norm": 0.22905513644218445, |
| "learning_rate": 9.742398852279741e-05, |
| "loss": 0.0196, |
| "step": 14750 |
| }, |
| { |
| "grad_norm": 0.24030999839305878, |
| "learning_rate": 9.741874711412535e-05, |
| "loss": 0.0184, |
| "step": 14760 |
| }, |
| { |
| "grad_norm": 0.19056032598018646, |
| "learning_rate": 9.741350051981042e-05, |
| "loss": 0.0209, |
| "step": 14770 |
| }, |
| { |
| "grad_norm": 0.22818054258823395, |
| "learning_rate": 9.740824874042633e-05, |
| "loss": 0.0197, |
| "step": 14780 |
| }, |
| { |
| "grad_norm": 0.2896696627140045, |
| "learning_rate": 9.740299177654746e-05, |
| "loss": 0.0205, |
| "step": 14790 |
| }, |
| { |
| "grad_norm": 0.23162594437599182, |
| "learning_rate": 9.739772962874867e-05, |
| "loss": 0.0192, |
| "step": 14800 |
| }, |
| { |
| "grad_norm": 0.23662790656089783, |
| "learning_rate": 9.739246229760541e-05, |
| "loss": 0.0195, |
| "step": 14810 |
| }, |
| { |
| "grad_norm": 0.30700597167015076, |
| "learning_rate": 9.738718978369376e-05, |
| "loss": 0.0207, |
| "step": 14820 |
| }, |
| { |
| "grad_norm": 0.17646534740924835, |
| "learning_rate": 9.738191208759025e-05, |
| "loss": 0.0201, |
| "step": 14830 |
| }, |
| { |
| "grad_norm": 0.2281806915998459, |
| "learning_rate": 9.73766292098721e-05, |
| "loss": 0.0204, |
| "step": 14840 |
| }, |
| { |
| "grad_norm": 0.25043541193008423, |
| "learning_rate": 9.737134115111699e-05, |
| "loss": 0.0196, |
| "step": 14850 |
| }, |
| { |
| "grad_norm": 0.278923362493515, |
| "learning_rate": 9.736604791190323e-05, |
| "loss": 0.0165, |
| "step": 14860 |
| }, |
| { |
| "grad_norm": 0.1961689442396164, |
| "learning_rate": 9.73607494928097e-05, |
| "loss": 0.0183, |
| "step": 14870 |
| }, |
| { |
| "grad_norm": 0.234564870595932, |
| "learning_rate": 9.735544589441581e-05, |
| "loss": 0.0206, |
| "step": 14880 |
| }, |
| { |
| "grad_norm": 0.23098857700824738, |
| "learning_rate": 9.735013711730154e-05, |
| "loss": 0.0198, |
| "step": 14890 |
| }, |
| { |
| "grad_norm": 0.17101159691810608, |
| "learning_rate": 9.734482316204747e-05, |
| "loss": 0.0186, |
| "step": 14900 |
| }, |
| { |
| "grad_norm": 0.2384091466665268, |
| "learning_rate": 9.733950402923473e-05, |
| "loss": 0.0169, |
| "step": 14910 |
| }, |
| { |
| "grad_norm": 0.2314639389514923, |
| "learning_rate": 9.7334179719445e-05, |
| "loss": 0.021, |
| "step": 14920 |
| }, |
| { |
| "grad_norm": 0.3325411081314087, |
| "learning_rate": 9.732885023326053e-05, |
| "loss": 0.0193, |
| "step": 14930 |
| }, |
| { |
| "grad_norm": 0.2880927324295044, |
| "learning_rate": 9.732351557126418e-05, |
| "loss": 0.0229, |
| "step": 14940 |
| }, |
| { |
| "grad_norm": 0.29757729172706604, |
| "learning_rate": 9.731817573403929e-05, |
| "loss": 0.019, |
| "step": 14950 |
| }, |
| { |
| "grad_norm": 0.2836707532405853, |
| "learning_rate": 9.731283072216985e-05, |
| "loss": 0.0185, |
| "step": 14960 |
| }, |
| { |
| "grad_norm": 0.2036943882703781, |
| "learning_rate": 9.730748053624039e-05, |
| "loss": 0.0246, |
| "step": 14970 |
| }, |
| { |
| "grad_norm": 0.23931244015693665, |
| "learning_rate": 9.730212517683598e-05, |
| "loss": 0.0187, |
| "step": 14980 |
| }, |
| { |
| "grad_norm": 0.23276709020137787, |
| "learning_rate": 9.729676464454228e-05, |
| "loss": 0.0173, |
| "step": 14990 |
| }, |
| { |
| "grad_norm": 0.25916847586631775, |
| "learning_rate": 9.72913989399455e-05, |
| "loss": 0.0206, |
| "step": 15000 |
| }, |
| { |
| "grad_norm": 0.2327517718076706, |
| "learning_rate": 9.728602806363242e-05, |
| "loss": 0.0175, |
| "step": 15010 |
| }, |
| { |
| "grad_norm": 0.2676856517791748, |
| "learning_rate": 9.728065201619043e-05, |
| "loss": 0.0195, |
| "step": 15020 |
| }, |
| { |
| "grad_norm": 0.26770108938217163, |
| "learning_rate": 9.727527079820742e-05, |
| "loss": 0.0171, |
| "step": 15030 |
| }, |
| { |
| "grad_norm": 0.2789733409881592, |
| "learning_rate": 9.726988441027186e-05, |
| "loss": 0.0159, |
| "step": 15040 |
| }, |
| { |
| "grad_norm": 0.19679932296276093, |
| "learning_rate": 9.726449285297281e-05, |
| "loss": 0.0177, |
| "step": 15050 |
| }, |
| { |
| "grad_norm": 0.2140314131975174, |
| "learning_rate": 9.72590961268999e-05, |
| "loss": 0.0168, |
| "step": 15060 |
| }, |
| { |
| "grad_norm": 0.2961437702178955, |
| "learning_rate": 9.725369423264328e-05, |
| "loss": 0.0168, |
| "step": 15070 |
| }, |
| { |
| "grad_norm": 0.23276185989379883, |
| "learning_rate": 9.72482871707937e-05, |
| "loss": 0.0189, |
| "step": 15080 |
| }, |
| { |
| "grad_norm": 0.2881372272968292, |
| "learning_rate": 9.724287494194247e-05, |
| "loss": 0.0176, |
| "step": 15090 |
| }, |
| { |
| "grad_norm": 0.3392719626426697, |
| "learning_rate": 9.723745754668147e-05, |
| "loss": 0.0186, |
| "step": 15100 |
| }, |
| { |
| "grad_norm": 0.28607383370399475, |
| "learning_rate": 9.723203498560313e-05, |
| "loss": 0.02, |
| "step": 15110 |
| }, |
| { |
| "grad_norm": 0.27815496921539307, |
| "learning_rate": 9.722660725930046e-05, |
| "loss": 0.0191, |
| "step": 15120 |
| }, |
| { |
| "grad_norm": 0.26892364025115967, |
| "learning_rate": 9.722117436836702e-05, |
| "loss": 0.0199, |
| "step": 15130 |
| }, |
| { |
| "grad_norm": 0.22978749871253967, |
| "learning_rate": 9.721573631339696e-05, |
| "loss": 0.0207, |
| "step": 15140 |
| }, |
| { |
| "grad_norm": 0.20402418076992035, |
| "learning_rate": 9.721029309498494e-05, |
| "loss": 0.0201, |
| "step": 15150 |
| }, |
| { |
| "grad_norm": 0.24020260572433472, |
| "learning_rate": 9.720484471372627e-05, |
| "loss": 0.0197, |
| "step": 15160 |
| }, |
| { |
| "grad_norm": 0.20848724246025085, |
| "learning_rate": 9.719939117021673e-05, |
| "loss": 0.0199, |
| "step": 15170 |
| }, |
| { |
| "grad_norm": 0.29797449707984924, |
| "learning_rate": 9.719393246505275e-05, |
| "loss": 0.0194, |
| "step": 15180 |
| }, |
| { |
| "grad_norm": 0.30025288462638855, |
| "learning_rate": 9.718846859883128e-05, |
| "loss": 0.0219, |
| "step": 15190 |
| }, |
| { |
| "grad_norm": 0.2575812339782715, |
| "learning_rate": 9.718299957214982e-05, |
| "loss": 0.0205, |
| "step": 15200 |
| }, |
| { |
| "grad_norm": 0.3019827902317047, |
| "learning_rate": 9.717752538560646e-05, |
| "loss": 0.0203, |
| "step": 15210 |
| }, |
| { |
| "grad_norm": 0.2757616639137268, |
| "learning_rate": 9.717204603979986e-05, |
| "loss": 0.0191, |
| "step": 15220 |
| }, |
| { |
| "grad_norm": 0.3016200363636017, |
| "learning_rate": 9.716656153532922e-05, |
| "loss": 0.0171, |
| "step": 15230 |
| }, |
| { |
| "grad_norm": 0.23993328213691711, |
| "learning_rate": 9.716107187279434e-05, |
| "loss": 0.0198, |
| "step": 15240 |
| }, |
| { |
| "grad_norm": 0.22839435935020447, |
| "learning_rate": 9.715557705279555e-05, |
| "loss": 0.0173, |
| "step": 15250 |
| }, |
| { |
| "grad_norm": 0.22564002871513367, |
| "learning_rate": 9.715007707593372e-05, |
| "loss": 0.0202, |
| "step": 15260 |
| }, |
| { |
| "grad_norm": 0.19124171137809753, |
| "learning_rate": 9.714457194281036e-05, |
| "loss": 0.0181, |
| "step": 15270 |
| }, |
| { |
| "grad_norm": 0.32519978284835815, |
| "learning_rate": 9.713906165402751e-05, |
| "loss": 0.0202, |
| "step": 15280 |
| }, |
| { |
| "grad_norm": 0.21475209295749664, |
| "learning_rate": 9.713354621018774e-05, |
| "loss": 0.0188, |
| "step": 15290 |
| }, |
| { |
| "grad_norm": 0.19821572303771973, |
| "learning_rate": 9.712802561189422e-05, |
| "loss": 0.0172, |
| "step": 15300 |
| }, |
| { |
| "grad_norm": 0.32506078481674194, |
| "learning_rate": 9.712249985975069e-05, |
| "loss": 0.0179, |
| "step": 15310 |
| }, |
| { |
| "grad_norm": 0.28665006160736084, |
| "learning_rate": 9.71169689543614e-05, |
| "loss": 0.0201, |
| "step": 15320 |
| }, |
| { |
| "grad_norm": 0.1634398251771927, |
| "learning_rate": 9.711143289633123e-05, |
| "loss": 0.0169, |
| "step": 15330 |
| }, |
| { |
| "grad_norm": 0.25451749563217163, |
| "learning_rate": 9.710589168626561e-05, |
| "loss": 0.0194, |
| "step": 15340 |
| }, |
| { |
| "grad_norm": 0.25250038504600525, |
| "learning_rate": 9.710034532477048e-05, |
| "loss": 0.0191, |
| "step": 15350 |
| }, |
| { |
| "grad_norm": 0.20981234312057495, |
| "learning_rate": 9.709479381245239e-05, |
| "loss": 0.0195, |
| "step": 15360 |
| }, |
| { |
| "grad_norm": 0.26432904601097107, |
| "learning_rate": 9.708923714991847e-05, |
| "loss": 0.0234, |
| "step": 15370 |
| }, |
| { |
| "grad_norm": 0.21719463169574738, |
| "learning_rate": 9.708367533777638e-05, |
| "loss": 0.0226, |
| "step": 15380 |
| }, |
| { |
| "grad_norm": 0.247014582157135, |
| "learning_rate": 9.707810837663431e-05, |
| "loss": 0.0195, |
| "step": 15390 |
| }, |
| { |
| "grad_norm": 0.22526317834854126, |
| "learning_rate": 9.707253626710113e-05, |
| "loss": 0.0221, |
| "step": 15400 |
| }, |
| { |
| "grad_norm": 0.2209337204694748, |
| "learning_rate": 9.706695900978613e-05, |
| "loss": 0.0197, |
| "step": 15410 |
| }, |
| { |
| "grad_norm": 0.24105608463287354, |
| "learning_rate": 9.706137660529926e-05, |
| "loss": 0.0196, |
| "step": 15420 |
| }, |
| { |
| "grad_norm": 0.25565317273139954, |
| "learning_rate": 9.705578905425101e-05, |
| "loss": 0.02, |
| "step": 15430 |
| }, |
| { |
| "grad_norm": 0.2159339338541031, |
| "learning_rate": 9.705019635725241e-05, |
| "loss": 0.0214, |
| "step": 15440 |
| }, |
| { |
| "grad_norm": 0.27560457587242126, |
| "learning_rate": 9.704459851491508e-05, |
| "loss": 0.0184, |
| "step": 15450 |
| }, |
| { |
| "grad_norm": 0.2387838512659073, |
| "learning_rate": 9.703899552785118e-05, |
| "loss": 0.0193, |
| "step": 15460 |
| }, |
| { |
| "grad_norm": 0.2243328094482422, |
| "learning_rate": 9.703338739667346e-05, |
| "loss": 0.0223, |
| "step": 15470 |
| }, |
| { |
| "grad_norm": 0.229559987783432, |
| "learning_rate": 9.70277741219952e-05, |
| "loss": 0.0208, |
| "step": 15480 |
| }, |
| { |
| "grad_norm": 0.28965842723846436, |
| "learning_rate": 9.702215570443027e-05, |
| "loss": 0.0257, |
| "step": 15490 |
| }, |
| { |
| "grad_norm": 0.3054785430431366, |
| "learning_rate": 9.701653214459309e-05, |
| "loss": 0.0241, |
| "step": 15500 |
| }, |
| { |
| "grad_norm": 0.34266725182533264, |
| "learning_rate": 9.701090344309865e-05, |
| "loss": 0.0211, |
| "step": 15510 |
| }, |
| { |
| "grad_norm": 0.2969988286495209, |
| "learning_rate": 9.700526960056247e-05, |
| "loss": 0.0204, |
| "step": 15520 |
| }, |
| { |
| "grad_norm": 0.2747593820095062, |
| "learning_rate": 9.699963061760068e-05, |
| "loss": 0.0197, |
| "step": 15530 |
| }, |
| { |
| "grad_norm": 0.16242137551307678, |
| "learning_rate": 9.699398649482997e-05, |
| "loss": 0.0195, |
| "step": 15540 |
| }, |
| { |
| "grad_norm": 0.395180881023407, |
| "learning_rate": 9.698833723286753e-05, |
| "loss": 0.0205, |
| "step": 15550 |
| }, |
| { |
| "grad_norm": 0.22436875104904175, |
| "learning_rate": 9.698268283233118e-05, |
| "loss": 0.0187, |
| "step": 15560 |
| }, |
| { |
| "grad_norm": 0.23885248601436615, |
| "learning_rate": 9.697702329383929e-05, |
| "loss": 0.0182, |
| "step": 15570 |
| }, |
| { |
| "grad_norm": 0.24845798313617706, |
| "learning_rate": 9.697135861801074e-05, |
| "loss": 0.0158, |
| "step": 15580 |
| }, |
| { |
| "grad_norm": 0.23030376434326172, |
| "learning_rate": 9.696568880546505e-05, |
| "loss": 0.0191, |
| "step": 15590 |
| }, |
| { |
| "grad_norm": 0.2087312787771225, |
| "learning_rate": 9.696001385682223e-05, |
| "loss": 0.0203, |
| "step": 15600 |
| }, |
| { |
| "grad_norm": 0.20425690710544586, |
| "learning_rate": 9.695433377270291e-05, |
| "loss": 0.0181, |
| "step": 15610 |
| }, |
| { |
| "grad_norm": 0.2104547619819641, |
| "learning_rate": 9.694864855372824e-05, |
| "loss": 0.0166, |
| "step": 15620 |
| }, |
| { |
| "grad_norm": 0.2342647910118103, |
| "learning_rate": 9.694295820051995e-05, |
| "loss": 0.017, |
| "step": 15630 |
| }, |
| { |
| "grad_norm": 0.27400583028793335, |
| "learning_rate": 9.693726271370032e-05, |
| "loss": 0.0192, |
| "step": 15640 |
| }, |
| { |
| "grad_norm": 0.19918233156204224, |
| "learning_rate": 9.693156209389221e-05, |
| "loss": 0.0183, |
| "step": 15650 |
| }, |
| { |
| "grad_norm": 0.2711070477962494, |
| "learning_rate": 9.692585634171905e-05, |
| "loss": 0.0209, |
| "step": 15660 |
| }, |
| { |
| "grad_norm": 0.24387693405151367, |
| "learning_rate": 9.692014545780476e-05, |
| "loss": 0.0195, |
| "step": 15670 |
| }, |
| { |
| "grad_norm": 0.2516650855541229, |
| "learning_rate": 9.691442944277393e-05, |
| "loss": 0.0206, |
| "step": 15680 |
| }, |
| { |
| "grad_norm": 0.22077317535877228, |
| "learning_rate": 9.690870829725162e-05, |
| "loss": 0.0171, |
| "step": 15690 |
| }, |
| { |
| "grad_norm": 0.19970019161701202, |
| "learning_rate": 9.69029820218635e-05, |
| "loss": 0.0153, |
| "step": 15700 |
| }, |
| { |
| "grad_norm": 0.2740638554096222, |
| "learning_rate": 9.689725061723579e-05, |
| "loss": 0.0195, |
| "step": 15710 |
| }, |
| { |
| "grad_norm": 0.2535877525806427, |
| "learning_rate": 9.689151408399527e-05, |
| "loss": 0.0195, |
| "step": 15720 |
| }, |
| { |
| "grad_norm": 0.3257971704006195, |
| "learning_rate": 9.688577242276924e-05, |
| "loss": 0.0208, |
| "step": 15730 |
| }, |
| { |
| "grad_norm": 0.28868475556373596, |
| "learning_rate": 9.688002563418566e-05, |
| "loss": 0.0195, |
| "step": 15740 |
| }, |
| { |
| "grad_norm": 0.2290724515914917, |
| "learning_rate": 9.687427371887293e-05, |
| "loss": 0.0195, |
| "step": 15750 |
| }, |
| { |
| "grad_norm": 0.2544545531272888, |
| "learning_rate": 9.686851667746012e-05, |
| "loss": 0.0211, |
| "step": 15760 |
| }, |
| { |
| "grad_norm": 0.17678581178188324, |
| "learning_rate": 9.686275451057677e-05, |
| "loss": 0.0197, |
| "step": 15770 |
| }, |
| { |
| "grad_norm": 0.33382585644721985, |
| "learning_rate": 9.685698721885308e-05, |
| "loss": 0.0191, |
| "step": 15780 |
| }, |
| { |
| "grad_norm": 0.24783273041248322, |
| "learning_rate": 9.68512148029197e-05, |
| "loss": 0.0183, |
| "step": 15790 |
| }, |
| { |
| "grad_norm": 0.2595932185649872, |
| "learning_rate": 9.684543726340791e-05, |
| "loss": 0.0195, |
| "step": 15800 |
| }, |
| { |
| "grad_norm": 0.23103168606758118, |
| "learning_rate": 9.683965460094952e-05, |
| "loss": 0.0173, |
| "step": 15810 |
| }, |
| { |
| "grad_norm": 0.19797135889530182, |
| "learning_rate": 9.683386681617694e-05, |
| "loss": 0.0212, |
| "step": 15820 |
| }, |
| { |
| "grad_norm": 0.24339400231838226, |
| "learning_rate": 9.68280739097231e-05, |
| "loss": 0.0171, |
| "step": 15830 |
| }, |
| { |
| "grad_norm": 0.23345626890659332, |
| "learning_rate": 9.682227588222148e-05, |
| "loss": 0.0234, |
| "step": 15840 |
| }, |
| { |
| "grad_norm": 0.221258282661438, |
| "learning_rate": 9.681647273430618e-05, |
| "loss": 0.0182, |
| "step": 15850 |
| }, |
| { |
| "grad_norm": 0.28538239002227783, |
| "learning_rate": 9.681066446661182e-05, |
| "loss": 0.0205, |
| "step": 15860 |
| }, |
| { |
| "grad_norm": 0.24031752347946167, |
| "learning_rate": 9.680485107977357e-05, |
| "loss": 0.0183, |
| "step": 15870 |
| }, |
| { |
| "grad_norm": 0.22477011382579803, |
| "learning_rate": 9.679903257442716e-05, |
| "loss": 0.0205, |
| "step": 15880 |
| }, |
| { |
| "grad_norm": 0.23822762072086334, |
| "learning_rate": 9.679320895120891e-05, |
| "loss": 0.0199, |
| "step": 15890 |
| }, |
| { |
| "grad_norm": 0.24312853813171387, |
| "learning_rate": 9.67873802107557e-05, |
| "loss": 0.0224, |
| "step": 15900 |
| }, |
| { |
| "grad_norm": 0.1952960044145584, |
| "learning_rate": 9.67815463537049e-05, |
| "loss": 0.0208, |
| "step": 15910 |
| }, |
| { |
| "grad_norm": 0.24588929116725922, |
| "learning_rate": 9.677570738069457e-05, |
| "loss": 0.0249, |
| "step": 15920 |
| }, |
| { |
| "grad_norm": 0.3298582434654236, |
| "learning_rate": 9.676986329236318e-05, |
| "loss": 0.0194, |
| "step": 15930 |
| }, |
| { |
| "grad_norm": 0.263412743806839, |
| "learning_rate": 9.676401408934987e-05, |
| "loss": 0.0193, |
| "step": 15940 |
| }, |
| { |
| "grad_norm": 0.21354520320892334, |
| "learning_rate": 9.675815977229428e-05, |
| "loss": 0.0223, |
| "step": 15950 |
| }, |
| { |
| "grad_norm": 0.22564445436000824, |
| "learning_rate": 9.675230034183664e-05, |
| "loss": 0.0194, |
| "step": 15960 |
| }, |
| { |
| "grad_norm": 0.216091126203537, |
| "learning_rate": 9.674643579861773e-05, |
| "loss": 0.0188, |
| "step": 15970 |
| }, |
| { |
| "grad_norm": 0.182041734457016, |
| "learning_rate": 9.674056614327886e-05, |
| "loss": 0.0174, |
| "step": 15980 |
| }, |
| { |
| "grad_norm": 0.2135583609342575, |
| "learning_rate": 9.673469137646198e-05, |
| "loss": 0.0172, |
| "step": 15990 |
| }, |
| { |
| "grad_norm": 0.2592829465866089, |
| "learning_rate": 9.67288114988095e-05, |
| "loss": 0.0224, |
| "step": 16000 |
| }, |
| { |
| "grad_norm": 0.30103909969329834, |
| "learning_rate": 9.672292651096447e-05, |
| "loss": 0.02, |
| "step": 16010 |
| }, |
| { |
| "grad_norm": 0.18954713642597198, |
| "learning_rate": 9.671703641357042e-05, |
| "loss": 0.0212, |
| "step": 16020 |
| }, |
| { |
| "grad_norm": 0.2024218887090683, |
| "learning_rate": 9.67111412072715e-05, |
| "loss": 0.0186, |
| "step": 16030 |
| }, |
| { |
| "grad_norm": 0.23591367900371552, |
| "learning_rate": 9.670524089271242e-05, |
| "loss": 0.0175, |
| "step": 16040 |
| }, |
| { |
| "grad_norm": 0.2588534951210022, |
| "learning_rate": 9.669933547053842e-05, |
| "loss": 0.0189, |
| "step": 16050 |
| }, |
| { |
| "grad_norm": 0.28336942195892334, |
| "learning_rate": 9.669342494139531e-05, |
| "loss": 0.0188, |
| "step": 16060 |
| }, |
| { |
| "grad_norm": 0.21044746041297913, |
| "learning_rate": 9.668750930592943e-05, |
| "loss": 0.0169, |
| "step": 16070 |
| }, |
| { |
| "grad_norm": 0.2560344934463501, |
| "learning_rate": 9.668158856478775e-05, |
| "loss": 0.019, |
| "step": 16080 |
| }, |
| { |
| "grad_norm": 0.2520523965358734, |
| "learning_rate": 9.66756627186177e-05, |
| "loss": 0.0174, |
| "step": 16090 |
| }, |
| { |
| "grad_norm": 0.27171728014945984, |
| "learning_rate": 9.666973176806737e-05, |
| "loss": 0.018, |
| "step": 16100 |
| }, |
| { |
| "grad_norm": 0.28915536403656006, |
| "learning_rate": 9.666379571378534e-05, |
| "loss": 0.0186, |
| "step": 16110 |
| }, |
| { |
| "grad_norm": 0.2225058674812317, |
| "learning_rate": 9.665785455642076e-05, |
| "loss": 0.0181, |
| "step": 16120 |
| }, |
| { |
| "grad_norm": 0.26176416873931885, |
| "learning_rate": 9.665190829662337e-05, |
| "loss": 0.0198, |
| "step": 16130 |
| }, |
| { |
| "grad_norm": 0.29473400115966797, |
| "learning_rate": 9.664595693504342e-05, |
| "loss": 0.0191, |
| "step": 16140 |
| }, |
| { |
| "grad_norm": 0.29856547713279724, |
| "learning_rate": 9.664000047233175e-05, |
| "loss": 0.0198, |
| "step": 16150 |
| }, |
| { |
| "grad_norm": 0.2818152904510498, |
| "learning_rate": 9.663403890913976e-05, |
| "loss": 0.0201, |
| "step": 16160 |
| }, |
| { |
| "grad_norm": 0.25227588415145874, |
| "learning_rate": 9.662807224611938e-05, |
| "loss": 0.0178, |
| "step": 16170 |
| }, |
| { |
| "grad_norm": 0.2221623659133911, |
| "learning_rate": 9.662210048392311e-05, |
| "loss": 0.0182, |
| "step": 16180 |
| }, |
| { |
| "grad_norm": 0.2256096452474594, |
| "learning_rate": 9.661612362320405e-05, |
| "loss": 0.0173, |
| "step": 16190 |
| }, |
| { |
| "grad_norm": 0.23687534034252167, |
| "learning_rate": 9.661014166461579e-05, |
| "loss": 0.0176, |
| "step": 16200 |
| }, |
| { |
| "grad_norm": 0.22864700853824615, |
| "learning_rate": 9.66041546088125e-05, |
| "loss": 0.0175, |
| "step": 16210 |
| }, |
| { |
| "grad_norm": 0.2747984528541565, |
| "learning_rate": 9.659816245644895e-05, |
| "loss": 0.0205, |
| "step": 16220 |
| }, |
| { |
| "grad_norm": 0.2141098976135254, |
| "learning_rate": 9.65921652081804e-05, |
| "loss": 0.0168, |
| "step": 16230 |
| }, |
| { |
| "grad_norm": 0.32376036047935486, |
| "learning_rate": 9.658616286466271e-05, |
| "loss": 0.0203, |
| "step": 16240 |
| }, |
| { |
| "grad_norm": 0.24155114591121674, |
| "learning_rate": 9.65801554265523e-05, |
| "loss": 0.0158, |
| "step": 16250 |
| }, |
| { |
| "grad_norm": 0.2217189073562622, |
| "learning_rate": 9.657414289450612e-05, |
| "loss": 0.0227, |
| "step": 16260 |
| }, |
| { |
| "grad_norm": 0.28855669498443604, |
| "learning_rate": 9.656812526918171e-05, |
| "loss": 0.0186, |
| "step": 16270 |
| }, |
| { |
| "grad_norm": 0.203464537858963, |
| "learning_rate": 9.656210255123712e-05, |
| "loss": 0.0219, |
| "step": 16280 |
| }, |
| { |
| "grad_norm": 0.18729417026042938, |
| "learning_rate": 9.6556074741331e-05, |
| "loss": 0.0162, |
| "step": 16290 |
| }, |
| { |
| "grad_norm": 0.26334065198898315, |
| "learning_rate": 9.655004184012256e-05, |
| "loss": 0.0197, |
| "step": 16300 |
| }, |
| { |
| "grad_norm": 0.2082899808883667, |
| "learning_rate": 9.654400384827152e-05, |
| "loss": 0.019, |
| "step": 16310 |
| }, |
| { |
| "grad_norm": 0.21090373396873474, |
| "learning_rate": 9.653796076643818e-05, |
| "loss": 0.0185, |
| "step": 16320 |
| }, |
| { |
| "grad_norm": 0.23750464618206024, |
| "learning_rate": 9.653191259528344e-05, |
| "loss": 0.0166, |
| "step": 16330 |
| }, |
| { |
| "grad_norm": 0.23250509798526764, |
| "learning_rate": 9.65258593354687e-05, |
| "loss": 0.0166, |
| "step": 16340 |
| }, |
| { |
| "grad_norm": 0.31120097637176514, |
| "learning_rate": 9.651980098765591e-05, |
| "loss": 0.0196, |
| "step": 16350 |
| }, |
| { |
| "grad_norm": 0.27687424421310425, |
| "learning_rate": 9.651373755250765e-05, |
| "loss": 0.0215, |
| "step": 16360 |
| }, |
| { |
| "grad_norm": 0.19686377048492432, |
| "learning_rate": 9.650766903068697e-05, |
| "loss": 0.0159, |
| "step": 16370 |
| }, |
| { |
| "grad_norm": 0.20663948357105255, |
| "learning_rate": 9.650159542285753e-05, |
| "loss": 0.0161, |
| "step": 16380 |
| }, |
| { |
| "grad_norm": 0.24702349305152893, |
| "learning_rate": 9.649551672968353e-05, |
| "loss": 0.0177, |
| "step": 16390 |
| }, |
| { |
| "grad_norm": 0.21502424776554108, |
| "learning_rate": 9.648943295182973e-05, |
| "loss": 0.0217, |
| "step": 16400 |
| }, |
| { |
| "grad_norm": 0.20918725430965424, |
| "learning_rate": 9.648334408996144e-05, |
| "loss": 0.0195, |
| "step": 16410 |
| }, |
| { |
| "grad_norm": 0.2542378306388855, |
| "learning_rate": 9.647725014474452e-05, |
| "loss": 0.0203, |
| "step": 16420 |
| }, |
| { |
| "grad_norm": 0.22581025958061218, |
| "learning_rate": 9.64711511168454e-05, |
| "loss": 0.0166, |
| "step": 16430 |
| }, |
| { |
| "grad_norm": 0.21214455366134644, |
| "learning_rate": 9.646504700693108e-05, |
| "loss": 0.0164, |
| "step": 16440 |
| }, |
| { |
| "grad_norm": 0.22136463224887848, |
| "learning_rate": 9.645893781566907e-05, |
| "loss": 0.0203, |
| "step": 16450 |
| }, |
| { |
| "grad_norm": 0.3346709907054901, |
| "learning_rate": 9.645282354372744e-05, |
| "loss": 0.0199, |
| "step": 16460 |
| }, |
| { |
| "grad_norm": 0.22477203607559204, |
| "learning_rate": 9.644670419177491e-05, |
| "loss": 0.0165, |
| "step": 16470 |
| }, |
| { |
| "grad_norm": 0.22344274818897247, |
| "learning_rate": 9.644057976048062e-05, |
| "loss": 0.0184, |
| "step": 16480 |
| }, |
| { |
| "grad_norm": 0.20499740540981293, |
| "learning_rate": 9.643445025051435e-05, |
| "loss": 0.0168, |
| "step": 16490 |
| }, |
| { |
| "grad_norm": 0.1834016740322113, |
| "learning_rate": 9.642831566254641e-05, |
| "loss": 0.0179, |
| "step": 16500 |
| }, |
| { |
| "grad_norm": 0.2168210744857788, |
| "learning_rate": 9.642217599724769e-05, |
| "loss": 0.0133, |
| "step": 16510 |
| }, |
| { |
| "grad_norm": 0.22100216150283813, |
| "learning_rate": 9.64160312552896e-05, |
| "loss": 0.0173, |
| "step": 16520 |
| }, |
| { |
| "grad_norm": 0.24819809198379517, |
| "learning_rate": 9.64098814373441e-05, |
| "loss": 0.0192, |
| "step": 16530 |
| }, |
| { |
| "grad_norm": 0.27600330114364624, |
| "learning_rate": 9.640372654408374e-05, |
| "loss": 0.0169, |
| "step": 16540 |
| }, |
| { |
| "grad_norm": 0.1754896193742752, |
| "learning_rate": 9.639756657618162e-05, |
| "loss": 0.0153, |
| "step": 16550 |
| }, |
| { |
| "grad_norm": 0.2038646787405014, |
| "learning_rate": 9.639140153431138e-05, |
| "loss": 0.0196, |
| "step": 16560 |
| }, |
| { |
| "grad_norm": 0.22871485352516174, |
| "learning_rate": 9.638523141914721e-05, |
| "loss": 0.0203, |
| "step": 16570 |
| }, |
| { |
| "grad_norm": 0.2602187991142273, |
| "learning_rate": 9.637905623136388e-05, |
| "loss": 0.0169, |
| "step": 16580 |
| }, |
| { |
| "grad_norm": 0.19301439821720123, |
| "learning_rate": 9.637287597163669e-05, |
| "loss": 0.0178, |
| "step": 16590 |
| }, |
| { |
| "grad_norm": 0.20549316704273224, |
| "learning_rate": 9.63666906406415e-05, |
| "loss": 0.0172, |
| "step": 16600 |
| }, |
| { |
| "grad_norm": 0.18847408890724182, |
| "learning_rate": 9.636050023905473e-05, |
| "loss": 0.0147, |
| "step": 16610 |
| }, |
| { |
| "grad_norm": 0.20408432185649872, |
| "learning_rate": 9.635430476755336e-05, |
| "loss": 0.0174, |
| "step": 16620 |
| }, |
| { |
| "grad_norm": 0.19497902691364288, |
| "learning_rate": 9.63481042268149e-05, |
| "loss": 0.0195, |
| "step": 16630 |
| }, |
| { |
| "grad_norm": 0.22574973106384277, |
| "learning_rate": 9.634189861751745e-05, |
| "loss": 0.0159, |
| "step": 16640 |
| }, |
| { |
| "grad_norm": 0.21444794535636902, |
| "learning_rate": 9.633568794033967e-05, |
| "loss": 0.0226, |
| "step": 16650 |
| }, |
| { |
| "grad_norm": 0.24873623251914978, |
| "learning_rate": 9.63294721959607e-05, |
| "loss": 0.0161, |
| "step": 16660 |
| }, |
| { |
| "grad_norm": 0.26096534729003906, |
| "learning_rate": 9.63232513850603e-05, |
| "loss": 0.0203, |
| "step": 16670 |
| }, |
| { |
| "grad_norm": 0.22449441254138947, |
| "learning_rate": 9.631702550831878e-05, |
| "loss": 0.0177, |
| "step": 16680 |
| }, |
| { |
| "grad_norm": 0.23263201117515564, |
| "learning_rate": 9.631079456641698e-05, |
| "loss": 0.0157, |
| "step": 16690 |
| }, |
| { |
| "grad_norm": 0.22929039597511292, |
| "learning_rate": 9.630455856003632e-05, |
| "loss": 0.0165, |
| "step": 16700 |
| }, |
| { |
| "grad_norm": 0.16631029546260834, |
| "learning_rate": 9.629831748985876e-05, |
| "loss": 0.0177, |
| "step": 16710 |
| }, |
| { |
| "grad_norm": 0.23983977735042572, |
| "learning_rate": 9.629207135656679e-05, |
| "loss": 0.015, |
| "step": 16720 |
| }, |
| { |
| "grad_norm": 0.25655442476272583, |
| "learning_rate": 9.628582016084353e-05, |
| "loss": 0.0165, |
| "step": 16730 |
| }, |
| { |
| "grad_norm": 0.29186007380485535, |
| "learning_rate": 9.627956390337254e-05, |
| "loss": 0.0199, |
| "step": 16740 |
| }, |
| { |
| "grad_norm": 0.27269017696380615, |
| "learning_rate": 9.627330258483802e-05, |
| "loss": 0.0191, |
| "step": 16750 |
| }, |
| { |
| "grad_norm": 0.34987184405326843, |
| "learning_rate": 9.62670362059247e-05, |
| "loss": 0.0193, |
| "step": 16760 |
| }, |
| { |
| "grad_norm": 0.32363274693489075, |
| "learning_rate": 9.626076476731786e-05, |
| "loss": 0.0201, |
| "step": 16770 |
| }, |
| { |
| "grad_norm": 0.2766304314136505, |
| "learning_rate": 9.625448826970336e-05, |
| "loss": 0.0181, |
| "step": 16780 |
| }, |
| { |
| "grad_norm": 0.27444687485694885, |
| "learning_rate": 9.624820671376755e-05, |
| "loss": 0.0158, |
| "step": 16790 |
| }, |
| { |
| "grad_norm": 0.22991421818733215, |
| "learning_rate": 9.62419201001974e-05, |
| "loss": 0.0168, |
| "step": 16800 |
| }, |
| { |
| "grad_norm": 0.26918646693229675, |
| "learning_rate": 9.623562842968037e-05, |
| "loss": 0.0143, |
| "step": 16810 |
| }, |
| { |
| "grad_norm": 0.29095256328582764, |
| "learning_rate": 9.622933170290454e-05, |
| "loss": 0.0172, |
| "step": 16820 |
| }, |
| { |
| "grad_norm": 0.21555963158607483, |
| "learning_rate": 9.622302992055849e-05, |
| "loss": 0.0183, |
| "step": 16830 |
| }, |
| { |
| "grad_norm": 0.2406519651412964, |
| "learning_rate": 9.62167230833314e-05, |
| "loss": 0.0159, |
| "step": 16840 |
| }, |
| { |
| "grad_norm": 0.26476937532424927, |
| "learning_rate": 9.621041119191295e-05, |
| "loss": 0.0196, |
| "step": 16850 |
| }, |
| { |
| "grad_norm": 0.22450637817382812, |
| "learning_rate": 9.620409424699342e-05, |
| "loss": 0.0169, |
| "step": 16860 |
| }, |
| { |
| "grad_norm": 0.2418346256017685, |
| "learning_rate": 9.619777224926359e-05, |
| "loss": 0.0173, |
| "step": 16870 |
| }, |
| { |
| "grad_norm": 0.254596084356308, |
| "learning_rate": 9.619144519941485e-05, |
| "loss": 0.0167, |
| "step": 16880 |
| }, |
| { |
| "grad_norm": 0.22944311797618866, |
| "learning_rate": 9.618511309813912e-05, |
| "loss": 0.0179, |
| "step": 16890 |
| }, |
| { |
| "grad_norm": 0.2638665735721588, |
| "learning_rate": 9.617877594612886e-05, |
| "loss": 0.0193, |
| "step": 16900 |
| }, |
| { |
| "grad_norm": 0.24032092094421387, |
| "learning_rate": 9.617243374407707e-05, |
| "loss": 0.0174, |
| "step": 16910 |
| }, |
| { |
| "grad_norm": 0.25759685039520264, |
| "learning_rate": 9.616608649267736e-05, |
| "loss": 0.0202, |
| "step": 16920 |
| }, |
| { |
| "grad_norm": 0.24273432791233063, |
| "learning_rate": 9.615973419262385e-05, |
| "loss": 0.0165, |
| "step": 16930 |
| }, |
| { |
| "grad_norm": 0.24848264455795288, |
| "learning_rate": 9.615337684461119e-05, |
| "loss": 0.0176, |
| "step": 16940 |
| }, |
| { |
| "grad_norm": 0.26598796248435974, |
| "learning_rate": 9.614701444933465e-05, |
| "loss": 0.0202, |
| "step": 16950 |
| }, |
| { |
| "grad_norm": 0.2356976568698883, |
| "learning_rate": 9.614064700748997e-05, |
| "loss": 0.0213, |
| "step": 16960 |
| }, |
| { |
| "grad_norm": 0.2209087312221527, |
| "learning_rate": 9.613427451977352e-05, |
| "loss": 0.0215, |
| "step": 16970 |
| }, |
| { |
| "grad_norm": 0.2411251813173294, |
| "learning_rate": 9.612789698688216e-05, |
| "loss": 0.0175, |
| "step": 16980 |
| }, |
| { |
| "grad_norm": 0.20533424615859985, |
| "learning_rate": 9.612151440951334e-05, |
| "loss": 0.015, |
| "step": 16990 |
| }, |
| { |
| "grad_norm": 0.2369515299797058, |
| "learning_rate": 9.611512678836506e-05, |
| "loss": 0.0191, |
| "step": 17000 |
| }, |
| { |
| "grad_norm": 0.22637242078781128, |
| "learning_rate": 9.610873412413584e-05, |
| "loss": 0.0241, |
| "step": 17010 |
| }, |
| { |
| "grad_norm": 0.2567742168903351, |
| "learning_rate": 9.610233641752476e-05, |
| "loss": 0.0204, |
| "step": 17020 |
| }, |
| { |
| "grad_norm": 0.23093710839748383, |
| "learning_rate": 9.609593366923151e-05, |
| "loss": 0.0183, |
| "step": 17030 |
| }, |
| { |
| "grad_norm": 0.38678979873657227, |
| "learning_rate": 9.608952587995625e-05, |
| "loss": 0.0214, |
| "step": 17040 |
| }, |
| { |
| "grad_norm": 0.2566768229007721, |
| "learning_rate": 9.608311305039972e-05, |
| "loss": 0.0234, |
| "step": 17050 |
| }, |
| { |
| "grad_norm": 0.22188995778560638, |
| "learning_rate": 9.607669518126326e-05, |
| "loss": 0.0201, |
| "step": 17060 |
| }, |
| { |
| "grad_norm": 0.2851111590862274, |
| "learning_rate": 9.607027227324866e-05, |
| "loss": 0.0207, |
| "step": 17070 |
| }, |
| { |
| "grad_norm": 0.25374385714530945, |
| "learning_rate": 9.606384432705837e-05, |
| "loss": 0.0166, |
| "step": 17080 |
| }, |
| { |
| "grad_norm": 0.20746755599975586, |
| "learning_rate": 9.60574113433953e-05, |
| "loss": 0.0203, |
| "step": 17090 |
| }, |
| { |
| "grad_norm": 0.25531473755836487, |
| "learning_rate": 9.6050973322963e-05, |
| "loss": 0.0159, |
| "step": 17100 |
| }, |
| { |
| "grad_norm": 0.2727971076965332, |
| "learning_rate": 9.604453026646547e-05, |
| "loss": 0.0167, |
| "step": 17110 |
| }, |
| { |
| "grad_norm": 0.24038639664649963, |
| "learning_rate": 9.603808217460735e-05, |
| "loss": 0.0183, |
| "step": 17120 |
| }, |
| { |
| "grad_norm": 0.25764548778533936, |
| "learning_rate": 9.603162904809377e-05, |
| "loss": 0.0181, |
| "step": 17130 |
| }, |
| { |
| "grad_norm": 0.19836722314357758, |
| "learning_rate": 9.602517088763045e-05, |
| "loss": 0.0191, |
| "step": 17140 |
| }, |
| { |
| "grad_norm": 0.275120347738266, |
| "learning_rate": 9.601870769392365e-05, |
| "loss": 0.0176, |
| "step": 17150 |
| }, |
| { |
| "grad_norm": 0.22377800941467285, |
| "learning_rate": 9.601223946768017e-05, |
| "loss": 0.0194, |
| "step": 17160 |
| }, |
| { |
| "grad_norm": 0.31283488869667053, |
| "learning_rate": 9.600576620960734e-05, |
| "loss": 0.0165, |
| "step": 17170 |
| }, |
| { |
| "grad_norm": 0.16364149749279022, |
| "learning_rate": 9.599928792041308e-05, |
| "loss": 0.0166, |
| "step": 17180 |
| }, |
| { |
| "grad_norm": 0.23527780175209045, |
| "learning_rate": 9.599280460080587e-05, |
| "loss": 0.0209, |
| "step": 17190 |
| }, |
| { |
| "grad_norm": 0.2357402741909027, |
| "learning_rate": 9.59863162514947e-05, |
| "loss": 0.0161, |
| "step": 17200 |
| }, |
| { |
| "grad_norm": 0.22466665506362915, |
| "learning_rate": 9.597982287318911e-05, |
| "loss": 0.0146, |
| "step": 17210 |
| }, |
| { |
| "grad_norm": 0.21593722701072693, |
| "learning_rate": 9.597332446659923e-05, |
| "loss": 0.0178, |
| "step": 17220 |
| }, |
| { |
| "grad_norm": 0.27758243680000305, |
| "learning_rate": 9.59668210324357e-05, |
| "loss": 0.0209, |
| "step": 17230 |
| }, |
| { |
| "grad_norm": 0.24260929226875305, |
| "learning_rate": 9.596031257140974e-05, |
| "loss": 0.0191, |
| "step": 17240 |
| }, |
| { |
| "grad_norm": 0.3037336468696594, |
| "learning_rate": 9.59537990842331e-05, |
| "loss": 0.0168, |
| "step": 17250 |
| }, |
| { |
| "grad_norm": 0.17514759302139282, |
| "learning_rate": 9.594728057161806e-05, |
| "loss": 0.0194, |
| "step": 17260 |
| }, |
| { |
| "grad_norm": 0.2643806040287018, |
| "learning_rate": 9.594075703427752e-05, |
| "loss": 0.0171, |
| "step": 17270 |
| }, |
| { |
| "grad_norm": 0.12826664745807648, |
| "learning_rate": 9.593422847292486e-05, |
| "loss": 0.0168, |
| "step": 17280 |
| }, |
| { |
| "grad_norm": 0.18956172466278076, |
| "learning_rate": 9.592769488827402e-05, |
| "loss": 0.0159, |
| "step": 17290 |
| }, |
| { |
| "grad_norm": 0.20451869070529938, |
| "learning_rate": 9.592115628103952e-05, |
| "loss": 0.0185, |
| "step": 17300 |
| }, |
| { |
| "grad_norm": 0.23827993869781494, |
| "learning_rate": 9.591461265193643e-05, |
| "loss": 0.0167, |
| "step": 17310 |
| }, |
| { |
| "grad_norm": 0.20346641540527344, |
| "learning_rate": 9.590806400168032e-05, |
| "loss": 0.018, |
| "step": 17320 |
| }, |
| { |
| "grad_norm": 0.20496748387813568, |
| "learning_rate": 9.590151033098735e-05, |
| "loss": 0.0199, |
| "step": 17330 |
| }, |
| { |
| "grad_norm": 0.24242371320724487, |
| "learning_rate": 9.589495164057423e-05, |
| "loss": 0.0171, |
| "step": 17340 |
| }, |
| { |
| "grad_norm": 0.24139827489852905, |
| "learning_rate": 9.58883879311582e-05, |
| "loss": 0.0193, |
| "step": 17350 |
| }, |
| { |
| "grad_norm": 0.20769353210926056, |
| "learning_rate": 9.588181920345705e-05, |
| "loss": 0.0172, |
| "step": 17360 |
| }, |
| { |
| "grad_norm": 0.19775322079658508, |
| "learning_rate": 9.587524545818913e-05, |
| "loss": 0.0153, |
| "step": 17370 |
| }, |
| { |
| "grad_norm": 0.2737868130207062, |
| "learning_rate": 9.586866669607335e-05, |
| "loss": 0.0168, |
| "step": 17380 |
| }, |
| { |
| "grad_norm": 0.19795580208301544, |
| "learning_rate": 9.586208291782915e-05, |
| "loss": 0.0178, |
| "step": 17390 |
| }, |
| { |
| "grad_norm": 0.2711477279663086, |
| "learning_rate": 9.58554941241765e-05, |
| "loss": 0.0201, |
| "step": 17400 |
| }, |
| { |
| "grad_norm": 0.24536965787410736, |
| "learning_rate": 9.584890031583596e-05, |
| "loss": 0.0183, |
| "step": 17410 |
| }, |
| { |
| "grad_norm": 0.34444183111190796, |
| "learning_rate": 9.584230149352861e-05, |
| "loss": 0.0177, |
| "step": 17420 |
| }, |
| { |
| "grad_norm": 0.27383139729499817, |
| "learning_rate": 9.58356976579761e-05, |
| "loss": 0.0176, |
| "step": 17430 |
| }, |
| { |
| "grad_norm": 0.2097596675157547, |
| "learning_rate": 9.58290888099006e-05, |
| "loss": 0.019, |
| "step": 17440 |
| }, |
| { |
| "grad_norm": 0.25391826033592224, |
| "learning_rate": 9.582247495002486e-05, |
| "loss": 0.0187, |
| "step": 17450 |
| }, |
| { |
| "grad_norm": 0.19036152958869934, |
| "learning_rate": 9.581585607907214e-05, |
| "loss": 0.0167, |
| "step": 17460 |
| }, |
| { |
| "grad_norm": 0.22091791033744812, |
| "learning_rate": 9.580923219776628e-05, |
| "loss": 0.0188, |
| "step": 17470 |
| }, |
| { |
| "grad_norm": 0.2458617240190506, |
| "learning_rate": 9.580260330683167e-05, |
| "loss": 0.0183, |
| "step": 17480 |
| }, |
| { |
| "grad_norm": 0.23816366493701935, |
| "learning_rate": 9.579596940699322e-05, |
| "loss": 0.0172, |
| "step": 17490 |
| }, |
| { |
| "grad_norm": 0.2567879855632782, |
| "learning_rate": 9.578933049897643e-05, |
| "loss": 0.0179, |
| "step": 17500 |
| }, |
| { |
| "grad_norm": 0.24667951464653015, |
| "learning_rate": 9.578268658350728e-05, |
| "loss": 0.0178, |
| "step": 17510 |
| }, |
| { |
| "grad_norm": 0.24973371624946594, |
| "learning_rate": 9.577603766131235e-05, |
| "loss": 0.0152, |
| "step": 17520 |
| }, |
| { |
| "grad_norm": 0.2715742886066437, |
| "learning_rate": 9.576938373311878e-05, |
| "loss": 0.0186, |
| "step": 17530 |
| }, |
| { |
| "grad_norm": 0.25942331552505493, |
| "learning_rate": 9.576272479965421e-05, |
| "loss": 0.0177, |
| "step": 17540 |
| }, |
| { |
| "grad_norm": 0.2839871346950531, |
| "learning_rate": 9.575606086164687e-05, |
| "loss": 0.0163, |
| "step": 17550 |
| }, |
| { |
| "grad_norm": 0.22360889613628387, |
| "learning_rate": 9.57493919198255e-05, |
| "loss": 0.0169, |
| "step": 17560 |
| }, |
| { |
| "grad_norm": 0.2493814378976822, |
| "learning_rate": 9.57427179749194e-05, |
| "loss": 0.0189, |
| "step": 17570 |
| }, |
| { |
| "grad_norm": 0.2246805876493454, |
| "learning_rate": 9.573603902765846e-05, |
| "loss": 0.0182, |
| "step": 17580 |
| }, |
| { |
| "grad_norm": 0.2053459733724594, |
| "learning_rate": 9.572935507877304e-05, |
| "loss": 0.0178, |
| "step": 17590 |
| }, |
| { |
| "grad_norm": 0.24530400335788727, |
| "learning_rate": 9.57226661289941e-05, |
| "loss": 0.0153, |
| "step": 17600 |
| }, |
| { |
| "grad_norm": 0.2642321288585663, |
| "learning_rate": 9.571597217905315e-05, |
| "loss": 0.017, |
| "step": 17610 |
| }, |
| { |
| "grad_norm": 0.18858280777931213, |
| "learning_rate": 9.57092732296822e-05, |
| "loss": 0.0162, |
| "step": 17620 |
| }, |
| { |
| "grad_norm": 0.24414698779582977, |
| "learning_rate": 9.570256928161385e-05, |
| "loss": 0.0184, |
| "step": 17630 |
| }, |
| { |
| "grad_norm": 0.25567445158958435, |
| "learning_rate": 9.569586033558126e-05, |
| "loss": 0.018, |
| "step": 17640 |
| }, |
| { |
| "grad_norm": 0.23591777682304382, |
| "learning_rate": 9.568914639231807e-05, |
| "loss": 0.0141, |
| "step": 17650 |
| }, |
| { |
| "grad_norm": 0.2599583864212036, |
| "learning_rate": 9.568242745255852e-05, |
| "loss": 0.0154, |
| "step": 17660 |
| }, |
| { |
| "grad_norm": 0.17688848078250885, |
| "learning_rate": 9.567570351703739e-05, |
| "loss": 0.0181, |
| "step": 17670 |
| }, |
| { |
| "grad_norm": 0.15748238563537598, |
| "learning_rate": 9.566897458649001e-05, |
| "loss": 0.0213, |
| "step": 17680 |
| }, |
| { |
| "grad_norm": 0.26078203320503235, |
| "learning_rate": 9.566224066165221e-05, |
| "loss": 0.0164, |
| "step": 17690 |
| }, |
| { |
| "grad_norm": 0.2592557966709137, |
| "learning_rate": 9.565550174326043e-05, |
| "loss": 0.0187, |
| "step": 17700 |
| }, |
| { |
| "grad_norm": 0.1889854073524475, |
| "learning_rate": 9.564875783205162e-05, |
| "loss": 0.0163, |
| "step": 17710 |
| }, |
| { |
| "grad_norm": 0.1754608303308487, |
| "learning_rate": 9.564200892876328e-05, |
| "loss": 0.019, |
| "step": 17720 |
| }, |
| { |
| "grad_norm": 0.18894585967063904, |
| "learning_rate": 9.563525503413348e-05, |
| "loss": 0.016, |
| "step": 17730 |
| }, |
| { |
| "grad_norm": 0.22855456173419952, |
| "learning_rate": 9.562849614890079e-05, |
| "loss": 0.0198, |
| "step": 17740 |
| }, |
| { |
| "grad_norm": 0.24553902447223663, |
| "learning_rate": 9.562173227380436e-05, |
| "loss": 0.0144, |
| "step": 17750 |
| }, |
| { |
| "grad_norm": 0.28026923537254333, |
| "learning_rate": 9.561496340958389e-05, |
| "loss": 0.0248, |
| "step": 17760 |
| }, |
| { |
| "grad_norm": 0.24462267756462097, |
| "learning_rate": 9.560818955697959e-05, |
| "loss": 0.0191, |
| "step": 17770 |
| }, |
| { |
| "grad_norm": 0.2001575380563736, |
| "learning_rate": 9.560141071673228e-05, |
| "loss": 0.017, |
| "step": 17780 |
| }, |
| { |
| "grad_norm": 0.22568632662296295, |
| "learning_rate": 9.559462688958323e-05, |
| "loss": 0.0188, |
| "step": 17790 |
| }, |
| { |
| "grad_norm": 0.26937243342399597, |
| "learning_rate": 9.558783807627434e-05, |
| "loss": 0.0172, |
| "step": 17800 |
| }, |
| { |
| "grad_norm": 0.27089959383010864, |
| "learning_rate": 9.558104427754801e-05, |
| "loss": 0.0209, |
| "step": 17810 |
| }, |
| { |
| "grad_norm": 0.2073383927345276, |
| "learning_rate": 9.557424549414722e-05, |
| "loss": 0.0177, |
| "step": 17820 |
| }, |
| { |
| "grad_norm": 0.21644099056720734, |
| "learning_rate": 9.556744172681546e-05, |
| "loss": 0.0152, |
| "step": 17830 |
| }, |
| { |
| "grad_norm": 0.21143971383571625, |
| "learning_rate": 9.556063297629677e-05, |
| "loss": 0.0176, |
| "step": 17840 |
| }, |
| { |
| "grad_norm": 0.20659197866916656, |
| "learning_rate": 9.555381924333578e-05, |
| "loss": 0.0164, |
| "step": 17850 |
| }, |
| { |
| "grad_norm": 0.24294114112854004, |
| "learning_rate": 9.554700052867758e-05, |
| "loss": 0.0173, |
| "step": 17860 |
| }, |
| { |
| "grad_norm": 0.27616581320762634, |
| "learning_rate": 9.554017683306789e-05, |
| "loss": 0.0211, |
| "step": 17870 |
| }, |
| { |
| "grad_norm": 0.224766805768013, |
| "learning_rate": 9.553334815725294e-05, |
| "loss": 0.0162, |
| "step": 17880 |
| }, |
| { |
| "grad_norm": 0.271637886762619, |
| "learning_rate": 9.552651450197949e-05, |
| "loss": 0.0193, |
| "step": 17890 |
| }, |
| { |
| "grad_norm": 0.2578735649585724, |
| "learning_rate": 9.551967586799486e-05, |
| "loss": 0.017, |
| "step": 17900 |
| }, |
| { |
| "grad_norm": 0.1847911775112152, |
| "learning_rate": 9.551283225604692e-05, |
| "loss": 0.0198, |
| "step": 17910 |
| }, |
| { |
| "grad_norm": 0.23736201226711273, |
| "learning_rate": 9.550598366688406e-05, |
| "loss": 0.0198, |
| "step": 17920 |
| }, |
| { |
| "grad_norm": 0.22950343787670135, |
| "learning_rate": 9.549913010125526e-05, |
| "loss": 0.0164, |
| "step": 17930 |
| }, |
| { |
| "grad_norm": 0.24767309427261353, |
| "learning_rate": 9.549227155990999e-05, |
| "loss": 0.0176, |
| "step": 17940 |
| }, |
| { |
| "grad_norm": 0.18506084382534027, |
| "learning_rate": 9.548540804359828e-05, |
| "loss": 0.0154, |
| "step": 17950 |
| }, |
| { |
| "grad_norm": 0.2415982335805893, |
| "learning_rate": 9.547853955307077e-05, |
| "loss": 0.0206, |
| "step": 17960 |
| }, |
| { |
| "grad_norm": 0.24404023587703705, |
| "learning_rate": 9.547166608907853e-05, |
| "loss": 0.0178, |
| "step": 17970 |
| }, |
| { |
| "grad_norm": 0.2148423045873642, |
| "learning_rate": 9.546478765237326e-05, |
| "loss": 0.0161, |
| "step": 17980 |
| }, |
| { |
| "grad_norm": 0.18518278002738953, |
| "learning_rate": 9.545790424370715e-05, |
| "loss": 0.0181, |
| "step": 17990 |
| }, |
| { |
| "grad_norm": 0.2136387825012207, |
| "learning_rate": 9.5451015863833e-05, |
| "loss": 0.0161, |
| "step": 18000 |
| }, |
| { |
| "grad_norm": 0.27653440833091736, |
| "learning_rate": 9.544412251350408e-05, |
| "loss": 0.0197, |
| "step": 18010 |
| }, |
| { |
| "grad_norm": 0.27941060066223145, |
| "learning_rate": 9.543722419347422e-05, |
| "loss": 0.0159, |
| "step": 18020 |
| }, |
| { |
| "grad_norm": 0.24115628004074097, |
| "learning_rate": 9.543032090449788e-05, |
| "loss": 0.0149, |
| "step": 18030 |
| }, |
| { |
| "grad_norm": 0.1998482495546341, |
| "learning_rate": 9.542341264732992e-05, |
| "loss": 0.0161, |
| "step": 18040 |
| }, |
| { |
| "grad_norm": 0.20984019339084625, |
| "learning_rate": 9.541649942272585e-05, |
| "loss": 0.0174, |
| "step": 18050 |
| }, |
| { |
| "grad_norm": 0.25274404883384705, |
| "learning_rate": 9.54095812314417e-05, |
| "loss": 0.0203, |
| "step": 18060 |
| }, |
| { |
| "grad_norm": 0.253617525100708, |
| "learning_rate": 9.540265807423401e-05, |
| "loss": 0.0189, |
| "step": 18070 |
| }, |
| { |
| "grad_norm": 0.24331985414028168, |
| "learning_rate": 9.53957299518599e-05, |
| "loss": 0.0161, |
| "step": 18080 |
| }, |
| { |
| "grad_norm": 0.2861975133419037, |
| "learning_rate": 9.5388796865077e-05, |
| "loss": 0.0181, |
| "step": 18090 |
| }, |
| { |
| "grad_norm": 0.21068565547466278, |
| "learning_rate": 9.538185881464353e-05, |
| "loss": 0.0184, |
| "step": 18100 |
| }, |
| { |
| "grad_norm": 0.23860912024974823, |
| "learning_rate": 9.537491580131821e-05, |
| "loss": 0.016, |
| "step": 18110 |
| }, |
| { |
| "grad_norm": 0.20223286747932434, |
| "learning_rate": 9.53679678258603e-05, |
| "loss": 0.0211, |
| "step": 18120 |
| }, |
| { |
| "grad_norm": 0.2065197378396988, |
| "learning_rate": 9.536101488902966e-05, |
| "loss": 0.0186, |
| "step": 18130 |
| }, |
| { |
| "grad_norm": 0.26642438769340515, |
| "learning_rate": 9.535405699158663e-05, |
| "loss": 0.0147, |
| "step": 18140 |
| }, |
| { |
| "grad_norm": 0.21723781526088715, |
| "learning_rate": 9.53470941342921e-05, |
| "loss": 0.0137, |
| "step": 18150 |
| }, |
| { |
| "grad_norm": 0.2028735876083374, |
| "learning_rate": 9.534012631790756e-05, |
| "loss": 0.0158, |
| "step": 18160 |
| }, |
| { |
| "grad_norm": 0.1708681732416153, |
| "learning_rate": 9.533315354319494e-05, |
| "loss": 0.0146, |
| "step": 18170 |
| }, |
| { |
| "grad_norm": 0.2970765233039856, |
| "learning_rate": 9.532617581091682e-05, |
| "loss": 0.0161, |
| "step": 18180 |
| }, |
| { |
| "grad_norm": 0.2009182721376419, |
| "learning_rate": 9.531919312183629e-05, |
| "loss": 0.0174, |
| "step": 18190 |
| }, |
| { |
| "grad_norm": 0.22930586338043213, |
| "learning_rate": 9.531220547671688e-05, |
| "loss": 0.018, |
| "step": 18200 |
| }, |
| { |
| "grad_norm": 0.220820352435112, |
| "learning_rate": 9.530521287632285e-05, |
| "loss": 0.0157, |
| "step": 18210 |
| }, |
| { |
| "grad_norm": 0.23223398625850677, |
| "learning_rate": 9.529821532141884e-05, |
| "loss": 0.0169, |
| "step": 18220 |
| }, |
| { |
| "grad_norm": 0.259183794260025, |
| "learning_rate": 9.52912128127701e-05, |
| "loss": 0.0199, |
| "step": 18230 |
| }, |
| { |
| "grad_norm": 0.2559663951396942, |
| "learning_rate": 9.528420535114244e-05, |
| "loss": 0.0207, |
| "step": 18240 |
| }, |
| { |
| "grad_norm": 0.22421777248382568, |
| "learning_rate": 9.527719293730215e-05, |
| "loss": 0.0186, |
| "step": 18250 |
| }, |
| { |
| "grad_norm": 0.2555665969848633, |
| "learning_rate": 9.527017557201611e-05, |
| "loss": 0.0175, |
| "step": 18260 |
| }, |
| { |
| "grad_norm": 0.2150653451681137, |
| "learning_rate": 9.526315325605176e-05, |
| "loss": 0.0191, |
| "step": 18270 |
| }, |
| { |
| "grad_norm": 0.18184694647789001, |
| "learning_rate": 9.525612599017699e-05, |
| "loss": 0.0182, |
| "step": 18280 |
| }, |
| { |
| "grad_norm": 0.21569839119911194, |
| "learning_rate": 9.524909377516033e-05, |
| "loss": 0.0163, |
| "step": 18290 |
| }, |
| { |
| "grad_norm": 0.22885504364967346, |
| "learning_rate": 9.524205661177081e-05, |
| "loss": 0.0146, |
| "step": 18300 |
| }, |
| { |
| "grad_norm": 0.2409166693687439, |
| "learning_rate": 9.523501450077801e-05, |
| "loss": 0.0205, |
| "step": 18310 |
| }, |
| { |
| "grad_norm": 0.2402997761964798, |
| "learning_rate": 9.522796744295202e-05, |
| "loss": 0.0156, |
| "step": 18320 |
| }, |
| { |
| "grad_norm": 0.24736376106739044, |
| "learning_rate": 9.522091543906352e-05, |
| "loss": 0.0213, |
| "step": 18330 |
| }, |
| { |
| "grad_norm": 0.2833060622215271, |
| "learning_rate": 9.521385848988369e-05, |
| "loss": 0.0181, |
| "step": 18340 |
| }, |
| { |
| "grad_norm": 0.2343091368675232, |
| "learning_rate": 9.520679659618428e-05, |
| "loss": 0.0175, |
| "step": 18350 |
| }, |
| { |
| "grad_norm": 0.212591752409935, |
| "learning_rate": 9.519972975873754e-05, |
| "loss": 0.0168, |
| "step": 18360 |
| }, |
| { |
| "grad_norm": 0.23697754740715027, |
| "learning_rate": 9.519265797831633e-05, |
| "loss": 0.0162, |
| "step": 18370 |
| }, |
| { |
| "grad_norm": 0.21745391190052032, |
| "learning_rate": 9.518558125569399e-05, |
| "loss": 0.0169, |
| "step": 18380 |
| }, |
| { |
| "grad_norm": 0.2738904058933258, |
| "learning_rate": 9.517849959164442e-05, |
| "loss": 0.0149, |
| "step": 18390 |
| }, |
| { |
| "grad_norm": 0.22212840616703033, |
| "learning_rate": 9.517141298694205e-05, |
| "loss": 0.0158, |
| "step": 18400 |
| }, |
| { |
| "grad_norm": 0.2424498349428177, |
| "learning_rate": 9.516432144236188e-05, |
| "loss": 0.0171, |
| "step": 18410 |
| }, |
| { |
| "grad_norm": 0.2065114676952362, |
| "learning_rate": 9.515722495867941e-05, |
| "loss": 0.0163, |
| "step": 18420 |
| }, |
| { |
| "grad_norm": 0.2845364809036255, |
| "learning_rate": 9.515012353667072e-05, |
| "loss": 0.0187, |
| "step": 18430 |
| }, |
| { |
| "grad_norm": 0.23354214429855347, |
| "learning_rate": 9.51430171771124e-05, |
| "loss": 0.0195, |
| "step": 18440 |
| }, |
| { |
| "grad_norm": 0.261211097240448, |
| "learning_rate": 9.513590588078159e-05, |
| "loss": 0.0187, |
| "step": 18450 |
| }, |
| { |
| "grad_norm": 0.17390166223049164, |
| "learning_rate": 9.512878964845597e-05, |
| "loss": 0.0149, |
| "step": 18460 |
| }, |
| { |
| "grad_norm": 0.2599153518676758, |
| "learning_rate": 9.512166848091377e-05, |
| "loss": 0.0149, |
| "step": 18470 |
| }, |
| { |
| "grad_norm": 0.20262955129146576, |
| "learning_rate": 9.511454237893376e-05, |
| "loss": 0.0161, |
| "step": 18480 |
| }, |
| { |
| "grad_norm": 0.2117965966463089, |
| "learning_rate": 9.51074113432952e-05, |
| "loss": 0.0207, |
| "step": 18490 |
| }, |
| { |
| "grad_norm": 0.2957056760787964, |
| "learning_rate": 9.510027537477797e-05, |
| "loss": 0.0169, |
| "step": 18500 |
| }, |
| { |
| "grad_norm": 0.2731700539588928, |
| "learning_rate": 9.509313447416242e-05, |
| "loss": 0.0176, |
| "step": 18510 |
| }, |
| { |
| "grad_norm": 0.26647451519966125, |
| "learning_rate": 9.508598864222949e-05, |
| "loss": 0.0186, |
| "step": 18520 |
| }, |
| { |
| "grad_norm": 0.1898316591978073, |
| "learning_rate": 9.507883787976062e-05, |
| "loss": 0.0171, |
| "step": 18530 |
| }, |
| { |
| "grad_norm": 0.26751694083213806, |
| "learning_rate": 9.507168218753781e-05, |
| "loss": 0.0188, |
| "step": 18540 |
| }, |
| { |
| "grad_norm": 0.21939140558242798, |
| "learning_rate": 9.506452156634362e-05, |
| "loss": 0.0183, |
| "step": 18550 |
| }, |
| { |
| "grad_norm": 0.19924594461917877, |
| "learning_rate": 9.505735601696109e-05, |
| "loss": 0.0163, |
| "step": 18560 |
| }, |
| { |
| "grad_norm": 0.19633571803569794, |
| "learning_rate": 9.505018554017385e-05, |
| "loss": 0.0145, |
| "step": 18570 |
| }, |
| { |
| "grad_norm": 0.214951291680336, |
| "learning_rate": 9.504301013676604e-05, |
| "loss": 0.0147, |
| "step": 18580 |
| }, |
| { |
| "grad_norm": 0.2576771080493927, |
| "learning_rate": 9.503582980752238e-05, |
| "loss": 0.0162, |
| "step": 18590 |
| }, |
| { |
| "grad_norm": 0.2438470870256424, |
| "learning_rate": 9.502864455322809e-05, |
| "loss": 0.0168, |
| "step": 18600 |
| }, |
| { |
| "grad_norm": 0.21890531480312347, |
| "learning_rate": 9.502145437466891e-05, |
| "loss": 0.0185, |
| "step": 18610 |
| }, |
| { |
| "grad_norm": 0.2068784534931183, |
| "learning_rate": 9.501425927263116e-05, |
| "loss": 0.0189, |
| "step": 18620 |
| }, |
| { |
| "grad_norm": 0.2585833668708801, |
| "learning_rate": 9.500705924790172e-05, |
| "loss": 0.0184, |
| "step": 18630 |
| }, |
| { |
| "grad_norm": 0.18611527979373932, |
| "learning_rate": 9.499985430126794e-05, |
| "loss": 0.0148, |
| "step": 18640 |
| }, |
| { |
| "grad_norm": 0.21463438868522644, |
| "learning_rate": 9.499264443351775e-05, |
| "loss": 0.0158, |
| "step": 18650 |
| }, |
| { |
| "grad_norm": 0.19403278827667236, |
| "learning_rate": 9.498542964543961e-05, |
| "loss": 0.0165, |
| "step": 18660 |
| }, |
| { |
| "grad_norm": 0.22292093932628632, |
| "learning_rate": 9.497820993782252e-05, |
| "loss": 0.0151, |
| "step": 18670 |
| }, |
| { |
| "grad_norm": 0.17556989192962646, |
| "learning_rate": 9.497098531145601e-05, |
| "loss": 0.0178, |
| "step": 18680 |
| }, |
| { |
| "grad_norm": 0.26126596331596375, |
| "learning_rate": 9.496375576713017e-05, |
| "loss": 0.0165, |
| "step": 18690 |
| }, |
| { |
| "grad_norm": 0.17960461974143982, |
| "learning_rate": 9.49565213056356e-05, |
| "loss": 0.0165, |
| "step": 18700 |
| }, |
| { |
| "grad_norm": 0.2099495530128479, |
| "learning_rate": 9.494928192776342e-05, |
| "loss": 0.0142, |
| "step": 18710 |
| }, |
| { |
| "grad_norm": 0.19007565081119537, |
| "learning_rate": 9.494203763430538e-05, |
| "loss": 0.0154, |
| "step": 18720 |
| }, |
| { |
| "grad_norm": 0.23223096132278442, |
| "learning_rate": 9.493478842605366e-05, |
| "loss": 0.0185, |
| "step": 18730 |
| }, |
| { |
| "grad_norm": 0.2481299638748169, |
| "learning_rate": 9.492753430380105e-05, |
| "loss": 0.016, |
| "step": 18740 |
| }, |
| { |
| "grad_norm": 0.2259487509727478, |
| "learning_rate": 9.492027526834083e-05, |
| "loss": 0.0161, |
| "step": 18750 |
| }, |
| { |
| "grad_norm": 0.17821355164051056, |
| "learning_rate": 9.491301132046684e-05, |
| "loss": 0.0129, |
| "step": 18760 |
| }, |
| { |
| "grad_norm": 0.18956951797008514, |
| "learning_rate": 9.490574246097345e-05, |
| "loss": 0.0143, |
| "step": 18770 |
| }, |
| { |
| "grad_norm": 0.1658146232366562, |
| "learning_rate": 9.48984686906556e-05, |
| "loss": 0.0161, |
| "step": 18780 |
| }, |
| { |
| "grad_norm": 0.1845589280128479, |
| "learning_rate": 9.489119001030871e-05, |
| "loss": 0.0144, |
| "step": 18790 |
| }, |
| { |
| "grad_norm": 0.22135934233665466, |
| "learning_rate": 9.488390642072878e-05, |
| "loss": 0.0166, |
| "step": 18800 |
| }, |
| { |
| "grad_norm": 0.23337242007255554, |
| "learning_rate": 9.48766179227123e-05, |
| "loss": 0.0157, |
| "step": 18810 |
| }, |
| { |
| "grad_norm": 0.24434435367584229, |
| "learning_rate": 9.486932451705636e-05, |
| "loss": 0.0209, |
| "step": 18820 |
| }, |
| { |
| "grad_norm": 0.21083883941173553, |
| "learning_rate": 9.486202620455857e-05, |
| "loss": 0.0167, |
| "step": 18830 |
| }, |
| { |
| "grad_norm": 0.26065054535865784, |
| "learning_rate": 9.485472298601704e-05, |
| "loss": 0.0165, |
| "step": 18840 |
| }, |
| { |
| "grad_norm": 0.2413932830095291, |
| "learning_rate": 9.484741486223043e-05, |
| "loss": 0.0166, |
| "step": 18850 |
| }, |
| { |
| "grad_norm": 0.21706706285476685, |
| "learning_rate": 9.484010183399797e-05, |
| "loss": 0.0148, |
| "step": 18860 |
| }, |
| { |
| "grad_norm": 0.19135063886642456, |
| "learning_rate": 9.483278390211938e-05, |
| "loss": 0.0148, |
| "step": 18870 |
| }, |
| { |
| "grad_norm": 0.19527922570705414, |
| "learning_rate": 9.482546106739496e-05, |
| "loss": 0.0176, |
| "step": 18880 |
| }, |
| { |
| "grad_norm": 0.24653393030166626, |
| "learning_rate": 9.48181333306255e-05, |
| "loss": 0.0193, |
| "step": 18890 |
| }, |
| { |
| "grad_norm": 0.26578599214553833, |
| "learning_rate": 9.481080069261237e-05, |
| "loss": 0.0156, |
| "step": 18900 |
| }, |
| { |
| "grad_norm": 0.2269006073474884, |
| "learning_rate": 9.480346315415745e-05, |
| "loss": 0.0173, |
| "step": 18910 |
| }, |
| { |
| "grad_norm": 0.20283561944961548, |
| "learning_rate": 9.479612071606314e-05, |
| "loss": 0.0167, |
| "step": 18920 |
| }, |
| { |
| "grad_norm": 0.21275149285793304, |
| "learning_rate": 9.478877337913244e-05, |
| "loss": 0.0185, |
| "step": 18930 |
| }, |
| { |
| "grad_norm": 0.23655854165554047, |
| "learning_rate": 9.478142114416881e-05, |
| "loss": 0.0166, |
| "step": 18940 |
| }, |
| { |
| "grad_norm": 0.27066153287887573, |
| "learning_rate": 9.47740640119763e-05, |
| "loss": 0.0173, |
| "step": 18950 |
| }, |
| { |
| "grad_norm": 0.22452524304389954, |
| "learning_rate": 9.476670198335947e-05, |
| "loss": 0.0188, |
| "step": 18960 |
| }, |
| { |
| "grad_norm": 0.21628102660179138, |
| "learning_rate": 9.47593350591234e-05, |
| "loss": 0.0164, |
| "step": 18970 |
| }, |
| { |
| "grad_norm": 0.22073863446712494, |
| "learning_rate": 9.475196324007376e-05, |
| "loss": 0.0164, |
| "step": 18980 |
| }, |
| { |
| "grad_norm": 0.1865048110485077, |
| "learning_rate": 9.474458652701669e-05, |
| "loss": 0.0176, |
| "step": 18990 |
| }, |
| { |
| "grad_norm": 0.2237597554922104, |
| "learning_rate": 9.473720492075892e-05, |
| "loss": 0.0184, |
| "step": 19000 |
| }, |
| { |
| "grad_norm": 0.21400532126426697, |
| "learning_rate": 9.472981842210768e-05, |
| "loss": 0.0159, |
| "step": 19010 |
| }, |
| { |
| "grad_norm": 0.1770801693201065, |
| "learning_rate": 9.472242703187074e-05, |
| "loss": 0.016, |
| "step": 19020 |
| }, |
| { |
| "grad_norm": 0.21562041342258453, |
| "learning_rate": 9.471503075085643e-05, |
| "loss": 0.0184, |
| "step": 19030 |
| }, |
| { |
| "grad_norm": 0.25867006182670593, |
| "learning_rate": 9.470762957987359e-05, |
| "loss": 0.0147, |
| "step": 19040 |
| }, |
| { |
| "grad_norm": 0.17780904471874237, |
| "learning_rate": 9.470022351973158e-05, |
| "loss": 0.0153, |
| "step": 19050 |
| }, |
| { |
| "grad_norm": 0.18168801069259644, |
| "learning_rate": 9.469281257124034e-05, |
| "loss": 0.015, |
| "step": 19060 |
| }, |
| { |
| "grad_norm": 0.21822762489318848, |
| "learning_rate": 9.46853967352103e-05, |
| "loss": 0.0173, |
| "step": 19070 |
| }, |
| { |
| "grad_norm": 0.22131718695163727, |
| "learning_rate": 9.467797601245246e-05, |
| "loss": 0.0203, |
| "step": 19080 |
| }, |
| { |
| "grad_norm": 0.18401233851909637, |
| "learning_rate": 9.467055040377834e-05, |
| "loss": 0.0177, |
| "step": 19090 |
| }, |
| { |
| "grad_norm": 0.2105451226234436, |
| "learning_rate": 9.466311990999999e-05, |
| "loss": 0.0165, |
| "step": 19100 |
| }, |
| { |
| "grad_norm": 0.2062365561723709, |
| "learning_rate": 9.465568453193e-05, |
| "loss": 0.0165, |
| "step": 19110 |
| }, |
| { |
| "grad_norm": 0.3424419164657593, |
| "learning_rate": 9.464824427038148e-05, |
| "loss": 0.0185, |
| "step": 19120 |
| }, |
| { |
| "grad_norm": 0.20042471587657928, |
| "learning_rate": 9.46407991261681e-05, |
| "loss": 0.0159, |
| "step": 19130 |
| }, |
| { |
| "grad_norm": 0.21363820135593414, |
| "learning_rate": 9.463334910010404e-05, |
| "loss": 0.0155, |
| "step": 19140 |
| }, |
| { |
| "grad_norm": 0.1989668905735016, |
| "learning_rate": 9.462589419300403e-05, |
| "loss": 0.0168, |
| "step": 19150 |
| }, |
| { |
| "grad_norm": 0.18865354359149933, |
| "learning_rate": 9.461843440568333e-05, |
| "loss": 0.0147, |
| "step": 19160 |
| }, |
| { |
| "grad_norm": 0.21299093961715698, |
| "learning_rate": 9.461096973895773e-05, |
| "loss": 0.0158, |
| "step": 19170 |
| }, |
| { |
| "grad_norm": 0.2013334184885025, |
| "learning_rate": 9.460350019364355e-05, |
| "loss": 0.0196, |
| "step": 19180 |
| }, |
| { |
| "grad_norm": 0.2315259426832199, |
| "learning_rate": 9.459602577055764e-05, |
| "loss": 0.0178, |
| "step": 19190 |
| }, |
| { |
| "grad_norm": 0.19243505597114563, |
| "learning_rate": 9.45885464705174e-05, |
| "loss": 0.0163, |
| "step": 19200 |
| }, |
| { |
| "grad_norm": 0.19622260332107544, |
| "learning_rate": 9.458106229434076e-05, |
| "loss": 0.014, |
| "step": 19210 |
| }, |
| { |
| "grad_norm": 0.20563949644565582, |
| "learning_rate": 9.457357324284617e-05, |
| "loss": 0.0133, |
| "step": 19220 |
| }, |
| { |
| "grad_norm": 0.20626185834407806, |
| "learning_rate": 9.456607931685262e-05, |
| "loss": 0.0149, |
| "step": 19230 |
| }, |
| { |
| "grad_norm": 0.20093387365341187, |
| "learning_rate": 9.455858051717965e-05, |
| "loss": 0.0149, |
| "step": 19240 |
| }, |
| { |
| "grad_norm": 0.20239786803722382, |
| "learning_rate": 9.45510768446473e-05, |
| "loss": 0.0179, |
| "step": 19250 |
| }, |
| { |
| "grad_norm": 0.23519350588321686, |
| "learning_rate": 9.454356830007618e-05, |
| "loss": 0.0136, |
| "step": 19260 |
| }, |
| { |
| "grad_norm": 0.19403128325939178, |
| "learning_rate": 9.45360548842874e-05, |
| "loss": 0.0149, |
| "step": 19270 |
| }, |
| { |
| "grad_norm": 0.25772014260292053, |
| "learning_rate": 9.452853659810261e-05, |
| "loss": 0.0171, |
| "step": 19280 |
| }, |
| { |
| "grad_norm": 0.18387538194656372, |
| "learning_rate": 9.452101344234401e-05, |
| "loss": 0.0165, |
| "step": 19290 |
| }, |
| { |
| "grad_norm": 0.2201598435640335, |
| "learning_rate": 9.451348541783431e-05, |
| "loss": 0.0125, |
| "step": 19300 |
| }, |
| { |
| "grad_norm": 0.17011705040931702, |
| "learning_rate": 9.450595252539678e-05, |
| "loss": 0.0136, |
| "step": 19310 |
| }, |
| { |
| "grad_norm": 0.16841693222522736, |
| "learning_rate": 9.449841476585518e-05, |
| "loss": 0.0151, |
| "step": 19320 |
| }, |
| { |
| "grad_norm": 0.17615078389644623, |
| "learning_rate": 9.449087214003384e-05, |
| "loss": 0.0186, |
| "step": 19330 |
| }, |
| { |
| "grad_norm": 0.2136228233575821, |
| "learning_rate": 9.448332464875765e-05, |
| "loss": 0.0178, |
| "step": 19340 |
| }, |
| { |
| "grad_norm": 0.17352472245693207, |
| "learning_rate": 9.447577229285192e-05, |
| "loss": 0.0152, |
| "step": 19350 |
| }, |
| { |
| "grad_norm": 0.2533184289932251, |
| "learning_rate": 9.446821507314261e-05, |
| "loss": 0.0161, |
| "step": 19360 |
| }, |
| { |
| "grad_norm": 0.2478211224079132, |
| "learning_rate": 9.446065299045617e-05, |
| "loss": 0.0158, |
| "step": 19370 |
| }, |
| { |
| "grad_norm": 0.184827521443367, |
| "learning_rate": 9.445308604561955e-05, |
| "loss": 0.016, |
| "step": 19380 |
| }, |
| { |
| "grad_norm": 0.19070421159267426, |
| "learning_rate": 9.444551423946028e-05, |
| "loss": 0.018, |
| "step": 19390 |
| }, |
| { |
| "grad_norm": 0.17417584359645844, |
| "learning_rate": 9.443793757280638e-05, |
| "loss": 0.0167, |
| "step": 19400 |
| }, |
| { |
| "grad_norm": 0.19516630470752716, |
| "learning_rate": 9.443035604648646e-05, |
| "loss": 0.0183, |
| "step": 19410 |
| }, |
| { |
| "grad_norm": 0.21007031202316284, |
| "learning_rate": 9.44227696613296e-05, |
| "loss": 0.0147, |
| "step": 19420 |
| }, |
| { |
| "grad_norm": 0.22228588163852692, |
| "learning_rate": 9.441517841816542e-05, |
| "loss": 0.0146, |
| "step": 19430 |
| }, |
| { |
| "grad_norm": 0.1996290683746338, |
| "learning_rate": 9.440758231782413e-05, |
| "loss": 0.0145, |
| "step": 19440 |
| }, |
| { |
| "grad_norm": 0.2389068454504013, |
| "learning_rate": 9.439998136113639e-05, |
| "loss": 0.0161, |
| "step": 19450 |
| }, |
| { |
| "grad_norm": 0.2581833302974701, |
| "learning_rate": 9.439237554893344e-05, |
| "loss": 0.0142, |
| "step": 19460 |
| }, |
| { |
| "grad_norm": 0.1934615671634674, |
| "learning_rate": 9.438476488204705e-05, |
| "loss": 0.0189, |
| "step": 19470 |
| }, |
| { |
| "grad_norm": 0.2351747751235962, |
| "learning_rate": 9.43771493613095e-05, |
| "loss": 0.0177, |
| "step": 19480 |
| }, |
| { |
| "grad_norm": 0.20744049549102783, |
| "learning_rate": 9.436952898755362e-05, |
| "loss": 0.0152, |
| "step": 19490 |
| }, |
| { |
| "grad_norm": 0.2298344522714615, |
| "learning_rate": 9.436190376161276e-05, |
| "loss": 0.0129, |
| "step": 19500 |
| }, |
| { |
| "grad_norm": 0.19509683549404144, |
| "learning_rate": 9.43542736843208e-05, |
| "loss": 0.0193, |
| "step": 19510 |
| }, |
| { |
| "grad_norm": 0.28534120321273804, |
| "learning_rate": 9.434663875651216e-05, |
| "loss": 0.0149, |
| "step": 19520 |
| }, |
| { |
| "grad_norm": 0.26728376746177673, |
| "learning_rate": 9.433899897902177e-05, |
| "loss": 0.0169, |
| "step": 19530 |
| }, |
| { |
| "grad_norm": 0.17947261035442352, |
| "learning_rate": 9.433135435268511e-05, |
| "loss": 0.0145, |
| "step": 19540 |
| }, |
| { |
| "grad_norm": 0.2602401673793793, |
| "learning_rate": 9.432370487833819e-05, |
| "loss": 0.0178, |
| "step": 19550 |
| }, |
| { |
| "grad_norm": 0.20871134102344513, |
| "learning_rate": 9.431605055681756e-05, |
| "loss": 0.0149, |
| "step": 19560 |
| }, |
| { |
| "grad_norm": 0.17870604991912842, |
| "learning_rate": 9.430839138896026e-05, |
| "loss": 0.0172, |
| "step": 19570 |
| }, |
| { |
| "grad_norm": 0.20555749535560608, |
| "learning_rate": 9.43007273756039e-05, |
| "loss": 0.0128, |
| "step": 19580 |
| }, |
| { |
| "grad_norm": 0.24009230732917786, |
| "learning_rate": 9.429305851758658e-05, |
| "loss": 0.0168, |
| "step": 19590 |
| }, |
| { |
| "grad_norm": 0.2157120704650879, |
| "learning_rate": 9.428538481574699e-05, |
| "loss": 0.0182, |
| "step": 19600 |
| }, |
| { |
| "grad_norm": 0.22232243418693542, |
| "learning_rate": 9.42777062709243e-05, |
| "loss": 0.0155, |
| "step": 19610 |
| }, |
| { |
| "grad_norm": 0.24278074502944946, |
| "learning_rate": 9.427002288395821e-05, |
| "loss": 0.0167, |
| "step": 19620 |
| }, |
| { |
| "grad_norm": 0.2559744715690613, |
| "learning_rate": 9.426233465568898e-05, |
| "loss": 0.015, |
| "step": 19630 |
| }, |
| { |
| "grad_norm": 0.22816607356071472, |
| "learning_rate": 9.42546415869574e-05, |
| "loss": 0.0185, |
| "step": 19640 |
| }, |
| { |
| "grad_norm": 0.2328685224056244, |
| "learning_rate": 9.424694367860473e-05, |
| "loss": 0.0162, |
| "step": 19650 |
| }, |
| { |
| "grad_norm": 0.20901796221733093, |
| "learning_rate": 9.423924093147284e-05, |
| "loss": 0.0152, |
| "step": 19660 |
| }, |
| { |
| "grad_norm": 0.2872743010520935, |
| "learning_rate": 9.423153334640407e-05, |
| "loss": 0.016, |
| "step": 19670 |
| }, |
| { |
| "grad_norm": 0.17910341918468475, |
| "learning_rate": 9.42238209242413e-05, |
| "loss": 0.0178, |
| "step": 19680 |
| }, |
| { |
| "grad_norm": 0.20844197273254395, |
| "learning_rate": 9.421610366582798e-05, |
| "loss": 0.0147, |
| "step": 19690 |
| }, |
| { |
| "grad_norm": 0.2056276798248291, |
| "learning_rate": 9.420838157200803e-05, |
| "loss": 0.0141, |
| "step": 19700 |
| }, |
| { |
| "grad_norm": 0.2005912810564041, |
| "learning_rate": 9.420065464362594e-05, |
| "loss": 0.0167, |
| "step": 19710 |
| }, |
| { |
| "grad_norm": 0.1822415292263031, |
| "learning_rate": 9.419292288152673e-05, |
| "loss": 0.0158, |
| "step": 19720 |
| }, |
| { |
| "grad_norm": 0.19640015065670013, |
| "learning_rate": 9.418518628655588e-05, |
| "loss": 0.019, |
| "step": 19730 |
| }, |
| { |
| "grad_norm": 0.20280957221984863, |
| "learning_rate": 9.417744485955951e-05, |
| "loss": 0.0165, |
| "step": 19740 |
| }, |
| { |
| "grad_norm": 0.21713192760944366, |
| "learning_rate": 9.41696986013842e-05, |
| "loss": 0.0148, |
| "step": 19750 |
| }, |
| { |
| "grad_norm": 0.1741454154253006, |
| "learning_rate": 9.416194751287705e-05, |
| "loss": 0.0139, |
| "step": 19760 |
| }, |
| { |
| "grad_norm": 0.22186775505542755, |
| "learning_rate": 9.415419159488572e-05, |
| "loss": 0.0176, |
| "step": 19770 |
| }, |
| { |
| "grad_norm": 0.2876559793949127, |
| "learning_rate": 9.414643084825837e-05, |
| "loss": 0.0153, |
| "step": 19780 |
| }, |
| { |
| "grad_norm": 0.17327694594860077, |
| "learning_rate": 9.413866527384372e-05, |
| "loss": 0.0155, |
| "step": 19790 |
| }, |
| { |
| "grad_norm": 0.20157267153263092, |
| "learning_rate": 9.4130894872491e-05, |
| "loss": 0.0129, |
| "step": 19800 |
| }, |
| { |
| "grad_norm": 0.22156211733818054, |
| "learning_rate": 9.412311964504998e-05, |
| "loss": 0.0155, |
| "step": 19810 |
| }, |
| { |
| "grad_norm": 0.23381781578063965, |
| "learning_rate": 9.411533959237091e-05, |
| "loss": 0.0162, |
| "step": 19820 |
| }, |
| { |
| "grad_norm": 0.23688384890556335, |
| "learning_rate": 9.410755471530464e-05, |
| "loss": 0.0175, |
| "step": 19830 |
| }, |
| { |
| "grad_norm": 0.15220996737480164, |
| "learning_rate": 9.40997650147025e-05, |
| "loss": 0.0164, |
| "step": 19840 |
| }, |
| { |
| "grad_norm": 0.1738215535879135, |
| "learning_rate": 9.409197049141637e-05, |
| "loss": 0.0143, |
| "step": 19850 |
| }, |
| { |
| "grad_norm": 0.16357380151748657, |
| "learning_rate": 9.408417114629863e-05, |
| "loss": 0.0173, |
| "step": 19860 |
| }, |
| { |
| "grad_norm": 0.17979925870895386, |
| "learning_rate": 9.40763669802022e-05, |
| "loss": 0.0184, |
| "step": 19870 |
| }, |
| { |
| "grad_norm": 0.19944514334201813, |
| "learning_rate": 9.406855799398056e-05, |
| "loss": 0.0182, |
| "step": 19880 |
| }, |
| { |
| "grad_norm": 0.2147817760705948, |
| "learning_rate": 9.406074418848767e-05, |
| "loss": 0.017, |
| "step": 19890 |
| }, |
| { |
| "grad_norm": 0.21749483048915863, |
| "learning_rate": 9.405292556457805e-05, |
| "loss": 0.018, |
| "step": 19900 |
| }, |
| { |
| "grad_norm": 0.26651832461357117, |
| "learning_rate": 9.404510212310671e-05, |
| "loss": 0.0201, |
| "step": 19910 |
| }, |
| { |
| "grad_norm": 0.1734253466129303, |
| "learning_rate": 9.403727386492924e-05, |
| "loss": 0.0163, |
| "step": 19920 |
| }, |
| { |
| "grad_norm": 0.22233816981315613, |
| "learning_rate": 9.40294407909017e-05, |
| "loss": 0.0161, |
| "step": 19930 |
| }, |
| { |
| "grad_norm": 0.17614853382110596, |
| "learning_rate": 9.40216029018807e-05, |
| "loss": 0.0161, |
| "step": 19940 |
| }, |
| { |
| "grad_norm": 0.20520544052124023, |
| "learning_rate": 9.401376019872338e-05, |
| "loss": 0.0134, |
| "step": 19950 |
| }, |
| { |
| "grad_norm": 0.22282147407531738, |
| "learning_rate": 9.400591268228746e-05, |
| "loss": 0.0168, |
| "step": 19960 |
| }, |
| { |
| "grad_norm": 0.22932884097099304, |
| "learning_rate": 9.399806035343106e-05, |
| "loss": 0.0194, |
| "step": 19970 |
| }, |
| { |
| "grad_norm": 0.25537630915641785, |
| "learning_rate": 9.399020321301294e-05, |
| "loss": 0.0155, |
| "step": 19980 |
| }, |
| { |
| "grad_norm": 0.16550827026367188, |
| "learning_rate": 9.398234126189234e-05, |
| "loss": 0.0154, |
| "step": 19990 |
| }, |
| { |
| "grad_norm": 0.21164970099925995, |
| "learning_rate": 9.397447450092902e-05, |
| "loss": 0.0173, |
| "step": 20000 |
| }, |
| { |
| "grad_norm": 0.24687914550304413, |
| "learning_rate": 9.39666029309833e-05, |
| "loss": 0.0201, |
| "step": 20010 |
| }, |
| { |
| "grad_norm": 0.2638387382030487, |
| "learning_rate": 9.395872655291596e-05, |
| "loss": 0.0164, |
| "step": 20020 |
| }, |
| { |
| "grad_norm": 0.21427874267101288, |
| "learning_rate": 9.395084536758838e-05, |
| "loss": 0.0159, |
| "step": 20030 |
| }, |
| { |
| "grad_norm": 0.2500334680080414, |
| "learning_rate": 9.394295937586243e-05, |
| "loss": 0.0156, |
| "step": 20040 |
| }, |
| { |
| "grad_norm": 0.3239850103855133, |
| "learning_rate": 9.393506857860052e-05, |
| "loss": 0.0148, |
| "step": 20050 |
| }, |
| { |
| "grad_norm": 0.17661415040493011, |
| "learning_rate": 9.392717297666555e-05, |
| "loss": 0.0161, |
| "step": 20060 |
| }, |
| { |
| "grad_norm": 0.19975602626800537, |
| "learning_rate": 9.391927257092101e-05, |
| "loss": 0.0179, |
| "step": 20070 |
| }, |
| { |
| "grad_norm": 0.23306478559970856, |
| "learning_rate": 9.391136736223085e-05, |
| "loss": 0.0145, |
| "step": 20080 |
| }, |
| { |
| "grad_norm": 0.2603534162044525, |
| "learning_rate": 9.390345735145956e-05, |
| "loss": 0.0186, |
| "step": 20090 |
| }, |
| { |
| "grad_norm": 0.3492478132247925, |
| "learning_rate": 9.389554253947219e-05, |
| "loss": 0.0209, |
| "step": 20100 |
| }, |
| { |
| "grad_norm": 0.2827115058898926, |
| "learning_rate": 9.388762292713428e-05, |
| "loss": 0.0203, |
| "step": 20110 |
| }, |
| { |
| "grad_norm": 0.2664572298526764, |
| "learning_rate": 9.38796985153119e-05, |
| "loss": 0.019, |
| "step": 20120 |
| }, |
| { |
| "grad_norm": 0.25753986835479736, |
| "learning_rate": 9.387176930487169e-05, |
| "loss": 0.018, |
| "step": 20130 |
| }, |
| { |
| "grad_norm": 0.17414173483848572, |
| "learning_rate": 9.386383529668072e-05, |
| "loss": 0.016, |
| "step": 20140 |
| }, |
| { |
| "grad_norm": 0.202108234167099, |
| "learning_rate": 9.385589649160669e-05, |
| "loss": 0.0186, |
| "step": 20150 |
| }, |
| { |
| "grad_norm": 0.23871807754039764, |
| "learning_rate": 9.384795289051775e-05, |
| "loss": 0.0151, |
| "step": 20160 |
| }, |
| { |
| "grad_norm": 0.22860831022262573, |
| "learning_rate": 9.384000449428261e-05, |
| "loss": 0.0141, |
| "step": 20170 |
| }, |
| { |
| "grad_norm": 0.1896970570087433, |
| "learning_rate": 9.383205130377048e-05, |
| "loss": 0.0151, |
| "step": 20180 |
| }, |
| { |
| "grad_norm": 0.20248743891716003, |
| "learning_rate": 9.382409331985114e-05, |
| "loss": 0.0172, |
| "step": 20190 |
| }, |
| { |
| "grad_norm": 0.18169482052326202, |
| "learning_rate": 9.381613054339482e-05, |
| "loss": 0.0146, |
| "step": 20200 |
| }, |
| { |
| "grad_norm": 0.23816770315170288, |
| "learning_rate": 9.380816297527235e-05, |
| "loss": 0.0159, |
| "step": 20210 |
| }, |
| { |
| "grad_norm": 0.23363809287548065, |
| "learning_rate": 9.380019061635506e-05, |
| "loss": 0.019, |
| "step": 20220 |
| }, |
| { |
| "grad_norm": 0.2147899717092514, |
| "learning_rate": 9.379221346751474e-05, |
| "loss": 0.0136, |
| "step": 20230 |
| }, |
| { |
| "grad_norm": 0.2629246413707733, |
| "learning_rate": 9.378423152962382e-05, |
| "loss": 0.0153, |
| "step": 20240 |
| }, |
| { |
| "grad_norm": 0.24167490005493164, |
| "learning_rate": 9.377624480355517e-05, |
| "loss": 0.0184, |
| "step": 20250 |
| }, |
| { |
| "grad_norm": 0.19889342784881592, |
| "learning_rate": 9.376825329018219e-05, |
| "loss": 0.019, |
| "step": 20260 |
| }, |
| { |
| "grad_norm": 0.2099040448665619, |
| "learning_rate": 9.376025699037884e-05, |
| "loss": 0.0143, |
| "step": 20270 |
| }, |
| { |
| "grad_norm": 0.25897860527038574, |
| "learning_rate": 9.37522559050196e-05, |
| "loss": 0.0153, |
| "step": 20280 |
| }, |
| { |
| "grad_norm": 0.1966579705476761, |
| "learning_rate": 9.37442500349794e-05, |
| "loss": 0.0164, |
| "step": 20290 |
| }, |
| { |
| "grad_norm": 0.26289817690849304, |
| "learning_rate": 9.373623938113381e-05, |
| "loss": 0.0154, |
| "step": 20300 |
| }, |
| { |
| "grad_norm": 0.19626647233963013, |
| "learning_rate": 9.372822394435883e-05, |
| "loss": 0.02, |
| "step": 20310 |
| }, |
| { |
| "grad_norm": 0.1939084380865097, |
| "learning_rate": 9.372020372553102e-05, |
| "loss": 0.016, |
| "step": 20320 |
| }, |
| { |
| "grad_norm": 0.1999606341123581, |
| "learning_rate": 9.371217872552746e-05, |
| "loss": 0.0154, |
| "step": 20330 |
| }, |
| { |
| "grad_norm": 0.19327686727046967, |
| "learning_rate": 9.370414894522576e-05, |
| "loss": 0.0153, |
| "step": 20340 |
| }, |
| { |
| "grad_norm": 0.22085945308208466, |
| "learning_rate": 9.369611438550406e-05, |
| "loss": 0.0177, |
| "step": 20350 |
| }, |
| { |
| "grad_norm": 0.26224273443222046, |
| "learning_rate": 9.368807504724095e-05, |
| "loss": 0.016, |
| "step": 20360 |
| }, |
| { |
| "grad_norm": 0.19646985828876495, |
| "learning_rate": 9.368003093131565e-05, |
| "loss": 0.0154, |
| "step": 20370 |
| }, |
| { |
| "grad_norm": 0.2008865922689438, |
| "learning_rate": 9.367198203860785e-05, |
| "loss": 0.018, |
| "step": 20380 |
| }, |
| { |
| "grad_norm": 0.19568470120429993, |
| "learning_rate": 9.366392836999774e-05, |
| "loss": 0.0181, |
| "step": 20390 |
| }, |
| { |
| "grad_norm": 0.18460358679294586, |
| "learning_rate": 9.365586992636607e-05, |
| "loss": 0.0168, |
| "step": 20400 |
| }, |
| { |
| "grad_norm": 0.25228118896484375, |
| "learning_rate": 9.364780670859412e-05, |
| "loss": 0.0164, |
| "step": 20410 |
| }, |
| { |
| "grad_norm": 0.23357588052749634, |
| "learning_rate": 9.363973871756364e-05, |
| "loss": 0.0129, |
| "step": 20420 |
| }, |
| { |
| "grad_norm": 0.2548837661743164, |
| "learning_rate": 9.363166595415696e-05, |
| "loss": 0.0121, |
| "step": 20430 |
| }, |
| { |
| "grad_norm": 0.16061502695083618, |
| "learning_rate": 9.362358841925686e-05, |
| "loss": 0.0157, |
| "step": 20440 |
| }, |
| { |
| "grad_norm": 0.2448650449514389, |
| "learning_rate": 9.361550611374674e-05, |
| "loss": 0.0148, |
| "step": 20450 |
| }, |
| { |
| "grad_norm": 0.21893996000289917, |
| "learning_rate": 9.360741903851043e-05, |
| "loss": 0.0165, |
| "step": 20460 |
| }, |
| { |
| "grad_norm": 0.22877977788448334, |
| "learning_rate": 9.359932719443236e-05, |
| "loss": 0.0164, |
| "step": 20470 |
| }, |
| { |
| "grad_norm": 0.22097840905189514, |
| "learning_rate": 9.35912305823974e-05, |
| "loss": 0.0175, |
| "step": 20480 |
| }, |
| { |
| "grad_norm": 0.2514427602291107, |
| "learning_rate": 9.358312920329101e-05, |
| "loss": 0.0193, |
| "step": 20490 |
| }, |
| { |
| "grad_norm": 0.17240166664123535, |
| "learning_rate": 9.357502305799914e-05, |
| "loss": 0.0176, |
| "step": 20500 |
| }, |
| { |
| "grad_norm": 0.21320685744285583, |
| "learning_rate": 9.356691214740824e-05, |
| "loss": 0.0142, |
| "step": 20510 |
| }, |
| { |
| "grad_norm": 0.21088501811027527, |
| "learning_rate": 9.355879647240535e-05, |
| "loss": 0.0198, |
| "step": 20520 |
| }, |
| { |
| "grad_norm": 0.21060901880264282, |
| "learning_rate": 9.355067603387798e-05, |
| "loss": 0.0175, |
| "step": 20530 |
| }, |
| { |
| "grad_norm": 0.19547854363918304, |
| "learning_rate": 9.354255083271412e-05, |
| "loss": 0.0131, |
| "step": 20540 |
| }, |
| { |
| "grad_norm": 0.17447546124458313, |
| "learning_rate": 9.353442086980239e-05, |
| "loss": 0.016, |
| "step": 20550 |
| }, |
| { |
| "grad_norm": 0.2414809912443161, |
| "learning_rate": 9.352628614603185e-05, |
| "loss": 0.0183, |
| "step": 20560 |
| }, |
| { |
| "grad_norm": 0.19553828239440918, |
| "learning_rate": 9.351814666229209e-05, |
| "loss": 0.0157, |
| "step": 20570 |
| }, |
| { |
| "grad_norm": 0.21528875827789307, |
| "learning_rate": 9.351000241947324e-05, |
| "loss": 0.0174, |
| "step": 20580 |
| }, |
| { |
| "grad_norm": 0.21160905063152313, |
| "learning_rate": 9.350185341846594e-05, |
| "loss": 0.0159, |
| "step": 20590 |
| }, |
| { |
| "grad_norm": 0.2174428254365921, |
| "learning_rate": 9.349369966016134e-05, |
| "loss": 0.014, |
| "step": 20600 |
| }, |
| { |
| "grad_norm": 0.192820206284523, |
| "learning_rate": 9.348554114545117e-05, |
| "loss": 0.015, |
| "step": 20610 |
| }, |
| { |
| "grad_norm": 0.2764177620410919, |
| "learning_rate": 9.347737787522758e-05, |
| "loss": 0.0136, |
| "step": 20620 |
| }, |
| { |
| "grad_norm": 0.20158930122852325, |
| "learning_rate": 9.346920985038332e-05, |
| "loss": 0.0162, |
| "step": 20630 |
| }, |
| { |
| "grad_norm": 0.1876758635044098, |
| "learning_rate": 9.346103707181162e-05, |
| "loss": 0.0164, |
| "step": 20640 |
| }, |
| { |
| "grad_norm": 0.1939225047826767, |
| "learning_rate": 9.345285954040626e-05, |
| "loss": 0.0176, |
| "step": 20650 |
| }, |
| { |
| "grad_norm": 0.20861920714378357, |
| "learning_rate": 9.34446772570615e-05, |
| "loss": 0.0179, |
| "step": 20660 |
| }, |
| { |
| "grad_norm": 0.23533208668231964, |
| "learning_rate": 9.343649022267214e-05, |
| "loss": 0.0174, |
| "step": 20670 |
| }, |
| { |
| "grad_norm": 0.24126026034355164, |
| "learning_rate": 9.342829843813353e-05, |
| "loss": 0.0163, |
| "step": 20680 |
| }, |
| { |
| "grad_norm": 0.21258068084716797, |
| "learning_rate": 9.342010190434149e-05, |
| "loss": 0.0159, |
| "step": 20690 |
| }, |
| { |
| "grad_norm": 0.20059002935886383, |
| "learning_rate": 9.34119006221924e-05, |
| "loss": 0.0173, |
| "step": 20700 |
| }, |
| { |
| "grad_norm": 0.23242899775505066, |
| "learning_rate": 9.340369459258313e-05, |
| "loss": 0.0161, |
| "step": 20710 |
| }, |
| { |
| "grad_norm": 0.2322058081626892, |
| "learning_rate": 9.339548381641106e-05, |
| "loss": 0.0163, |
| "step": 20720 |
| }, |
| { |
| "grad_norm": 0.21510280668735504, |
| "learning_rate": 9.338726829457413e-05, |
| "loss": 0.0174, |
| "step": 20730 |
| }, |
| { |
| "grad_norm": 0.24406039714813232, |
| "learning_rate": 9.337904802797078e-05, |
| "loss": 0.0188, |
| "step": 20740 |
| }, |
| { |
| "grad_norm": 0.21909946203231812, |
| "learning_rate": 9.337082301749993e-05, |
| "loss": 0.0155, |
| "step": 20750 |
| }, |
| { |
| "grad_norm": 0.17972180247306824, |
| "learning_rate": 9.336259326406109e-05, |
| "loss": 0.0171, |
| "step": 20760 |
| }, |
| { |
| "grad_norm": 0.2577114999294281, |
| "learning_rate": 9.335435876855427e-05, |
| "loss": 0.0193, |
| "step": 20770 |
| }, |
| { |
| "grad_norm": 0.1579129993915558, |
| "learning_rate": 9.334611953187994e-05, |
| "loss": 0.0139, |
| "step": 20780 |
| }, |
| { |
| "grad_norm": 0.25546857714653015, |
| "learning_rate": 9.333787555493914e-05, |
| "loss": 0.0153, |
| "step": 20790 |
| }, |
| { |
| "grad_norm": 0.2245105355978012, |
| "learning_rate": 9.332962683863345e-05, |
| "loss": 0.0156, |
| "step": 20800 |
| }, |
| { |
| "grad_norm": 0.28271248936653137, |
| "learning_rate": 9.332137338386489e-05, |
| "loss": 0.0147, |
| "step": 20810 |
| }, |
| { |
| "grad_norm": 0.19915854930877686, |
| "learning_rate": 9.33131151915361e-05, |
| "loss": 0.0153, |
| "step": 20820 |
| }, |
| { |
| "grad_norm": 0.18703337013721466, |
| "learning_rate": 9.330485226255012e-05, |
| "loss": 0.017, |
| "step": 20830 |
| }, |
| { |
| "grad_norm": 0.14349418878555298, |
| "learning_rate": 9.329658459781061e-05, |
| "loss": 0.014, |
| "step": 20840 |
| }, |
| { |
| "grad_norm": 0.2399742752313614, |
| "learning_rate": 9.328831219822172e-05, |
| "loss": 0.0151, |
| "step": 20850 |
| }, |
| { |
| "grad_norm": 0.20987749099731445, |
| "learning_rate": 9.328003506468808e-05, |
| "loss": 0.016, |
| "step": 20860 |
| }, |
| { |
| "grad_norm": 0.22967706620693207, |
| "learning_rate": 9.327175319811488e-05, |
| "loss": 0.0141, |
| "step": 20870 |
| }, |
| { |
| "grad_norm": 0.22494818270206451, |
| "learning_rate": 9.326346659940781e-05, |
| "loss": 0.0161, |
| "step": 20880 |
| }, |
| { |
| "grad_norm": 0.23504319787025452, |
| "learning_rate": 9.325517526947308e-05, |
| "loss": 0.0144, |
| "step": 20890 |
| }, |
| { |
| "grad_norm": 0.21370932459831238, |
| "learning_rate": 9.32468792092174e-05, |
| "loss": 0.0129, |
| "step": 20900 |
| }, |
| { |
| "grad_norm": 0.19011889398097992, |
| "learning_rate": 9.323857841954803e-05, |
| "loss": 0.0159, |
| "step": 20910 |
| }, |
| { |
| "grad_norm": 0.18483437597751617, |
| "learning_rate": 9.323027290137276e-05, |
| "loss": 0.0149, |
| "step": 20920 |
| }, |
| { |
| "grad_norm": 0.18339982628822327, |
| "learning_rate": 9.322196265559981e-05, |
| "loss": 0.0145, |
| "step": 20930 |
| }, |
| { |
| "grad_norm": 0.24630165100097656, |
| "learning_rate": 9.321364768313803e-05, |
| "loss": 0.0183, |
| "step": 20940 |
| }, |
| { |
| "grad_norm": 0.2203260064125061, |
| "learning_rate": 9.32053279848967e-05, |
| "loss": 0.0137, |
| "step": 20950 |
| }, |
| { |
| "grad_norm": 0.25817951560020447, |
| "learning_rate": 9.319700356178567e-05, |
| "loss": 0.0144, |
| "step": 20960 |
| }, |
| { |
| "grad_norm": 0.20414921641349792, |
| "learning_rate": 9.318867441471527e-05, |
| "loss": 0.0149, |
| "step": 20970 |
| }, |
| { |
| "grad_norm": 0.16952864825725555, |
| "learning_rate": 9.318034054459637e-05, |
| "loss": 0.0161, |
| "step": 20980 |
| }, |
| { |
| "grad_norm": 0.21634019911289215, |
| "learning_rate": 9.317200195234034e-05, |
| "loss": 0.0145, |
| "step": 20990 |
| }, |
| { |
| "grad_norm": 0.23945434391498566, |
| "learning_rate": 9.316365863885909e-05, |
| "loss": 0.0192, |
| "step": 21000 |
| }, |
| { |
| "grad_norm": 0.1654985100030899, |
| "learning_rate": 9.315531060506502e-05, |
| "loss": 0.0157, |
| "step": 21010 |
| }, |
| { |
| "grad_norm": 0.19484049081802368, |
| "learning_rate": 9.314695785187108e-05, |
| "loss": 0.0176, |
| "step": 21020 |
| }, |
| { |
| "grad_norm": 0.21691042184829712, |
| "learning_rate": 9.313860038019069e-05, |
| "loss": 0.0153, |
| "step": 21030 |
| }, |
| { |
| "grad_norm": 0.26378920674324036, |
| "learning_rate": 9.313023819093782e-05, |
| "loss": 0.0178, |
| "step": 21040 |
| }, |
| { |
| "grad_norm": 0.28963178396224976, |
| "learning_rate": 9.312187128502695e-05, |
| "loss": 0.0203, |
| "step": 21050 |
| }, |
| { |
| "grad_norm": 0.21935831010341644, |
| "learning_rate": 9.311349966337307e-05, |
| "loss": 0.0144, |
| "step": 21060 |
| }, |
| { |
| "grad_norm": 0.17089690268039703, |
| "learning_rate": 9.310512332689169e-05, |
| "loss": 0.0189, |
| "step": 21070 |
| }, |
| { |
| "grad_norm": 0.18906037509441376, |
| "learning_rate": 9.309674227649883e-05, |
| "loss": 0.0159, |
| "step": 21080 |
| }, |
| { |
| "grad_norm": 0.19601677358150482, |
| "learning_rate": 9.308835651311103e-05, |
| "loss": 0.0158, |
| "step": 21090 |
| }, |
| { |
| "grad_norm": 0.21335552632808685, |
| "learning_rate": 9.307996603764533e-05, |
| "loss": 0.0148, |
| "step": 21100 |
| }, |
| { |
| "grad_norm": 0.24205279350280762, |
| "learning_rate": 9.307157085101932e-05, |
| "loss": 0.0179, |
| "step": 21110 |
| }, |
| { |
| "grad_norm": 0.19505095481872559, |
| "learning_rate": 9.306317095415109e-05, |
| "loss": 0.0148, |
| "step": 21120 |
| }, |
| { |
| "grad_norm": 0.3368663787841797, |
| "learning_rate": 9.305476634795922e-05, |
| "loss": 0.0172, |
| "step": 21130 |
| }, |
| { |
| "grad_norm": 0.24451889097690582, |
| "learning_rate": 9.304635703336284e-05, |
| "loss": 0.0177, |
| "step": 21140 |
| }, |
| { |
| "grad_norm": 0.2271011620759964, |
| "learning_rate": 9.303794301128157e-05, |
| "loss": 0.0172, |
| "step": 21150 |
| }, |
| { |
| "grad_norm": 0.21570229530334473, |
| "learning_rate": 9.302952428263555e-05, |
| "loss": 0.016, |
| "step": 21160 |
| }, |
| { |
| "grad_norm": 0.2091713696718216, |
| "learning_rate": 9.302110084834545e-05, |
| "loss": 0.0152, |
| "step": 21170 |
| }, |
| { |
| "grad_norm": 0.2397819459438324, |
| "learning_rate": 9.301267270933245e-05, |
| "loss": 0.0167, |
| "step": 21180 |
| }, |
| { |
| "grad_norm": 0.23981717228889465, |
| "learning_rate": 9.300423986651823e-05, |
| "loss": 0.019, |
| "step": 21190 |
| }, |
| { |
| "grad_norm": 0.23592530190944672, |
| "learning_rate": 9.299580232082501e-05, |
| "loss": 0.0167, |
| "step": 21200 |
| }, |
| { |
| "grad_norm": 0.24473746120929718, |
| "learning_rate": 9.298736007317547e-05, |
| "loss": 0.0143, |
| "step": 21210 |
| }, |
| { |
| "grad_norm": 0.2057255357503891, |
| "learning_rate": 9.297891312449288e-05, |
| "loss": 0.0167, |
| "step": 21220 |
| }, |
| { |
| "grad_norm": 0.22062143683433533, |
| "learning_rate": 9.297046147570094e-05, |
| "loss": 0.0169, |
| "step": 21230 |
| }, |
| { |
| "grad_norm": 0.1654421091079712, |
| "learning_rate": 9.296200512772396e-05, |
| "loss": 0.0155, |
| "step": 21240 |
| }, |
| { |
| "grad_norm": 0.2394648939371109, |
| "learning_rate": 9.295354408148668e-05, |
| "loss": 0.014, |
| "step": 21250 |
| }, |
| { |
| "grad_norm": 0.20795094966888428, |
| "learning_rate": 9.294507833791441e-05, |
| "loss": 0.0152, |
| "step": 21260 |
| }, |
| { |
| "grad_norm": 0.2580958604812622, |
| "learning_rate": 9.293660789793295e-05, |
| "loss": 0.0172, |
| "step": 21270 |
| }, |
| { |
| "grad_norm": 0.18256065249443054, |
| "learning_rate": 9.292813276246858e-05, |
| "loss": 0.0161, |
| "step": 21280 |
| }, |
| { |
| "grad_norm": 0.19090405106544495, |
| "learning_rate": 9.291965293244816e-05, |
| "loss": 0.0166, |
| "step": 21290 |
| }, |
| { |
| "grad_norm": 0.26941341161727905, |
| "learning_rate": 9.291116840879904e-05, |
| "loss": 0.0145, |
| "step": 21300 |
| }, |
| { |
| "grad_norm": 0.18729177117347717, |
| "learning_rate": 9.290267919244904e-05, |
| "loss": 0.0184, |
| "step": 21310 |
| }, |
| { |
| "grad_norm": 0.15456588566303253, |
| "learning_rate": 9.289418528432655e-05, |
| "loss": 0.0165, |
| "step": 21320 |
| }, |
| { |
| "grad_norm": 0.20851314067840576, |
| "learning_rate": 9.288568668536045e-05, |
| "loss": 0.0123, |
| "step": 21330 |
| }, |
| { |
| "grad_norm": 0.2293507605791092, |
| "learning_rate": 9.287718339648013e-05, |
| "loss": 0.0162, |
| "step": 21340 |
| }, |
| { |
| "grad_norm": 0.2490718513727188, |
| "learning_rate": 9.28686754186155e-05, |
| "loss": 0.0163, |
| "step": 21350 |
| }, |
| { |
| "grad_norm": 0.1911923587322235, |
| "learning_rate": 9.286016275269698e-05, |
| "loss": 0.015, |
| "step": 21360 |
| }, |
| { |
| "grad_norm": 0.188841313123703, |
| "learning_rate": 9.285164539965551e-05, |
| "loss": 0.0139, |
| "step": 21370 |
| }, |
| { |
| "grad_norm": 0.26565125584602356, |
| "learning_rate": 9.284312336042251e-05, |
| "loss": 0.0132, |
| "step": 21380 |
| }, |
| { |
| "grad_norm": 0.2394629269838333, |
| "learning_rate": 9.283459663592996e-05, |
| "loss": 0.0168, |
| "step": 21390 |
| }, |
| { |
| "grad_norm": 0.19435308873653412, |
| "learning_rate": 9.282606522711033e-05, |
| "loss": 0.0146, |
| "step": 21400 |
| }, |
| { |
| "grad_norm": 0.19722171127796173, |
| "learning_rate": 9.281752913489657e-05, |
| "loss": 0.0147, |
| "step": 21410 |
| }, |
| { |
| "grad_norm": 0.16051748394966125, |
| "learning_rate": 9.280898836022222e-05, |
| "loss": 0.0151, |
| "step": 21420 |
| }, |
| { |
| "grad_norm": 0.22683489322662354, |
| "learning_rate": 9.280044290402126e-05, |
| "loss": 0.0157, |
| "step": 21430 |
| }, |
| { |
| "grad_norm": 0.20663219690322876, |
| "learning_rate": 9.279189276722821e-05, |
| "loss": 0.0171, |
| "step": 21440 |
| }, |
| { |
| "grad_norm": 0.2679346203804016, |
| "learning_rate": 9.278333795077812e-05, |
| "loss": 0.0195, |
| "step": 21450 |
| }, |
| { |
| "grad_norm": 0.21232666075229645, |
| "learning_rate": 9.27747784556065e-05, |
| "loss": 0.0136, |
| "step": 21460 |
| }, |
| { |
| "grad_norm": 0.25075575709342957, |
| "learning_rate": 9.276621428264942e-05, |
| "loss": 0.0127, |
| "step": 21470 |
| }, |
| { |
| "grad_norm": 0.21216872334480286, |
| "learning_rate": 9.275764543284345e-05, |
| "loss": 0.0142, |
| "step": 21480 |
| }, |
| { |
| "grad_norm": 0.2122514694929123, |
| "learning_rate": 9.274907190712566e-05, |
| "loss": 0.0154, |
| "step": 21490 |
| }, |
| { |
| "grad_norm": 0.19179250299930573, |
| "learning_rate": 9.274049370643363e-05, |
| "loss": 0.0169, |
| "step": 21500 |
| }, |
| { |
| "grad_norm": 0.21835725009441376, |
| "learning_rate": 9.273191083170547e-05, |
| "loss": 0.0127, |
| "step": 21510 |
| }, |
| { |
| "grad_norm": 0.2395332008600235, |
| "learning_rate": 9.27233232838798e-05, |
| "loss": 0.0161, |
| "step": 21520 |
| }, |
| { |
| "grad_norm": 0.22382347285747528, |
| "learning_rate": 9.27147310638957e-05, |
| "loss": 0.0155, |
| "step": 21530 |
| }, |
| { |
| "grad_norm": 0.21594788134098053, |
| "learning_rate": 9.270613417269286e-05, |
| "loss": 0.0169, |
| "step": 21540 |
| }, |
| { |
| "grad_norm": 0.2419319599866867, |
| "learning_rate": 9.269753261121138e-05, |
| "loss": 0.0154, |
| "step": 21550 |
| }, |
| { |
| "grad_norm": 0.2286730855703354, |
| "learning_rate": 9.268892638039194e-05, |
| "loss": 0.0159, |
| "step": 21560 |
| }, |
| { |
| "grad_norm": 0.1851305514574051, |
| "learning_rate": 9.268031548117569e-05, |
| "loss": 0.0144, |
| "step": 21570 |
| }, |
| { |
| "grad_norm": 0.16771528124809265, |
| "learning_rate": 9.26716999145043e-05, |
| "loss": 0.0185, |
| "step": 21580 |
| }, |
| { |
| "grad_norm": 0.24980562925338745, |
| "learning_rate": 9.266307968131998e-05, |
| "loss": 0.0168, |
| "step": 21590 |
| }, |
| { |
| "grad_norm": 0.2545185089111328, |
| "learning_rate": 9.26544547825654e-05, |
| "loss": 0.0174, |
| "step": 21600 |
| }, |
| { |
| "grad_norm": 0.20524127781391144, |
| "learning_rate": 9.264582521918376e-05, |
| "loss": 0.0157, |
| "step": 21610 |
| }, |
| { |
| "grad_norm": 0.18211904168128967, |
| "learning_rate": 9.263719099211881e-05, |
| "loss": 0.0166, |
| "step": 21620 |
| }, |
| { |
| "grad_norm": 0.15426014363765717, |
| "learning_rate": 9.262855210231476e-05, |
| "loss": 0.0166, |
| "step": 21630 |
| }, |
| { |
| "grad_norm": 0.15671566128730774, |
| "learning_rate": 9.261990855071633e-05, |
| "loss": 0.0161, |
| "step": 21640 |
| }, |
| { |
| "grad_norm": 0.2167554795742035, |
| "learning_rate": 9.261126033826878e-05, |
| "loss": 0.0141, |
| "step": 21650 |
| }, |
| { |
| "grad_norm": 0.19651272892951965, |
| "learning_rate": 9.260260746591786e-05, |
| "loss": 0.0159, |
| "step": 21660 |
| }, |
| { |
| "grad_norm": 0.21581247448921204, |
| "learning_rate": 9.259394993460985e-05, |
| "loss": 0.0159, |
| "step": 21670 |
| }, |
| { |
| "grad_norm": 0.22810497879981995, |
| "learning_rate": 9.258528774529151e-05, |
| "loss": 0.0161, |
| "step": 21680 |
| }, |
| { |
| "grad_norm": 0.22470343112945557, |
| "learning_rate": 9.257662089891013e-05, |
| "loss": 0.0166, |
| "step": 21690 |
| }, |
| { |
| "grad_norm": 0.19753903150558472, |
| "learning_rate": 9.25679493964135e-05, |
| "loss": 0.016, |
| "step": 21700 |
| }, |
| { |
| "grad_norm": 0.20362699031829834, |
| "learning_rate": 9.255927323874994e-05, |
| "loss": 0.016, |
| "step": 21710 |
| }, |
| { |
| "grad_norm": 0.24484552443027496, |
| "learning_rate": 9.255059242686822e-05, |
| "loss": 0.0218, |
| "step": 21720 |
| }, |
| { |
| "grad_norm": 0.28058183193206787, |
| "learning_rate": 9.254190696171769e-05, |
| "loss": 0.0186, |
| "step": 21730 |
| }, |
| { |
| "grad_norm": 0.2401166409254074, |
| "learning_rate": 9.25332168442482e-05, |
| "loss": 0.0153, |
| "step": 21740 |
| }, |
| { |
| "grad_norm": 0.18627475202083588, |
| "learning_rate": 9.252452207541004e-05, |
| "loss": 0.0151, |
| "step": 21750 |
| }, |
| { |
| "grad_norm": 0.18535223603248596, |
| "learning_rate": 9.251582265615409e-05, |
| "loss": 0.0155, |
| "step": 21760 |
| }, |
| { |
| "grad_norm": 0.17740526795387268, |
| "learning_rate": 9.250711858743169e-05, |
| "loss": 0.0175, |
| "step": 21770 |
| }, |
| { |
| "grad_norm": 0.26088157296180725, |
| "learning_rate": 9.24984098701947e-05, |
| "loss": 0.0155, |
| "step": 21780 |
| }, |
| { |
| "grad_norm": 0.21444660425186157, |
| "learning_rate": 9.248969650539552e-05, |
| "loss": 0.0142, |
| "step": 21790 |
| }, |
| { |
| "grad_norm": 0.23764176666736603, |
| "learning_rate": 9.2480978493987e-05, |
| "loss": 0.0146, |
| "step": 21800 |
| }, |
| { |
| "grad_norm": 0.24186286330223083, |
| "learning_rate": 9.247225583692256e-05, |
| "loss": 0.0163, |
| "step": 21810 |
| }, |
| { |
| "grad_norm": 0.26205840706825256, |
| "learning_rate": 9.246352853515607e-05, |
| "loss": 0.0134, |
| "step": 21820 |
| }, |
| { |
| "grad_norm": 0.22400116920471191, |
| "learning_rate": 9.245479658964194e-05, |
| "loss": 0.0151, |
| "step": 21830 |
| }, |
| { |
| "grad_norm": 0.20029689371585846, |
| "learning_rate": 9.244606000133507e-05, |
| "loss": 0.0155, |
| "step": 21840 |
| }, |
| { |
| "grad_norm": 0.2253129929304123, |
| "learning_rate": 9.24373187711909e-05, |
| "loss": 0.017, |
| "step": 21850 |
| }, |
| { |
| "grad_norm": 0.1444292813539505, |
| "learning_rate": 9.242857290016537e-05, |
| "loss": 0.0149, |
| "step": 21860 |
| }, |
| { |
| "grad_norm": 0.2714897394180298, |
| "learning_rate": 9.241982238921488e-05, |
| "loss": 0.0169, |
| "step": 21870 |
| }, |
| { |
| "grad_norm": 0.22298938035964966, |
| "learning_rate": 9.24110672392964e-05, |
| "loss": 0.0146, |
| "step": 21880 |
| }, |
| { |
| "grad_norm": 0.21249057352542877, |
| "learning_rate": 9.240230745136737e-05, |
| "loss": 0.0163, |
| "step": 21890 |
| }, |
| { |
| "grad_norm": 0.24658262729644775, |
| "learning_rate": 9.239354302638575e-05, |
| "loss": 0.0164, |
| "step": 21900 |
| }, |
| { |
| "grad_norm": 0.18486113846302032, |
| "learning_rate": 9.238477396531e-05, |
| "loss": 0.0148, |
| "step": 21910 |
| }, |
| { |
| "grad_norm": 0.17722158133983612, |
| "learning_rate": 9.23760002690991e-05, |
| "loss": 0.0147, |
| "step": 21920 |
| }, |
| { |
| "grad_norm": 0.2587215006351471, |
| "learning_rate": 9.236722193871252e-05, |
| "loss": 0.0155, |
| "step": 21930 |
| }, |
| { |
| "grad_norm": 0.16541887819766998, |
| "learning_rate": 9.235843897511023e-05, |
| "loss": 0.0153, |
| "step": 21940 |
| }, |
| { |
| "grad_norm": 0.15841276943683624, |
| "learning_rate": 9.234965137925276e-05, |
| "loss": 0.0129, |
| "step": 21950 |
| }, |
| { |
| "grad_norm": 0.23887085914611816, |
| "learning_rate": 9.234085915210108e-05, |
| "loss": 0.0146, |
| "step": 21960 |
| }, |
| { |
| "grad_norm": 0.181078240275383, |
| "learning_rate": 9.23320622946167e-05, |
| "loss": 0.0173, |
| "step": 21970 |
| }, |
| { |
| "grad_norm": 0.22938847541809082, |
| "learning_rate": 9.232326080776163e-05, |
| "loss": 0.0178, |
| "step": 21980 |
| }, |
| { |
| "grad_norm": 0.16398344933986664, |
| "learning_rate": 9.23144546924984e-05, |
| "loss": 0.0166, |
| "step": 21990 |
| }, |
| { |
| "grad_norm": 0.30050989985466003, |
| "learning_rate": 9.230564394979e-05, |
| "loss": 0.0135, |
| "step": 22000 |
| }, |
| { |
| "grad_norm": 0.1774117350578308, |
| "learning_rate": 9.22968285806e-05, |
| "loss": 0.0147, |
| "step": 22010 |
| }, |
| { |
| "grad_norm": 0.19958597421646118, |
| "learning_rate": 9.228800858589242e-05, |
| "loss": 0.0146, |
| "step": 22020 |
| }, |
| { |
| "grad_norm": 0.18918026983737946, |
| "learning_rate": 9.227918396663179e-05, |
| "loss": 0.0139, |
| "step": 22030 |
| }, |
| { |
| "grad_norm": 0.22280901670455933, |
| "learning_rate": 9.227035472378319e-05, |
| "loss": 0.0154, |
| "step": 22040 |
| }, |
| { |
| "grad_norm": 0.24022266268730164, |
| "learning_rate": 9.226152085831213e-05, |
| "loss": 0.013, |
| "step": 22050 |
| }, |
| { |
| "grad_norm": 0.22554033994674683, |
| "learning_rate": 9.22526823711847e-05, |
| "loss": 0.0131, |
| "step": 22060 |
| }, |
| { |
| "grad_norm": 0.18703500926494598, |
| "learning_rate": 9.224383926336745e-05, |
| "loss": 0.0154, |
| "step": 22070 |
| }, |
| { |
| "grad_norm": 0.22093725204467773, |
| "learning_rate": 9.223499153582744e-05, |
| "loss": 0.0143, |
| "step": 22080 |
| }, |
| { |
| "grad_norm": 0.21581970155239105, |
| "learning_rate": 9.222613918953226e-05, |
| "loss": 0.014, |
| "step": 22090 |
| }, |
| { |
| "grad_norm": 0.16765768826007843, |
| "learning_rate": 9.221728222544999e-05, |
| "loss": 0.0178, |
| "step": 22100 |
| }, |
| { |
| "grad_norm": 0.2585461139678955, |
| "learning_rate": 9.22084206445492e-05, |
| "loss": 0.014, |
| "step": 22110 |
| }, |
| { |
| "grad_norm": 0.21266622841358185, |
| "learning_rate": 9.2199554447799e-05, |
| "loss": 0.015, |
| "step": 22120 |
| }, |
| { |
| "grad_norm": 0.2095349133014679, |
| "learning_rate": 9.219068363616897e-05, |
| "loss": 0.018, |
| "step": 22130 |
| }, |
| { |
| "grad_norm": 0.1255059838294983, |
| "learning_rate": 9.218180821062919e-05, |
| "loss": 0.0152, |
| "step": 22140 |
| }, |
| { |
| "grad_norm": 0.20356354117393494, |
| "learning_rate": 9.21729281721503e-05, |
| "loss": 0.0142, |
| "step": 22150 |
| }, |
| { |
| "grad_norm": 0.19390271604061127, |
| "learning_rate": 9.216404352170339e-05, |
| "loss": 0.0139, |
| "step": 22160 |
| }, |
| { |
| "grad_norm": 0.29199182987213135, |
| "learning_rate": 9.215515426026007e-05, |
| "loss": 0.015, |
| "step": 22170 |
| }, |
| { |
| "grad_norm": 0.27190372347831726, |
| "learning_rate": 9.214626038879246e-05, |
| "loss": 0.016, |
| "step": 22180 |
| }, |
| { |
| "grad_norm": 0.21578331291675568, |
| "learning_rate": 9.21373619082732e-05, |
| "loss": 0.0151, |
| "step": 22190 |
| }, |
| { |
| "grad_norm": 0.2100040465593338, |
| "learning_rate": 9.212845881967535e-05, |
| "loss": 0.0161, |
| "step": 22200 |
| }, |
| { |
| "grad_norm": 0.18035203218460083, |
| "learning_rate": 9.211955112397262e-05, |
| "loss": 0.0151, |
| "step": 22210 |
| }, |
| { |
| "grad_norm": 0.21208736300468445, |
| "learning_rate": 9.211063882213909e-05, |
| "loss": 0.017, |
| "step": 22220 |
| }, |
| { |
| "grad_norm": 0.19938045740127563, |
| "learning_rate": 9.210172191514942e-05, |
| "loss": 0.0149, |
| "step": 22230 |
| }, |
| { |
| "grad_norm": 0.17603902518749237, |
| "learning_rate": 9.209280040397874e-05, |
| "loss": 0.0154, |
| "step": 22240 |
| }, |
| { |
| "grad_norm": 0.23635786771774292, |
| "learning_rate": 9.208387428960268e-05, |
| "loss": 0.0199, |
| "step": 22250 |
| }, |
| { |
| "grad_norm": 0.19247859716415405, |
| "learning_rate": 9.20749435729974e-05, |
| "loss": 0.0166, |
| "step": 22260 |
| }, |
| { |
| "grad_norm": 0.17111243307590485, |
| "learning_rate": 9.206600825513957e-05, |
| "loss": 0.0139, |
| "step": 22270 |
| }, |
| { |
| "grad_norm": 0.17398229241371155, |
| "learning_rate": 9.20570683370063e-05, |
| "loss": 0.0147, |
| "step": 22280 |
| }, |
| { |
| "grad_norm": 0.14633597433567047, |
| "learning_rate": 9.204812381957528e-05, |
| "loss": 0.0144, |
| "step": 22290 |
| }, |
| { |
| "grad_norm": 0.18436771631240845, |
| "learning_rate": 9.203917470382465e-05, |
| "loss": 0.0121, |
| "step": 22300 |
| }, |
| { |
| "grad_norm": 0.23613686859607697, |
| "learning_rate": 9.203022099073309e-05, |
| "loss": 0.0152, |
| "step": 22310 |
| }, |
| { |
| "grad_norm": 0.30081671476364136, |
| "learning_rate": 9.202126268127976e-05, |
| "loss": 0.0146, |
| "step": 22320 |
| }, |
| { |
| "grad_norm": 0.24067933857440948, |
| "learning_rate": 9.20122997764443e-05, |
| "loss": 0.0182, |
| "step": 22330 |
| }, |
| { |
| "grad_norm": 0.20103171467781067, |
| "learning_rate": 9.200333227720692e-05, |
| "loss": 0.016, |
| "step": 22340 |
| }, |
| { |
| "grad_norm": 0.19687743484973907, |
| "learning_rate": 9.199436018454826e-05, |
| "loss": 0.0149, |
| "step": 22350 |
| }, |
| { |
| "grad_norm": 0.25894713401794434, |
| "learning_rate": 9.198538349944952e-05, |
| "loss": 0.0147, |
| "step": 22360 |
| }, |
| { |
| "grad_norm": 0.1842220425605774, |
| "learning_rate": 9.197640222289234e-05, |
| "loss": 0.015, |
| "step": 22370 |
| }, |
| { |
| "grad_norm": 0.14746177196502686, |
| "learning_rate": 9.196741635585895e-05, |
| "loss": 0.0129, |
| "step": 22380 |
| }, |
| { |
| "grad_norm": 0.1876509040594101, |
| "learning_rate": 9.195842589933199e-05, |
| "loss": 0.0174, |
| "step": 22390 |
| }, |
| { |
| "grad_norm": 0.19653862714767456, |
| "learning_rate": 9.194943085429466e-05, |
| "loss": 0.0166, |
| "step": 22400 |
| }, |
| { |
| "grad_norm": 0.18987902998924255, |
| "learning_rate": 9.194043122173065e-05, |
| "loss": 0.014, |
| "step": 22410 |
| }, |
| { |
| "grad_norm": 0.16859357059001923, |
| "learning_rate": 9.193142700262413e-05, |
| "loss": 0.0173, |
| "step": 22420 |
| }, |
| { |
| "grad_norm": 0.2265775054693222, |
| "learning_rate": 9.192241819795979e-05, |
| "loss": 0.0159, |
| "step": 22430 |
| }, |
| { |
| "grad_norm": 0.2058074176311493, |
| "learning_rate": 9.191340480872284e-05, |
| "loss": 0.017, |
| "step": 22440 |
| }, |
| { |
| "grad_norm": 0.24698784947395325, |
| "learning_rate": 9.190438683589895e-05, |
| "loss": 0.0154, |
| "step": 22450 |
| }, |
| { |
| "grad_norm": 0.20847007632255554, |
| "learning_rate": 9.189536428047432e-05, |
| "loss": 0.0141, |
| "step": 22460 |
| }, |
| { |
| "grad_norm": 0.16210098564624786, |
| "learning_rate": 9.188633714343564e-05, |
| "loss": 0.0148, |
| "step": 22470 |
| }, |
| { |
| "grad_norm": 0.27115052938461304, |
| "learning_rate": 9.18773054257701e-05, |
| "loss": 0.0134, |
| "step": 22480 |
| }, |
| { |
| "grad_norm": 0.15684786438941956, |
| "learning_rate": 9.18682691284654e-05, |
| "loss": 0.0132, |
| "step": 22490 |
| }, |
| { |
| "grad_norm": 0.2694545090198517, |
| "learning_rate": 9.185922825250974e-05, |
| "loss": 0.0147, |
| "step": 22500 |
| }, |
| { |
| "grad_norm": 0.2178843766450882, |
| "learning_rate": 9.185018279889181e-05, |
| "loss": 0.0155, |
| "step": 22510 |
| }, |
| { |
| "grad_norm": 0.17947521805763245, |
| "learning_rate": 9.184113276860082e-05, |
| "loss": 0.014, |
| "step": 22520 |
| }, |
| { |
| "grad_norm": 0.16482184827327728, |
| "learning_rate": 9.183207816262645e-05, |
| "loss": 0.0153, |
| "step": 22530 |
| }, |
| { |
| "grad_norm": 0.2352295070886612, |
| "learning_rate": 9.182301898195891e-05, |
| "loss": 0.0163, |
| "step": 22540 |
| }, |
| { |
| "grad_norm": 0.2273463010787964, |
| "learning_rate": 9.181395522758889e-05, |
| "loss": 0.0145, |
| "step": 22550 |
| }, |
| { |
| "grad_norm": 0.18331757187843323, |
| "learning_rate": 9.180488690050759e-05, |
| "loss": 0.0158, |
| "step": 22560 |
| }, |
| { |
| "grad_norm": 0.30773892998695374, |
| "learning_rate": 9.179581400170671e-05, |
| "loss": 0.0143, |
| "step": 22570 |
| }, |
| { |
| "grad_norm": 0.23257498443126678, |
| "learning_rate": 9.178673653217845e-05, |
| "loss": 0.0124, |
| "step": 22580 |
| }, |
| { |
| "grad_norm": 0.19392426311969757, |
| "learning_rate": 9.177765449291551e-05, |
| "loss": 0.0141, |
| "step": 22590 |
| }, |
| { |
| "grad_norm": 0.220121830701828, |
| "learning_rate": 9.176856788491109e-05, |
| "loss": 0.0161, |
| "step": 22600 |
| }, |
| { |
| "grad_norm": 0.19694368541240692, |
| "learning_rate": 9.175947670915887e-05, |
| "loss": 0.015, |
| "step": 22610 |
| }, |
| { |
| "grad_norm": 0.1852843016386032, |
| "learning_rate": 9.175038096665309e-05, |
| "loss": 0.012, |
| "step": 22620 |
| }, |
| { |
| "grad_norm": 0.1881941854953766, |
| "learning_rate": 9.17412806583884e-05, |
| "loss": 0.0143, |
| "step": 22630 |
| }, |
| { |
| "grad_norm": 0.21659281849861145, |
| "learning_rate": 9.173217578536002e-05, |
| "loss": 0.0142, |
| "step": 22640 |
| }, |
| { |
| "grad_norm": 0.21505822241306305, |
| "learning_rate": 9.172306634856362e-05, |
| "loss": 0.0165, |
| "step": 22650 |
| }, |
| { |
| "grad_norm": 0.19481819868087769, |
| "learning_rate": 9.171395234899545e-05, |
| "loss": 0.0138, |
| "step": 22660 |
| }, |
| { |
| "grad_norm": 0.22649617493152618, |
| "learning_rate": 9.170483378765214e-05, |
| "loss": 0.0192, |
| "step": 22670 |
| }, |
| { |
| "grad_norm": 0.2129245400428772, |
| "learning_rate": 9.169571066553091e-05, |
| "loss": 0.0148, |
| "step": 22680 |
| }, |
| { |
| "grad_norm": 0.1935233324766159, |
| "learning_rate": 9.168658298362946e-05, |
| "loss": 0.0154, |
| "step": 22690 |
| }, |
| { |
| "grad_norm": 0.18985940515995026, |
| "learning_rate": 9.167745074294598e-05, |
| "loss": 0.0147, |
| "step": 22700 |
| }, |
| { |
| "grad_norm": 0.21253089606761932, |
| "learning_rate": 9.166831394447913e-05, |
| "loss": 0.0167, |
| "step": 22710 |
| }, |
| { |
| "grad_norm": 0.21559609472751617, |
| "learning_rate": 9.165917258922812e-05, |
| "loss": 0.0137, |
| "step": 22720 |
| }, |
| { |
| "grad_norm": 0.20335878431797028, |
| "learning_rate": 9.165002667819262e-05, |
| "loss": 0.0142, |
| "step": 22730 |
| }, |
| { |
| "grad_norm": 0.1861836463212967, |
| "learning_rate": 9.164087621237282e-05, |
| "loss": 0.0149, |
| "step": 22740 |
| }, |
| { |
| "grad_norm": 0.20750901103019714, |
| "learning_rate": 9.163172119276942e-05, |
| "loss": 0.0145, |
| "step": 22750 |
| }, |
| { |
| "grad_norm": 0.17172884941101074, |
| "learning_rate": 9.162256162038358e-05, |
| "loss": 0.0129, |
| "step": 22760 |
| }, |
| { |
| "grad_norm": 0.17812736332416534, |
| "learning_rate": 9.161339749621698e-05, |
| "loss": 0.016, |
| "step": 22770 |
| }, |
| { |
| "grad_norm": 0.1545380800962448, |
| "learning_rate": 9.160422882127177e-05, |
| "loss": 0.0142, |
| "step": 22780 |
| }, |
| { |
| "grad_norm": 0.20724381506443024, |
| "learning_rate": 9.159505559655069e-05, |
| "loss": 0.0145, |
| "step": 22790 |
| }, |
| { |
| "grad_norm": 0.21726366877555847, |
| "learning_rate": 9.158587782305684e-05, |
| "loss": 0.0148, |
| "step": 22800 |
| }, |
| { |
| "grad_norm": 0.1817689836025238, |
| "learning_rate": 9.157669550179391e-05, |
| "loss": 0.0172, |
| "step": 22810 |
| }, |
| { |
| "grad_norm": 0.20288459956645966, |
| "learning_rate": 9.156750863376609e-05, |
| "loss": 0.012, |
| "step": 22820 |
| }, |
| { |
| "grad_norm": 0.27603045105934143, |
| "learning_rate": 9.155831721997801e-05, |
| "loss": 0.016, |
| "step": 22830 |
| }, |
| { |
| "grad_norm": 0.1986236870288849, |
| "learning_rate": 9.154912126143484e-05, |
| "loss": 0.0142, |
| "step": 22840 |
| }, |
| { |
| "grad_norm": 0.26990512013435364, |
| "learning_rate": 9.153992075914224e-05, |
| "loss": 0.0169, |
| "step": 22850 |
| }, |
| { |
| "grad_norm": 0.19406422972679138, |
| "learning_rate": 9.153071571410635e-05, |
| "loss": 0.0154, |
| "step": 22860 |
| }, |
| { |
| "grad_norm": 0.20222462713718414, |
| "learning_rate": 9.152150612733384e-05, |
| "loss": 0.014, |
| "step": 22870 |
| }, |
| { |
| "grad_norm": 0.1865585893392563, |
| "learning_rate": 9.151229199983184e-05, |
| "loss": 0.0145, |
| "step": 22880 |
| }, |
| { |
| "grad_norm": 0.1840989887714386, |
| "learning_rate": 9.150307333260802e-05, |
| "loss": 0.0141, |
| "step": 22890 |
| }, |
| { |
| "grad_norm": 0.21253526210784912, |
| "learning_rate": 9.149385012667048e-05, |
| "loss": 0.0168, |
| "step": 22900 |
| }, |
| { |
| "grad_norm": 0.1894034892320633, |
| "learning_rate": 9.148462238302788e-05, |
| "loss": 0.0157, |
| "step": 22910 |
| }, |
| { |
| "grad_norm": 0.17287315428256989, |
| "learning_rate": 9.147539010268936e-05, |
| "loss": 0.0169, |
| "step": 22920 |
| }, |
| { |
| "grad_norm": 0.22205814719200134, |
| "learning_rate": 9.14661532866645e-05, |
| "loss": 0.0151, |
| "step": 22930 |
| }, |
| { |
| "grad_norm": 0.1727629154920578, |
| "learning_rate": 9.145691193596348e-05, |
| "loss": 0.0149, |
| "step": 22940 |
| }, |
| { |
| "grad_norm": 0.21813349425792694, |
| "learning_rate": 9.144766605159691e-05, |
| "loss": 0.0156, |
| "step": 22950 |
| }, |
| { |
| "grad_norm": 0.22872646152973175, |
| "learning_rate": 9.14384156345759e-05, |
| "loss": 0.016, |
| "step": 22960 |
| }, |
| { |
| "grad_norm": 0.18975701928138733, |
| "learning_rate": 9.142916068591204e-05, |
| "loss": 0.0163, |
| "step": 22970 |
| }, |
| { |
| "grad_norm": 0.2046053558588028, |
| "learning_rate": 9.141990120661746e-05, |
| "loss": 0.0141, |
| "step": 22980 |
| }, |
| { |
| "grad_norm": 0.2238152176141739, |
| "learning_rate": 9.141063719770475e-05, |
| "loss": 0.0128, |
| "step": 22990 |
| }, |
| { |
| "grad_norm": 0.20782746374607086, |
| "learning_rate": 9.140136866018704e-05, |
| "loss": 0.0167, |
| "step": 23000 |
| }, |
| { |
| "grad_norm": 0.19436068832874298, |
| "learning_rate": 9.139209559507788e-05, |
| "loss": 0.015, |
| "step": 23010 |
| }, |
| { |
| "grad_norm": 0.19544769823551178, |
| "learning_rate": 9.13828180033914e-05, |
| "loss": 0.0139, |
| "step": 23020 |
| }, |
| { |
| "grad_norm": 0.2249060720205307, |
| "learning_rate": 9.137353588614212e-05, |
| "loss": 0.0127, |
| "step": 23030 |
| }, |
| { |
| "grad_norm": 0.15047655999660492, |
| "learning_rate": 9.136424924434519e-05, |
| "loss": 0.0154, |
| "step": 23040 |
| }, |
| { |
| "grad_norm": 0.2030155062675476, |
| "learning_rate": 9.135495807901615e-05, |
| "loss": 0.0134, |
| "step": 23050 |
| }, |
| { |
| "grad_norm": 0.2389880269765854, |
| "learning_rate": 9.134566239117108e-05, |
| "loss": 0.0157, |
| "step": 23060 |
| }, |
| { |
| "grad_norm": 0.21059682965278625, |
| "learning_rate": 9.13363621818265e-05, |
| "loss": 0.0154, |
| "step": 23070 |
| }, |
| { |
| "grad_norm": 0.2781943678855896, |
| "learning_rate": 9.132705745199953e-05, |
| "loss": 0.0176, |
| "step": 23080 |
| }, |
| { |
| "grad_norm": 0.25491416454315186, |
| "learning_rate": 9.131774820270768e-05, |
| "loss": 0.0169, |
| "step": 23090 |
| }, |
| { |
| "grad_norm": 0.23441153764724731, |
| "learning_rate": 9.130843443496901e-05, |
| "loss": 0.0174, |
| "step": 23100 |
| }, |
| { |
| "grad_norm": 0.1828630417585373, |
| "learning_rate": 9.129911614980206e-05, |
| "loss": 0.0139, |
| "step": 23110 |
| }, |
| { |
| "grad_norm": 0.14871282875537872, |
| "learning_rate": 9.128979334822584e-05, |
| "loss": 0.0133, |
| "step": 23120 |
| }, |
| { |
| "grad_norm": 0.22658561170101166, |
| "learning_rate": 9.128046603125992e-05, |
| "loss": 0.0142, |
| "step": 23130 |
| }, |
| { |
| "grad_norm": 0.22888781130313873, |
| "learning_rate": 9.12711341999243e-05, |
| "loss": 0.0132, |
| "step": 23140 |
| }, |
| { |
| "grad_norm": 0.20940542221069336, |
| "learning_rate": 9.12617978552395e-05, |
| "loss": 0.0149, |
| "step": 23150 |
| }, |
| { |
| "grad_norm": 0.21227090060710907, |
| "learning_rate": 9.12524569982265e-05, |
| "loss": 0.0142, |
| "step": 23160 |
| }, |
| { |
| "grad_norm": 0.24097922444343567, |
| "learning_rate": 9.124311162990684e-05, |
| "loss": 0.0139, |
| "step": 23170 |
| }, |
| { |
| "grad_norm": 0.23145781457424164, |
| "learning_rate": 9.12337617513025e-05, |
| "loss": 0.0149, |
| "step": 23180 |
| }, |
| { |
| "grad_norm": 0.20231445133686066, |
| "learning_rate": 9.122440736343596e-05, |
| "loss": 0.0181, |
| "step": 23190 |
| }, |
| { |
| "grad_norm": 0.17131511867046356, |
| "learning_rate": 9.12150484673302e-05, |
| "loss": 0.0194, |
| "step": 23200 |
| }, |
| { |
| "grad_norm": 0.18629176914691925, |
| "learning_rate": 9.120568506400873e-05, |
| "loss": 0.0138, |
| "step": 23210 |
| }, |
| { |
| "grad_norm": 0.18614289164543152, |
| "learning_rate": 9.119631715449548e-05, |
| "loss": 0.0124, |
| "step": 23220 |
| }, |
| { |
| "grad_norm": 0.17157837748527527, |
| "learning_rate": 9.118694473981493e-05, |
| "loss": 0.0147, |
| "step": 23230 |
| }, |
| { |
| "grad_norm": 0.13805267214775085, |
| "learning_rate": 9.117756782099203e-05, |
| "loss": 0.0139, |
| "step": 23240 |
| }, |
| { |
| "grad_norm": 0.20274634659290314, |
| "learning_rate": 9.11681863990522e-05, |
| "loss": 0.0114, |
| "step": 23250 |
| }, |
| { |
| "grad_norm": 0.19627954065799713, |
| "learning_rate": 9.115880047502142e-05, |
| "loss": 0.0127, |
| "step": 23260 |
| }, |
| { |
| "grad_norm": 0.1853030025959015, |
| "learning_rate": 9.114941004992609e-05, |
| "loss": 0.0138, |
| "step": 23270 |
| }, |
| { |
| "grad_norm": 0.18201330304145813, |
| "learning_rate": 9.114001512479317e-05, |
| "loss": 0.0154, |
| "step": 23280 |
| }, |
| { |
| "grad_norm": 0.2335299700498581, |
| "learning_rate": 9.113061570065003e-05, |
| "loss": 0.0136, |
| "step": 23290 |
| }, |
| { |
| "grad_norm": 0.163480743765831, |
| "learning_rate": 9.112121177852459e-05, |
| "loss": 0.0139, |
| "step": 23300 |
| }, |
| { |
| "grad_norm": 0.22948700189590454, |
| "learning_rate": 9.111180335944527e-05, |
| "loss": 0.0139, |
| "step": 23310 |
| }, |
| { |
| "grad_norm": 0.1871514916419983, |
| "learning_rate": 9.110239044444093e-05, |
| "loss": 0.0152, |
| "step": 23320 |
| }, |
| { |
| "grad_norm": 0.1818545013666153, |
| "learning_rate": 9.109297303454099e-05, |
| "loss": 0.0131, |
| "step": 23330 |
| }, |
| { |
| "grad_norm": 0.16701292991638184, |
| "learning_rate": 9.108355113077526e-05, |
| "loss": 0.0123, |
| "step": 23340 |
| }, |
| { |
| "grad_norm": 0.28000277280807495, |
| "learning_rate": 9.107412473417419e-05, |
| "loss": 0.0144, |
| "step": 23350 |
| }, |
| { |
| "grad_norm": 0.2582252621650696, |
| "learning_rate": 9.106469384576858e-05, |
| "loss": 0.0158, |
| "step": 23360 |
| }, |
| { |
| "grad_norm": 0.17312873899936676, |
| "learning_rate": 9.105525846658978e-05, |
| "loss": 0.0158, |
| "step": 23370 |
| }, |
| { |
| "grad_norm": 0.2704509198665619, |
| "learning_rate": 9.104581859766965e-05, |
| "loss": 0.0177, |
| "step": 23380 |
| }, |
| { |
| "grad_norm": 0.23072083294391632, |
| "learning_rate": 9.10363742400405e-05, |
| "loss": 0.0152, |
| "step": 23390 |
| }, |
| { |
| "grad_norm": 0.16527651250362396, |
| "learning_rate": 9.102692539473518e-05, |
| "loss": 0.0144, |
| "step": 23400 |
| }, |
| { |
| "grad_norm": 0.20676404237747192, |
| "learning_rate": 9.101747206278697e-05, |
| "loss": 0.0159, |
| "step": 23410 |
| }, |
| { |
| "grad_norm": 0.2146616131067276, |
| "learning_rate": 9.100801424522968e-05, |
| "loss": 0.0178, |
| "step": 23420 |
| }, |
| { |
| "grad_norm": 0.2614075839519501, |
| "learning_rate": 9.099855194309762e-05, |
| "loss": 0.0173, |
| "step": 23430 |
| }, |
| { |
| "grad_norm": 0.17789052426815033, |
| "learning_rate": 9.098908515742554e-05, |
| "loss": 0.0135, |
| "step": 23440 |
| }, |
| { |
| "grad_norm": 0.2318316549062729, |
| "learning_rate": 9.097961388924873e-05, |
| "loss": 0.0162, |
| "step": 23450 |
| }, |
| { |
| "grad_norm": 0.20885102450847626, |
| "learning_rate": 9.097013813960298e-05, |
| "loss": 0.0165, |
| "step": 23460 |
| }, |
| { |
| "grad_norm": 0.2531566917896271, |
| "learning_rate": 9.09606579095245e-05, |
| "loss": 0.0157, |
| "step": 23470 |
| }, |
| { |
| "grad_norm": 0.17769184708595276, |
| "learning_rate": 9.095117320005008e-05, |
| "loss": 0.0137, |
| "step": 23480 |
| }, |
| { |
| "grad_norm": 0.19554953277111053, |
| "learning_rate": 9.094168401221691e-05, |
| "loss": 0.0119, |
| "step": 23490 |
| }, |
| { |
| "grad_norm": 0.22157321870326996, |
| "learning_rate": 9.093219034706273e-05, |
| "loss": 0.015, |
| "step": 23500 |
| }, |
| { |
| "grad_norm": 0.18708685040473938, |
| "learning_rate": 9.092269220562577e-05, |
| "loss": 0.0163, |
| "step": 23510 |
| }, |
| { |
| "grad_norm": 0.2525607943534851, |
| "learning_rate": 9.09131895889447e-05, |
| "loss": 0.0153, |
| "step": 23520 |
| }, |
| { |
| "grad_norm": 0.1881437450647354, |
| "learning_rate": 9.090368249805873e-05, |
| "loss": 0.0164, |
| "step": 23530 |
| }, |
| { |
| "grad_norm": 0.2073652744293213, |
| "learning_rate": 9.089417093400754e-05, |
| "loss": 0.0157, |
| "step": 23540 |
| }, |
| { |
| "grad_norm": 0.18113574385643005, |
| "learning_rate": 9.088465489783131e-05, |
| "loss": 0.014, |
| "step": 23550 |
| }, |
| { |
| "grad_norm": 0.2287263572216034, |
| "learning_rate": 9.087513439057068e-05, |
| "loss": 0.0116, |
| "step": 23560 |
| }, |
| { |
| "grad_norm": 0.23755474388599396, |
| "learning_rate": 9.08656094132668e-05, |
| "loss": 0.0138, |
| "step": 23570 |
| }, |
| { |
| "grad_norm": 0.24319809675216675, |
| "learning_rate": 9.085607996696134e-05, |
| "loss": 0.019, |
| "step": 23580 |
| }, |
| { |
| "grad_norm": 0.2438126504421234, |
| "learning_rate": 9.084654605269639e-05, |
| "loss": 0.0143, |
| "step": 23590 |
| }, |
| { |
| "grad_norm": 0.23479968309402466, |
| "learning_rate": 9.083700767151457e-05, |
| "loss": 0.014, |
| "step": 23600 |
| }, |
| { |
| "grad_norm": 0.19559985399246216, |
| "learning_rate": 9.082746482445898e-05, |
| "loss": 0.0138, |
| "step": 23610 |
| }, |
| { |
| "grad_norm": 0.1752758026123047, |
| "learning_rate": 9.081791751257325e-05, |
| "loss": 0.0132, |
| "step": 23620 |
| }, |
| { |
| "grad_norm": 0.2757525146007538, |
| "learning_rate": 9.080836573690142e-05, |
| "loss": 0.0144, |
| "step": 23630 |
| }, |
| { |
| "grad_norm": 0.23443222045898438, |
| "learning_rate": 9.079880949848805e-05, |
| "loss": 0.0145, |
| "step": 23640 |
| }, |
| { |
| "grad_norm": 0.21670211851596832, |
| "learning_rate": 9.078924879837822e-05, |
| "loss": 0.0123, |
| "step": 23650 |
| }, |
| { |
| "grad_norm": 0.21093955636024475, |
| "learning_rate": 9.077968363761747e-05, |
| "loss": 0.0133, |
| "step": 23660 |
| }, |
| { |
| "grad_norm": 0.22188355028629303, |
| "learning_rate": 9.077011401725182e-05, |
| "loss": 0.0149, |
| "step": 23670 |
| }, |
| { |
| "grad_norm": 0.1894949972629547, |
| "learning_rate": 9.07605399383278e-05, |
| "loss": 0.0124, |
| "step": 23680 |
| }, |
| { |
| "grad_norm": 0.22568151354789734, |
| "learning_rate": 9.075096140189243e-05, |
| "loss": 0.0126, |
| "step": 23690 |
| }, |
| { |
| "grad_norm": 0.20633463561534882, |
| "learning_rate": 9.074137840899318e-05, |
| "loss": 0.0143, |
| "step": 23700 |
| }, |
| { |
| "grad_norm": 0.20695094764232635, |
| "learning_rate": 9.073179096067804e-05, |
| "loss": 0.0128, |
| "step": 23710 |
| }, |
| { |
| "grad_norm": 0.177053764462471, |
| "learning_rate": 9.072219905799549e-05, |
| "loss": 0.0155, |
| "step": 23720 |
| }, |
| { |
| "grad_norm": 0.20552749931812286, |
| "learning_rate": 9.071260270199447e-05, |
| "loss": 0.0135, |
| "step": 23730 |
| }, |
| { |
| "grad_norm": 0.2141244113445282, |
| "learning_rate": 9.070300189372441e-05, |
| "loss": 0.0156, |
| "step": 23740 |
| }, |
| { |
| "grad_norm": 0.20599396526813507, |
| "learning_rate": 9.069339663423528e-05, |
| "loss": 0.0136, |
| "step": 23750 |
| }, |
| { |
| "grad_norm": 0.17279891669750214, |
| "learning_rate": 9.068378692457747e-05, |
| "loss": 0.0154, |
| "step": 23760 |
| }, |
| { |
| "grad_norm": 0.20797847211360931, |
| "learning_rate": 9.067417276580189e-05, |
| "loss": 0.014, |
| "step": 23770 |
| }, |
| { |
| "grad_norm": 0.17896415293216705, |
| "learning_rate": 9.066455415895993e-05, |
| "loss": 0.0144, |
| "step": 23780 |
| }, |
| { |
| "grad_norm": 0.18460604548454285, |
| "learning_rate": 9.065493110510346e-05, |
| "loss": 0.0137, |
| "step": 23790 |
| }, |
| { |
| "grad_norm": 0.17211906611919403, |
| "learning_rate": 9.064530360528484e-05, |
| "loss": 0.0142, |
| "step": 23800 |
| }, |
| { |
| "grad_norm": 0.27202412486076355, |
| "learning_rate": 9.063567166055695e-05, |
| "loss": 0.0139, |
| "step": 23810 |
| }, |
| { |
| "grad_norm": 0.2137269675731659, |
| "learning_rate": 9.062603527197308e-05, |
| "loss": 0.0172, |
| "step": 23820 |
| }, |
| { |
| "grad_norm": 0.21203388273715973, |
| "learning_rate": 9.06163944405871e-05, |
| "loss": 0.0124, |
| "step": 23830 |
| }, |
| { |
| "grad_norm": 0.20699001848697662, |
| "learning_rate": 9.060674916745327e-05, |
| "loss": 0.0143, |
| "step": 23840 |
| }, |
| { |
| "grad_norm": 0.18704131245613098, |
| "learning_rate": 9.05970994536264e-05, |
| "loss": 0.0133, |
| "step": 23850 |
| }, |
| { |
| "grad_norm": 0.18146492540836334, |
| "learning_rate": 9.05874453001618e-05, |
| "loss": 0.0129, |
| "step": 23860 |
| }, |
| { |
| "grad_norm": 0.17791873216629028, |
| "learning_rate": 9.057778670811517e-05, |
| "loss": 0.0135, |
| "step": 23870 |
| }, |
| { |
| "grad_norm": 0.16056740283966064, |
| "learning_rate": 9.056812367854281e-05, |
| "loss": 0.013, |
| "step": 23880 |
| }, |
| { |
| "grad_norm": 0.22617898881435394, |
| "learning_rate": 9.055845621250143e-05, |
| "loss": 0.0149, |
| "step": 23890 |
| }, |
| { |
| "grad_norm": 0.1643180102109909, |
| "learning_rate": 9.054878431104825e-05, |
| "loss": 0.0147, |
| "step": 23900 |
| }, |
| { |
| "grad_norm": 0.2128770351409912, |
| "learning_rate": 9.0539107975241e-05, |
| "loss": 0.0133, |
| "step": 23910 |
| }, |
| { |
| "grad_norm": 0.20491258800029755, |
| "learning_rate": 9.052942720613784e-05, |
| "loss": 0.0134, |
| "step": 23920 |
| }, |
| { |
| "grad_norm": 0.241663858294487, |
| "learning_rate": 9.051974200479745e-05, |
| "loss": 0.0122, |
| "step": 23930 |
| }, |
| { |
| "grad_norm": 0.23031777143478394, |
| "learning_rate": 9.051005237227901e-05, |
| "loss": 0.014, |
| "step": 23940 |
| }, |
| { |
| "grad_norm": 0.22506073117256165, |
| "learning_rate": 9.050035830964215e-05, |
| "loss": 0.014, |
| "step": 23950 |
| }, |
| { |
| "grad_norm": 0.19109565019607544, |
| "learning_rate": 9.049065981794698e-05, |
| "loss": 0.0157, |
| "step": 23960 |
| }, |
| { |
| "grad_norm": 0.15611767768859863, |
| "learning_rate": 9.048095689825414e-05, |
| "loss": 0.0114, |
| "step": 23970 |
| }, |
| { |
| "grad_norm": 0.1800951212644577, |
| "learning_rate": 9.047124955162472e-05, |
| "loss": 0.0122, |
| "step": 23980 |
| }, |
| { |
| "grad_norm": 0.1684034764766693, |
| "learning_rate": 9.046153777912028e-05, |
| "loss": 0.0136, |
| "step": 23990 |
| }, |
| { |
| "grad_norm": 0.20641621947288513, |
| "learning_rate": 9.045182158180292e-05, |
| "loss": 0.0139, |
| "step": 24000 |
| }, |
| { |
| "grad_norm": 0.23477306962013245, |
| "learning_rate": 9.044210096073516e-05, |
| "loss": 0.0131, |
| "step": 24010 |
| }, |
| { |
| "grad_norm": 0.21521511673927307, |
| "learning_rate": 9.043237591698004e-05, |
| "loss": 0.0154, |
| "step": 24020 |
| }, |
| { |
| "grad_norm": 0.19954660534858704, |
| "learning_rate": 9.04226464516011e-05, |
| "loss": 0.0134, |
| "step": 24030 |
| }, |
| { |
| "grad_norm": 0.27395713329315186, |
| "learning_rate": 9.041291256566229e-05, |
| "loss": 0.0164, |
| "step": 24040 |
| }, |
| { |
| "grad_norm": 0.23552460968494415, |
| "learning_rate": 9.040317426022814e-05, |
| "loss": 0.0159, |
| "step": 24050 |
| }, |
| { |
| "grad_norm": 0.17470824718475342, |
| "learning_rate": 9.03934315363636e-05, |
| "loss": 0.017, |
| "step": 24060 |
| }, |
| { |
| "grad_norm": 0.20106098055839539, |
| "learning_rate": 9.038368439513409e-05, |
| "loss": 0.0138, |
| "step": 24070 |
| }, |
| { |
| "grad_norm": 0.1621820032596588, |
| "learning_rate": 9.03739328376056e-05, |
| "loss": 0.0128, |
| "step": 24080 |
| }, |
| { |
| "grad_norm": 0.19426368176937103, |
| "learning_rate": 9.036417686484451e-05, |
| "loss": 0.0162, |
| "step": 24090 |
| }, |
| { |
| "grad_norm": 0.1972815990447998, |
| "learning_rate": 9.035441647791773e-05, |
| "loss": 0.0143, |
| "step": 24100 |
| }, |
| { |
| "grad_norm": 0.13212399184703827, |
| "learning_rate": 9.034465167789263e-05, |
| "loss": 0.0133, |
| "step": 24110 |
| }, |
| { |
| "grad_norm": 0.24731220304965973, |
| "learning_rate": 9.033488246583706e-05, |
| "loss": 0.0144, |
| "step": 24120 |
| }, |
| { |
| "grad_norm": 0.20434601604938507, |
| "learning_rate": 9.032510884281941e-05, |
| "loss": 0.0127, |
| "step": 24130 |
| }, |
| { |
| "grad_norm": 0.22800716757774353, |
| "learning_rate": 9.031533080990848e-05, |
| "loss": 0.0135, |
| "step": 24140 |
| }, |
| { |
| "grad_norm": 0.14135698974132538, |
| "learning_rate": 9.030554836817358e-05, |
| "loss": 0.0133, |
| "step": 24150 |
| }, |
| { |
| "grad_norm": 0.18815097212791443, |
| "learning_rate": 9.029576151868451e-05, |
| "loss": 0.014, |
| "step": 24160 |
| }, |
| { |
| "grad_norm": 0.1988460123538971, |
| "learning_rate": 9.028597026251155e-05, |
| "loss": 0.0121, |
| "step": 24170 |
| }, |
| { |
| "grad_norm": 0.22878068685531616, |
| "learning_rate": 9.027617460072547e-05, |
| "loss": 0.017, |
| "step": 24180 |
| }, |
| { |
| "grad_norm": 0.17404484748840332, |
| "learning_rate": 9.026637453439745e-05, |
| "loss": 0.0182, |
| "step": 24190 |
| }, |
| { |
| "grad_norm": 0.18336114287376404, |
| "learning_rate": 9.025657006459927e-05, |
| "loss": 0.0131, |
| "step": 24200 |
| }, |
| { |
| "grad_norm": 0.2783775329589844, |
| "learning_rate": 9.024676119240311e-05, |
| "loss": 0.0146, |
| "step": 24210 |
| }, |
| { |
| "grad_norm": 0.20794598758220673, |
| "learning_rate": 9.023694791888166e-05, |
| "loss": 0.017, |
| "step": 24220 |
| }, |
| { |
| "grad_norm": 0.17894412577152252, |
| "learning_rate": 9.022713024510808e-05, |
| "loss": 0.0145, |
| "step": 24230 |
| }, |
| { |
| "grad_norm": 0.2050257921218872, |
| "learning_rate": 9.021730817215601e-05, |
| "loss": 0.0136, |
| "step": 24240 |
| }, |
| { |
| "grad_norm": 0.20929528772830963, |
| "learning_rate": 9.02074817010996e-05, |
| "loss": 0.0151, |
| "step": 24250 |
| }, |
| { |
| "grad_norm": 0.2159581482410431, |
| "learning_rate": 9.019765083301342e-05, |
| "loss": 0.0139, |
| "step": 24260 |
| }, |
| { |
| "grad_norm": 0.18301215767860413, |
| "learning_rate": 9.01878155689726e-05, |
| "loss": 0.0122, |
| "step": 24270 |
| }, |
| { |
| "grad_norm": 0.2517687678337097, |
| "learning_rate": 9.017797591005268e-05, |
| "loss": 0.0137, |
| "step": 24280 |
| }, |
| { |
| "grad_norm": 0.23124970495700836, |
| "learning_rate": 9.016813185732972e-05, |
| "loss": 0.0178, |
| "step": 24290 |
| }, |
| { |
| "grad_norm": 0.14354126155376434, |
| "learning_rate": 9.015828341188027e-05, |
| "loss": 0.0143, |
| "step": 24300 |
| }, |
| { |
| "grad_norm": 0.18401013314723969, |
| "learning_rate": 9.01484305747813e-05, |
| "loss": 0.0153, |
| "step": 24310 |
| }, |
| { |
| "grad_norm": 0.20167894661426544, |
| "learning_rate": 9.013857334711033e-05, |
| "loss": 0.0132, |
| "step": 24320 |
| }, |
| { |
| "grad_norm": 0.15036405622959137, |
| "learning_rate": 9.012871172994534e-05, |
| "loss": 0.014, |
| "step": 24330 |
| }, |
| { |
| "grad_norm": 0.18220263719558716, |
| "learning_rate": 9.011884572436476e-05, |
| "loss": 0.0147, |
| "step": 24340 |
| }, |
| { |
| "grad_norm": 0.17381809651851654, |
| "learning_rate": 9.010897533144754e-05, |
| "loss": 0.013, |
| "step": 24350 |
| }, |
| { |
| "grad_norm": 0.1628270298242569, |
| "learning_rate": 9.009910055227306e-05, |
| "loss": 0.0151, |
| "step": 24360 |
| }, |
| { |
| "grad_norm": 0.20888406038284302, |
| "learning_rate": 9.008922138792124e-05, |
| "loss": 0.0154, |
| "step": 24370 |
| }, |
| { |
| "grad_norm": 0.1570347249507904, |
| "learning_rate": 9.007933783947244e-05, |
| "loss": 0.0137, |
| "step": 24380 |
| }, |
| { |
| "grad_norm": 0.1836668998003006, |
| "learning_rate": 9.006944990800752e-05, |
| "loss": 0.0151, |
| "step": 24390 |
| }, |
| { |
| "grad_norm": 0.20798632502555847, |
| "learning_rate": 9.005955759460779e-05, |
| "loss": 0.0139, |
| "step": 24400 |
| }, |
| { |
| "grad_norm": 0.17567555606365204, |
| "learning_rate": 9.004966090035508e-05, |
| "loss": 0.015, |
| "step": 24410 |
| }, |
| { |
| "grad_norm": 0.2556035816669464, |
| "learning_rate": 9.003975982633166e-05, |
| "loss": 0.0137, |
| "step": 24420 |
| }, |
| { |
| "grad_norm": 0.21839715540409088, |
| "learning_rate": 9.00298543736203e-05, |
| "loss": 0.0156, |
| "step": 24430 |
| }, |
| { |
| "grad_norm": 0.1876654326915741, |
| "learning_rate": 9.001994454330427e-05, |
| "loss": 0.0172, |
| "step": 24440 |
| }, |
| { |
| "grad_norm": 0.20417526364326477, |
| "learning_rate": 9.001003033646727e-05, |
| "loss": 0.0154, |
| "step": 24450 |
| }, |
| { |
| "grad_norm": 0.16093535721302032, |
| "learning_rate": 9.00001117541935e-05, |
| "loss": 0.0141, |
| "step": 24460 |
| }, |
| { |
| "grad_norm": 0.20546258985996246, |
| "learning_rate": 8.999018879756764e-05, |
| "loss": 0.0131, |
| "step": 24470 |
| }, |
| { |
| "grad_norm": 0.13915178179740906, |
| "learning_rate": 8.998026146767487e-05, |
| "loss": 0.0135, |
| "step": 24480 |
| }, |
| { |
| "grad_norm": 0.2658989429473877, |
| "learning_rate": 8.99703297656008e-05, |
| "loss": 0.0164, |
| "step": 24490 |
| }, |
| { |
| "grad_norm": 0.2229062020778656, |
| "learning_rate": 8.996039369243156e-05, |
| "loss": 0.0179, |
| "step": 24500 |
| }, |
| { |
| "grad_norm": 0.22348475456237793, |
| "learning_rate": 8.995045324925378e-05, |
| "loss": 0.0149, |
| "step": 24510 |
| }, |
| { |
| "grad_norm": 0.16096004843711853, |
| "learning_rate": 8.994050843715448e-05, |
| "loss": 0.0141, |
| "step": 24520 |
| }, |
| { |
| "grad_norm": 0.1520673632621765, |
| "learning_rate": 8.993055925722121e-05, |
| "loss": 0.0155, |
| "step": 24530 |
| }, |
| { |
| "grad_norm": 0.19288542866706848, |
| "learning_rate": 8.992060571054202e-05, |
| "loss": 0.0176, |
| "step": 24540 |
| }, |
| { |
| "grad_norm": 0.24025534093379974, |
| "learning_rate": 8.991064779820542e-05, |
| "loss": 0.0146, |
| "step": 24550 |
| }, |
| { |
| "grad_norm": 0.20292890071868896, |
| "learning_rate": 8.990068552130036e-05, |
| "loss": 0.0154, |
| "step": 24560 |
| }, |
| { |
| "grad_norm": 0.22602087259292603, |
| "learning_rate": 8.989071888091634e-05, |
| "loss": 0.0174, |
| "step": 24570 |
| }, |
| { |
| "grad_norm": 0.22065483033657074, |
| "learning_rate": 8.988074787814329e-05, |
| "loss": 0.013, |
| "step": 24580 |
| }, |
| { |
| "grad_norm": 0.22227178514003754, |
| "learning_rate": 8.987077251407158e-05, |
| "loss": 0.012, |
| "step": 24590 |
| }, |
| { |
| "grad_norm": 0.2217588573694229, |
| "learning_rate": 8.986079278979216e-05, |
| "loss": 0.0164, |
| "step": 24600 |
| }, |
| { |
| "grad_norm": 0.19181472063064575, |
| "learning_rate": 8.985080870639635e-05, |
| "loss": 0.0145, |
| "step": 24610 |
| }, |
| { |
| "grad_norm": 0.22570356726646423, |
| "learning_rate": 8.984082026497603e-05, |
| "loss": 0.015, |
| "step": 24620 |
| }, |
| { |
| "grad_norm": 0.17008230090141296, |
| "learning_rate": 8.98308274666235e-05, |
| "loss": 0.0126, |
| "step": 24630 |
| }, |
| { |
| "grad_norm": 0.20422600209712982, |
| "learning_rate": 8.982083031243155e-05, |
| "loss": 0.0127, |
| "step": 24640 |
| }, |
| { |
| "grad_norm": 0.1393459290266037, |
| "learning_rate": 8.98108288034935e-05, |
| "loss": 0.0128, |
| "step": 24650 |
| }, |
| { |
| "grad_norm": 0.24756769835948944, |
| "learning_rate": 8.980082294090305e-05, |
| "loss": 0.0148, |
| "step": 24660 |
| }, |
| { |
| "grad_norm": 0.170320063829422, |
| "learning_rate": 8.979081272575443e-05, |
| "loss": 0.0151, |
| "step": 24670 |
| }, |
| { |
| "grad_norm": 0.20775482058525085, |
| "learning_rate": 8.978079815914236e-05, |
| "loss": 0.0149, |
| "step": 24680 |
| }, |
| { |
| "grad_norm": 0.20452362298965454, |
| "learning_rate": 8.977077924216202e-05, |
| "loss": 0.0161, |
| "step": 24690 |
| }, |
| { |
| "grad_norm": 0.1973673403263092, |
| "learning_rate": 8.976075597590905e-05, |
| "loss": 0.0137, |
| "step": 24700 |
| }, |
| { |
| "grad_norm": 0.23274099826812744, |
| "learning_rate": 8.975072836147958e-05, |
| "loss": 0.0132, |
| "step": 24710 |
| }, |
| { |
| "grad_norm": 0.1778196394443512, |
| "learning_rate": 8.974069639997025e-05, |
| "loss": 0.0172, |
| "step": 24720 |
| }, |
| { |
| "grad_norm": 0.22104007005691528, |
| "learning_rate": 8.973066009247808e-05, |
| "loss": 0.0141, |
| "step": 24730 |
| }, |
| { |
| "grad_norm": 0.15248070657253265, |
| "learning_rate": 8.972061944010066e-05, |
| "loss": 0.0141, |
| "step": 24740 |
| }, |
| { |
| "grad_norm": 0.2708389163017273, |
| "learning_rate": 8.971057444393603e-05, |
| "loss": 0.0126, |
| "step": 24750 |
| }, |
| { |
| "grad_norm": 0.18148130178451538, |
| "learning_rate": 8.970052510508268e-05, |
| "loss": 0.0179, |
| "step": 24760 |
| }, |
| { |
| "grad_norm": 0.16653865575790405, |
| "learning_rate": 8.969047142463959e-05, |
| "loss": 0.0117, |
| "step": 24770 |
| }, |
| { |
| "grad_norm": 0.2350323647260666, |
| "learning_rate": 8.968041340370621e-05, |
| "loss": 0.0134, |
| "step": 24780 |
| }, |
| { |
| "grad_norm": 0.18603160977363586, |
| "learning_rate": 8.96703510433825e-05, |
| "loss": 0.016, |
| "step": 24790 |
| }, |
| { |
| "grad_norm": 0.2362491935491562, |
| "learning_rate": 8.966028434476883e-05, |
| "loss": 0.0146, |
| "step": 24800 |
| }, |
| { |
| "grad_norm": 0.2748473286628723, |
| "learning_rate": 8.96502133089661e-05, |
| "loss": 0.0163, |
| "step": 24810 |
| }, |
| { |
| "grad_norm": 0.216475710272789, |
| "learning_rate": 8.964013793707564e-05, |
| "loss": 0.0136, |
| "step": 24820 |
| }, |
| { |
| "grad_norm": 0.223602756857872, |
| "learning_rate": 8.963005823019932e-05, |
| "loss": 0.0135, |
| "step": 24830 |
| }, |
| { |
| "grad_norm": 0.21306408941745758, |
| "learning_rate": 8.961997418943939e-05, |
| "loss": 0.0124, |
| "step": 24840 |
| }, |
| { |
| "grad_norm": 0.159842848777771, |
| "learning_rate": 8.960988581589865e-05, |
| "loss": 0.0134, |
| "step": 24850 |
| }, |
| { |
| "grad_norm": 0.2029435783624649, |
| "learning_rate": 8.959979311068037e-05, |
| "loss": 0.0114, |
| "step": 24860 |
| }, |
| { |
| "grad_norm": 0.23242947459220886, |
| "learning_rate": 8.958969607488823e-05, |
| "loss": 0.0114, |
| "step": 24870 |
| }, |
| { |
| "grad_norm": 0.22794553637504578, |
| "learning_rate": 8.957959470962647e-05, |
| "loss": 0.0135, |
| "step": 24880 |
| }, |
| { |
| "grad_norm": 0.256059855222702, |
| "learning_rate": 8.956948901599971e-05, |
| "loss": 0.0117, |
| "step": 24890 |
| }, |
| { |
| "grad_norm": 0.20883019268512726, |
| "learning_rate": 8.955937899511315e-05, |
| "loss": 0.0143, |
| "step": 24900 |
| }, |
| { |
| "grad_norm": 0.15798425674438477, |
| "learning_rate": 8.954926464807238e-05, |
| "loss": 0.0135, |
| "step": 24910 |
| }, |
| { |
| "grad_norm": 0.19378982484340668, |
| "learning_rate": 8.953914597598347e-05, |
| "loss": 0.0165, |
| "step": 24920 |
| }, |
| { |
| "grad_norm": 0.22431515157222748, |
| "learning_rate": 8.952902297995303e-05, |
| "loss": 0.0145, |
| "step": 24930 |
| }, |
| { |
| "grad_norm": 0.20018118619918823, |
| "learning_rate": 8.951889566108804e-05, |
| "loss": 0.0126, |
| "step": 24940 |
| }, |
| { |
| "grad_norm": 0.1766684353351593, |
| "learning_rate": 8.950876402049606e-05, |
| "loss": 0.0136, |
| "step": 24950 |
| }, |
| { |
| "grad_norm": 0.17362891137599945, |
| "learning_rate": 8.949862805928504e-05, |
| "loss": 0.0143, |
| "step": 24960 |
| }, |
| { |
| "grad_norm": 0.2080824375152588, |
| "learning_rate": 8.948848777856343e-05, |
| "loss": 0.0123, |
| "step": 24970 |
| }, |
| { |
| "grad_norm": 0.25880157947540283, |
| "learning_rate": 8.947834317944017e-05, |
| "loss": 0.0168, |
| "step": 24980 |
| }, |
| { |
| "grad_norm": 0.14517757296562195, |
| "learning_rate": 8.946819426302466e-05, |
| "loss": 0.0147, |
| "step": 24990 |
| }, |
| { |
| "grad_norm": 0.23001737892627716, |
| "learning_rate": 8.945804103042676e-05, |
| "loss": 0.0145, |
| "step": 25000 |
| }, |
| { |
| "grad_norm": 0.16731934249401093, |
| "learning_rate": 8.944788348275681e-05, |
| "loss": 0.0138, |
| "step": 25010 |
| }, |
| { |
| "grad_norm": 0.2097262442111969, |
| "learning_rate": 8.943772162112565e-05, |
| "loss": 0.0133, |
| "step": 25020 |
| }, |
| { |
| "grad_norm": 0.21776002645492554, |
| "learning_rate": 8.942755544664454e-05, |
| "loss": 0.0144, |
| "step": 25030 |
| }, |
| { |
| "grad_norm": 0.21143537759780884, |
| "learning_rate": 8.941738496042525e-05, |
| "loss": 0.012, |
| "step": 25040 |
| }, |
| { |
| "grad_norm": 0.20147302746772766, |
| "learning_rate": 8.940721016357999e-05, |
| "loss": 0.014, |
| "step": 25050 |
| }, |
| { |
| "grad_norm": 0.17477922141551971, |
| "learning_rate": 8.939703105722148e-05, |
| "loss": 0.0149, |
| "step": 25060 |
| }, |
| { |
| "grad_norm": 0.23321568965911865, |
| "learning_rate": 8.93868476424629e-05, |
| "loss": 0.0179, |
| "step": 25070 |
| }, |
| { |
| "grad_norm": 0.19054847955703735, |
| "learning_rate": 8.937665992041786e-05, |
| "loss": 0.0132, |
| "step": 25080 |
| }, |
| { |
| "grad_norm": 0.19033202528953552, |
| "learning_rate": 8.93664678922005e-05, |
| "loss": 0.0138, |
| "step": 25090 |
| }, |
| { |
| "grad_norm": 0.21861092746257782, |
| "learning_rate": 8.93562715589254e-05, |
| "loss": 0.0152, |
| "step": 25100 |
| }, |
| { |
| "grad_norm": 0.1533270925283432, |
| "learning_rate": 8.934607092170762e-05, |
| "loss": 0.014, |
| "step": 25110 |
| }, |
| { |
| "grad_norm": 0.1720176637172699, |
| "learning_rate": 8.933586598166266e-05, |
| "loss": 0.0155, |
| "step": 25120 |
| }, |
| { |
| "grad_norm": 0.17896032333374023, |
| "learning_rate": 8.932565673990655e-05, |
| "loss": 0.0143, |
| "step": 25130 |
| }, |
| { |
| "grad_norm": 0.17787151038646698, |
| "learning_rate": 8.931544319755574e-05, |
| "loss": 0.0153, |
| "step": 25140 |
| }, |
| { |
| "grad_norm": 0.20071110129356384, |
| "learning_rate": 8.930522535572718e-05, |
| "loss": 0.0133, |
| "step": 25150 |
| }, |
| { |
| "grad_norm": 0.18091028928756714, |
| "learning_rate": 8.929500321553826e-05, |
| "loss": 0.0122, |
| "step": 25160 |
| }, |
| { |
| "grad_norm": 0.19485972821712494, |
| "learning_rate": 8.928477677810686e-05, |
| "loss": 0.0127, |
| "step": 25170 |
| }, |
| { |
| "grad_norm": 0.2231544554233551, |
| "learning_rate": 8.927454604455137e-05, |
| "loss": 0.0139, |
| "step": 25180 |
| }, |
| { |
| "grad_norm": 0.23672042787075043, |
| "learning_rate": 8.926431101599053e-05, |
| "loss": 0.014, |
| "step": 25190 |
| }, |
| { |
| "grad_norm": 0.18106228113174438, |
| "learning_rate": 8.925407169354369e-05, |
| "loss": 0.0116, |
| "step": 25200 |
| }, |
| { |
| "grad_norm": 0.16468533873558044, |
| "learning_rate": 8.92438280783306e-05, |
| "loss": 0.0113, |
| "step": 25210 |
| }, |
| { |
| "grad_norm": 0.2718585729598999, |
| "learning_rate": 8.923358017147146e-05, |
| "loss": 0.0163, |
| "step": 25220 |
| }, |
| { |
| "grad_norm": 0.2014102339744568, |
| "learning_rate": 8.922332797408697e-05, |
| "loss": 0.0144, |
| "step": 25230 |
| }, |
| { |
| "grad_norm": 0.2678363621234894, |
| "learning_rate": 8.921307148729831e-05, |
| "loss": 0.0137, |
| "step": 25240 |
| }, |
| { |
| "grad_norm": 0.17882150411605835, |
| "learning_rate": 8.920281071222712e-05, |
| "loss": 0.0121, |
| "step": 25250 |
| }, |
| { |
| "grad_norm": 0.24254170060157776, |
| "learning_rate": 8.919254564999548e-05, |
| "loss": 0.0122, |
| "step": 25260 |
| }, |
| { |
| "grad_norm": 0.22848983108997345, |
| "learning_rate": 8.918227630172598e-05, |
| "loss": 0.0148, |
| "step": 25270 |
| }, |
| { |
| "grad_norm": 0.2279837429523468, |
| "learning_rate": 8.917200266854165e-05, |
| "loss": 0.0133, |
| "step": 25280 |
| }, |
| { |
| "grad_norm": 0.2629374861717224, |
| "learning_rate": 8.9161724751566e-05, |
| "loss": 0.0159, |
| "step": 25290 |
| }, |
| { |
| "grad_norm": 0.18192367255687714, |
| "learning_rate": 8.915144255192302e-05, |
| "loss": 0.0143, |
| "step": 25300 |
| }, |
| { |
| "grad_norm": 0.2204187661409378, |
| "learning_rate": 8.914115607073714e-05, |
| "loss": 0.017, |
| "step": 25310 |
| }, |
| { |
| "grad_norm": 0.15653793513774872, |
| "learning_rate": 8.913086530913327e-05, |
| "loss": 0.0153, |
| "step": 25320 |
| }, |
| { |
| "grad_norm": 0.19547070562839508, |
| "learning_rate": 8.912057026823681e-05, |
| "loss": 0.0118, |
| "step": 25330 |
| }, |
| { |
| "grad_norm": 0.26632529497146606, |
| "learning_rate": 8.91102709491736e-05, |
| "loss": 0.013, |
| "step": 25340 |
| }, |
| { |
| "grad_norm": 0.23742526769638062, |
| "learning_rate": 8.909996735306996e-05, |
| "loss": 0.012, |
| "step": 25350 |
| }, |
| { |
| "grad_norm": 0.27239763736724854, |
| "learning_rate": 8.908965948105268e-05, |
| "loss": 0.0134, |
| "step": 25360 |
| }, |
| { |
| "grad_norm": 0.18642865121364594, |
| "learning_rate": 8.907934733424901e-05, |
| "loss": 0.0148, |
| "step": 25370 |
| }, |
| { |
| "grad_norm": 0.20425890386104584, |
| "learning_rate": 8.906903091378666e-05, |
| "loss": 0.0143, |
| "step": 25380 |
| }, |
| { |
| "grad_norm": 0.18161271512508392, |
| "learning_rate": 8.905871022079384e-05, |
| "loss": 0.0151, |
| "step": 25390 |
| }, |
| { |
| "grad_norm": 0.17329958081245422, |
| "learning_rate": 8.90483852563992e-05, |
| "loss": 0.0148, |
| "step": 25400 |
| }, |
| { |
| "grad_norm": 0.15716147422790527, |
| "learning_rate": 8.903805602173185e-05, |
| "loss": 0.0187, |
| "step": 25410 |
| }, |
| { |
| "grad_norm": 0.1766807734966278, |
| "learning_rate": 8.902772251792137e-05, |
| "loss": 0.0156, |
| "step": 25420 |
| }, |
| { |
| "grad_norm": 0.19636660814285278, |
| "learning_rate": 8.901738474609786e-05, |
| "loss": 0.0132, |
| "step": 25430 |
| }, |
| { |
| "grad_norm": 0.20554254949092865, |
| "learning_rate": 8.900704270739179e-05, |
| "loss": 0.016, |
| "step": 25440 |
| }, |
| { |
| "grad_norm": 0.14951704442501068, |
| "learning_rate": 8.89966964029342e-05, |
| "loss": 0.0111, |
| "step": 25450 |
| }, |
| { |
| "grad_norm": 0.21583913266658783, |
| "learning_rate": 8.898634583385652e-05, |
| "loss": 0.0156, |
| "step": 25460 |
| }, |
| { |
| "grad_norm": 0.2214123010635376, |
| "learning_rate": 8.897599100129065e-05, |
| "loss": 0.018, |
| "step": 25470 |
| }, |
| { |
| "grad_norm": 0.19157958030700684, |
| "learning_rate": 8.896563190636903e-05, |
| "loss": 0.0161, |
| "step": 25480 |
| }, |
| { |
| "grad_norm": 0.1713401824235916, |
| "learning_rate": 8.895526855022448e-05, |
| "loss": 0.0155, |
| "step": 25490 |
| }, |
| { |
| "grad_norm": 0.22314561903476715, |
| "learning_rate": 8.894490093399033e-05, |
| "loss": 0.014, |
| "step": 25500 |
| }, |
| { |
| "grad_norm": 0.1553162783384323, |
| "learning_rate": 8.893452905880035e-05, |
| "loss": 0.0143, |
| "step": 25510 |
| }, |
| { |
| "grad_norm": 0.19584983587265015, |
| "learning_rate": 8.892415292578883e-05, |
| "loss": 0.0177, |
| "step": 25520 |
| }, |
| { |
| "grad_norm": 0.20513120293617249, |
| "learning_rate": 8.891377253609046e-05, |
| "loss": 0.0163, |
| "step": 25530 |
| }, |
| { |
| "grad_norm": 0.1911878138780594, |
| "learning_rate": 8.890338789084043e-05, |
| "loss": 0.0161, |
| "step": 25540 |
| }, |
| { |
| "grad_norm": 0.18822386860847473, |
| "learning_rate": 8.88929989911744e-05, |
| "loss": 0.011, |
| "step": 25550 |
| }, |
| { |
| "grad_norm": 0.1524885892868042, |
| "learning_rate": 8.888260583822847e-05, |
| "loss": 0.0129, |
| "step": 25560 |
| }, |
| { |
| "grad_norm": 0.2081162929534912, |
| "learning_rate": 8.887220843313921e-05, |
| "loss": 0.0139, |
| "step": 25570 |
| }, |
| { |
| "grad_norm": 0.22650867700576782, |
| "learning_rate": 8.88618067770437e-05, |
| "loss": 0.0155, |
| "step": 25580 |
| }, |
| { |
| "grad_norm": 0.21249236166477203, |
| "learning_rate": 8.885140087107942e-05, |
| "loss": 0.0163, |
| "step": 25590 |
| }, |
| { |
| "grad_norm": 0.1845710575580597, |
| "learning_rate": 8.884099071638436e-05, |
| "loss": 0.0145, |
| "step": 25600 |
| }, |
| { |
| "grad_norm": 0.20314599573612213, |
| "learning_rate": 8.883057631409695e-05, |
| "loss": 0.012, |
| "step": 25610 |
| }, |
| { |
| "grad_norm": 0.22037655115127563, |
| "learning_rate": 8.882015766535608e-05, |
| "loss": 0.0141, |
| "step": 25620 |
| }, |
| { |
| "grad_norm": 0.22149570286273956, |
| "learning_rate": 8.880973477130115e-05, |
| "loss": 0.0156, |
| "step": 25630 |
| }, |
| { |
| "grad_norm": 0.19525587558746338, |
| "learning_rate": 8.879930763307197e-05, |
| "loss": 0.0133, |
| "step": 25640 |
| }, |
| { |
| "grad_norm": 0.17419621348381042, |
| "learning_rate": 8.878887625180884e-05, |
| "loss": 0.0137, |
| "step": 25650 |
| }, |
| { |
| "grad_norm": 0.25345706939697266, |
| "learning_rate": 8.877844062865253e-05, |
| "loss": 0.0144, |
| "step": 25660 |
| }, |
| { |
| "grad_norm": 0.2026941478252411, |
| "learning_rate": 8.876800076474424e-05, |
| "loss": 0.0191, |
| "step": 25670 |
| }, |
| { |
| "grad_norm": 0.19127890467643738, |
| "learning_rate": 8.875755666122568e-05, |
| "loss": 0.0156, |
| "step": 25680 |
| }, |
| { |
| "grad_norm": 0.22991883754730225, |
| "learning_rate": 8.8747108319239e-05, |
| "loss": 0.0115, |
| "step": 25690 |
| }, |
| { |
| "grad_norm": 0.18754807114601135, |
| "learning_rate": 8.87366557399268e-05, |
| "loss": 0.0151, |
| "step": 25700 |
| }, |
| { |
| "grad_norm": 0.18135304749011993, |
| "learning_rate": 8.872619892443217e-05, |
| "loss": 0.0151, |
| "step": 25710 |
| }, |
| { |
| "grad_norm": 0.16874587535858154, |
| "learning_rate": 8.871573787389865e-05, |
| "loss": 0.013, |
| "step": 25720 |
| }, |
| { |
| "grad_norm": 0.1952400803565979, |
| "learning_rate": 8.870527258947024e-05, |
| "loss": 0.0152, |
| "step": 25730 |
| }, |
| { |
| "grad_norm": 0.1622031182050705, |
| "learning_rate": 8.869480307229143e-05, |
| "loss": 0.0159, |
| "step": 25740 |
| }, |
| { |
| "grad_norm": 0.21519528329372406, |
| "learning_rate": 8.868432932350712e-05, |
| "loss": 0.0149, |
| "step": 25750 |
| }, |
| { |
| "grad_norm": 0.2549628019332886, |
| "learning_rate": 8.867385134426272e-05, |
| "loss": 0.0148, |
| "step": 25760 |
| }, |
| { |
| "grad_norm": 0.20081402361392975, |
| "learning_rate": 8.866336913570407e-05, |
| "loss": 0.0128, |
| "step": 25770 |
| }, |
| { |
| "grad_norm": 0.2072092890739441, |
| "learning_rate": 8.865288269897751e-05, |
| "loss": 0.0147, |
| "step": 25780 |
| }, |
| { |
| "grad_norm": 0.2012583464384079, |
| "learning_rate": 8.864239203522981e-05, |
| "loss": 0.015, |
| "step": 25790 |
| }, |
| { |
| "grad_norm": 0.2645418643951416, |
| "learning_rate": 8.863189714560822e-05, |
| "loss": 0.0119, |
| "step": 25800 |
| }, |
| { |
| "grad_norm": 0.1994146853685379, |
| "learning_rate": 8.862139803126043e-05, |
| "loss": 0.0136, |
| "step": 25810 |
| }, |
| { |
| "grad_norm": 0.16972285509109497, |
| "learning_rate": 8.861089469333463e-05, |
| "loss": 0.0137, |
| "step": 25820 |
| }, |
| { |
| "grad_norm": 0.18160787224769592, |
| "learning_rate": 8.860038713297944e-05, |
| "loss": 0.0157, |
| "step": 25830 |
| }, |
| { |
| "grad_norm": 0.15314006805419922, |
| "learning_rate": 8.858987535134394e-05, |
| "loss": 0.0137, |
| "step": 25840 |
| }, |
| { |
| "grad_norm": 0.12870992720127106, |
| "learning_rate": 8.857935934957769e-05, |
| "loss": 0.0137, |
| "step": 25850 |
| }, |
| { |
| "grad_norm": 0.19985873997211456, |
| "learning_rate": 8.856883912883071e-05, |
| "loss": 0.0175, |
| "step": 25860 |
| }, |
| { |
| "grad_norm": 0.1666407734155655, |
| "learning_rate": 8.855831469025346e-05, |
| "loss": 0.0122, |
| "step": 25870 |
| }, |
| { |
| "grad_norm": 0.14283858239650726, |
| "learning_rate": 8.854778603499689e-05, |
| "loss": 0.017, |
| "step": 25880 |
| }, |
| { |
| "grad_norm": 0.203786700963974, |
| "learning_rate": 8.85372531642124e-05, |
| "loss": 0.0135, |
| "step": 25890 |
| }, |
| { |
| "grad_norm": 0.20282699167728424, |
| "learning_rate": 8.852671607905185e-05, |
| "loss": 0.0134, |
| "step": 25900 |
| }, |
| { |
| "grad_norm": 0.22731496393680573, |
| "learning_rate": 8.851617478066754e-05, |
| "loss": 0.0158, |
| "step": 25910 |
| }, |
| { |
| "grad_norm": 0.19563531875610352, |
| "learning_rate": 8.850562927021227e-05, |
| "loss": 0.0149, |
| "step": 25920 |
| }, |
| { |
| "grad_norm": 0.1704259216785431, |
| "learning_rate": 8.849507954883928e-05, |
| "loss": 0.0113, |
| "step": 25930 |
| }, |
| { |
| "grad_norm": 0.1772235631942749, |
| "learning_rate": 8.848452561770226e-05, |
| "loss": 0.0113, |
| "step": 25940 |
| }, |
| { |
| "grad_norm": 0.19219110906124115, |
| "learning_rate": 8.847396747795538e-05, |
| "loss": 0.0138, |
| "step": 25950 |
| }, |
| { |
| "grad_norm": 0.16359460353851318, |
| "learning_rate": 8.846340513075327e-05, |
| "loss": 0.0125, |
| "step": 25960 |
| }, |
| { |
| "grad_norm": 0.15491141378879547, |
| "learning_rate": 8.845283857725099e-05, |
| "loss": 0.0109, |
| "step": 25970 |
| }, |
| { |
| "grad_norm": 0.1785901039838791, |
| "learning_rate": 8.844226781860409e-05, |
| "loss": 0.0134, |
| "step": 25980 |
| }, |
| { |
| "grad_norm": 0.21501228213310242, |
| "learning_rate": 8.84316928559686e-05, |
| "loss": 0.013, |
| "step": 25990 |
| }, |
| { |
| "grad_norm": 0.20008786022663116, |
| "learning_rate": 8.842111369050094e-05, |
| "loss": 0.012, |
| "step": 26000 |
| }, |
| { |
| "grad_norm": 0.21582910418510437, |
| "learning_rate": 8.841053032335808e-05, |
| "loss": 0.0189, |
| "step": 26010 |
| }, |
| { |
| "grad_norm": 0.24350406229496002, |
| "learning_rate": 8.839994275569735e-05, |
| "loss": 0.0133, |
| "step": 26020 |
| }, |
| { |
| "grad_norm": 0.20001371204853058, |
| "learning_rate": 8.838935098867662e-05, |
| "loss": 0.0166, |
| "step": 26030 |
| }, |
| { |
| "grad_norm": 0.27071765065193176, |
| "learning_rate": 8.837875502345418e-05, |
| "loss": 0.0133, |
| "step": 26040 |
| }, |
| { |
| "grad_norm": 0.22188922762870789, |
| "learning_rate": 8.83681548611888e-05, |
| "loss": 0.012, |
| "step": 26050 |
| }, |
| { |
| "grad_norm": 0.20551364123821259, |
| "learning_rate": 8.835755050303969e-05, |
| "loss": 0.0133, |
| "step": 26060 |
| }, |
| { |
| "grad_norm": 0.19251608848571777, |
| "learning_rate": 8.834694195016653e-05, |
| "loss": 0.0151, |
| "step": 26070 |
| }, |
| { |
| "grad_norm": 0.1723404973745346, |
| "learning_rate": 8.833632920372942e-05, |
| "loss": 0.0137, |
| "step": 26080 |
| }, |
| { |
| "grad_norm": 0.21637320518493652, |
| "learning_rate": 8.832571226488903e-05, |
| "loss": 0.0153, |
| "step": 26090 |
| }, |
| { |
| "grad_norm": 0.2538270056247711, |
| "learning_rate": 8.831509113480634e-05, |
| "loss": 0.0149, |
| "step": 26100 |
| }, |
| { |
| "grad_norm": 0.17992226779460907, |
| "learning_rate": 8.83044658146429e-05, |
| "loss": 0.0133, |
| "step": 26110 |
| }, |
| { |
| "grad_norm": 0.2082221955060959, |
| "learning_rate": 8.829383630556067e-05, |
| "loss": 0.0122, |
| "step": 26120 |
| }, |
| { |
| "grad_norm": 0.20343394577503204, |
| "learning_rate": 8.828320260872207e-05, |
| "loss": 0.0139, |
| "step": 26130 |
| }, |
| { |
| "grad_norm": 0.17990347743034363, |
| "learning_rate": 8.827256472529e-05, |
| "loss": 0.0128, |
| "step": 26140 |
| }, |
| { |
| "grad_norm": 0.2202722579240799, |
| "learning_rate": 8.826192265642778e-05, |
| "loss": 0.0157, |
| "step": 26150 |
| }, |
| { |
| "grad_norm": 0.18976671993732452, |
| "learning_rate": 8.825127640329923e-05, |
| "loss": 0.0119, |
| "step": 26160 |
| }, |
| { |
| "grad_norm": 0.21775297820568085, |
| "learning_rate": 8.824062596706861e-05, |
| "loss": 0.0164, |
| "step": 26170 |
| }, |
| { |
| "grad_norm": 0.19183680415153503, |
| "learning_rate": 8.822997134890062e-05, |
| "loss": 0.0122, |
| "step": 26180 |
| }, |
| { |
| "grad_norm": 0.1663697361946106, |
| "learning_rate": 8.821931254996044e-05, |
| "loss": 0.0133, |
| "step": 26190 |
| }, |
| { |
| "grad_norm": 0.1694713979959488, |
| "learning_rate": 8.82086495714137e-05, |
| "loss": 0.0154, |
| "step": 26200 |
| }, |
| { |
| "grad_norm": 0.18789827823638916, |
| "learning_rate": 8.81979824144265e-05, |
| "loss": 0.0162, |
| "step": 26210 |
| }, |
| { |
| "grad_norm": 0.21716323494911194, |
| "learning_rate": 8.818731108016536e-05, |
| "loss": 0.0142, |
| "step": 26220 |
| }, |
| { |
| "grad_norm": 0.20898616313934326, |
| "learning_rate": 8.81766355697973e-05, |
| "loss": 0.0176, |
| "step": 26230 |
| }, |
| { |
| "grad_norm": 0.20870767533779144, |
| "learning_rate": 8.816595588448977e-05, |
| "loss": 0.0129, |
| "step": 26240 |
| }, |
| { |
| "grad_norm": 0.18642646074295044, |
| "learning_rate": 8.81552720254107e-05, |
| "loss": 0.0113, |
| "step": 26250 |
| }, |
| { |
| "grad_norm": 0.20856960117816925, |
| "learning_rate": 8.814458399372842e-05, |
| "loss": 0.0135, |
| "step": 26260 |
| }, |
| { |
| "grad_norm": 0.16934159398078918, |
| "learning_rate": 8.813389179061181e-05, |
| "loss": 0.0123, |
| "step": 26270 |
| }, |
| { |
| "grad_norm": 0.19757625460624695, |
| "learning_rate": 8.812319541723012e-05, |
| "loss": 0.0105, |
| "step": 26280 |
| }, |
| { |
| "grad_norm": 0.16270165145397186, |
| "learning_rate": 8.811249487475309e-05, |
| "loss": 0.0122, |
| "step": 26290 |
| }, |
| { |
| "grad_norm": 0.2062361240386963, |
| "learning_rate": 8.810179016435092e-05, |
| "loss": 0.0152, |
| "step": 26300 |
| }, |
| { |
| "grad_norm": 0.19589756429195404, |
| "learning_rate": 8.809108128719428e-05, |
| "loss": 0.0121, |
| "step": 26310 |
| }, |
| { |
| "grad_norm": 0.12294069677591324, |
| "learning_rate": 8.808036824445424e-05, |
| "loss": 0.0146, |
| "step": 26320 |
| }, |
| { |
| "grad_norm": 0.17404891550540924, |
| "learning_rate": 8.806965103730238e-05, |
| "loss": 0.0153, |
| "step": 26330 |
| }, |
| { |
| "grad_norm": 0.2562452256679535, |
| "learning_rate": 8.805892966691074e-05, |
| "loss": 0.0143, |
| "step": 26340 |
| }, |
| { |
| "grad_norm": 0.23337846994400024, |
| "learning_rate": 8.804820413445175e-05, |
| "loss": 0.0164, |
| "step": 26350 |
| }, |
| { |
| "grad_norm": 0.2396707385778427, |
| "learning_rate": 8.803747444109837e-05, |
| "loss": 0.0145, |
| "step": 26360 |
| }, |
| { |
| "grad_norm": 0.1897064745426178, |
| "learning_rate": 8.802674058802399e-05, |
| "loss": 0.0126, |
| "step": 26370 |
| }, |
| { |
| "grad_norm": 0.19712260365486145, |
| "learning_rate": 8.801600257640241e-05, |
| "loss": 0.0157, |
| "step": 26380 |
| }, |
| { |
| "grad_norm": 0.17782928049564362, |
| "learning_rate": 8.800526040740795e-05, |
| "loss": 0.0155, |
| "step": 26390 |
| }, |
| { |
| "grad_norm": 0.1928723156452179, |
| "learning_rate": 8.799451408221535e-05, |
| "loss": 0.0127, |
| "step": 26400 |
| }, |
| { |
| "grad_norm": 0.17179611325263977, |
| "learning_rate": 8.798376360199982e-05, |
| "loss": 0.0134, |
| "step": 26410 |
| }, |
| { |
| "grad_norm": 0.17854979634284973, |
| "learning_rate": 8.797300896793701e-05, |
| "loss": 0.0144, |
| "step": 26420 |
| }, |
| { |
| "grad_norm": 0.18036052584648132, |
| "learning_rate": 8.796225018120302e-05, |
| "loss": 0.0126, |
| "step": 26430 |
| }, |
| { |
| "grad_norm": 0.15296590328216553, |
| "learning_rate": 8.795148724297444e-05, |
| "loss": 0.0144, |
| "step": 26440 |
| }, |
| { |
| "grad_norm": 0.14553052186965942, |
| "learning_rate": 8.794072015442825e-05, |
| "loss": 0.0122, |
| "step": 26450 |
| }, |
| { |
| "grad_norm": 0.2545054256916046, |
| "learning_rate": 8.792994891674198e-05, |
| "loss": 0.0134, |
| "step": 26460 |
| }, |
| { |
| "grad_norm": 0.2066628336906433, |
| "learning_rate": 8.79191735310935e-05, |
| "loss": 0.0135, |
| "step": 26470 |
| }, |
| { |
| "grad_norm": 0.20902188122272491, |
| "learning_rate": 8.790839399866122e-05, |
| "loss": 0.0142, |
| "step": 26480 |
| }, |
| { |
| "grad_norm": 0.1952153593301773, |
| "learning_rate": 8.789761032062397e-05, |
| "loss": 0.0151, |
| "step": 26490 |
| }, |
| { |
| "grad_norm": 0.1802971065044403, |
| "learning_rate": 8.788682249816103e-05, |
| "loss": 0.0128, |
| "step": 26500 |
| }, |
| { |
| "grad_norm": 0.16834264993667603, |
| "learning_rate": 8.787603053245215e-05, |
| "loss": 0.0138, |
| "step": 26510 |
| }, |
| { |
| "grad_norm": 0.22321510314941406, |
| "learning_rate": 8.78652344246775e-05, |
| "loss": 0.0133, |
| "step": 26520 |
| }, |
| { |
| "grad_norm": 0.18334518373012543, |
| "learning_rate": 8.785443417601776e-05, |
| "loss": 0.0137, |
| "step": 26530 |
| }, |
| { |
| "grad_norm": 0.18657852709293365, |
| "learning_rate": 8.784362978765401e-05, |
| "loss": 0.0159, |
| "step": 26540 |
| }, |
| { |
| "grad_norm": 0.2138308882713318, |
| "learning_rate": 8.783282126076779e-05, |
| "loss": 0.0142, |
| "step": 26550 |
| }, |
| { |
| "grad_norm": 0.154995858669281, |
| "learning_rate": 8.782200859654112e-05, |
| "loss": 0.0132, |
| "step": 26560 |
| }, |
| { |
| "grad_norm": 0.2004760503768921, |
| "learning_rate": 8.781119179615646e-05, |
| "loss": 0.0166, |
| "step": 26570 |
| }, |
| { |
| "grad_norm": 0.21278400719165802, |
| "learning_rate": 8.780037086079674e-05, |
| "loss": 0.016, |
| "step": 26580 |
| }, |
| { |
| "grad_norm": 0.18658854067325592, |
| "learning_rate": 8.778954579164527e-05, |
| "loss": 0.0118, |
| "step": 26590 |
| }, |
| { |
| "grad_norm": 0.17720405757427216, |
| "learning_rate": 8.777871658988588e-05, |
| "loss": 0.0127, |
| "step": 26600 |
| }, |
| { |
| "grad_norm": 0.2561233937740326, |
| "learning_rate": 8.776788325670285e-05, |
| "loss": 0.0164, |
| "step": 26610 |
| }, |
| { |
| "grad_norm": 0.17624244093894958, |
| "learning_rate": 8.775704579328089e-05, |
| "loss": 0.0156, |
| "step": 26620 |
| }, |
| { |
| "grad_norm": 0.3170694410800934, |
| "learning_rate": 8.774620420080517e-05, |
| "loss": 0.0142, |
| "step": 26630 |
| }, |
| { |
| "grad_norm": 0.1428108811378479, |
| "learning_rate": 8.773535848046131e-05, |
| "loss": 0.0129, |
| "step": 26640 |
| }, |
| { |
| "grad_norm": 0.15282820165157318, |
| "learning_rate": 8.772450863343538e-05, |
| "loss": 0.0122, |
| "step": 26650 |
| }, |
| { |
| "grad_norm": 0.1680191308259964, |
| "learning_rate": 8.77136546609139e-05, |
| "loss": 0.0136, |
| "step": 26660 |
| }, |
| { |
| "grad_norm": 0.1819058656692505, |
| "learning_rate": 8.770279656408385e-05, |
| "loss": 0.0131, |
| "step": 26670 |
| }, |
| { |
| "grad_norm": 0.15053746104240417, |
| "learning_rate": 8.769193434413265e-05, |
| "loss": 0.0121, |
| "step": 26680 |
| }, |
| { |
| "grad_norm": 0.15997923910617828, |
| "learning_rate": 8.76810680022482e-05, |
| "loss": 0.0128, |
| "step": 26690 |
| }, |
| { |
| "grad_norm": 0.19415059685707092, |
| "learning_rate": 8.767019753961878e-05, |
| "loss": 0.0122, |
| "step": 26700 |
| }, |
| { |
| "grad_norm": 0.17333634197711945, |
| "learning_rate": 8.765932295743321e-05, |
| "loss": 0.015, |
| "step": 26710 |
| }, |
| { |
| "grad_norm": 0.22342133522033691, |
| "learning_rate": 8.764844425688068e-05, |
| "loss": 0.0129, |
| "step": 26720 |
| }, |
| { |
| "grad_norm": 0.155137300491333, |
| "learning_rate": 8.763756143915092e-05, |
| "loss": 0.0121, |
| "step": 26730 |
| }, |
| { |
| "grad_norm": 0.16613472998142242, |
| "learning_rate": 8.7626674505434e-05, |
| "loss": 0.0107, |
| "step": 26740 |
| }, |
| { |
| "grad_norm": 0.16115880012512207, |
| "learning_rate": 8.761578345692053e-05, |
| "loss": 0.0126, |
| "step": 26750 |
| }, |
| { |
| "grad_norm": 0.21547505259513855, |
| "learning_rate": 8.760488829480156e-05, |
| "loss": 0.0128, |
| "step": 26760 |
| }, |
| { |
| "grad_norm": 0.23196235299110413, |
| "learning_rate": 8.759398902026854e-05, |
| "loss": 0.0133, |
| "step": 26770 |
| }, |
| { |
| "grad_norm": 0.21111854910850525, |
| "learning_rate": 8.758308563451339e-05, |
| "loss": 0.0149, |
| "step": 26780 |
| }, |
| { |
| "grad_norm": 0.19990575313568115, |
| "learning_rate": 8.75721781387285e-05, |
| "loss": 0.0101, |
| "step": 26790 |
| }, |
| { |
| "grad_norm": 0.1742956042289734, |
| "learning_rate": 8.75612665341067e-05, |
| "loss": 0.0134, |
| "step": 26800 |
| }, |
| { |
| "grad_norm": 0.17443598806858063, |
| "learning_rate": 8.755035082184126e-05, |
| "loss": 0.0131, |
| "step": 26810 |
| }, |
| { |
| "grad_norm": 0.1800273358821869, |
| "learning_rate": 8.753943100312592e-05, |
| "loss": 0.0126, |
| "step": 26820 |
| }, |
| { |
| "grad_norm": 0.16605105996131897, |
| "learning_rate": 8.752850707915484e-05, |
| "loss": 0.0148, |
| "step": 26830 |
| }, |
| { |
| "grad_norm": 0.18857042491436005, |
| "learning_rate": 8.751757905112264e-05, |
| "loss": 0.0121, |
| "step": 26840 |
| }, |
| { |
| "grad_norm": 0.1778421401977539, |
| "learning_rate": 8.75066469202244e-05, |
| "loss": 0.0129, |
| "step": 26850 |
| }, |
| { |
| "grad_norm": 0.1562901735305786, |
| "learning_rate": 8.749571068765567e-05, |
| "loss": 0.0114, |
| "step": 26860 |
| }, |
| { |
| "grad_norm": 0.22905878722667694, |
| "learning_rate": 8.748477035461238e-05, |
| "loss": 0.0145, |
| "step": 26870 |
| }, |
| { |
| "grad_norm": 0.3024975061416626, |
| "learning_rate": 8.747382592229095e-05, |
| "loss": 0.0147, |
| "step": 26880 |
| }, |
| { |
| "grad_norm": 0.16097016632556915, |
| "learning_rate": 8.746287739188828e-05, |
| "loss": 0.0167, |
| "step": 26890 |
| }, |
| { |
| "grad_norm": 0.13104328513145447, |
| "learning_rate": 8.745192476460165e-05, |
| "loss": 0.0127, |
| "step": 26900 |
| }, |
| { |
| "grad_norm": 0.15686337649822235, |
| "learning_rate": 8.744096804162882e-05, |
| "loss": 0.0132, |
| "step": 26910 |
| }, |
| { |
| "grad_norm": 0.17414389550685883, |
| "learning_rate": 8.743000722416804e-05, |
| "loss": 0.0131, |
| "step": 26920 |
| }, |
| { |
| "grad_norm": 0.2515980303287506, |
| "learning_rate": 8.741904231341793e-05, |
| "loss": 0.0135, |
| "step": 26930 |
| }, |
| { |
| "grad_norm": 0.1402544379234314, |
| "learning_rate": 8.740807331057762e-05, |
| "loss": 0.0142, |
| "step": 26940 |
| }, |
| { |
| "grad_norm": 0.20550718903541565, |
| "learning_rate": 8.739710021684667e-05, |
| "loss": 0.0169, |
| "step": 26950 |
| }, |
| { |
| "grad_norm": 0.21412119269371033, |
| "learning_rate": 8.738612303342503e-05, |
| "loss": 0.0153, |
| "step": 26960 |
| }, |
| { |
| "grad_norm": 0.23892386257648468, |
| "learning_rate": 8.73751417615132e-05, |
| "loss": 0.0136, |
| "step": 26970 |
| }, |
| { |
| "grad_norm": 0.20890675485134125, |
| "learning_rate": 8.736415640231208e-05, |
| "loss": 0.0131, |
| "step": 26980 |
| }, |
| { |
| "grad_norm": 0.1736547201871872, |
| "learning_rate": 8.735316695702297e-05, |
| "loss": 0.0144, |
| "step": 26990 |
| }, |
| { |
| "grad_norm": 0.17582859098911285, |
| "learning_rate": 8.734217342684769e-05, |
| "loss": 0.0126, |
| "step": 27000 |
| }, |
| { |
| "grad_norm": 0.2060476392507553, |
| "learning_rate": 8.733117581298847e-05, |
| "loss": 0.0145, |
| "step": 27010 |
| }, |
| { |
| "grad_norm": 0.20875637233257294, |
| "learning_rate": 8.732017411664796e-05, |
| "loss": 0.0127, |
| "step": 27020 |
| }, |
| { |
| "grad_norm": 0.16377364099025726, |
| "learning_rate": 8.730916833902936e-05, |
| "loss": 0.0132, |
| "step": 27030 |
| }, |
| { |
| "grad_norm": 0.21675199270248413, |
| "learning_rate": 8.729815848133618e-05, |
| "loss": 0.0117, |
| "step": 27040 |
| }, |
| { |
| "grad_norm": 0.172952339053154, |
| "learning_rate": 8.728714454477247e-05, |
| "loss": 0.0123, |
| "step": 27050 |
| }, |
| { |
| "grad_norm": 0.1896006464958191, |
| "learning_rate": 8.727612653054269e-05, |
| "loss": 0.0155, |
| "step": 27060 |
| }, |
| { |
| "grad_norm": 0.18309657275676727, |
| "learning_rate": 8.726510443985176e-05, |
| "loss": 0.0128, |
| "step": 27070 |
| }, |
| { |
| "grad_norm": 0.19412779808044434, |
| "learning_rate": 8.725407827390503e-05, |
| "loss": 0.0147, |
| "step": 27080 |
| }, |
| { |
| "grad_norm": 0.16986395418643951, |
| "learning_rate": 8.724304803390833e-05, |
| "loss": 0.0111, |
| "step": 27090 |
| }, |
| { |
| "grad_norm": 0.19317303597927094, |
| "learning_rate": 8.723201372106788e-05, |
| "loss": 0.0122, |
| "step": 27100 |
| }, |
| { |
| "grad_norm": 0.14348508417606354, |
| "learning_rate": 8.722097533659038e-05, |
| "loss": 0.012, |
| "step": 27110 |
| }, |
| { |
| "grad_norm": 0.18144506216049194, |
| "learning_rate": 8.720993288168299e-05, |
| "loss": 0.0138, |
| "step": 27120 |
| }, |
| { |
| "grad_norm": 0.1836540699005127, |
| "learning_rate": 8.719888635755327e-05, |
| "loss": 0.0131, |
| "step": 27130 |
| }, |
| { |
| "grad_norm": 0.1599224954843521, |
| "learning_rate": 8.718783576540928e-05, |
| "loss": 0.0116, |
| "step": 27140 |
| }, |
| { |
| "grad_norm": 0.21616582572460175, |
| "learning_rate": 8.717678110645948e-05, |
| "loss": 0.0151, |
| "step": 27150 |
| }, |
| { |
| "grad_norm": 0.17849023640155792, |
| "learning_rate": 8.716572238191279e-05, |
| "loss": 0.0159, |
| "step": 27160 |
| }, |
| { |
| "grad_norm": 0.2699224054813385, |
| "learning_rate": 8.715465959297857e-05, |
| "loss": 0.0168, |
| "step": 27170 |
| }, |
| { |
| "grad_norm": 0.19859661161899567, |
| "learning_rate": 8.714359274086665e-05, |
| "loss": 0.0131, |
| "step": 27180 |
| }, |
| { |
| "grad_norm": 0.19852979481220245, |
| "learning_rate": 8.713252182678726e-05, |
| "loss": 0.0157, |
| "step": 27190 |
| }, |
| { |
| "grad_norm": 0.17707674205303192, |
| "learning_rate": 8.712144685195112e-05, |
| "loss": 0.0126, |
| "step": 27200 |
| }, |
| { |
| "grad_norm": 0.21748629212379456, |
| "learning_rate": 8.711036781756936e-05, |
| "loss": 0.0141, |
| "step": 27210 |
| }, |
| { |
| "grad_norm": 0.17880921065807343, |
| "learning_rate": 8.709928472485357e-05, |
| "loss": 0.0111, |
| "step": 27220 |
| }, |
| { |
| "grad_norm": 0.24127905070781708, |
| "learning_rate": 8.708819757501579e-05, |
| "loss": 0.0127, |
| "step": 27230 |
| }, |
| { |
| "grad_norm": 0.22769764065742493, |
| "learning_rate": 8.707710636926846e-05, |
| "loss": 0.0131, |
| "step": 27240 |
| }, |
| { |
| "grad_norm": 0.24227939546108246, |
| "learning_rate": 8.706601110882455e-05, |
| "loss": 0.0149, |
| "step": 27250 |
| }, |
| { |
| "grad_norm": 0.19728736579418182, |
| "learning_rate": 8.705491179489738e-05, |
| "loss": 0.0129, |
| "step": 27260 |
| }, |
| { |
| "grad_norm": 0.17270460724830627, |
| "learning_rate": 8.704380842870077e-05, |
| "loss": 0.0122, |
| "step": 27270 |
| }, |
| { |
| "grad_norm": 0.1484808772802353, |
| "learning_rate": 8.703270101144895e-05, |
| "loss": 0.0117, |
| "step": 27280 |
| }, |
| { |
| "grad_norm": 0.19262616336345673, |
| "learning_rate": 8.702158954435664e-05, |
| "loss": 0.0135, |
| "step": 27290 |
| }, |
| { |
| "grad_norm": 0.23880822956562042, |
| "learning_rate": 8.701047402863896e-05, |
| "loss": 0.0159, |
| "step": 27300 |
| }, |
| { |
| "grad_norm": 0.18311969935894012, |
| "learning_rate": 8.699935446551148e-05, |
| "loss": 0.0144, |
| "step": 27310 |
| }, |
| { |
| "grad_norm": 0.18611888587474823, |
| "learning_rate": 8.698823085619022e-05, |
| "loss": 0.0138, |
| "step": 27320 |
| }, |
| { |
| "grad_norm": 0.17694741487503052, |
| "learning_rate": 8.697710320189166e-05, |
| "loss": 0.0112, |
| "step": 27330 |
| }, |
| { |
| "grad_norm": 0.1960524618625641, |
| "learning_rate": 8.696597150383268e-05, |
| "loss": 0.0118, |
| "step": 27340 |
| }, |
| { |
| "grad_norm": 0.2268739491701126, |
| "learning_rate": 8.695483576323063e-05, |
| "loss": 0.0126, |
| "step": 27350 |
| }, |
| { |
| "grad_norm": 0.14451560378074646, |
| "learning_rate": 8.69436959813033e-05, |
| "loss": 0.0112, |
| "step": 27360 |
| }, |
| { |
| "grad_norm": 0.1837204545736313, |
| "learning_rate": 8.693255215926892e-05, |
| "loss": 0.0135, |
| "step": 27370 |
| }, |
| { |
| "grad_norm": 0.22977836430072784, |
| "learning_rate": 8.692140429834617e-05, |
| "loss": 0.0137, |
| "step": 27380 |
| }, |
| { |
| "grad_norm": 0.18952079117298126, |
| "learning_rate": 8.691025239975415e-05, |
| "loss": 0.013, |
| "step": 27390 |
| }, |
| { |
| "grad_norm": 0.1871054768562317, |
| "learning_rate": 8.689909646471243e-05, |
| "loss": 0.0119, |
| "step": 27400 |
| }, |
| { |
| "grad_norm": 0.14948269724845886, |
| "learning_rate": 8.688793649444099e-05, |
| "loss": 0.0122, |
| "step": 27410 |
| }, |
| { |
| "grad_norm": 0.19388332962989807, |
| "learning_rate": 8.687677249016029e-05, |
| "loss": 0.0129, |
| "step": 27420 |
| }, |
| { |
| "grad_norm": 0.2243487536907196, |
| "learning_rate": 8.686560445309118e-05, |
| "loss": 0.0148, |
| "step": 27430 |
| }, |
| { |
| "grad_norm": 0.1868431121110916, |
| "learning_rate": 8.685443238445499e-05, |
| "loss": 0.0128, |
| "step": 27440 |
| }, |
| { |
| "grad_norm": 0.1388297975063324, |
| "learning_rate": 8.68432562854735e-05, |
| "loss": 0.0138, |
| "step": 27450 |
| }, |
| { |
| "grad_norm": 0.15557223558425903, |
| "learning_rate": 8.683207615736887e-05, |
| "loss": 0.0112, |
| "step": 27460 |
| }, |
| { |
| "grad_norm": 0.17421898245811462, |
| "learning_rate": 8.682089200136379e-05, |
| "loss": 0.0148, |
| "step": 27470 |
| }, |
| { |
| "grad_norm": 0.20171713829040527, |
| "learning_rate": 8.680970381868132e-05, |
| "loss": 0.0135, |
| "step": 27480 |
| }, |
| { |
| "grad_norm": 0.20627743005752563, |
| "learning_rate": 8.679851161054498e-05, |
| "loss": 0.0119, |
| "step": 27490 |
| }, |
| { |
| "grad_norm": 0.15651550889015198, |
| "learning_rate": 8.678731537817873e-05, |
| "loss": 0.0125, |
| "step": 27500 |
| }, |
| { |
| "grad_norm": 0.15714424848556519, |
| "learning_rate": 8.677611512280697e-05, |
| "loss": 0.0118, |
| "step": 27510 |
| }, |
| { |
| "grad_norm": 0.14530454576015472, |
| "learning_rate": 8.676491084565457e-05, |
| "loss": 0.0124, |
| "step": 27520 |
| }, |
| { |
| "grad_norm": 0.1886354237794876, |
| "learning_rate": 8.675370254794678e-05, |
| "loss": 0.0102, |
| "step": 27530 |
| }, |
| { |
| "grad_norm": 0.16549897193908691, |
| "learning_rate": 8.674249023090935e-05, |
| "loss": 0.0134, |
| "step": 27540 |
| }, |
| { |
| "grad_norm": 0.19222389161586761, |
| "learning_rate": 8.673127389576843e-05, |
| "loss": 0.0128, |
| "step": 27550 |
| }, |
| { |
| "grad_norm": 0.21641723811626434, |
| "learning_rate": 8.67200535437506e-05, |
| "loss": 0.0114, |
| "step": 27560 |
| }, |
| { |
| "grad_norm": 0.14683885872364044, |
| "learning_rate": 8.670882917608296e-05, |
| "loss": 0.0148, |
| "step": 27570 |
| }, |
| { |
| "grad_norm": 0.2236359715461731, |
| "learning_rate": 8.669760079399292e-05, |
| "loss": 0.0128, |
| "step": 27580 |
| }, |
| { |
| "grad_norm": 0.1656251698732376, |
| "learning_rate": 8.668636839870845e-05, |
| "loss": 0.0158, |
| "step": 27590 |
| }, |
| { |
| "grad_norm": 0.2022354155778885, |
| "learning_rate": 8.667513199145789e-05, |
| "loss": 0.0134, |
| "step": 27600 |
| }, |
| { |
| "grad_norm": 0.16965201497077942, |
| "learning_rate": 8.666389157347002e-05, |
| "loss": 0.0143, |
| "step": 27610 |
| }, |
| { |
| "grad_norm": 0.21600964665412903, |
| "learning_rate": 8.66526471459741e-05, |
| "loss": 0.0176, |
| "step": 27620 |
| }, |
| { |
| "grad_norm": 0.18543939292430878, |
| "learning_rate": 8.66413987101998e-05, |
| "loss": 0.0157, |
| "step": 27630 |
| }, |
| { |
| "grad_norm": 0.2895159125328064, |
| "learning_rate": 8.663014626737723e-05, |
| "loss": 0.0136, |
| "step": 27640 |
| }, |
| { |
| "grad_norm": 0.18034076690673828, |
| "learning_rate": 8.661888981873691e-05, |
| "loss": 0.0132, |
| "step": 27650 |
| }, |
| { |
| "grad_norm": 0.1948971003293991, |
| "learning_rate": 8.660762936550988e-05, |
| "loss": 0.0129, |
| "step": 27660 |
| }, |
| { |
| "grad_norm": 0.16037020087242126, |
| "learning_rate": 8.659636490892753e-05, |
| "loss": 0.0115, |
| "step": 27670 |
| }, |
| { |
| "grad_norm": 0.20323437452316284, |
| "learning_rate": 8.658509645022174e-05, |
| "loss": 0.011, |
| "step": 27680 |
| }, |
| { |
| "grad_norm": 0.2092437893152237, |
| "learning_rate": 8.657382399062481e-05, |
| "loss": 0.013, |
| "step": 27690 |
| }, |
| { |
| "grad_norm": 0.23704589903354645, |
| "learning_rate": 8.656254753136946e-05, |
| "loss": 0.0146, |
| "step": 27700 |
| }, |
| { |
| "grad_norm": 0.20448334515094757, |
| "learning_rate": 8.655126707368891e-05, |
| "loss": 0.0125, |
| "step": 27710 |
| }, |
| { |
| "grad_norm": 0.18248756229877472, |
| "learning_rate": 8.653998261881672e-05, |
| "loss": 0.0109, |
| "step": 27720 |
| }, |
| { |
| "grad_norm": 0.1941586583852768, |
| "learning_rate": 8.652869416798699e-05, |
| "loss": 0.0112, |
| "step": 27730 |
| }, |
| { |
| "grad_norm": 0.181466206908226, |
| "learning_rate": 8.651740172243417e-05, |
| "loss": 0.0128, |
| "step": 27740 |
| }, |
| { |
| "grad_norm": 0.18117640912532806, |
| "learning_rate": 8.65061052833932e-05, |
| "loss": 0.0137, |
| "step": 27750 |
| }, |
| { |
| "grad_norm": 0.15962450206279755, |
| "learning_rate": 8.649480485209945e-05, |
| "loss": 0.0118, |
| "step": 27760 |
| }, |
| { |
| "grad_norm": 0.17997871339321136, |
| "learning_rate": 8.64835004297887e-05, |
| "loss": 0.0143, |
| "step": 27770 |
| }, |
| { |
| "grad_norm": 0.20928233861923218, |
| "learning_rate": 8.64721920176972e-05, |
| "loss": 0.0106, |
| "step": 27780 |
| }, |
| { |
| "grad_norm": 0.18144932389259338, |
| "learning_rate": 8.646087961706164e-05, |
| "loss": 0.0135, |
| "step": 27790 |
| }, |
| { |
| "grad_norm": 0.17991112172603607, |
| "learning_rate": 8.644956322911908e-05, |
| "loss": 0.0116, |
| "step": 27800 |
| }, |
| { |
| "grad_norm": 0.17153599858283997, |
| "learning_rate": 8.643824285510709e-05, |
| "loss": 0.0122, |
| "step": 27810 |
| }, |
| { |
| "grad_norm": 0.23814387619495392, |
| "learning_rate": 8.642691849626364e-05, |
| "loss": 0.0153, |
| "step": 27820 |
| }, |
| { |
| "grad_norm": 0.18724940717220306, |
| "learning_rate": 8.641559015382717e-05, |
| "loss": 0.0118, |
| "step": 27830 |
| }, |
| { |
| "grad_norm": 0.15284952521324158, |
| "learning_rate": 8.640425782903649e-05, |
| "loss": 0.0116, |
| "step": 27840 |
| }, |
| { |
| "grad_norm": 0.18523651361465454, |
| "learning_rate": 8.639292152313091e-05, |
| "loss": 0.0126, |
| "step": 27850 |
| }, |
| { |
| "grad_norm": 0.17586593329906464, |
| "learning_rate": 8.638158123735015e-05, |
| "loss": 0.0134, |
| "step": 27860 |
| }, |
| { |
| "grad_norm": 0.17588144540786743, |
| "learning_rate": 8.637023697293436e-05, |
| "loss": 0.014, |
| "step": 27870 |
| }, |
| { |
| "grad_norm": 0.15913942456245422, |
| "learning_rate": 8.635888873112414e-05, |
| "loss": 0.0135, |
| "step": 27880 |
| }, |
| { |
| "grad_norm": 0.2409949004650116, |
| "learning_rate": 8.634753651316052e-05, |
| "loss": 0.0154, |
| "step": 27890 |
| }, |
| { |
| "grad_norm": 0.18579398095607758, |
| "learning_rate": 8.633618032028496e-05, |
| "loss": 0.0128, |
| "step": 27900 |
| }, |
| { |
| "grad_norm": 0.25115251541137695, |
| "learning_rate": 8.632482015373934e-05, |
| "loss": 0.0162, |
| "step": 27910 |
| }, |
| { |
| "grad_norm": 0.1500532329082489, |
| "learning_rate": 8.6313456014766e-05, |
| "loss": 0.0118, |
| "step": 27920 |
| }, |
| { |
| "grad_norm": 0.19730953872203827, |
| "learning_rate": 8.630208790460771e-05, |
| "loss": 0.0152, |
| "step": 27930 |
| }, |
| { |
| "grad_norm": 0.24407461285591125, |
| "learning_rate": 8.629071582450768e-05, |
| "loss": 0.0129, |
| "step": 27940 |
| }, |
| { |
| "grad_norm": 0.1834212690591812, |
| "learning_rate": 8.62793397757095e-05, |
| "loss": 0.0174, |
| "step": 27950 |
| }, |
| { |
| "grad_norm": 0.1906377226114273, |
| "learning_rate": 8.626795975945729e-05, |
| "loss": 0.0126, |
| "step": 27960 |
| }, |
| { |
| "grad_norm": 0.20611874759197235, |
| "learning_rate": 8.625657577699551e-05, |
| "loss": 0.0143, |
| "step": 27970 |
| }, |
| { |
| "grad_norm": 0.22802181541919708, |
| "learning_rate": 8.624518782956914e-05, |
| "loss": 0.0129, |
| "step": 27980 |
| }, |
| { |
| "grad_norm": 0.19341625273227692, |
| "learning_rate": 8.62337959184235e-05, |
| "loss": 0.0138, |
| "step": 27990 |
| }, |
| { |
| "grad_norm": 0.2044740617275238, |
| "learning_rate": 8.622240004480441e-05, |
| "loss": 0.0131, |
| "step": 28000 |
| }, |
| { |
| "grad_norm": 0.25316351652145386, |
| "learning_rate": 8.621100020995814e-05, |
| "loss": 0.0147, |
| "step": 28010 |
| }, |
| { |
| "grad_norm": 0.18751955032348633, |
| "learning_rate": 8.619959641513132e-05, |
| "loss": 0.0133, |
| "step": 28020 |
| }, |
| { |
| "grad_norm": 0.24041640758514404, |
| "learning_rate": 8.618818866157105e-05, |
| "loss": 0.0133, |
| "step": 28030 |
| }, |
| { |
| "grad_norm": 0.15245193243026733, |
| "learning_rate": 8.617677695052487e-05, |
| "loss": 0.0133, |
| "step": 28040 |
| }, |
| { |
| "grad_norm": 0.17806296050548553, |
| "learning_rate": 8.616536128324078e-05, |
| "loss": 0.0165, |
| "step": 28050 |
| }, |
| { |
| "grad_norm": 0.2666396200656891, |
| "learning_rate": 8.615394166096712e-05, |
| "loss": 0.0133, |
| "step": 28060 |
| }, |
| { |
| "grad_norm": 0.1795058250427246, |
| "learning_rate": 8.614251808495279e-05, |
| "loss": 0.012, |
| "step": 28070 |
| }, |
| { |
| "grad_norm": 0.1847299188375473, |
| "learning_rate": 8.6131090556447e-05, |
| "loss": 0.0121, |
| "step": 28080 |
| }, |
| { |
| "grad_norm": 0.16904352605342865, |
| "learning_rate": 8.611965907669947e-05, |
| "loss": 0.0101, |
| "step": 28090 |
| }, |
| { |
| "grad_norm": 0.2383386343717575, |
| "learning_rate": 8.610822364696034e-05, |
| "loss": 0.015, |
| "step": 28100 |
| }, |
| { |
| "grad_norm": 0.14433036744594574, |
| "learning_rate": 8.609678426848015e-05, |
| "loss": 0.0111, |
| "step": 28110 |
| }, |
| { |
| "grad_norm": 0.18644677102565765, |
| "learning_rate": 8.60853409425099e-05, |
| "loss": 0.0118, |
| "step": 28120 |
| }, |
| { |
| "grad_norm": 0.13823361694812775, |
| "learning_rate": 8.607389367030104e-05, |
| "loss": 0.0117, |
| "step": 28130 |
| }, |
| { |
| "grad_norm": 0.1907871514558792, |
| "learning_rate": 8.606244245310538e-05, |
| "loss": 0.013, |
| "step": 28140 |
| }, |
| { |
| "grad_norm": 0.18961817026138306, |
| "learning_rate": 8.605098729217525e-05, |
| "loss": 0.0102, |
| "step": 28150 |
| }, |
| { |
| "grad_norm": 0.1441236287355423, |
| "learning_rate": 8.603952818876335e-05, |
| "loss": 0.0134, |
| "step": 28160 |
| }, |
| { |
| "grad_norm": 0.16797539591789246, |
| "learning_rate": 8.602806514412281e-05, |
| "loss": 0.0152, |
| "step": 28170 |
| }, |
| { |
| "grad_norm": 0.18718941509723663, |
| "learning_rate": 8.601659815950726e-05, |
| "loss": 0.0121, |
| "step": 28180 |
| }, |
| { |
| "grad_norm": 0.19151407480239868, |
| "learning_rate": 8.600512723617067e-05, |
| "loss": 0.0144, |
| "step": 28190 |
| }, |
| { |
| "grad_norm": 0.1924273818731308, |
| "learning_rate": 8.59936523753675e-05, |
| "loss": 0.0126, |
| "step": 28200 |
| }, |
| { |
| "grad_norm": 0.12516771256923676, |
| "learning_rate": 8.598217357835264e-05, |
| "loss": 0.0115, |
| "step": 28210 |
| }, |
| { |
| "grad_norm": 0.1990862786769867, |
| "learning_rate": 8.597069084638135e-05, |
| "loss": 0.0136, |
| "step": 28220 |
| }, |
| { |
| "grad_norm": 0.1527869999408722, |
| "learning_rate": 8.595920418070939e-05, |
| "loss": 0.0115, |
| "step": 28230 |
| }, |
| { |
| "grad_norm": 0.147520512342453, |
| "learning_rate": 8.594771358259295e-05, |
| "loss": 0.0136, |
| "step": 28240 |
| }, |
| { |
| "grad_norm": 0.3083323836326599, |
| "learning_rate": 8.593621905328858e-05, |
| "loss": 0.0123, |
| "step": 28250 |
| }, |
| { |
| "grad_norm": 0.15598563849925995, |
| "learning_rate": 8.592472059405333e-05, |
| "loss": 0.0136, |
| "step": 28260 |
| }, |
| { |
| "grad_norm": 0.15811626613140106, |
| "learning_rate": 8.591321820614464e-05, |
| "loss": 0.0104, |
| "step": 28270 |
| }, |
| { |
| "grad_norm": 0.17856080830097198, |
| "learning_rate": 8.590171189082041e-05, |
| "loss": 0.0123, |
| "step": 28280 |
| }, |
| { |
| "grad_norm": 0.1795537769794464, |
| "learning_rate": 8.589020164933894e-05, |
| "loss": 0.0107, |
| "step": 28290 |
| }, |
| { |
| "grad_norm": 0.17818602919578552, |
| "learning_rate": 8.587868748295898e-05, |
| "loss": 0.0126, |
| "step": 28300 |
| }, |
| { |
| "grad_norm": 0.12199205905199051, |
| "learning_rate": 8.586716939293971e-05, |
| "loss": 0.0137, |
| "step": 28310 |
| }, |
| { |
| "grad_norm": 0.19500301778316498, |
| "learning_rate": 8.58556473805407e-05, |
| "loss": 0.0144, |
| "step": 28320 |
| }, |
| { |
| "grad_norm": 0.16873633861541748, |
| "learning_rate": 8.584412144702202e-05, |
| "loss": 0.0108, |
| "step": 28330 |
| }, |
| { |
| "grad_norm": 0.16769428551197052, |
| "learning_rate": 8.58325915936441e-05, |
| "loss": 0.0112, |
| "step": 28340 |
| }, |
| { |
| "grad_norm": 0.14065007865428925, |
| "learning_rate": 8.582105782166783e-05, |
| "loss": 0.0111, |
| "step": 28350 |
| }, |
| { |
| "grad_norm": 0.18229477107524872, |
| "learning_rate": 8.580952013235455e-05, |
| "loss": 0.0108, |
| "step": 28360 |
| }, |
| { |
| "grad_norm": 0.2582354247570038, |
| "learning_rate": 8.579797852696596e-05, |
| "loss": 0.0119, |
| "step": 28370 |
| }, |
| { |
| "grad_norm": 0.18101894855499268, |
| "learning_rate": 8.578643300676428e-05, |
| "loss": 0.0136, |
| "step": 28380 |
| }, |
| { |
| "grad_norm": 0.18537025153636932, |
| "learning_rate": 8.577488357301209e-05, |
| "loss": 0.0143, |
| "step": 28390 |
| }, |
| { |
| "grad_norm": 0.21140316128730774, |
| "learning_rate": 8.576333022697242e-05, |
| "loss": 0.0109, |
| "step": 28400 |
| }, |
| { |
| "grad_norm": 0.16614943742752075, |
| "learning_rate": 8.575177296990873e-05, |
| "loss": 0.0165, |
| "step": 28410 |
| }, |
| { |
| "grad_norm": 0.16962102055549622, |
| "learning_rate": 8.574021180308489e-05, |
| "loss": 0.0119, |
| "step": 28420 |
| }, |
| { |
| "grad_norm": 0.19473373889923096, |
| "learning_rate": 8.572864672776523e-05, |
| "loss": 0.013, |
| "step": 28430 |
| }, |
| { |
| "grad_norm": 0.19951479136943817, |
| "learning_rate": 8.571707774521447e-05, |
| "loss": 0.013, |
| "step": 28440 |
| }, |
| { |
| "grad_norm": 0.19348788261413574, |
| "learning_rate": 8.57055048566978e-05, |
| "loss": 0.0136, |
| "step": 28450 |
| }, |
| { |
| "grad_norm": 0.24966219067573547, |
| "learning_rate": 8.569392806348078e-05, |
| "loss": 0.0142, |
| "step": 28460 |
| }, |
| { |
| "grad_norm": 0.18688538670539856, |
| "learning_rate": 8.568234736682947e-05, |
| "loss": 0.0153, |
| "step": 28470 |
| }, |
| { |
| "grad_norm": 0.18647579848766327, |
| "learning_rate": 8.567076276801029e-05, |
| "loss": 0.0147, |
| "step": 28480 |
| }, |
| { |
| "grad_norm": 0.14750352501869202, |
| "learning_rate": 8.565917426829013e-05, |
| "loss": 0.0127, |
| "step": 28490 |
| }, |
| { |
| "grad_norm": 0.20846188068389893, |
| "learning_rate": 8.564758186893628e-05, |
| "loss": 0.0138, |
| "step": 28500 |
| }, |
| { |
| "grad_norm": 0.18948955833911896, |
| "learning_rate": 8.563598557121649e-05, |
| "loss": 0.0105, |
| "step": 28510 |
| }, |
| { |
| "grad_norm": 0.2050204575061798, |
| "learning_rate": 8.562438537639888e-05, |
| "loss": 0.0127, |
| "step": 28520 |
| }, |
| { |
| "grad_norm": 0.20523923635482788, |
| "learning_rate": 8.561278128575206e-05, |
| "loss": 0.0137, |
| "step": 28530 |
| }, |
| { |
| "grad_norm": 0.195975661277771, |
| "learning_rate": 8.5601173300545e-05, |
| "loss": 0.013, |
| "step": 28540 |
| }, |
| { |
| "grad_norm": 0.21493001282215118, |
| "learning_rate": 8.558956142204717e-05, |
| "loss": 0.0139, |
| "step": 28550 |
| }, |
| { |
| "grad_norm": 0.1399243324995041, |
| "learning_rate": 8.55779456515284e-05, |
| "loss": 0.0143, |
| "step": 28560 |
| }, |
| { |
| "grad_norm": 0.11191680282354355, |
| "learning_rate": 8.556632599025898e-05, |
| "loss": 0.0126, |
| "step": 28570 |
| }, |
| { |
| "grad_norm": 0.18659430742263794, |
| "learning_rate": 8.555470243950964e-05, |
| "loss": 0.0128, |
| "step": 28580 |
| }, |
| { |
| "grad_norm": 0.17905016243457794, |
| "learning_rate": 8.554307500055148e-05, |
| "loss": 0.0122, |
| "step": 28590 |
| }, |
| { |
| "grad_norm": 0.165314719080925, |
| "learning_rate": 8.553144367465609e-05, |
| "loss": 0.0117, |
| "step": 28600 |
| }, |
| { |
| "grad_norm": 0.1951138973236084, |
| "learning_rate": 8.551980846309544e-05, |
| "loss": 0.0119, |
| "step": 28610 |
| }, |
| { |
| "grad_norm": 0.17219603061676025, |
| "learning_rate": 8.550816936714193e-05, |
| "loss": 0.0122, |
| "step": 28620 |
| }, |
| { |
| "grad_norm": 0.18891964852809906, |
| "learning_rate": 8.549652638806841e-05, |
| "loss": 0.0155, |
| "step": 28630 |
| }, |
| { |
| "grad_norm": 0.15256862342357635, |
| "learning_rate": 8.548487952714812e-05, |
| "loss": 0.0119, |
| "step": 28640 |
| }, |
| { |
| "grad_norm": 0.18874719738960266, |
| "learning_rate": 8.547322878565478e-05, |
| "loss": 0.0142, |
| "step": 28650 |
| }, |
| { |
| "grad_norm": 0.2267162799835205, |
| "learning_rate": 8.546157416486245e-05, |
| "loss": 0.0111, |
| "step": 28660 |
| }, |
| { |
| "grad_norm": 0.18347524106502533, |
| "learning_rate": 8.54499156660457e-05, |
| "loss": 0.0123, |
| "step": 28670 |
| }, |
| { |
| "grad_norm": 0.2024136781692505, |
| "learning_rate": 8.543825329047947e-05, |
| "loss": 0.0141, |
| "step": 28680 |
| }, |
| { |
| "grad_norm": 0.19219805300235748, |
| "learning_rate": 8.542658703943913e-05, |
| "loss": 0.013, |
| "step": 28690 |
| }, |
| { |
| "grad_norm": 0.20163536071777344, |
| "learning_rate": 8.541491691420051e-05, |
| "loss": 0.0143, |
| "step": 28700 |
| }, |
| { |
| "grad_norm": 0.20172348618507385, |
| "learning_rate": 8.54032429160398e-05, |
| "loss": 0.0133, |
| "step": 28710 |
| }, |
| { |
| "grad_norm": 0.22644536197185516, |
| "learning_rate": 8.539156504623369e-05, |
| "loss": 0.0114, |
| "step": 28720 |
| }, |
| { |
| "grad_norm": 0.20980650186538696, |
| "learning_rate": 8.537988330605923e-05, |
| "loss": 0.014, |
| "step": 28730 |
| }, |
| { |
| "grad_norm": 0.18033821880817413, |
| "learning_rate": 8.536819769679393e-05, |
| "loss": 0.0115, |
| "step": 28740 |
| }, |
| { |
| "grad_norm": 0.15536117553710938, |
| "learning_rate": 8.53565082197157e-05, |
| "loss": 0.0141, |
| "step": 28750 |
| }, |
| { |
| "grad_norm": 0.21701377630233765, |
| "learning_rate": 8.534481487610289e-05, |
| "loss": 0.0132, |
| "step": 28760 |
| }, |
| { |
| "grad_norm": 0.19303607940673828, |
| "learning_rate": 8.533311766723428e-05, |
| "loss": 0.0143, |
| "step": 28770 |
| }, |
| { |
| "grad_norm": 0.16089658439159393, |
| "learning_rate": 8.532141659438901e-05, |
| "loss": 0.0136, |
| "step": 28780 |
| }, |
| { |
| "grad_norm": 0.16383571922779083, |
| "learning_rate": 8.530971165884675e-05, |
| "loss": 0.0113, |
| "step": 28790 |
| }, |
| { |
| "grad_norm": 0.175754114985466, |
| "learning_rate": 8.529800286188752e-05, |
| "loss": 0.0116, |
| "step": 28800 |
| }, |
| { |
| "grad_norm": 0.14809109270572662, |
| "learning_rate": 8.528629020479175e-05, |
| "loss": 0.0114, |
| "step": 28810 |
| }, |
| { |
| "grad_norm": 0.18954558670520782, |
| "learning_rate": 8.527457368884033e-05, |
| "loss": 0.0148, |
| "step": 28820 |
| }, |
| { |
| "grad_norm": 0.15618684887886047, |
| "learning_rate": 8.526285331531458e-05, |
| "loss": 0.0111, |
| "step": 28830 |
| }, |
| { |
| "grad_norm": 0.19574670493602753, |
| "learning_rate": 8.525112908549621e-05, |
| "loss": 0.0134, |
| "step": 28840 |
| }, |
| { |
| "grad_norm": 0.22770293056964874, |
| "learning_rate": 8.523940100066735e-05, |
| "loss": 0.0118, |
| "step": 28850 |
| }, |
| { |
| "grad_norm": 0.14471113681793213, |
| "learning_rate": 8.52276690621106e-05, |
| "loss": 0.0124, |
| "step": 28860 |
| }, |
| { |
| "grad_norm": 0.163731649518013, |
| "learning_rate": 8.521593327110889e-05, |
| "loss": 0.0125, |
| "step": 28870 |
| }, |
| { |
| "grad_norm": 0.20715269446372986, |
| "learning_rate": 8.520419362894569e-05, |
| "loss": 0.0117, |
| "step": 28880 |
| }, |
| { |
| "grad_norm": 0.2353220283985138, |
| "learning_rate": 8.51924501369048e-05, |
| "loss": 0.0112, |
| "step": 28890 |
| }, |
| { |
| "grad_norm": 0.16888241469860077, |
| "learning_rate": 8.518070279627047e-05, |
| "loss": 0.0131, |
| "step": 28900 |
| }, |
| { |
| "grad_norm": 0.18317660689353943, |
| "learning_rate": 8.516895160832737e-05, |
| "loss": 0.0113, |
| "step": 28910 |
| }, |
| { |
| "grad_norm": 0.1913095861673355, |
| "learning_rate": 8.515719657436061e-05, |
| "loss": 0.0106, |
| "step": 28920 |
| }, |
| { |
| "grad_norm": 0.16577529907226562, |
| "learning_rate": 8.514543769565568e-05, |
| "loss": 0.0152, |
| "step": 28930 |
| }, |
| { |
| "grad_norm": 0.13656282424926758, |
| "learning_rate": 8.513367497349853e-05, |
| "loss": 0.0114, |
| "step": 28940 |
| }, |
| { |
| "grad_norm": 0.19207116961479187, |
| "learning_rate": 8.51219084091755e-05, |
| "loss": 0.0126, |
| "step": 28950 |
| }, |
| { |
| "grad_norm": 0.1479605734348297, |
| "learning_rate": 8.511013800397338e-05, |
| "loss": 0.0153, |
| "step": 28960 |
| }, |
| { |
| "grad_norm": 0.17726215720176697, |
| "learning_rate": 8.509836375917937e-05, |
| "loss": 0.0126, |
| "step": 28970 |
| }, |
| { |
| "grad_norm": 0.24432331323623657, |
| "learning_rate": 8.508658567608104e-05, |
| "loss": 0.0125, |
| "step": 28980 |
| }, |
| { |
| "grad_norm": 0.1874416172504425, |
| "learning_rate": 8.507480375596647e-05, |
| "loss": 0.0148, |
| "step": 28990 |
| }, |
| { |
| "grad_norm": 0.23588506877422333, |
| "learning_rate": 8.506301800012408e-05, |
| "loss": 0.0134, |
| "step": 29000 |
| }, |
| { |
| "grad_norm": 0.19267885386943817, |
| "learning_rate": 8.505122840984278e-05, |
| "loss": 0.0123, |
| "step": 29010 |
| }, |
| { |
| "grad_norm": 0.14792896807193756, |
| "learning_rate": 8.503943498641182e-05, |
| "loss": 0.0133, |
| "step": 29020 |
| }, |
| { |
| "grad_norm": 0.21639281511306763, |
| "learning_rate": 8.502763773112095e-05, |
| "loss": 0.012, |
| "step": 29030 |
| }, |
| { |
| "grad_norm": 0.19488239288330078, |
| "learning_rate": 8.501583664526026e-05, |
| "loss": 0.0157, |
| "step": 29040 |
| }, |
| { |
| "grad_norm": 0.1856304556131363, |
| "learning_rate": 8.500403173012032e-05, |
| "loss": 0.0097, |
| "step": 29050 |
| }, |
| { |
| "grad_norm": 0.15881025791168213, |
| "learning_rate": 8.499222298699211e-05, |
| "loss": 0.0121, |
| "step": 29060 |
| }, |
| { |
| "grad_norm": 0.19815614819526672, |
| "learning_rate": 8.498041041716701e-05, |
| "loss": 0.0135, |
| "step": 29070 |
| }, |
| { |
| "grad_norm": 0.16614429652690887, |
| "learning_rate": 8.496859402193681e-05, |
| "loss": 0.013, |
| "step": 29080 |
| }, |
| { |
| "grad_norm": 0.2017555981874466, |
| "learning_rate": 8.495677380259374e-05, |
| "loss": 0.0121, |
| "step": 29090 |
| }, |
| { |
| "grad_norm": 0.21205055713653564, |
| "learning_rate": 8.494494976043045e-05, |
| "loss": 0.0129, |
| "step": 29100 |
| }, |
| { |
| "grad_norm": 0.15883712470531464, |
| "learning_rate": 8.493312189673998e-05, |
| "loss": 0.0117, |
| "step": 29110 |
| }, |
| { |
| "grad_norm": 0.22783473134040833, |
| "learning_rate": 8.492129021281584e-05, |
| "loss": 0.0133, |
| "step": 29120 |
| }, |
| { |
| "grad_norm": 0.2064107209444046, |
| "learning_rate": 8.490945470995188e-05, |
| "loss": 0.0136, |
| "step": 29130 |
| }, |
| { |
| "grad_norm": 0.14446167647838593, |
| "learning_rate": 8.489761538944247e-05, |
| "loss": 0.0147, |
| "step": 29140 |
| }, |
| { |
| "grad_norm": 0.2097829133272171, |
| "learning_rate": 8.48857722525823e-05, |
| "loss": 0.0138, |
| "step": 29150 |
| }, |
| { |
| "grad_norm": 0.19569893181324005, |
| "learning_rate": 8.487392530066652e-05, |
| "loss": 0.0148, |
| "step": 29160 |
| }, |
| { |
| "grad_norm": 0.20244553685188293, |
| "learning_rate": 8.486207453499069e-05, |
| "loss": 0.0129, |
| "step": 29170 |
| }, |
| { |
| "grad_norm": 0.1293058842420578, |
| "learning_rate": 8.485021995685082e-05, |
| "loss": 0.0132, |
| "step": 29180 |
| }, |
| { |
| "grad_norm": 0.19508272409439087, |
| "learning_rate": 8.483836156754328e-05, |
| "loss": 0.0129, |
| "step": 29190 |
| }, |
| { |
| "grad_norm": 0.13909363746643066, |
| "learning_rate": 8.482649936836491e-05, |
| "loss": 0.0104, |
| "step": 29200 |
| }, |
| { |
| "grad_norm": 0.16894637048244476, |
| "learning_rate": 8.481463336061293e-05, |
| "loss": 0.0118, |
| "step": 29210 |
| }, |
| { |
| "grad_norm": 0.17273752391338348, |
| "learning_rate": 8.480276354558496e-05, |
| "loss": 0.0119, |
| "step": 29220 |
| }, |
| { |
| "grad_norm": 0.21895958483219147, |
| "learning_rate": 8.479088992457913e-05, |
| "loss": 0.0139, |
| "step": 29230 |
| }, |
| { |
| "grad_norm": 0.17813675105571747, |
| "learning_rate": 8.477901249889387e-05, |
| "loss": 0.016, |
| "step": 29240 |
| }, |
| { |
| "grad_norm": 0.20593753457069397, |
| "learning_rate": 8.47671312698281e-05, |
| "loss": 0.0136, |
| "step": 29250 |
| }, |
| { |
| "grad_norm": 0.1907685250043869, |
| "learning_rate": 8.475524623868112e-05, |
| "loss": 0.0119, |
| "step": 29260 |
| }, |
| { |
| "grad_norm": 0.1305512636899948, |
| "learning_rate": 8.474335740675266e-05, |
| "loss": 0.0115, |
| "step": 29270 |
| }, |
| { |
| "grad_norm": 0.16224084794521332, |
| "learning_rate": 8.473146477534289e-05, |
| "loss": 0.0117, |
| "step": 29280 |
| }, |
| { |
| "grad_norm": 0.1430281549692154, |
| "learning_rate": 8.471956834575232e-05, |
| "loss": 0.0126, |
| "step": 29290 |
| }, |
| { |
| "grad_norm": 0.1667489856481552, |
| "learning_rate": 8.470766811928197e-05, |
| "loss": 0.0133, |
| "step": 29300 |
| }, |
| { |
| "grad_norm": 0.1585753858089447, |
| "learning_rate": 8.469576409723323e-05, |
| "loss": 0.0117, |
| "step": 29310 |
| }, |
| { |
| "grad_norm": 0.17134876549243927, |
| "learning_rate": 8.468385628090788e-05, |
| "loss": 0.0123, |
| "step": 29320 |
| }, |
| { |
| "grad_norm": 0.1318117082118988, |
| "learning_rate": 8.467194467160815e-05, |
| "loss": 0.0133, |
| "step": 29330 |
| }, |
| { |
| "grad_norm": 0.16312025487422943, |
| "learning_rate": 8.466002927063667e-05, |
| "loss": 0.0106, |
| "step": 29340 |
| }, |
| { |
| "grad_norm": 0.16416870057582855, |
| "learning_rate": 8.464811007929651e-05, |
| "loss": 0.013, |
| "step": 29350 |
| }, |
| { |
| "grad_norm": 0.19791357219219208, |
| "learning_rate": 8.463618709889114e-05, |
| "loss": 0.0133, |
| "step": 29360 |
| }, |
| { |
| "grad_norm": 0.14834952354431152, |
| "learning_rate": 8.462426033072442e-05, |
| "loss": 0.0131, |
| "step": 29370 |
| }, |
| { |
| "grad_norm": 0.20697247982025146, |
| "learning_rate": 8.461232977610061e-05, |
| "loss": 0.0122, |
| "step": 29380 |
| }, |
| { |
| "grad_norm": 0.187699556350708, |
| "learning_rate": 8.46003954363245e-05, |
| "loss": 0.0119, |
| "step": 29390 |
| }, |
| { |
| "grad_norm": 0.18760277330875397, |
| "learning_rate": 8.458845731270115e-05, |
| "loss": 0.012, |
| "step": 29400 |
| }, |
| { |
| "grad_norm": 0.17339487373828888, |
| "learning_rate": 8.45765154065361e-05, |
| "loss": 0.0099, |
| "step": 29410 |
| }, |
| { |
| "grad_norm": 0.1999693661928177, |
| "learning_rate": 8.456456971913532e-05, |
| "loss": 0.0098, |
| "step": 29420 |
| }, |
| { |
| "grad_norm": 0.15931078791618347, |
| "learning_rate": 8.455262025180517e-05, |
| "loss": 0.0121, |
| "step": 29430 |
| }, |
| { |
| "grad_norm": 0.15840600430965424, |
| "learning_rate": 8.454066700585242e-05, |
| "loss": 0.0114, |
| "step": 29440 |
| }, |
| { |
| "grad_norm": 0.17567718029022217, |
| "learning_rate": 8.452870998258423e-05, |
| "loss": 0.012, |
| "step": 29450 |
| }, |
| { |
| "grad_norm": 0.1787082403898239, |
| "learning_rate": 8.451674918330825e-05, |
| "loss": 0.0132, |
| "step": 29460 |
| }, |
| { |
| "grad_norm": 0.18018962442874908, |
| "learning_rate": 8.450478460933246e-05, |
| "loss": 0.0105, |
| "step": 29470 |
| }, |
| { |
| "grad_norm": 0.15651020407676697, |
| "learning_rate": 8.449281626196532e-05, |
| "loss": 0.0127, |
| "step": 29480 |
| }, |
| { |
| "grad_norm": 0.18134862184524536, |
| "learning_rate": 8.448084414251564e-05, |
| "loss": 0.01, |
| "step": 29490 |
| }, |
| { |
| "grad_norm": 0.2032192200422287, |
| "learning_rate": 8.446886825229271e-05, |
| "loss": 0.0121, |
| "step": 29500 |
| }, |
| { |
| "grad_norm": 0.14923639595508575, |
| "learning_rate": 8.445688859260615e-05, |
| "loss": 0.0132, |
| "step": 29510 |
| }, |
| { |
| "grad_norm": 0.23174427449703217, |
| "learning_rate": 8.444490516476606e-05, |
| "loss": 0.0137, |
| "step": 29520 |
| }, |
| { |
| "grad_norm": 0.1746220737695694, |
| "learning_rate": 8.443291797008293e-05, |
| "loss": 0.0108, |
| "step": 29530 |
| }, |
| { |
| "grad_norm": 0.177895188331604, |
| "learning_rate": 8.442092700986765e-05, |
| "loss": 0.0155, |
| "step": 29540 |
| }, |
| { |
| "grad_norm": 0.1803567260503769, |
| "learning_rate": 8.440893228543156e-05, |
| "loss": 0.0119, |
| "step": 29550 |
| }, |
| { |
| "grad_norm": 0.20342227816581726, |
| "learning_rate": 8.439693379808638e-05, |
| "loss": 0.0132, |
| "step": 29560 |
| }, |
| { |
| "grad_norm": 0.16396351158618927, |
| "learning_rate": 8.43849315491442e-05, |
| "loss": 0.0124, |
| "step": 29570 |
| }, |
| { |
| "grad_norm": 0.1352321207523346, |
| "learning_rate": 8.437292553991763e-05, |
| "loss": 0.0109, |
| "step": 29580 |
| }, |
| { |
| "grad_norm": 0.19388611614704132, |
| "learning_rate": 8.436091577171959e-05, |
| "loss": 0.0114, |
| "step": 29590 |
| }, |
| { |
| "grad_norm": 0.23504306375980377, |
| "learning_rate": 8.434890224586347e-05, |
| "loss": 0.0144, |
| "step": 29600 |
| }, |
| { |
| "grad_norm": 0.27649304270744324, |
| "learning_rate": 8.433688496366303e-05, |
| "loss": 0.0154, |
| "step": 29610 |
| }, |
| { |
| "grad_norm": 0.21790184080600739, |
| "learning_rate": 8.432486392643248e-05, |
| "loss": 0.0133, |
| "step": 29620 |
| }, |
| { |
| "grad_norm": 0.23398706316947937, |
| "learning_rate": 8.431283913548643e-05, |
| "loss": 0.0113, |
| "step": 29630 |
| }, |
| { |
| "grad_norm": 0.16118748486042023, |
| "learning_rate": 8.430081059213985e-05, |
| "loss": 0.0114, |
| "step": 29640 |
| }, |
| { |
| "grad_norm": 0.1283956617116928, |
| "learning_rate": 8.428877829770823e-05, |
| "loss": 0.0118, |
| "step": 29650 |
| }, |
| { |
| "grad_norm": 0.20732879638671875, |
| "learning_rate": 8.427674225350735e-05, |
| "loss": 0.0123, |
| "step": 29660 |
| }, |
| { |
| "grad_norm": 0.18760187923908234, |
| "learning_rate": 8.426470246085347e-05, |
| "loss": 0.011, |
| "step": 29670 |
| }, |
| { |
| "grad_norm": 0.14654579758644104, |
| "learning_rate": 8.425265892106324e-05, |
| "loss": 0.0108, |
| "step": 29680 |
| }, |
| { |
| "grad_norm": 0.15680843591690063, |
| "learning_rate": 8.424061163545374e-05, |
| "loss": 0.0117, |
| "step": 29690 |
| }, |
| { |
| "grad_norm": 0.1776723712682724, |
| "learning_rate": 8.422856060534243e-05, |
| "loss": 0.0121, |
| "step": 29700 |
| }, |
| { |
| "grad_norm": 0.1705937683582306, |
| "learning_rate": 8.421650583204718e-05, |
| "loss": 0.0122, |
| "step": 29710 |
| }, |
| { |
| "grad_norm": 0.17986483871936798, |
| "learning_rate": 8.420444731688633e-05, |
| "loss": 0.0116, |
| "step": 29720 |
| }, |
| { |
| "grad_norm": 0.17028284072875977, |
| "learning_rate": 8.419238506117852e-05, |
| "loss": 0.0155, |
| "step": 29730 |
| }, |
| { |
| "grad_norm": 0.18307262659072876, |
| "learning_rate": 8.418031906624289e-05, |
| "loss": 0.0107, |
| "step": 29740 |
| }, |
| { |
| "grad_norm": 0.18997417390346527, |
| "learning_rate": 8.416824933339898e-05, |
| "loss": 0.0136, |
| "step": 29750 |
| }, |
| { |
| "grad_norm": 0.14866510033607483, |
| "learning_rate": 8.415617586396667e-05, |
| "loss": 0.0131, |
| "step": 29760 |
| }, |
| { |
| "grad_norm": 0.1801193207502365, |
| "learning_rate": 8.414409865926632e-05, |
| "loss": 0.0149, |
| "step": 29770 |
| }, |
| { |
| "grad_norm": 0.18089812994003296, |
| "learning_rate": 8.413201772061867e-05, |
| "loss": 0.0122, |
| "step": 29780 |
| }, |
| { |
| "grad_norm": 0.20078693330287933, |
| "learning_rate": 8.411993304934488e-05, |
| "loss": 0.0129, |
| "step": 29790 |
| }, |
| { |
| "grad_norm": 0.2205827832221985, |
| "learning_rate": 8.410784464676654e-05, |
| "loss": 0.0155, |
| "step": 29800 |
| }, |
| { |
| "grad_norm": 0.16440054774284363, |
| "learning_rate": 8.409575251420556e-05, |
| "loss": 0.0104, |
| "step": 29810 |
| }, |
| { |
| "grad_norm": 0.14770565927028656, |
| "learning_rate": 8.408365665298435e-05, |
| "loss": 0.0112, |
| "step": 29820 |
| }, |
| { |
| "grad_norm": 0.1756041944026947, |
| "learning_rate": 8.40715570644257e-05, |
| "loss": 0.0138, |
| "step": 29830 |
| }, |
| { |
| "grad_norm": 0.19235102832317352, |
| "learning_rate": 8.40594537498528e-05, |
| "loss": 0.0115, |
| "step": 29840 |
| }, |
| { |
| "grad_norm": 0.24119681119918823, |
| "learning_rate": 8.404734671058924e-05, |
| "loss": 0.0149, |
| "step": 29850 |
| }, |
| { |
| "grad_norm": 0.19353336095809937, |
| "learning_rate": 8.403523594795902e-05, |
| "loss": 0.0138, |
| "step": 29860 |
| }, |
| { |
| "grad_norm": 0.16554079949855804, |
| "learning_rate": 8.402312146328659e-05, |
| "loss": 0.0137, |
| "step": 29870 |
| }, |
| { |
| "grad_norm": 0.18109650909900665, |
| "learning_rate": 8.401100325789675e-05, |
| "loss": 0.0143, |
| "step": 29880 |
| }, |
| { |
| "grad_norm": 0.22677451372146606, |
| "learning_rate": 8.399888133311472e-05, |
| "loss": 0.0156, |
| "step": 29890 |
| }, |
| { |
| "grad_norm": 0.14986349642276764, |
| "learning_rate": 8.398675569026613e-05, |
| "loss": 0.0118, |
| "step": 29900 |
| }, |
| { |
| "grad_norm": 0.18745896220207214, |
| "learning_rate": 8.397462633067705e-05, |
| "loss": 0.0133, |
| "step": 29910 |
| }, |
| { |
| "grad_norm": 0.16973541676998138, |
| "learning_rate": 8.396249325567392e-05, |
| "loss": 0.0125, |
| "step": 29920 |
| }, |
| { |
| "grad_norm": 0.16721414029598236, |
| "learning_rate": 8.395035646658357e-05, |
| "loss": 0.0151, |
| "step": 29930 |
| }, |
| { |
| "grad_norm": 0.15872403979301453, |
| "learning_rate": 8.39382159647333e-05, |
| "loss": 0.0137, |
| "step": 29940 |
| }, |
| { |
| "grad_norm": 0.20979169011116028, |
| "learning_rate": 8.392607175145075e-05, |
| "loss": 0.0135, |
| "step": 29950 |
| }, |
| { |
| "grad_norm": 0.1652858406305313, |
| "learning_rate": 8.3913923828064e-05, |
| "loss": 0.0106, |
| "step": 29960 |
| }, |
| { |
| "grad_norm": 0.179422065615654, |
| "learning_rate": 8.390177219590152e-05, |
| "loss": 0.0105, |
| "step": 29970 |
| }, |
| { |
| "grad_norm": 0.1856880486011505, |
| "learning_rate": 8.388961685629222e-05, |
| "loss": 0.011, |
| "step": 29980 |
| }, |
| { |
| "grad_norm": 0.2280905395746231, |
| "learning_rate": 8.387745781056536e-05, |
| "loss": 0.0114, |
| "step": 29990 |
| }, |
| { |
| "grad_norm": 0.16093143820762634, |
| "learning_rate": 8.386529506005065e-05, |
| "loss": 0.0122, |
| "step": 30000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 48, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|