diff --git "a/libero_10_ckpt/checkpoint-100000/trainer_state.json" "b/libero_10_ckpt/checkpoint-100000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/libero_10_ckpt/checkpoint-100000/trainer_state.json" @@ -0,0 +1,60034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "grad_norm": 0.887859046459198, + "learning_rate": 1.8e-07, + "loss": 1.2386, + "step": 10 + }, + { + "grad_norm": 0.8838368058204651, + "learning_rate": 3.8e-07, + "loss": 1.2339, + "step": 20 + }, + { + "grad_norm": 0.8834822773933411, + "learning_rate": 5.8e-07, + "loss": 1.2206, + "step": 30 + }, + { + "grad_norm": 0.7918919324874878, + "learning_rate": 7.8e-07, + "loss": 1.2165, + "step": 40 + }, + { + "grad_norm": 0.5600261092185974, + "learning_rate": 9.8e-07, + "loss": 1.2214, + "step": 50 + }, + { + "grad_norm": 0.3669975996017456, + "learning_rate": 1.18e-06, + "loss": 1.223, + "step": 60 + }, + { + "grad_norm": 0.3037402033805847, + "learning_rate": 1.3800000000000001e-06, + "loss": 1.2178, + "step": 70 + }, + { + "grad_norm": 0.2610374987125397, + "learning_rate": 1.5800000000000003e-06, + "loss": 1.2181, + "step": 80 + }, + { + "grad_norm": 0.30342763662338257, + "learning_rate": 1.7800000000000001e-06, + "loss": 1.2202, + "step": 90 + }, + { + "grad_norm": 0.320873886346817, + "learning_rate": 1.98e-06, + "loss": 1.2111, + "step": 100 + }, + { + "grad_norm": 0.2444116324186325, + "learning_rate": 2.1800000000000003e-06, + "loss": 1.2142, + "step": 110 + }, + { + "grad_norm": 0.2923831641674042, + "learning_rate": 2.38e-06, + "loss": 1.2057, + "step": 120 + }, + { + "grad_norm": 0.4017699062824249, + "learning_rate": 2.5800000000000003e-06, + "loss": 1.1883, + "step": 130 + }, + { + "grad_norm": 0.3472406566143036, + "learning_rate": 2.78e-06, + "loss": 1.1521, + "step": 140 + }, + { + "grad_norm": 0.3293982148170471, + "learning_rate": 2.9800000000000003e-06, + "loss": 1.1178, + "step": 150 + }, + { + "grad_norm": 0.30647310614585876, + "learning_rate": 3.1800000000000005e-06, + "loss": 1.1095, + "step": 160 + }, + { + "grad_norm": 0.2220936268568039, + "learning_rate": 3.38e-06, + "loss": 1.1071, + "step": 170 + }, + { + "grad_norm": 0.309011310338974, + "learning_rate": 3.58e-06, + "loss": 1.0981, + "step": 180 + }, + { + "grad_norm": 0.26775071024894714, + "learning_rate": 3.7800000000000002e-06, + "loss": 1.0995, + "step": 190 + }, + { + "grad_norm": 0.3040660619735718, + "learning_rate": 3.98e-06, + "loss": 1.0814, + "step": 200 + }, + { + "grad_norm": 0.32198044657707214, + "learning_rate": 4.18e-06, + "loss": 1.0791, + "step": 210 + }, + { + "grad_norm": 0.2581990659236908, + "learning_rate": 4.38e-06, + "loss": 1.0733, + "step": 220 + }, + { + "grad_norm": 0.3079847991466522, + "learning_rate": 4.58e-06, + "loss": 1.0629, + "step": 230 + }, + { + "grad_norm": 0.3295849561691284, + "learning_rate": 4.780000000000001e-06, + "loss": 1.0591, + "step": 240 + }, + { + "grad_norm": 0.4684920310974121, + "learning_rate": 4.98e-06, + "loss": 1.0551, + "step": 250 + }, + { + "grad_norm": 0.3643186688423157, + "learning_rate": 5.18e-06, + "loss": 1.0484, + "step": 260 + }, + { + "grad_norm": 0.40556076169013977, + "learning_rate": 5.38e-06, + "loss": 1.0407, + "step": 270 + }, + { + "grad_norm": 0.8950938582420349, + "learning_rate": 5.580000000000001e-06, + "loss": 1.006, + "step": 280 + }, + { + "grad_norm": 1.1896557807922363, + "learning_rate": 5.78e-06, + "loss": 0.9703, + "step": 290 + }, + { + "grad_norm": 1.0755707025527954, + "learning_rate": 5.98e-06, + "loss": 0.9004, + "step": 300 + }, + { + "grad_norm": 1.0481752157211304, + "learning_rate": 6.18e-06, + "loss": 0.8497, + "step": 310 + }, + { + "grad_norm": 1.0609071254730225, + "learning_rate": 6.38e-06, + "loss": 0.8041, + "step": 320 + }, + { + "grad_norm": 1.446955680847168, + "learning_rate": 6.58e-06, + "loss": 0.7721, + "step": 330 + }, + { + "grad_norm": 1.0151890516281128, + "learning_rate": 6.78e-06, + "loss": 0.754, + "step": 340 + }, + { + "grad_norm": 0.8983801603317261, + "learning_rate": 6.98e-06, + "loss": 0.735, + "step": 350 + }, + { + "grad_norm": 1.2405860424041748, + "learning_rate": 7.180000000000001e-06, + "loss": 0.7113, + "step": 360 + }, + { + "grad_norm": 1.441552996635437, + "learning_rate": 7.3800000000000005e-06, + "loss": 0.6871, + "step": 370 + }, + { + "grad_norm": 1.440989375114441, + "learning_rate": 7.580000000000001e-06, + "loss": 0.6508, + "step": 380 + }, + { + "grad_norm": 1.8160384893417358, + "learning_rate": 7.78e-06, + "loss": 0.6076, + "step": 390 + }, + { + "grad_norm": 1.8520482778549194, + "learning_rate": 7.98e-06, + "loss": 0.5618, + "step": 400 + }, + { + "grad_norm": 1.6575309038162231, + "learning_rate": 8.18e-06, + "loss": 0.4971, + "step": 410 + }, + { + "grad_norm": 1.4107731580734253, + "learning_rate": 8.380000000000001e-06, + "loss": 0.4555, + "step": 420 + }, + { + "grad_norm": 1.6264456510543823, + "learning_rate": 8.580000000000001e-06, + "loss": 0.4183, + "step": 430 + }, + { + "grad_norm": 1.9247311353683472, + "learning_rate": 8.78e-06, + "loss": 0.3501, + "step": 440 + }, + { + "grad_norm": 1.674877405166626, + "learning_rate": 8.98e-06, + "loss": 0.3228, + "step": 450 + }, + { + "grad_norm": 1.677514910697937, + "learning_rate": 9.180000000000002e-06, + "loss": 0.2883, + "step": 460 + }, + { + "grad_norm": 1.2708207368850708, + "learning_rate": 9.38e-06, + "loss": 0.2523, + "step": 470 + }, + { + "grad_norm": 2.606647491455078, + "learning_rate": 9.58e-06, + "loss": 0.2409, + "step": 480 + }, + { + "grad_norm": 1.8302645683288574, + "learning_rate": 9.78e-06, + "loss": 0.2232, + "step": 490 + }, + { + "grad_norm": 1.465612530708313, + "learning_rate": 9.980000000000001e-06, + "loss": 0.2171, + "step": 500 + }, + { + "grad_norm": 1.6023286581039429, + "learning_rate": 1.018e-05, + "loss": 0.2111, + "step": 510 + }, + { + "grad_norm": 1.5220160484313965, + "learning_rate": 1.038e-05, + "loss": 0.21, + "step": 520 + }, + { + "grad_norm": 1.3357927799224854, + "learning_rate": 1.058e-05, + "loss": 0.1921, + "step": 530 + }, + { + "grad_norm": 1.312163233757019, + "learning_rate": 1.0780000000000002e-05, + "loss": 0.1813, + "step": 540 + }, + { + "grad_norm": 1.2907906770706177, + "learning_rate": 1.098e-05, + "loss": 0.1692, + "step": 550 + }, + { + "grad_norm": 1.0940006971359253, + "learning_rate": 1.118e-05, + "loss": 0.174, + "step": 560 + }, + { + "grad_norm": 2.1016087532043457, + "learning_rate": 1.1380000000000001e-05, + "loss": 0.1689, + "step": 570 + }, + { + "grad_norm": 1.2690412998199463, + "learning_rate": 1.1580000000000001e-05, + "loss": 0.1696, + "step": 580 + }, + { + "grad_norm": 1.5685679912567139, + "learning_rate": 1.178e-05, + "loss": 0.1651, + "step": 590 + }, + { + "grad_norm": 1.060952067375183, + "learning_rate": 1.198e-05, + "loss": 0.1635, + "step": 600 + }, + { + "grad_norm": 1.709365963935852, + "learning_rate": 1.2180000000000002e-05, + "loss": 0.1632, + "step": 610 + }, + { + "grad_norm": 1.1375125646591187, + "learning_rate": 1.238e-05, + "loss": 0.1578, + "step": 620 + }, + { + "grad_norm": 1.0781357288360596, + "learning_rate": 1.258e-05, + "loss": 0.1485, + "step": 630 + }, + { + "grad_norm": 1.1886907815933228, + "learning_rate": 1.278e-05, + "loss": 0.1518, + "step": 640 + }, + { + "grad_norm": 1.5670561790466309, + "learning_rate": 1.2980000000000001e-05, + "loss": 0.1403, + "step": 650 + }, + { + "grad_norm": 1.764258861541748, + "learning_rate": 1.3180000000000001e-05, + "loss": 0.1447, + "step": 660 + }, + { + "grad_norm": 2.070798635482788, + "learning_rate": 1.338e-05, + "loss": 0.1366, + "step": 670 + }, + { + "grad_norm": 1.1944119930267334, + "learning_rate": 1.358e-05, + "loss": 0.1439, + "step": 680 + }, + { + "grad_norm": 1.4056333303451538, + "learning_rate": 1.3780000000000002e-05, + "loss": 0.1445, + "step": 690 + }, + { + "grad_norm": 1.5682401657104492, + "learning_rate": 1.3980000000000002e-05, + "loss": 0.1395, + "step": 700 + }, + { + "grad_norm": 1.5870603322982788, + "learning_rate": 1.4180000000000001e-05, + "loss": 0.1364, + "step": 710 + }, + { + "grad_norm": 1.3146198987960815, + "learning_rate": 1.4380000000000001e-05, + "loss": 0.1389, + "step": 720 + }, + { + "grad_norm": 1.532326579093933, + "learning_rate": 1.4580000000000003e-05, + "loss": 0.13, + "step": 730 + }, + { + "grad_norm": 1.3333654403686523, + "learning_rate": 1.4779999999999999e-05, + "loss": 0.1287, + "step": 740 + }, + { + "grad_norm": 1.207822561264038, + "learning_rate": 1.4979999999999999e-05, + "loss": 0.137, + "step": 750 + }, + { + "grad_norm": 1.1548209190368652, + "learning_rate": 1.518e-05, + "loss": 0.1291, + "step": 760 + }, + { + "grad_norm": 1.2071747779846191, + "learning_rate": 1.538e-05, + "loss": 0.1277, + "step": 770 + }, + { + "grad_norm": 1.4934929609298706, + "learning_rate": 1.558e-05, + "loss": 0.1325, + "step": 780 + }, + { + "grad_norm": 1.293228268623352, + "learning_rate": 1.578e-05, + "loss": 0.1227, + "step": 790 + }, + { + "grad_norm": 1.2349001169204712, + "learning_rate": 1.598e-05, + "loss": 0.1224, + "step": 800 + }, + { + "grad_norm": 1.1482325792312622, + "learning_rate": 1.618e-05, + "loss": 0.1208, + "step": 810 + }, + { + "grad_norm": 0.9326032400131226, + "learning_rate": 1.6380000000000002e-05, + "loss": 0.1264, + "step": 820 + }, + { + "grad_norm": 1.5052955150604248, + "learning_rate": 1.658e-05, + "loss": 0.1155, + "step": 830 + }, + { + "grad_norm": 1.3440231084823608, + "learning_rate": 1.6780000000000002e-05, + "loss": 0.1144, + "step": 840 + }, + { + "grad_norm": 1.6285743713378906, + "learning_rate": 1.698e-05, + "loss": 0.116, + "step": 850 + }, + { + "grad_norm": 1.2971304655075073, + "learning_rate": 1.718e-05, + "loss": 0.1234, + "step": 860 + }, + { + "grad_norm": 1.070986032485962, + "learning_rate": 1.7380000000000003e-05, + "loss": 0.1127, + "step": 870 + }, + { + "grad_norm": 1.1398824453353882, + "learning_rate": 1.758e-05, + "loss": 0.114, + "step": 880 + }, + { + "grad_norm": 1.107651948928833, + "learning_rate": 1.7780000000000003e-05, + "loss": 0.1102, + "step": 890 + }, + { + "grad_norm": 1.0962471961975098, + "learning_rate": 1.798e-05, + "loss": 0.1135, + "step": 900 + }, + { + "grad_norm": 1.1993252038955688, + "learning_rate": 1.818e-05, + "loss": 0.1121, + "step": 910 + }, + { + "grad_norm": 1.3072483539581299, + "learning_rate": 1.838e-05, + "loss": 0.1114, + "step": 920 + }, + { + "grad_norm": 1.312279462814331, + "learning_rate": 1.858e-05, + "loss": 0.1147, + "step": 930 + }, + { + "grad_norm": 1.1000250577926636, + "learning_rate": 1.878e-05, + "loss": 0.1119, + "step": 940 + }, + { + "grad_norm": 1.4620685577392578, + "learning_rate": 1.898e-05, + "loss": 0.1051, + "step": 950 + }, + { + "grad_norm": 0.9386133551597595, + "learning_rate": 1.918e-05, + "loss": 0.1126, + "step": 960 + }, + { + "grad_norm": 1.26031494140625, + "learning_rate": 1.938e-05, + "loss": 0.1039, + "step": 970 + }, + { + "grad_norm": 1.0885252952575684, + "learning_rate": 1.9580000000000002e-05, + "loss": 0.1094, + "step": 980 + }, + { + "grad_norm": 1.437014102935791, + "learning_rate": 1.978e-05, + "loss": 0.1102, + "step": 990 + }, + { + "grad_norm": 1.1268717050552368, + "learning_rate": 1.9980000000000002e-05, + "loss": 0.1056, + "step": 1000 + }, + { + "grad_norm": 1.1384588479995728, + "learning_rate": 2.0180000000000003e-05, + "loss": 0.1045, + "step": 1010 + }, + { + "grad_norm": 1.066062331199646, + "learning_rate": 2.038e-05, + "loss": 0.1084, + "step": 1020 + }, + { + "grad_norm": 1.0102137327194214, + "learning_rate": 2.0580000000000003e-05, + "loss": 0.1045, + "step": 1030 + }, + { + "grad_norm": 1.0361474752426147, + "learning_rate": 2.078e-05, + "loss": 0.1043, + "step": 1040 + }, + { + "grad_norm": 0.8103893399238586, + "learning_rate": 2.098e-05, + "loss": 0.1008, + "step": 1050 + }, + { + "grad_norm": 1.3429416418075562, + "learning_rate": 2.118e-05, + "loss": 0.1033, + "step": 1060 + }, + { + "grad_norm": 0.8854686617851257, + "learning_rate": 2.138e-05, + "loss": 0.1095, + "step": 1070 + }, + { + "grad_norm": 1.438493251800537, + "learning_rate": 2.158e-05, + "loss": 0.1037, + "step": 1080 + }, + { + "grad_norm": 1.2051711082458496, + "learning_rate": 2.178e-05, + "loss": 0.1047, + "step": 1090 + }, + { + "grad_norm": 0.7368249893188477, + "learning_rate": 2.198e-05, + "loss": 0.0976, + "step": 1100 + }, + { + "grad_norm": 1.1620404720306396, + "learning_rate": 2.218e-05, + "loss": 0.0957, + "step": 1110 + }, + { + "grad_norm": 0.9190382957458496, + "learning_rate": 2.2380000000000003e-05, + "loss": 0.0967, + "step": 1120 + }, + { + "grad_norm": 0.9439403414726257, + "learning_rate": 2.258e-05, + "loss": 0.0959, + "step": 1130 + }, + { + "grad_norm": 0.9423888325691223, + "learning_rate": 2.2780000000000002e-05, + "loss": 0.0955, + "step": 1140 + }, + { + "grad_norm": 1.5919392108917236, + "learning_rate": 2.298e-05, + "loss": 0.0986, + "step": 1150 + }, + { + "grad_norm": 1.1985188722610474, + "learning_rate": 2.318e-05, + "loss": 0.0963, + "step": 1160 + }, + { + "grad_norm": 0.9483550786972046, + "learning_rate": 2.3380000000000003e-05, + "loss": 0.1033, + "step": 1170 + }, + { + "grad_norm": 0.8236895203590393, + "learning_rate": 2.358e-05, + "loss": 0.094, + "step": 1180 + }, + { + "grad_norm": 1.3157275915145874, + "learning_rate": 2.3780000000000003e-05, + "loss": 0.1011, + "step": 1190 + }, + { + "grad_norm": 0.9439451098442078, + "learning_rate": 2.398e-05, + "loss": 0.0956, + "step": 1200 + }, + { + "grad_norm": 0.8395298719406128, + "learning_rate": 2.418e-05, + "loss": 0.0991, + "step": 1210 + }, + { + "grad_norm": 1.132033348083496, + "learning_rate": 2.438e-05, + "loss": 0.0945, + "step": 1220 + }, + { + "grad_norm": 1.0645198822021484, + "learning_rate": 2.4580000000000002e-05, + "loss": 0.0977, + "step": 1230 + }, + { + "grad_norm": 1.2159433364868164, + "learning_rate": 2.478e-05, + "loss": 0.0977, + "step": 1240 + }, + { + "grad_norm": 1.3291488885879517, + "learning_rate": 2.498e-05, + "loss": 0.0959, + "step": 1250 + }, + { + "grad_norm": 1.1460973024368286, + "learning_rate": 2.5180000000000003e-05, + "loss": 0.0975, + "step": 1260 + }, + { + "grad_norm": 1.05596125125885, + "learning_rate": 2.5380000000000004e-05, + "loss": 0.0932, + "step": 1270 + }, + { + "grad_norm": 1.2657601833343506, + "learning_rate": 2.5580000000000002e-05, + "loss": 0.0986, + "step": 1280 + }, + { + "grad_norm": 0.9247602224349976, + "learning_rate": 2.5779999999999997e-05, + "loss": 0.0946, + "step": 1290 + }, + { + "grad_norm": 1.289202332496643, + "learning_rate": 2.598e-05, + "loss": 0.0859, + "step": 1300 + }, + { + "grad_norm": 1.1078996658325195, + "learning_rate": 2.618e-05, + "loss": 0.0982, + "step": 1310 + }, + { + "grad_norm": 0.9380692839622498, + "learning_rate": 2.6379999999999998e-05, + "loss": 0.0883, + "step": 1320 + }, + { + "grad_norm": 0.8545584082603455, + "learning_rate": 2.658e-05, + "loss": 0.0854, + "step": 1330 + }, + { + "grad_norm": 0.8042860627174377, + "learning_rate": 2.678e-05, + "loss": 0.0912, + "step": 1340 + }, + { + "grad_norm": 0.9274678230285645, + "learning_rate": 2.698e-05, + "loss": 0.0931, + "step": 1350 + }, + { + "grad_norm": 1.3973448276519775, + "learning_rate": 2.718e-05, + "loss": 0.1012, + "step": 1360 + }, + { + "grad_norm": 0.8301775455474854, + "learning_rate": 2.738e-05, + "loss": 0.0917, + "step": 1370 + }, + { + "grad_norm": 1.2012916803359985, + "learning_rate": 2.758e-05, + "loss": 0.0867, + "step": 1380 + }, + { + "grad_norm": 0.8488144874572754, + "learning_rate": 2.778e-05, + "loss": 0.089, + "step": 1390 + }, + { + "grad_norm": 0.8629277944564819, + "learning_rate": 2.798e-05, + "loss": 0.0954, + "step": 1400 + }, + { + "grad_norm": 0.8973829746246338, + "learning_rate": 2.818e-05, + "loss": 0.0883, + "step": 1410 + }, + { + "grad_norm": 0.9831207394599915, + "learning_rate": 2.8380000000000003e-05, + "loss": 0.0921, + "step": 1420 + }, + { + "grad_norm": 1.0982941389083862, + "learning_rate": 2.858e-05, + "loss": 0.0928, + "step": 1430 + }, + { + "grad_norm": 1.0521658658981323, + "learning_rate": 2.8780000000000002e-05, + "loss": 0.0892, + "step": 1440 + }, + { + "grad_norm": 0.7754012942314148, + "learning_rate": 2.898e-05, + "loss": 0.0893, + "step": 1450 + }, + { + "grad_norm": 1.028157353401184, + "learning_rate": 2.9180000000000002e-05, + "loss": 0.0892, + "step": 1460 + }, + { + "grad_norm": 0.9473941922187805, + "learning_rate": 2.9380000000000003e-05, + "loss": 0.0848, + "step": 1470 + }, + { + "grad_norm": 1.166499137878418, + "learning_rate": 2.958e-05, + "loss": 0.0882, + "step": 1480 + }, + { + "grad_norm": 0.9135540723800659, + "learning_rate": 2.9780000000000003e-05, + "loss": 0.085, + "step": 1490 + }, + { + "grad_norm": 0.8270283937454224, + "learning_rate": 2.998e-05, + "loss": 0.0855, + "step": 1500 + }, + { + "grad_norm": 0.9396687150001526, + "learning_rate": 3.0180000000000002e-05, + "loss": 0.0886, + "step": 1510 + }, + { + "grad_norm": 1.009047269821167, + "learning_rate": 3.0380000000000004e-05, + "loss": 0.0834, + "step": 1520 + }, + { + "grad_norm": 1.1448856592178345, + "learning_rate": 3.058e-05, + "loss": 0.0869, + "step": 1530 + }, + { + "grad_norm": 0.8587153553962708, + "learning_rate": 3.078e-05, + "loss": 0.0823, + "step": 1540 + }, + { + "grad_norm": 0.9685273170471191, + "learning_rate": 3.0980000000000005e-05, + "loss": 0.0867, + "step": 1550 + }, + { + "grad_norm": 0.6682008504867554, + "learning_rate": 3.118e-05, + "loss": 0.0837, + "step": 1560 + }, + { + "grad_norm": 0.5276884436607361, + "learning_rate": 3.138e-05, + "loss": 0.0847, + "step": 1570 + }, + { + "grad_norm": 1.131670355796814, + "learning_rate": 3.1580000000000006e-05, + "loss": 0.0882, + "step": 1580 + }, + { + "grad_norm": 0.6407247185707092, + "learning_rate": 3.1780000000000004e-05, + "loss": 0.0834, + "step": 1590 + }, + { + "grad_norm": 0.809824526309967, + "learning_rate": 3.198e-05, + "loss": 0.0856, + "step": 1600 + }, + { + "grad_norm": 0.8268172740936279, + "learning_rate": 3.218e-05, + "loss": 0.0843, + "step": 1610 + }, + { + "grad_norm": 0.5640199184417725, + "learning_rate": 3.238e-05, + "loss": 0.0852, + "step": 1620 + }, + { + "grad_norm": 0.5484631657600403, + "learning_rate": 3.2579999999999996e-05, + "loss": 0.0853, + "step": 1630 + }, + { + "grad_norm": 0.8151610493659973, + "learning_rate": 3.278e-05, + "loss": 0.084, + "step": 1640 + }, + { + "grad_norm": 0.9902693033218384, + "learning_rate": 3.298e-05, + "loss": 0.0836, + "step": 1650 + }, + { + "grad_norm": 1.077278971672058, + "learning_rate": 3.318e-05, + "loss": 0.0815, + "step": 1660 + }, + { + "grad_norm": 0.6865838170051575, + "learning_rate": 3.338e-05, + "loss": 0.0801, + "step": 1670 + }, + { + "grad_norm": 0.6826719641685486, + "learning_rate": 3.358e-05, + "loss": 0.0841, + "step": 1680 + }, + { + "grad_norm": 0.6357934474945068, + "learning_rate": 3.378e-05, + "loss": 0.0757, + "step": 1690 + }, + { + "grad_norm": 0.8530320525169373, + "learning_rate": 3.398e-05, + "loss": 0.0836, + "step": 1700 + }, + { + "grad_norm": 0.7172214984893799, + "learning_rate": 3.418e-05, + "loss": 0.0795, + "step": 1710 + }, + { + "grad_norm": 0.8882413506507874, + "learning_rate": 3.438e-05, + "loss": 0.0793, + "step": 1720 + }, + { + "grad_norm": 0.9875413775444031, + "learning_rate": 3.4580000000000004e-05, + "loss": 0.08, + "step": 1730 + }, + { + "grad_norm": 0.8255948424339294, + "learning_rate": 3.478e-05, + "loss": 0.0773, + "step": 1740 + }, + { + "grad_norm": 0.8142917156219482, + "learning_rate": 3.498e-05, + "loss": 0.0768, + "step": 1750 + }, + { + "grad_norm": 0.8827599287033081, + "learning_rate": 3.518e-05, + "loss": 0.0856, + "step": 1760 + }, + { + "grad_norm": 0.5484384298324585, + "learning_rate": 3.5380000000000003e-05, + "loss": 0.0795, + "step": 1770 + }, + { + "grad_norm": 0.43772321939468384, + "learning_rate": 3.558e-05, + "loss": 0.0796, + "step": 1780 + }, + { + "grad_norm": 0.4580235183238983, + "learning_rate": 3.578e-05, + "loss": 0.0772, + "step": 1790 + }, + { + "grad_norm": 0.7366774678230286, + "learning_rate": 3.5980000000000004e-05, + "loss": 0.0793, + "step": 1800 + }, + { + "grad_norm": 0.7630442380905151, + "learning_rate": 3.618e-05, + "loss": 0.0815, + "step": 1810 + }, + { + "grad_norm": 0.7698264718055725, + "learning_rate": 3.638e-05, + "loss": 0.0735, + "step": 1820 + }, + { + "grad_norm": 0.6987844109535217, + "learning_rate": 3.6580000000000006e-05, + "loss": 0.0759, + "step": 1830 + }, + { + "grad_norm": 0.6061340570449829, + "learning_rate": 3.6780000000000004e-05, + "loss": 0.077, + "step": 1840 + }, + { + "grad_norm": 1.0771291255950928, + "learning_rate": 3.698e-05, + "loss": 0.0791, + "step": 1850 + }, + { + "grad_norm": 0.6869987845420837, + "learning_rate": 3.7180000000000007e-05, + "loss": 0.0789, + "step": 1860 + }, + { + "grad_norm": 0.503115177154541, + "learning_rate": 3.7380000000000005e-05, + "loss": 0.0788, + "step": 1870 + }, + { + "grad_norm": 0.5843595862388611, + "learning_rate": 3.758e-05, + "loss": 0.0767, + "step": 1880 + }, + { + "grad_norm": 0.6130261421203613, + "learning_rate": 3.778000000000001e-05, + "loss": 0.0784, + "step": 1890 + }, + { + "grad_norm": 0.7057688236236572, + "learning_rate": 3.7980000000000006e-05, + "loss": 0.0752, + "step": 1900 + }, + { + "grad_norm": 0.854373574256897, + "learning_rate": 3.818e-05, + "loss": 0.0825, + "step": 1910 + }, + { + "grad_norm": 0.772139847278595, + "learning_rate": 3.838e-05, + "loss": 0.0778, + "step": 1920 + }, + { + "grad_norm": 0.47394874691963196, + "learning_rate": 3.858e-05, + "loss": 0.0733, + "step": 1930 + }, + { + "grad_norm": 0.6109016537666321, + "learning_rate": 3.878e-05, + "loss": 0.0792, + "step": 1940 + }, + { + "grad_norm": 0.553274929523468, + "learning_rate": 3.898e-05, + "loss": 0.074, + "step": 1950 + }, + { + "grad_norm": 0.6935548782348633, + "learning_rate": 3.918e-05, + "loss": 0.076, + "step": 1960 + }, + { + "grad_norm": 0.7194970846176147, + "learning_rate": 3.938e-05, + "loss": 0.0704, + "step": 1970 + }, + { + "grad_norm": 0.4381813704967499, + "learning_rate": 3.958e-05, + "loss": 0.0734, + "step": 1980 + }, + { + "grad_norm": 0.9503201842308044, + "learning_rate": 3.978e-05, + "loss": 0.077, + "step": 1990 + }, + { + "grad_norm": 0.7583017945289612, + "learning_rate": 3.998e-05, + "loss": 0.0732, + "step": 2000 + }, + { + "grad_norm": 0.834935188293457, + "learning_rate": 4.018e-05, + "loss": 0.0736, + "step": 2010 + }, + { + "grad_norm": 0.7883893251419067, + "learning_rate": 4.038e-05, + "loss": 0.0717, + "step": 2020 + }, + { + "grad_norm": 0.6994910836219788, + "learning_rate": 4.058e-05, + "loss": 0.0708, + "step": 2030 + }, + { + "grad_norm": 0.4112062454223633, + "learning_rate": 4.078e-05, + "loss": 0.0754, + "step": 2040 + }, + { + "grad_norm": 0.7285349369049072, + "learning_rate": 4.0980000000000004e-05, + "loss": 0.0773, + "step": 2050 + }, + { + "grad_norm": 0.5771617889404297, + "learning_rate": 4.118e-05, + "loss": 0.0725, + "step": 2060 + }, + { + "grad_norm": 0.620608925819397, + "learning_rate": 4.138e-05, + "loss": 0.0694, + "step": 2070 + }, + { + "grad_norm": 0.6632530093193054, + "learning_rate": 4.1580000000000005e-05, + "loss": 0.0764, + "step": 2080 + }, + { + "grad_norm": 0.7997716069221497, + "learning_rate": 4.178e-05, + "loss": 0.0756, + "step": 2090 + }, + { + "grad_norm": 0.5990532636642456, + "learning_rate": 4.198e-05, + "loss": 0.0679, + "step": 2100 + }, + { + "grad_norm": 0.6281260848045349, + "learning_rate": 4.2180000000000006e-05, + "loss": 0.0772, + "step": 2110 + }, + { + "grad_norm": 0.76509028673172, + "learning_rate": 4.2380000000000004e-05, + "loss": 0.0715, + "step": 2120 + }, + { + "grad_norm": 0.6485828757286072, + "learning_rate": 4.258e-05, + "loss": 0.074, + "step": 2130 + }, + { + "grad_norm": 0.7726843953132629, + "learning_rate": 4.278e-05, + "loss": 0.0748, + "step": 2140 + }, + { + "grad_norm": 0.5515874624252319, + "learning_rate": 4.2980000000000005e-05, + "loss": 0.0784, + "step": 2150 + }, + { + "grad_norm": 0.6219727396965027, + "learning_rate": 4.318e-05, + "loss": 0.0722, + "step": 2160 + }, + { + "grad_norm": 0.6551211476325989, + "learning_rate": 4.338e-05, + "loss": 0.0684, + "step": 2170 + }, + { + "grad_norm": 0.8495088219642639, + "learning_rate": 4.3580000000000006e-05, + "loss": 0.0738, + "step": 2180 + }, + { + "grad_norm": 0.9774518013000488, + "learning_rate": 4.3780000000000004e-05, + "loss": 0.0687, + "step": 2190 + }, + { + "grad_norm": 0.7352770566940308, + "learning_rate": 4.398e-05, + "loss": 0.0701, + "step": 2200 + }, + { + "grad_norm": 0.5702778697013855, + "learning_rate": 4.418000000000001e-05, + "loss": 0.0694, + "step": 2210 + }, + { + "grad_norm": 0.5630630850791931, + "learning_rate": 4.438e-05, + "loss": 0.0687, + "step": 2220 + }, + { + "grad_norm": 0.42525404691696167, + "learning_rate": 4.458e-05, + "loss": 0.0696, + "step": 2230 + }, + { + "grad_norm": 0.561542272567749, + "learning_rate": 4.478e-05, + "loss": 0.0706, + "step": 2240 + }, + { + "grad_norm": 0.8510808944702148, + "learning_rate": 4.498e-05, + "loss": 0.0723, + "step": 2250 + }, + { + "grad_norm": 0.6345523595809937, + "learning_rate": 4.518e-05, + "loss": 0.0665, + "step": 2260 + }, + { + "grad_norm": 0.5879749655723572, + "learning_rate": 4.538e-05, + "loss": 0.0684, + "step": 2270 + }, + { + "grad_norm": 0.5963588953018188, + "learning_rate": 4.558e-05, + "loss": 0.0666, + "step": 2280 + }, + { + "grad_norm": 0.6277705430984497, + "learning_rate": 4.578e-05, + "loss": 0.0738, + "step": 2290 + }, + { + "grad_norm": 0.7396746873855591, + "learning_rate": 4.5980000000000004e-05, + "loss": 0.0669, + "step": 2300 + }, + { + "grad_norm": 0.6703621745109558, + "learning_rate": 4.618e-05, + "loss": 0.0668, + "step": 2310 + }, + { + "grad_norm": 0.5077555775642395, + "learning_rate": 4.638e-05, + "loss": 0.0691, + "step": 2320 + }, + { + "grad_norm": 0.5597145557403564, + "learning_rate": 4.6580000000000005e-05, + "loss": 0.0663, + "step": 2330 + }, + { + "grad_norm": 0.7725422978401184, + "learning_rate": 4.678e-05, + "loss": 0.0738, + "step": 2340 + }, + { + "grad_norm": 0.7614355087280273, + "learning_rate": 4.698e-05, + "loss": 0.0678, + "step": 2350 + }, + { + "grad_norm": 0.7310566902160645, + "learning_rate": 4.718e-05, + "loss": 0.0713, + "step": 2360 + }, + { + "grad_norm": 0.42734700441360474, + "learning_rate": 4.7380000000000004e-05, + "loss": 0.0682, + "step": 2370 + }, + { + "grad_norm": 0.6056151390075684, + "learning_rate": 4.758e-05, + "loss": 0.0662, + "step": 2380 + }, + { + "grad_norm": 0.4743797779083252, + "learning_rate": 4.778e-05, + "loss": 0.0629, + "step": 2390 + }, + { + "grad_norm": 0.29339972138404846, + "learning_rate": 4.7980000000000005e-05, + "loss": 0.0673, + "step": 2400 + }, + { + "grad_norm": 0.6282046437263489, + "learning_rate": 4.818e-05, + "loss": 0.0632, + "step": 2410 + }, + { + "grad_norm": 0.4677516520023346, + "learning_rate": 4.838e-05, + "loss": 0.0626, + "step": 2420 + }, + { + "grad_norm": 0.6693249940872192, + "learning_rate": 4.8580000000000006e-05, + "loss": 0.0647, + "step": 2430 + }, + { + "grad_norm": 0.6636394262313843, + "learning_rate": 4.8780000000000004e-05, + "loss": 0.0624, + "step": 2440 + }, + { + "grad_norm": 0.4973454177379608, + "learning_rate": 4.898e-05, + "loss": 0.0663, + "step": 2450 + }, + { + "grad_norm": 0.37847772240638733, + "learning_rate": 4.918000000000001e-05, + "loss": 0.0594, + "step": 2460 + }, + { + "grad_norm": 0.5951005220413208, + "learning_rate": 4.9380000000000005e-05, + "loss": 0.0681, + "step": 2470 + }, + { + "grad_norm": 0.5232742428779602, + "learning_rate": 4.958e-05, + "loss": 0.0696, + "step": 2480 + }, + { + "grad_norm": 0.6293039917945862, + "learning_rate": 4.978e-05, + "loss": 0.0681, + "step": 2490 + }, + { + "grad_norm": 0.616435170173645, + "learning_rate": 4.9980000000000006e-05, + "loss": 0.0676, + "step": 2500 + }, + { + "grad_norm": 0.7929869890213013, + "learning_rate": 5.0180000000000004e-05, + "loss": 0.0694, + "step": 2510 + }, + { + "grad_norm": 0.436358243227005, + "learning_rate": 5.038e-05, + "loss": 0.066, + "step": 2520 + }, + { + "grad_norm": 0.49785980582237244, + "learning_rate": 5.058000000000001e-05, + "loss": 0.0634, + "step": 2530 + }, + { + "grad_norm": 0.4073878228664398, + "learning_rate": 5.0780000000000005e-05, + "loss": 0.0616, + "step": 2540 + }, + { + "grad_norm": 0.7101964950561523, + "learning_rate": 5.098e-05, + "loss": 0.0647, + "step": 2550 + }, + { + "grad_norm": 0.4145764410495758, + "learning_rate": 5.118000000000001e-05, + "loss": 0.0647, + "step": 2560 + }, + { + "grad_norm": 0.666660726070404, + "learning_rate": 5.1380000000000006e-05, + "loss": 0.0638, + "step": 2570 + }, + { + "grad_norm": 0.41386130452156067, + "learning_rate": 5.1580000000000004e-05, + "loss": 0.0592, + "step": 2580 + }, + { + "grad_norm": 0.5578649044036865, + "learning_rate": 5.178000000000001e-05, + "loss": 0.0609, + "step": 2590 + }, + { + "grad_norm": 0.44301366806030273, + "learning_rate": 5.198000000000001e-05, + "loss": 0.0609, + "step": 2600 + }, + { + "grad_norm": 0.6701443195343018, + "learning_rate": 5.2180000000000005e-05, + "loss": 0.067, + "step": 2610 + }, + { + "grad_norm": 0.5161815285682678, + "learning_rate": 5.238000000000001e-05, + "loss": 0.061, + "step": 2620 + }, + { + "grad_norm": 0.43941444158554077, + "learning_rate": 5.258000000000001e-05, + "loss": 0.0687, + "step": 2630 + }, + { + "grad_norm": 0.39227938652038574, + "learning_rate": 5.2780000000000006e-05, + "loss": 0.0648, + "step": 2640 + }, + { + "grad_norm": 0.47298720479011536, + "learning_rate": 5.2980000000000004e-05, + "loss": 0.063, + "step": 2650 + }, + { + "grad_norm": 0.36549681425094604, + "learning_rate": 5.318000000000001e-05, + "loss": 0.0629, + "step": 2660 + }, + { + "grad_norm": 0.7052419781684875, + "learning_rate": 5.338000000000001e-05, + "loss": 0.0634, + "step": 2670 + }, + { + "grad_norm": 0.3532015085220337, + "learning_rate": 5.3580000000000005e-05, + "loss": 0.0613, + "step": 2680 + }, + { + "grad_norm": 0.47537365555763245, + "learning_rate": 5.378e-05, + "loss": 0.0603, + "step": 2690 + }, + { + "grad_norm": 0.7598276138305664, + "learning_rate": 5.3979999999999995e-05, + "loss": 0.0636, + "step": 2700 + }, + { + "grad_norm": 0.49093112349510193, + "learning_rate": 5.418e-05, + "loss": 0.0558, + "step": 2710 + }, + { + "grad_norm": 0.4209236800670624, + "learning_rate": 5.438e-05, + "loss": 0.0671, + "step": 2720 + }, + { + "grad_norm": 0.621289849281311, + "learning_rate": 5.4579999999999996e-05, + "loss": 0.0551, + "step": 2730 + }, + { + "grad_norm": 0.32953131198883057, + "learning_rate": 5.478e-05, + "loss": 0.0573, + "step": 2740 + }, + { + "grad_norm": 0.6044805645942688, + "learning_rate": 5.498e-05, + "loss": 0.0654, + "step": 2750 + }, + { + "grad_norm": 0.5877718329429626, + "learning_rate": 5.518e-05, + "loss": 0.0635, + "step": 2760 + }, + { + "grad_norm": 0.5216749310493469, + "learning_rate": 5.538e-05, + "loss": 0.0651, + "step": 2770 + }, + { + "grad_norm": 0.41484737396240234, + "learning_rate": 5.558e-05, + "loss": 0.0596, + "step": 2780 + }, + { + "grad_norm": 0.4357282817363739, + "learning_rate": 5.578e-05, + "loss": 0.0592, + "step": 2790 + }, + { + "grad_norm": 0.5649386048316956, + "learning_rate": 5.5979999999999996e-05, + "loss": 0.0605, + "step": 2800 + }, + { + "grad_norm": 0.5894543528556824, + "learning_rate": 5.618e-05, + "loss": 0.0597, + "step": 2810 + }, + { + "grad_norm": 0.5362029075622559, + "learning_rate": 5.638e-05, + "loss": 0.0657, + "step": 2820 + }, + { + "grad_norm": 0.512429416179657, + "learning_rate": 5.658e-05, + "loss": 0.0599, + "step": 2830 + }, + { + "grad_norm": 0.4463641941547394, + "learning_rate": 5.678e-05, + "loss": 0.0581, + "step": 2840 + }, + { + "grad_norm": 0.5298840403556824, + "learning_rate": 5.698e-05, + "loss": 0.059, + "step": 2850 + }, + { + "grad_norm": 0.5238723754882812, + "learning_rate": 5.718e-05, + "loss": 0.0608, + "step": 2860 + }, + { + "grad_norm": 0.4166842997074127, + "learning_rate": 5.738e-05, + "loss": 0.0605, + "step": 2870 + }, + { + "grad_norm": 0.4618726074695587, + "learning_rate": 5.758e-05, + "loss": 0.0616, + "step": 2880 + }, + { + "grad_norm": 0.6359237432479858, + "learning_rate": 5.778e-05, + "loss": 0.0585, + "step": 2890 + }, + { + "grad_norm": 0.49833470582962036, + "learning_rate": 5.7980000000000004e-05, + "loss": 0.056, + "step": 2900 + }, + { + "grad_norm": 0.48706814646720886, + "learning_rate": 5.818e-05, + "loss": 0.0578, + "step": 2910 + }, + { + "grad_norm": 0.4886855483055115, + "learning_rate": 5.838e-05, + "loss": 0.0578, + "step": 2920 + }, + { + "grad_norm": 0.47901222109794617, + "learning_rate": 5.858e-05, + "loss": 0.0583, + "step": 2930 + }, + { + "grad_norm": 0.5193634033203125, + "learning_rate": 5.878e-05, + "loss": 0.0561, + "step": 2940 + }, + { + "grad_norm": 0.4615758955478668, + "learning_rate": 5.898e-05, + "loss": 0.0571, + "step": 2950 + }, + { + "grad_norm": 0.44494229555130005, + "learning_rate": 5.918e-05, + "loss": 0.057, + "step": 2960 + }, + { + "grad_norm": 0.4942503869533539, + "learning_rate": 5.9380000000000004e-05, + "loss": 0.0571, + "step": 2970 + }, + { + "grad_norm": 0.333330899477005, + "learning_rate": 5.958e-05, + "loss": 0.0579, + "step": 2980 + }, + { + "grad_norm": 0.3947157859802246, + "learning_rate": 5.978e-05, + "loss": 0.0592, + "step": 2990 + }, + { + "grad_norm": 0.33168426156044006, + "learning_rate": 5.9980000000000005e-05, + "loss": 0.059, + "step": 3000 + }, + { + "grad_norm": 0.473497211933136, + "learning_rate": 6.018e-05, + "loss": 0.0559, + "step": 3010 + }, + { + "grad_norm": 0.4030488431453705, + "learning_rate": 6.038e-05, + "loss": 0.0589, + "step": 3020 + }, + { + "grad_norm": 0.4054778814315796, + "learning_rate": 6.0580000000000006e-05, + "loss": 0.0577, + "step": 3030 + }, + { + "grad_norm": 0.4154609143733978, + "learning_rate": 6.0780000000000004e-05, + "loss": 0.0616, + "step": 3040 + }, + { + "grad_norm": 0.4764542579650879, + "learning_rate": 6.098e-05, + "loss": 0.0551, + "step": 3050 + }, + { + "grad_norm": 0.5344871878623962, + "learning_rate": 6.118000000000001e-05, + "loss": 0.0584, + "step": 3060 + }, + { + "grad_norm": 0.5409612655639648, + "learning_rate": 6.138e-05, + "loss": 0.0552, + "step": 3070 + }, + { + "grad_norm": 0.35256001353263855, + "learning_rate": 6.158e-05, + "loss": 0.0531, + "step": 3080 + }, + { + "grad_norm": 0.4489481449127197, + "learning_rate": 6.178000000000001e-05, + "loss": 0.056, + "step": 3090 + }, + { + "grad_norm": 0.4414198696613312, + "learning_rate": 6.198e-05, + "loss": 0.0555, + "step": 3100 + }, + { + "grad_norm": 0.3697075843811035, + "learning_rate": 6.218e-05, + "loss": 0.0525, + "step": 3110 + }, + { + "grad_norm": 0.36509448289871216, + "learning_rate": 6.238000000000001e-05, + "loss": 0.0543, + "step": 3120 + }, + { + "grad_norm": 0.6008139848709106, + "learning_rate": 6.258e-05, + "loss": 0.0595, + "step": 3130 + }, + { + "grad_norm": 0.35010260343551636, + "learning_rate": 6.278e-05, + "loss": 0.0571, + "step": 3140 + }, + { + "grad_norm": 0.42217549681663513, + "learning_rate": 6.298000000000001e-05, + "loss": 0.0531, + "step": 3150 + }, + { + "grad_norm": 0.4358842670917511, + "learning_rate": 6.318e-05, + "loss": 0.0551, + "step": 3160 + }, + { + "grad_norm": 0.5422085523605347, + "learning_rate": 6.338e-05, + "loss": 0.0582, + "step": 3170 + }, + { + "grad_norm": 0.4133646488189697, + "learning_rate": 6.358000000000001e-05, + "loss": 0.0603, + "step": 3180 + }, + { + "grad_norm": 0.3752889633178711, + "learning_rate": 6.378e-05, + "loss": 0.0603, + "step": 3190 + }, + { + "grad_norm": 0.38378527760505676, + "learning_rate": 6.398000000000001e-05, + "loss": 0.057, + "step": 3200 + }, + { + "grad_norm": 0.40851426124572754, + "learning_rate": 6.418000000000001e-05, + "loss": 0.0588, + "step": 3210 + }, + { + "grad_norm": 0.42088618874549866, + "learning_rate": 6.438e-05, + "loss": 0.0522, + "step": 3220 + }, + { + "grad_norm": 0.4962027072906494, + "learning_rate": 6.458000000000001e-05, + "loss": 0.0547, + "step": 3230 + }, + { + "grad_norm": 0.3258550465106964, + "learning_rate": 6.478000000000001e-05, + "loss": 0.0509, + "step": 3240 + }, + { + "grad_norm": 0.4915197491645813, + "learning_rate": 6.498e-05, + "loss": 0.0553, + "step": 3250 + }, + { + "grad_norm": 0.45102420449256897, + "learning_rate": 6.518000000000001e-05, + "loss": 0.0565, + "step": 3260 + }, + { + "grad_norm": 0.5395206809043884, + "learning_rate": 6.538000000000001e-05, + "loss": 0.0562, + "step": 3270 + }, + { + "grad_norm": 0.3776578903198242, + "learning_rate": 6.558e-05, + "loss": 0.056, + "step": 3280 + }, + { + "grad_norm": 0.45232003927230835, + "learning_rate": 6.578000000000001e-05, + "loss": 0.0564, + "step": 3290 + }, + { + "grad_norm": 0.3533763587474823, + "learning_rate": 6.598e-05, + "loss": 0.0532, + "step": 3300 + }, + { + "grad_norm": 0.4522567093372345, + "learning_rate": 6.618e-05, + "loss": 0.0588, + "step": 3310 + }, + { + "grad_norm": 0.4660104215145111, + "learning_rate": 6.638e-05, + "loss": 0.0527, + "step": 3320 + }, + { + "grad_norm": 0.41579964756965637, + "learning_rate": 6.658e-05, + "loss": 0.0542, + "step": 3330 + }, + { + "grad_norm": 0.4416622221469879, + "learning_rate": 6.678e-05, + "loss": 0.0535, + "step": 3340 + }, + { + "grad_norm": 0.29213327169418335, + "learning_rate": 6.698e-05, + "loss": 0.0484, + "step": 3350 + }, + { + "grad_norm": 0.2632782757282257, + "learning_rate": 6.718e-05, + "loss": 0.0498, + "step": 3360 + }, + { + "grad_norm": 0.34549370408058167, + "learning_rate": 6.738e-05, + "loss": 0.0502, + "step": 3370 + }, + { + "grad_norm": 0.435654878616333, + "learning_rate": 6.758e-05, + "loss": 0.0515, + "step": 3380 + }, + { + "grad_norm": 0.4589787423610687, + "learning_rate": 6.778e-05, + "loss": 0.0494, + "step": 3390 + }, + { + "grad_norm": 0.4124149680137634, + "learning_rate": 6.798e-05, + "loss": 0.0572, + "step": 3400 + }, + { + "grad_norm": 0.5631486773490906, + "learning_rate": 6.818e-05, + "loss": 0.0537, + "step": 3410 + }, + { + "grad_norm": 0.4126775860786438, + "learning_rate": 6.838e-05, + "loss": 0.0515, + "step": 3420 + }, + { + "grad_norm": 0.41366931796073914, + "learning_rate": 6.858e-05, + "loss": 0.0505, + "step": 3430 + }, + { + "grad_norm": 0.2984330654144287, + "learning_rate": 6.878e-05, + "loss": 0.0537, + "step": 3440 + }, + { + "grad_norm": 0.3571836054325104, + "learning_rate": 6.898e-05, + "loss": 0.0529, + "step": 3450 + }, + { + "grad_norm": 0.25202175974845886, + "learning_rate": 6.918e-05, + "loss": 0.0535, + "step": 3460 + }, + { + "grad_norm": 0.4546191692352295, + "learning_rate": 6.938e-05, + "loss": 0.0517, + "step": 3470 + }, + { + "grad_norm": 0.425432950258255, + "learning_rate": 6.958e-05, + "loss": 0.0533, + "step": 3480 + }, + { + "grad_norm": 0.45546233654022217, + "learning_rate": 6.978e-05, + "loss": 0.0516, + "step": 3490 + }, + { + "grad_norm": 0.42852041125297546, + "learning_rate": 6.998e-05, + "loss": 0.053, + "step": 3500 + }, + { + "grad_norm": 0.4947284758090973, + "learning_rate": 7.018e-05, + "loss": 0.0558, + "step": 3510 + }, + { + "grad_norm": 0.3243687152862549, + "learning_rate": 7.038e-05, + "loss": 0.0567, + "step": 3520 + }, + { + "grad_norm": 0.3285830616950989, + "learning_rate": 7.058e-05, + "loss": 0.0561, + "step": 3530 + }, + { + "grad_norm": 0.39936813712120056, + "learning_rate": 7.078e-05, + "loss": 0.0511, + "step": 3540 + }, + { + "grad_norm": 0.3161848485469818, + "learning_rate": 7.098e-05, + "loss": 0.05, + "step": 3550 + }, + { + "grad_norm": 0.5191971659660339, + "learning_rate": 7.118e-05, + "loss": 0.054, + "step": 3560 + }, + { + "grad_norm": 0.5381972789764404, + "learning_rate": 7.138e-05, + "loss": 0.0491, + "step": 3570 + }, + { + "grad_norm": 0.34964853525161743, + "learning_rate": 7.158e-05, + "loss": 0.0491, + "step": 3580 + }, + { + "grad_norm": 0.46988117694854736, + "learning_rate": 7.178000000000001e-05, + "loss": 0.0513, + "step": 3590 + }, + { + "grad_norm": 0.4249601662158966, + "learning_rate": 7.198e-05, + "loss": 0.0559, + "step": 3600 + }, + { + "grad_norm": 0.33850428462028503, + "learning_rate": 7.218e-05, + "loss": 0.0489, + "step": 3610 + }, + { + "grad_norm": 0.3701728284358978, + "learning_rate": 7.238000000000001e-05, + "loss": 0.0469, + "step": 3620 + }, + { + "grad_norm": 0.2906535267829895, + "learning_rate": 7.258e-05, + "loss": 0.0481, + "step": 3630 + }, + { + "grad_norm": 0.4270072281360626, + "learning_rate": 7.278e-05, + "loss": 0.0482, + "step": 3640 + }, + { + "grad_norm": 0.38514697551727295, + "learning_rate": 7.298000000000001e-05, + "loss": 0.0498, + "step": 3650 + }, + { + "grad_norm": 0.5302745699882507, + "learning_rate": 7.318e-05, + "loss": 0.0516, + "step": 3660 + }, + { + "grad_norm": 0.3949323892593384, + "learning_rate": 7.338e-05, + "loss": 0.0538, + "step": 3670 + }, + { + "grad_norm": 0.30689308047294617, + "learning_rate": 7.358000000000001e-05, + "loss": 0.0511, + "step": 3680 + }, + { + "grad_norm": 0.3946821093559265, + "learning_rate": 7.378e-05, + "loss": 0.0531, + "step": 3690 + }, + { + "grad_norm": 0.2896663248538971, + "learning_rate": 7.398e-05, + "loss": 0.0528, + "step": 3700 + }, + { + "grad_norm": 0.2610919773578644, + "learning_rate": 7.418000000000001e-05, + "loss": 0.0492, + "step": 3710 + }, + { + "grad_norm": 0.4678349792957306, + "learning_rate": 7.438e-05, + "loss": 0.0483, + "step": 3720 + }, + { + "grad_norm": 0.44914260506629944, + "learning_rate": 7.458000000000001e-05, + "loss": 0.0496, + "step": 3730 + }, + { + "grad_norm": 0.40144357085227966, + "learning_rate": 7.478e-05, + "loss": 0.0512, + "step": 3740 + }, + { + "grad_norm": 0.5745700597763062, + "learning_rate": 7.498e-05, + "loss": 0.0537, + "step": 3750 + }, + { + "grad_norm": 0.3442569077014923, + "learning_rate": 7.518000000000001e-05, + "loss": 0.0519, + "step": 3760 + }, + { + "grad_norm": 0.43658047914505005, + "learning_rate": 7.538e-05, + "loss": 0.0496, + "step": 3770 + }, + { + "grad_norm": 0.2623935639858246, + "learning_rate": 7.558e-05, + "loss": 0.0514, + "step": 3780 + }, + { + "grad_norm": 0.26179659366607666, + "learning_rate": 7.578000000000001e-05, + "loss": 0.0512, + "step": 3790 + }, + { + "grad_norm": 0.3111165165901184, + "learning_rate": 7.598e-05, + "loss": 0.053, + "step": 3800 + }, + { + "grad_norm": 0.27696117758750916, + "learning_rate": 7.618e-05, + "loss": 0.05, + "step": 3810 + }, + { + "grad_norm": 0.24919024109840393, + "learning_rate": 7.638000000000001e-05, + "loss": 0.054, + "step": 3820 + }, + { + "grad_norm": 0.3083406686782837, + "learning_rate": 7.658e-05, + "loss": 0.0517, + "step": 3830 + }, + { + "grad_norm": 0.5151512026786804, + "learning_rate": 7.678000000000001e-05, + "loss": 0.0483, + "step": 3840 + }, + { + "grad_norm": 0.7125098705291748, + "learning_rate": 7.698000000000001e-05, + "loss": 0.058, + "step": 3850 + }, + { + "grad_norm": 0.3545459806919098, + "learning_rate": 7.718e-05, + "loss": 0.0509, + "step": 3860 + }, + { + "grad_norm": 0.3257334530353546, + "learning_rate": 7.738000000000001e-05, + "loss": 0.0492, + "step": 3870 + }, + { + "grad_norm": 0.3800285756587982, + "learning_rate": 7.758000000000001e-05, + "loss": 0.0469, + "step": 3880 + }, + { + "grad_norm": 0.3816104233264923, + "learning_rate": 7.778e-05, + "loss": 0.0497, + "step": 3890 + }, + { + "grad_norm": 0.32740405201911926, + "learning_rate": 7.798000000000001e-05, + "loss": 0.0505, + "step": 3900 + }, + { + "grad_norm": 0.3985132575035095, + "learning_rate": 7.818000000000001e-05, + "loss": 0.0508, + "step": 3910 + }, + { + "grad_norm": 0.3178741931915283, + "learning_rate": 7.838e-05, + "loss": 0.0474, + "step": 3920 + }, + { + "grad_norm": 0.24734383821487427, + "learning_rate": 7.858000000000001e-05, + "loss": 0.0496, + "step": 3930 + }, + { + "grad_norm": 0.34386441111564636, + "learning_rate": 7.878e-05, + "loss": 0.0518, + "step": 3940 + }, + { + "grad_norm": 0.27462393045425415, + "learning_rate": 7.897999999999999e-05, + "loss": 0.0474, + "step": 3950 + }, + { + "grad_norm": 0.31337371468544006, + "learning_rate": 7.918e-05, + "loss": 0.0502, + "step": 3960 + }, + { + "grad_norm": 0.5135840177536011, + "learning_rate": 7.938e-05, + "loss": 0.048, + "step": 3970 + }, + { + "grad_norm": 0.3397790193557739, + "learning_rate": 7.958e-05, + "loss": 0.0513, + "step": 3980 + }, + { + "grad_norm": 0.2684673070907593, + "learning_rate": 7.978e-05, + "loss": 0.0475, + "step": 3990 + }, + { + "grad_norm": 0.3886200189590454, + "learning_rate": 7.998e-05, + "loss": 0.0516, + "step": 4000 + }, + { + "grad_norm": 0.29976579546928406, + "learning_rate": 8.018e-05, + "loss": 0.0491, + "step": 4010 + }, + { + "grad_norm": 0.2844051122665405, + "learning_rate": 8.038e-05, + "loss": 0.0488, + "step": 4020 + }, + { + "grad_norm": 0.29641762375831604, + "learning_rate": 8.058e-05, + "loss": 0.0484, + "step": 4030 + }, + { + "grad_norm": 0.3553694784641266, + "learning_rate": 8.078e-05, + "loss": 0.0445, + "step": 4040 + }, + { + "grad_norm": 0.3509349822998047, + "learning_rate": 8.098e-05, + "loss": 0.0468, + "step": 4050 + }, + { + "grad_norm": 0.39886170625686646, + "learning_rate": 8.118e-05, + "loss": 0.0459, + "step": 4060 + }, + { + "grad_norm": 0.28864529728889465, + "learning_rate": 8.138e-05, + "loss": 0.0468, + "step": 4070 + }, + { + "grad_norm": 0.424035906791687, + "learning_rate": 8.158e-05, + "loss": 0.0495, + "step": 4080 + }, + { + "grad_norm": 0.2661762237548828, + "learning_rate": 8.178e-05, + "loss": 0.0498, + "step": 4090 + }, + { + "grad_norm": 0.3365909159183502, + "learning_rate": 8.198e-05, + "loss": 0.0458, + "step": 4100 + }, + { + "grad_norm": 0.4436749517917633, + "learning_rate": 8.218e-05, + "loss": 0.0501, + "step": 4110 + }, + { + "grad_norm": 0.38972023129463196, + "learning_rate": 8.238000000000001e-05, + "loss": 0.0495, + "step": 4120 + }, + { + "grad_norm": 0.352256178855896, + "learning_rate": 8.258e-05, + "loss": 0.0452, + "step": 4130 + }, + { + "grad_norm": 0.3799952268600464, + "learning_rate": 8.278e-05, + "loss": 0.045, + "step": 4140 + }, + { + "grad_norm": 0.41920793056488037, + "learning_rate": 8.298000000000001e-05, + "loss": 0.0449, + "step": 4150 + }, + { + "grad_norm": 0.34260421991348267, + "learning_rate": 8.318e-05, + "loss": 0.0476, + "step": 4160 + }, + { + "grad_norm": 0.22982153296470642, + "learning_rate": 8.338e-05, + "loss": 0.0454, + "step": 4170 + }, + { + "grad_norm": 0.2280825525522232, + "learning_rate": 8.358e-05, + "loss": 0.0474, + "step": 4180 + }, + { + "grad_norm": 0.2698037624359131, + "learning_rate": 8.378e-05, + "loss": 0.0449, + "step": 4190 + }, + { + "grad_norm": 0.2514457106590271, + "learning_rate": 8.398e-05, + "loss": 0.0425, + "step": 4200 + }, + { + "grad_norm": 0.3674446642398834, + "learning_rate": 8.418e-05, + "loss": 0.0481, + "step": 4210 + }, + { + "grad_norm": 0.2206914871931076, + "learning_rate": 8.438e-05, + "loss": 0.0433, + "step": 4220 + }, + { + "grad_norm": 0.33014965057373047, + "learning_rate": 8.458e-05, + "loss": 0.0446, + "step": 4230 + }, + { + "grad_norm": 0.32250288128852844, + "learning_rate": 8.478e-05, + "loss": 0.0441, + "step": 4240 + }, + { + "grad_norm": 0.23812325298786163, + "learning_rate": 8.498e-05, + "loss": 0.0449, + "step": 4250 + }, + { + "grad_norm": 0.2779587507247925, + "learning_rate": 8.518000000000001e-05, + "loss": 0.0439, + "step": 4260 + }, + { + "grad_norm": 0.4090215563774109, + "learning_rate": 8.538e-05, + "loss": 0.0462, + "step": 4270 + }, + { + "grad_norm": 0.2885684370994568, + "learning_rate": 8.558e-05, + "loss": 0.0459, + "step": 4280 + }, + { + "grad_norm": 0.3297560513019562, + "learning_rate": 8.578000000000001e-05, + "loss": 0.0454, + "step": 4290 + }, + { + "grad_norm": 0.45233824849128723, + "learning_rate": 8.598e-05, + "loss": 0.0453, + "step": 4300 + }, + { + "grad_norm": 0.36038002371788025, + "learning_rate": 8.618e-05, + "loss": 0.0469, + "step": 4310 + }, + { + "grad_norm": 0.3213973045349121, + "learning_rate": 8.638000000000001e-05, + "loss": 0.0456, + "step": 4320 + }, + { + "grad_norm": 0.36170732975006104, + "learning_rate": 8.658e-05, + "loss": 0.0454, + "step": 4330 + }, + { + "grad_norm": 0.21560457348823547, + "learning_rate": 8.678e-05, + "loss": 0.0452, + "step": 4340 + }, + { + "grad_norm": 0.4805986285209656, + "learning_rate": 8.698000000000001e-05, + "loss": 0.0421, + "step": 4350 + }, + { + "grad_norm": 0.36630934476852417, + "learning_rate": 8.718e-05, + "loss": 0.0433, + "step": 4360 + }, + { + "grad_norm": 0.4369809925556183, + "learning_rate": 8.738000000000001e-05, + "loss": 0.0434, + "step": 4370 + }, + { + "grad_norm": 0.3558287024497986, + "learning_rate": 8.758000000000001e-05, + "loss": 0.0457, + "step": 4380 + }, + { + "grad_norm": 0.22896090149879456, + "learning_rate": 8.778e-05, + "loss": 0.0445, + "step": 4390 + }, + { + "grad_norm": 0.4297620952129364, + "learning_rate": 8.798000000000001e-05, + "loss": 0.0479, + "step": 4400 + }, + { + "grad_norm": 0.40545493364334106, + "learning_rate": 8.818000000000001e-05, + "loss": 0.048, + "step": 4410 + }, + { + "grad_norm": 0.2966093420982361, + "learning_rate": 8.838e-05, + "loss": 0.044, + "step": 4420 + }, + { + "grad_norm": 0.2219012826681137, + "learning_rate": 8.858000000000001e-05, + "loss": 0.0451, + "step": 4430 + }, + { + "grad_norm": 0.3496418297290802, + "learning_rate": 8.878000000000001e-05, + "loss": 0.046, + "step": 4440 + }, + { + "grad_norm": 0.3000999689102173, + "learning_rate": 8.898e-05, + "loss": 0.0468, + "step": 4450 + }, + { + "grad_norm": 0.3957916498184204, + "learning_rate": 8.918000000000001e-05, + "loss": 0.0489, + "step": 4460 + }, + { + "grad_norm": 0.32589191198349, + "learning_rate": 8.938e-05, + "loss": 0.0484, + "step": 4470 + }, + { + "grad_norm": 0.3164452612400055, + "learning_rate": 8.958e-05, + "loss": 0.0482, + "step": 4480 + }, + { + "grad_norm": 0.45082998275756836, + "learning_rate": 8.978000000000001e-05, + "loss": 0.0481, + "step": 4490 + }, + { + "grad_norm": 0.4291503429412842, + "learning_rate": 8.998e-05, + "loss": 0.0498, + "step": 4500 + }, + { + "grad_norm": 0.2157393991947174, + "learning_rate": 9.018000000000001e-05, + "loss": 0.0424, + "step": 4510 + }, + { + "grad_norm": 0.27393651008605957, + "learning_rate": 9.038000000000001e-05, + "loss": 0.0457, + "step": 4520 + }, + { + "grad_norm": 0.32539814710617065, + "learning_rate": 9.058e-05, + "loss": 0.0439, + "step": 4530 + }, + { + "grad_norm": 0.3991074860095978, + "learning_rate": 9.078000000000001e-05, + "loss": 0.0454, + "step": 4540 + }, + { + "grad_norm": 0.3990306854248047, + "learning_rate": 9.098000000000001e-05, + "loss": 0.0452, + "step": 4550 + }, + { + "grad_norm": 0.26367247104644775, + "learning_rate": 9.118e-05, + "loss": 0.0399, + "step": 4560 + }, + { + "grad_norm": 0.4659882187843323, + "learning_rate": 9.138e-05, + "loss": 0.0454, + "step": 4570 + }, + { + "grad_norm": 0.318401038646698, + "learning_rate": 9.158e-05, + "loss": 0.0459, + "step": 4580 + }, + { + "grad_norm": 0.4306316375732422, + "learning_rate": 9.178e-05, + "loss": 0.0507, + "step": 4590 + }, + { + "grad_norm": 0.34176039695739746, + "learning_rate": 9.198e-05, + "loss": 0.0423, + "step": 4600 + }, + { + "grad_norm": 0.2422589510679245, + "learning_rate": 9.218e-05, + "loss": 0.042, + "step": 4610 + }, + { + "grad_norm": 0.29336783289909363, + "learning_rate": 9.238e-05, + "loss": 0.0437, + "step": 4620 + }, + { + "grad_norm": 0.352510005235672, + "learning_rate": 9.258e-05, + "loss": 0.0422, + "step": 4630 + }, + { + "grad_norm": 0.2556855082511902, + "learning_rate": 9.278e-05, + "loss": 0.0434, + "step": 4640 + }, + { + "grad_norm": 0.2240312546491623, + "learning_rate": 9.298e-05, + "loss": 0.0435, + "step": 4650 + }, + { + "grad_norm": 0.25075870752334595, + "learning_rate": 9.318e-05, + "loss": 0.0433, + "step": 4660 + }, + { + "grad_norm": 0.3503510057926178, + "learning_rate": 9.338e-05, + "loss": 0.042, + "step": 4670 + }, + { + "grad_norm": 0.30280259251594543, + "learning_rate": 9.358e-05, + "loss": 0.0441, + "step": 4680 + }, + { + "grad_norm": 0.35998404026031494, + "learning_rate": 9.378e-05, + "loss": 0.0418, + "step": 4690 + }, + { + "grad_norm": 0.3646436333656311, + "learning_rate": 9.398e-05, + "loss": 0.0436, + "step": 4700 + }, + { + "grad_norm": 0.23352113366127014, + "learning_rate": 9.418e-05, + "loss": 0.041, + "step": 4710 + }, + { + "grad_norm": 0.3648340404033661, + "learning_rate": 9.438e-05, + "loss": 0.0416, + "step": 4720 + }, + { + "grad_norm": 0.21865098178386688, + "learning_rate": 9.458e-05, + "loss": 0.0425, + "step": 4730 + }, + { + "grad_norm": 0.34970855712890625, + "learning_rate": 9.478e-05, + "loss": 0.0413, + "step": 4740 + }, + { + "grad_norm": 0.28939327597618103, + "learning_rate": 9.498e-05, + "loss": 0.0462, + "step": 4750 + }, + { + "grad_norm": 0.2571461796760559, + "learning_rate": 9.518000000000001e-05, + "loss": 0.0429, + "step": 4760 + }, + { + "grad_norm": 0.2136971354484558, + "learning_rate": 9.538e-05, + "loss": 0.0407, + "step": 4770 + }, + { + "grad_norm": 0.2869543433189392, + "learning_rate": 9.558e-05, + "loss": 0.0419, + "step": 4780 + }, + { + "grad_norm": 0.3806733787059784, + "learning_rate": 9.578000000000001e-05, + "loss": 0.0435, + "step": 4790 + }, + { + "grad_norm": 0.2676893472671509, + "learning_rate": 9.598e-05, + "loss": 0.0396, + "step": 4800 + }, + { + "grad_norm": 0.38259613513946533, + "learning_rate": 9.618e-05, + "loss": 0.0437, + "step": 4810 + }, + { + "grad_norm": 0.3062846064567566, + "learning_rate": 9.638000000000001e-05, + "loss": 0.0416, + "step": 4820 + }, + { + "grad_norm": 0.23773372173309326, + "learning_rate": 9.658e-05, + "loss": 0.0425, + "step": 4830 + }, + { + "grad_norm": 0.21388767659664154, + "learning_rate": 9.678e-05, + "loss": 0.0406, + "step": 4840 + }, + { + "grad_norm": 0.24900363385677338, + "learning_rate": 9.698000000000001e-05, + "loss": 0.0416, + "step": 4850 + }, + { + "grad_norm": 0.23942746222019196, + "learning_rate": 9.718e-05, + "loss": 0.0402, + "step": 4860 + }, + { + "grad_norm": 0.3911798298358917, + "learning_rate": 9.738e-05, + "loss": 0.0456, + "step": 4870 + }, + { + "grad_norm": 0.20711258053779602, + "learning_rate": 9.758000000000001e-05, + "loss": 0.0418, + "step": 4880 + }, + { + "grad_norm": 0.46470123529434204, + "learning_rate": 9.778e-05, + "loss": 0.0445, + "step": 4890 + }, + { + "grad_norm": 0.22389590740203857, + "learning_rate": 9.798000000000001e-05, + "loss": 0.0435, + "step": 4900 + }, + { + "grad_norm": 0.30480411648750305, + "learning_rate": 9.818000000000001e-05, + "loss": 0.0441, + "step": 4910 + }, + { + "grad_norm": 0.43320390582084656, + "learning_rate": 9.838e-05, + "loss": 0.0418, + "step": 4920 + }, + { + "grad_norm": 0.24183569848537445, + "learning_rate": 9.858000000000001e-05, + "loss": 0.0402, + "step": 4930 + }, + { + "grad_norm": 0.20913515985012054, + "learning_rate": 9.878e-05, + "loss": 0.0409, + "step": 4940 + }, + { + "grad_norm": 0.2947292923927307, + "learning_rate": 9.898e-05, + "loss": 0.0388, + "step": 4950 + }, + { + "grad_norm": 0.2977288067340851, + "learning_rate": 9.918000000000001e-05, + "loss": 0.044, + "step": 4960 + }, + { + "grad_norm": 0.2913176417350769, + "learning_rate": 9.938e-05, + "loss": 0.0445, + "step": 4970 + }, + { + "grad_norm": 0.3403559923171997, + "learning_rate": 9.958e-05, + "loss": 0.0402, + "step": 4980 + }, + { + "grad_norm": 0.22246582806110382, + "learning_rate": 9.978000000000001e-05, + "loss": 0.0428, + "step": 4990 + }, + { + "grad_norm": 0.27238729596138, + "learning_rate": 9.998e-05, + "loss": 0.0395, + "step": 5000 + }, + { + "grad_norm": 0.1942736804485321, + "learning_rate": 9.999999778549045e-05, + "loss": 0.0384, + "step": 5010 + }, + { + "grad_norm": 0.23987223207950592, + "learning_rate": 9.999999013039593e-05, + "loss": 0.0383, + "step": 5020 + }, + { + "grad_norm": 0.22025597095489502, + "learning_rate": 9.999997700737766e-05, + "loss": 0.0396, + "step": 5030 + }, + { + "grad_norm": 0.3775672912597656, + "learning_rate": 9.999995841643709e-05, + "loss": 0.0411, + "step": 5040 + }, + { + "grad_norm": 0.18478576838970184, + "learning_rate": 9.999993435757623e-05, + "loss": 0.0388, + "step": 5050 + }, + { + "grad_norm": 0.33303654193878174, + "learning_rate": 9.999990483079773e-05, + "loss": 0.0401, + "step": 5060 + }, + { + "grad_norm": 0.18385417759418488, + "learning_rate": 9.999986983610481e-05, + "loss": 0.0419, + "step": 5070 + }, + { + "grad_norm": 0.22733381390571594, + "learning_rate": 9.99998293735013e-05, + "loss": 0.0449, + "step": 5080 + }, + { + "grad_norm": 0.1941104531288147, + "learning_rate": 9.999978344299161e-05, + "loss": 0.0381, + "step": 5090 + }, + { + "grad_norm": 0.22088196873664856, + "learning_rate": 9.99997320445808e-05, + "loss": 0.0382, + "step": 5100 + }, + { + "grad_norm": 0.3156355619430542, + "learning_rate": 9.999967517827444e-05, + "loss": 0.0402, + "step": 5110 + }, + { + "grad_norm": 0.38384589552879333, + "learning_rate": 9.999961284407879e-05, + "loss": 0.0426, + "step": 5120 + }, + { + "grad_norm": 0.34195733070373535, + "learning_rate": 9.999954504200067e-05, + "loss": 0.0441, + "step": 5130 + }, + { + "grad_norm": 0.25365501642227173, + "learning_rate": 9.999947177204744e-05, + "loss": 0.0416, + "step": 5140 + }, + { + "grad_norm": 0.2577289938926697, + "learning_rate": 9.999939303422718e-05, + "loss": 0.0406, + "step": 5150 + }, + { + "grad_norm": 0.21492202579975128, + "learning_rate": 9.999930882854847e-05, + "loss": 0.0392, + "step": 5160 + }, + { + "grad_norm": 0.22367168962955475, + "learning_rate": 9.999921915502051e-05, + "loss": 0.0416, + "step": 5170 + }, + { + "grad_norm": 0.33284175395965576, + "learning_rate": 9.99991240136531e-05, + "loss": 0.0378, + "step": 5180 + }, + { + "grad_norm": 0.2934053838253021, + "learning_rate": 9.999902340445668e-05, + "loss": 0.0401, + "step": 5190 + }, + { + "grad_norm": 0.2260569930076599, + "learning_rate": 9.999891732744224e-05, + "loss": 0.0402, + "step": 5200 + }, + { + "grad_norm": 0.30313557386398315, + "learning_rate": 9.999880578262135e-05, + "loss": 0.0434, + "step": 5210 + }, + { + "grad_norm": 0.30348771810531616, + "learning_rate": 9.999868877000624e-05, + "loss": 0.0371, + "step": 5220 + }, + { + "grad_norm": 0.28093504905700684, + "learning_rate": 9.99985662896097e-05, + "loss": 0.0369, + "step": 5230 + }, + { + "grad_norm": 0.3017555773258209, + "learning_rate": 9.999843834144513e-05, + "loss": 0.0404, + "step": 5240 + }, + { + "grad_norm": 0.27772057056427, + "learning_rate": 9.99983049255265e-05, + "loss": 0.0409, + "step": 5250 + }, + { + "grad_norm": 0.25949397683143616, + "learning_rate": 9.999816604186843e-05, + "loss": 0.0377, + "step": 5260 + }, + { + "grad_norm": 0.16661201417446136, + "learning_rate": 9.999802169048609e-05, + "loss": 0.0379, + "step": 5270 + }, + { + "grad_norm": 0.27486926317214966, + "learning_rate": 9.999787187139527e-05, + "loss": 0.0385, + "step": 5280 + }, + { + "grad_norm": 0.32676851749420166, + "learning_rate": 9.999771658461234e-05, + "loss": 0.0403, + "step": 5290 + }, + { + "grad_norm": 0.21389590203762054, + "learning_rate": 9.999755583015431e-05, + "loss": 0.0417, + "step": 5300 + }, + { + "grad_norm": 0.3162577152252197, + "learning_rate": 9.999738960803874e-05, + "loss": 0.0447, + "step": 5310 + }, + { + "grad_norm": 0.36167487502098083, + "learning_rate": 9.99972179182838e-05, + "loss": 0.0429, + "step": 5320 + }, + { + "grad_norm": 0.30087557435035706, + "learning_rate": 9.99970407609083e-05, + "loss": 0.0367, + "step": 5330 + }, + { + "grad_norm": 0.25154346227645874, + "learning_rate": 9.999685813593159e-05, + "loss": 0.0391, + "step": 5340 + }, + { + "grad_norm": 0.19980138540267944, + "learning_rate": 9.999667004337362e-05, + "loss": 0.0385, + "step": 5350 + }, + { + "grad_norm": 0.22533835470676422, + "learning_rate": 9.9996476483255e-05, + "loss": 0.0375, + "step": 5360 + }, + { + "grad_norm": 0.28390443325042725, + "learning_rate": 9.999627745559688e-05, + "loss": 0.0398, + "step": 5370 + }, + { + "grad_norm": 0.35866084694862366, + "learning_rate": 9.999607296042101e-05, + "loss": 0.042, + "step": 5380 + }, + { + "grad_norm": 0.38155660033226013, + "learning_rate": 9.99958629977498e-05, + "loss": 0.0389, + "step": 5390 + }, + { + "grad_norm": 0.25528281927108765, + "learning_rate": 9.999564756760615e-05, + "loss": 0.0439, + "step": 5400 + }, + { + "grad_norm": 0.2489691525697708, + "learning_rate": 9.999542667001366e-05, + "loss": 0.0385, + "step": 5410 + }, + { + "grad_norm": 0.190853551030159, + "learning_rate": 9.999520030499647e-05, + "loss": 0.0404, + "step": 5420 + }, + { + "grad_norm": 0.25139665603637695, + "learning_rate": 9.999496847257936e-05, + "loss": 0.038, + "step": 5430 + }, + { + "grad_norm": 0.2593582272529602, + "learning_rate": 9.999473117278764e-05, + "loss": 0.0386, + "step": 5440 + }, + { + "grad_norm": 0.3327621817588806, + "learning_rate": 9.999448840564731e-05, + "loss": 0.039, + "step": 5450 + }, + { + "grad_norm": 0.32085680961608887, + "learning_rate": 9.999424017118488e-05, + "loss": 0.0373, + "step": 5460 + }, + { + "grad_norm": 0.17589007318019867, + "learning_rate": 9.999398646942751e-05, + "loss": 0.0395, + "step": 5470 + }, + { + "grad_norm": 0.21082834899425507, + "learning_rate": 9.999372730040296e-05, + "loss": 0.0371, + "step": 5480 + }, + { + "grad_norm": 0.2091127187013626, + "learning_rate": 9.999346266413953e-05, + "loss": 0.0372, + "step": 5490 + }, + { + "grad_norm": 0.2219819277524948, + "learning_rate": 9.99931925606662e-05, + "loss": 0.0394, + "step": 5500 + }, + { + "grad_norm": 0.25080248713493347, + "learning_rate": 9.99929169900125e-05, + "loss": 0.0372, + "step": 5510 + }, + { + "grad_norm": 0.21684858202934265, + "learning_rate": 9.999263595220855e-05, + "loss": 0.0387, + "step": 5520 + }, + { + "grad_norm": 0.182122603058815, + "learning_rate": 9.99923494472851e-05, + "loss": 0.04, + "step": 5530 + }, + { + "grad_norm": 0.24153488874435425, + "learning_rate": 9.999205747527348e-05, + "loss": 0.0356, + "step": 5540 + }, + { + "grad_norm": 0.26444098353385925, + "learning_rate": 9.999176003620561e-05, + "loss": 0.0381, + "step": 5550 + }, + { + "grad_norm": 0.23309607803821564, + "learning_rate": 9.999145713011405e-05, + "loss": 0.039, + "step": 5560 + }, + { + "grad_norm": 0.33661502599716187, + "learning_rate": 9.999114875703186e-05, + "loss": 0.0375, + "step": 5570 + }, + { + "grad_norm": 0.2171265184879303, + "learning_rate": 9.999083491699281e-05, + "loss": 0.0372, + "step": 5580 + }, + { + "grad_norm": 0.2567727267742157, + "learning_rate": 9.999051561003123e-05, + "loss": 0.0338, + "step": 5590 + }, + { + "grad_norm": 0.2327042669057846, + "learning_rate": 9.999019083618202e-05, + "loss": 0.0336, + "step": 5600 + }, + { + "grad_norm": 0.24825477600097656, + "learning_rate": 9.99898605954807e-05, + "loss": 0.0361, + "step": 5610 + }, + { + "grad_norm": 0.22263135015964508, + "learning_rate": 9.998952488796338e-05, + "loss": 0.0353, + "step": 5620 + }, + { + "grad_norm": 0.21979659795761108, + "learning_rate": 9.998918371366676e-05, + "loss": 0.0374, + "step": 5630 + }, + { + "grad_norm": 0.19611229002475739, + "learning_rate": 9.99888370726282e-05, + "loss": 0.0344, + "step": 5640 + }, + { + "grad_norm": 0.17958128452301025, + "learning_rate": 9.998848496488556e-05, + "loss": 0.0322, + "step": 5650 + }, + { + "grad_norm": 0.1848185807466507, + "learning_rate": 9.998812739047736e-05, + "loss": 0.0369, + "step": 5660 + }, + { + "grad_norm": 0.1802225410938263, + "learning_rate": 9.99877643494427e-05, + "loss": 0.0376, + "step": 5670 + }, + { + "grad_norm": 0.251686692237854, + "learning_rate": 9.998739584182128e-05, + "loss": 0.0351, + "step": 5680 + }, + { + "grad_norm": 0.3315955102443695, + "learning_rate": 9.998702186765342e-05, + "loss": 0.0376, + "step": 5690 + }, + { + "grad_norm": 0.17571847140789032, + "learning_rate": 9.998664242698e-05, + "loss": 0.0352, + "step": 5700 + }, + { + "grad_norm": 0.19867119193077087, + "learning_rate": 9.998625751984251e-05, + "loss": 0.034, + "step": 5710 + }, + { + "grad_norm": 0.2491987943649292, + "learning_rate": 9.998586714628307e-05, + "loss": 0.0327, + "step": 5720 + }, + { + "grad_norm": 0.22270077466964722, + "learning_rate": 9.998547130634432e-05, + "loss": 0.0376, + "step": 5730 + }, + { + "grad_norm": 0.1913171261548996, + "learning_rate": 9.99850700000696e-05, + "loss": 0.0359, + "step": 5740 + }, + { + "grad_norm": 0.22182171046733856, + "learning_rate": 9.998466322750278e-05, + "loss": 0.0396, + "step": 5750 + }, + { + "grad_norm": 0.2724314033985138, + "learning_rate": 9.998425098868834e-05, + "loss": 0.038, + "step": 5760 + }, + { + "grad_norm": 0.21521678566932678, + "learning_rate": 9.998383328367136e-05, + "loss": 0.0369, + "step": 5770 + }, + { + "grad_norm": 0.21064412593841553, + "learning_rate": 9.99834101124975e-05, + "loss": 0.0363, + "step": 5780 + }, + { + "grad_norm": 0.23841209709644318, + "learning_rate": 9.998298147521309e-05, + "loss": 0.037, + "step": 5790 + }, + { + "grad_norm": 0.20650985836982727, + "learning_rate": 9.998254737186496e-05, + "loss": 0.0343, + "step": 5800 + }, + { + "grad_norm": 0.24462926387786865, + "learning_rate": 9.99821078025006e-05, + "loss": 0.0348, + "step": 5810 + }, + { + "grad_norm": 0.17325110733509064, + "learning_rate": 9.998166276716807e-05, + "loss": 0.0366, + "step": 5820 + }, + { + "grad_norm": 0.30905815958976746, + "learning_rate": 9.998121226591606e-05, + "loss": 0.0363, + "step": 5830 + }, + { + "grad_norm": 0.2216346710920334, + "learning_rate": 9.998075629879382e-05, + "loss": 0.0351, + "step": 5840 + }, + { + "grad_norm": 0.2192988097667694, + "learning_rate": 9.99802948658512e-05, + "loss": 0.0397, + "step": 5850 + }, + { + "grad_norm": 0.19919931888580322, + "learning_rate": 9.99798279671387e-05, + "loss": 0.0351, + "step": 5860 + }, + { + "grad_norm": 0.27472516894340515, + "learning_rate": 9.997935560270734e-05, + "loss": 0.0329, + "step": 5870 + }, + { + "grad_norm": 0.17475055158138275, + "learning_rate": 9.997887777260879e-05, + "loss": 0.0327, + "step": 5880 + }, + { + "grad_norm": 0.2529633641242981, + "learning_rate": 9.997839447689532e-05, + "loss": 0.0409, + "step": 5890 + }, + { + "grad_norm": 0.23397588729858398, + "learning_rate": 9.997790571561978e-05, + "loss": 0.0416, + "step": 5900 + }, + { + "grad_norm": 0.2022402435541153, + "learning_rate": 9.99774114888356e-05, + "loss": 0.0397, + "step": 5910 + }, + { + "grad_norm": 0.2603520154953003, + "learning_rate": 9.997691179659684e-05, + "loss": 0.0334, + "step": 5920 + }, + { + "grad_norm": 0.2793276906013489, + "learning_rate": 9.997640663895815e-05, + "loss": 0.0333, + "step": 5930 + }, + { + "grad_norm": 0.21222557127475739, + "learning_rate": 9.997589601597477e-05, + "loss": 0.0348, + "step": 5940 + }, + { + "grad_norm": 0.19338689744472504, + "learning_rate": 9.997537992770252e-05, + "loss": 0.0356, + "step": 5950 + }, + { + "grad_norm": 0.16201873123645782, + "learning_rate": 9.997485837419788e-05, + "loss": 0.0325, + "step": 5960 + }, + { + "grad_norm": 0.22541604936122894, + "learning_rate": 9.997433135551786e-05, + "loss": 0.0381, + "step": 5970 + }, + { + "grad_norm": 0.2036927491426468, + "learning_rate": 9.997379887172009e-05, + "loss": 0.0355, + "step": 5980 + }, + { + "grad_norm": 0.21954143047332764, + "learning_rate": 9.997326092286281e-05, + "loss": 0.0322, + "step": 5990 + }, + { + "grad_norm": 0.3290071487426758, + "learning_rate": 9.997271750900486e-05, + "loss": 0.0351, + "step": 6000 + }, + { + "grad_norm": 0.23220016062259674, + "learning_rate": 9.997216863020565e-05, + "loss": 0.0327, + "step": 6010 + }, + { + "grad_norm": 0.14995290338993073, + "learning_rate": 9.99716142865252e-05, + "loss": 0.0359, + "step": 6020 + }, + { + "grad_norm": 0.2336021363735199, + "learning_rate": 9.997105447802415e-05, + "loss": 0.0322, + "step": 6030 + }, + { + "grad_norm": 0.27834412455558777, + "learning_rate": 9.997048920476373e-05, + "loss": 0.0318, + "step": 6040 + }, + { + "grad_norm": 0.21318142116069794, + "learning_rate": 9.996991846680572e-05, + "loss": 0.0386, + "step": 6050 + }, + { + "grad_norm": 0.15503254532814026, + "learning_rate": 9.996934226421257e-05, + "loss": 0.0328, + "step": 6060 + }, + { + "grad_norm": 0.2921711504459381, + "learning_rate": 9.996876059704726e-05, + "loss": 0.0325, + "step": 6070 + }, + { + "grad_norm": 0.30437105894088745, + "learning_rate": 9.996817346537343e-05, + "loss": 0.0358, + "step": 6080 + }, + { + "grad_norm": 0.13411550223827362, + "learning_rate": 9.996758086925526e-05, + "loss": 0.033, + "step": 6090 + }, + { + "grad_norm": 0.2496754378080368, + "learning_rate": 9.996698280875759e-05, + "loss": 0.0371, + "step": 6100 + }, + { + "grad_norm": 0.2054837942123413, + "learning_rate": 9.99663792839458e-05, + "loss": 0.0356, + "step": 6110 + }, + { + "grad_norm": 0.25616130232810974, + "learning_rate": 9.99657702948859e-05, + "loss": 0.0356, + "step": 6120 + }, + { + "grad_norm": 0.3262997567653656, + "learning_rate": 9.996515584164448e-05, + "loss": 0.0402, + "step": 6130 + }, + { + "grad_norm": 0.20472665131092072, + "learning_rate": 9.996453592428873e-05, + "loss": 0.0348, + "step": 6140 + }, + { + "grad_norm": 0.19291138648986816, + "learning_rate": 9.996391054288646e-05, + "loss": 0.0318, + "step": 6150 + }, + { + "grad_norm": 0.1613888442516327, + "learning_rate": 9.996327969750605e-05, + "loss": 0.0364, + "step": 6160 + }, + { + "grad_norm": 0.22556284070014954, + "learning_rate": 9.996264338821649e-05, + "loss": 0.0337, + "step": 6170 + }, + { + "grad_norm": 0.17967262864112854, + "learning_rate": 9.996200161508735e-05, + "loss": 0.0337, + "step": 6180 + }, + { + "grad_norm": 0.19388766586780548, + "learning_rate": 9.996135437818885e-05, + "loss": 0.0325, + "step": 6190 + }, + { + "grad_norm": 0.16700346767902374, + "learning_rate": 9.996070167759175e-05, + "loss": 0.0335, + "step": 6200 + }, + { + "grad_norm": 0.2261561155319214, + "learning_rate": 9.996004351336743e-05, + "loss": 0.0345, + "step": 6210 + }, + { + "grad_norm": 0.26240289211273193, + "learning_rate": 9.995937988558785e-05, + "loss": 0.0352, + "step": 6220 + }, + { + "grad_norm": 0.18253673613071442, + "learning_rate": 9.995871079432561e-05, + "loss": 0.0317, + "step": 6230 + }, + { + "grad_norm": 0.1770331710577011, + "learning_rate": 9.995803623965389e-05, + "loss": 0.0317, + "step": 6240 + }, + { + "grad_norm": 0.2862870395183563, + "learning_rate": 9.995735622164641e-05, + "loss": 0.031, + "step": 6250 + }, + { + "grad_norm": 0.22489173710346222, + "learning_rate": 9.995667074037758e-05, + "loss": 0.0325, + "step": 6260 + }, + { + "grad_norm": 0.23317387700080872, + "learning_rate": 9.995597979592232e-05, + "loss": 0.032, + "step": 6270 + }, + { + "grad_norm": 0.2282499521970749, + "learning_rate": 9.995528338835625e-05, + "loss": 0.0328, + "step": 6280 + }, + { + "grad_norm": 0.1628732979297638, + "learning_rate": 9.995458151775547e-05, + "loss": 0.0343, + "step": 6290 + }, + { + "grad_norm": 0.2638527452945709, + "learning_rate": 9.995387418419677e-05, + "loss": 0.0322, + "step": 6300 + }, + { + "grad_norm": 0.265533447265625, + "learning_rate": 9.99531613877575e-05, + "loss": 0.0345, + "step": 6310 + }, + { + "grad_norm": 0.19551481306552887, + "learning_rate": 9.995244312851559e-05, + "loss": 0.0347, + "step": 6320 + }, + { + "grad_norm": 0.2561339735984802, + "learning_rate": 9.995171940654961e-05, + "loss": 0.0351, + "step": 6330 + }, + { + "grad_norm": 0.16764198243618011, + "learning_rate": 9.995099022193871e-05, + "loss": 0.0328, + "step": 6340 + }, + { + "grad_norm": 0.2190362960100174, + "learning_rate": 9.995025557476261e-05, + "loss": 0.0332, + "step": 6350 + }, + { + "grad_norm": 0.1858881413936615, + "learning_rate": 9.994951546510165e-05, + "loss": 0.0336, + "step": 6360 + }, + { + "grad_norm": 0.36151257157325745, + "learning_rate": 9.994876989303679e-05, + "loss": 0.0391, + "step": 6370 + }, + { + "grad_norm": 0.2080828696489334, + "learning_rate": 9.994801885864955e-05, + "loss": 0.0332, + "step": 6380 + }, + { + "grad_norm": 0.146501824259758, + "learning_rate": 9.994726236202205e-05, + "loss": 0.0335, + "step": 6390 + }, + { + "grad_norm": 0.18071219325065613, + "learning_rate": 9.994650040323704e-05, + "loss": 0.0321, + "step": 6400 + }, + { + "grad_norm": 0.1672569066286087, + "learning_rate": 9.994573298237784e-05, + "loss": 0.0284, + "step": 6410 + }, + { + "grad_norm": 0.2229665368795395, + "learning_rate": 9.994496009952837e-05, + "loss": 0.0333, + "step": 6420 + }, + { + "grad_norm": 0.18981540203094482, + "learning_rate": 9.994418175477316e-05, + "loss": 0.0327, + "step": 6430 + }, + { + "grad_norm": 0.14861337840557098, + "learning_rate": 9.994339794819733e-05, + "loss": 0.0307, + "step": 6440 + }, + { + "grad_norm": 0.19116857647895813, + "learning_rate": 9.994260867988658e-05, + "loss": 0.0311, + "step": 6450 + }, + { + "grad_norm": 0.23270951211452484, + "learning_rate": 9.994181394992723e-05, + "loss": 0.0312, + "step": 6460 + }, + { + "grad_norm": 0.19573582708835602, + "learning_rate": 9.994101375840618e-05, + "loss": 0.0341, + "step": 6470 + }, + { + "grad_norm": 0.18730996549129486, + "learning_rate": 9.994020810541098e-05, + "loss": 0.0348, + "step": 6480 + }, + { + "grad_norm": 0.15340380370616913, + "learning_rate": 9.99393969910297e-05, + "loss": 0.0286, + "step": 6490 + }, + { + "grad_norm": 0.2002669721841812, + "learning_rate": 9.993858041535104e-05, + "loss": 0.0336, + "step": 6500 + }, + { + "grad_norm": 0.17635585367679596, + "learning_rate": 9.99377583784643e-05, + "loss": 0.0307, + "step": 6510 + }, + { + "grad_norm": 0.20118558406829834, + "learning_rate": 9.993693088045939e-05, + "loss": 0.0338, + "step": 6520 + }, + { + "grad_norm": 0.23127947747707367, + "learning_rate": 9.99360979214268e-05, + "loss": 0.0315, + "step": 6530 + }, + { + "grad_norm": 0.2097419798374176, + "learning_rate": 9.99352595014576e-05, + "loss": 0.0327, + "step": 6540 + }, + { + "grad_norm": 0.22037020325660706, + "learning_rate": 9.993441562064354e-05, + "loss": 0.0342, + "step": 6550 + }, + { + "grad_norm": 0.13925804197788239, + "learning_rate": 9.993356627907685e-05, + "loss": 0.0331, + "step": 6560 + }, + { + "grad_norm": 0.18792033195495605, + "learning_rate": 9.99327114768504e-05, + "loss": 0.0302, + "step": 6570 + }, + { + "grad_norm": 0.1822575330734253, + "learning_rate": 9.99318512140577e-05, + "loss": 0.0284, + "step": 6580 + }, + { + "grad_norm": 0.16886144876480103, + "learning_rate": 9.993098549079284e-05, + "loss": 0.0333, + "step": 6590 + }, + { + "grad_norm": 0.16944506764411926, + "learning_rate": 9.993011430715047e-05, + "loss": 0.0343, + "step": 6600 + }, + { + "grad_norm": 0.22519199550151825, + "learning_rate": 9.992923766322586e-05, + "loss": 0.0325, + "step": 6610 + }, + { + "grad_norm": 0.23796287178993225, + "learning_rate": 9.99283555591149e-05, + "loss": 0.0314, + "step": 6620 + }, + { + "grad_norm": 0.20532706379890442, + "learning_rate": 9.992746799491404e-05, + "loss": 0.033, + "step": 6630 + }, + { + "grad_norm": 0.2360721081495285, + "learning_rate": 9.992657497072033e-05, + "loss": 0.0315, + "step": 6640 + }, + { + "grad_norm": 0.16264893114566803, + "learning_rate": 9.992567648663147e-05, + "loss": 0.0327, + "step": 6650 + }, + { + "grad_norm": 0.2652653753757477, + "learning_rate": 9.992477254274568e-05, + "loss": 0.0311, + "step": 6660 + }, + { + "grad_norm": 0.2615054249763489, + "learning_rate": 9.992386313916183e-05, + "loss": 0.0315, + "step": 6670 + }, + { + "grad_norm": 0.20902769267559052, + "learning_rate": 9.992294827597934e-05, + "loss": 0.0339, + "step": 6680 + }, + { + "grad_norm": 0.15806248784065247, + "learning_rate": 9.992202795329831e-05, + "loss": 0.0317, + "step": 6690 + }, + { + "grad_norm": 0.20595693588256836, + "learning_rate": 9.992110217121936e-05, + "loss": 0.0282, + "step": 6700 + }, + { + "grad_norm": 0.14836330711841583, + "learning_rate": 9.992017092984372e-05, + "loss": 0.032, + "step": 6710 + }, + { + "grad_norm": 0.18595246970653534, + "learning_rate": 9.991923422927326e-05, + "loss": 0.0318, + "step": 6720 + }, + { + "grad_norm": 0.286554753780365, + "learning_rate": 9.991829206961037e-05, + "loss": 0.0348, + "step": 6730 + }, + { + "grad_norm": 0.16324231028556824, + "learning_rate": 9.991734445095813e-05, + "loss": 0.0305, + "step": 6740 + }, + { + "grad_norm": 0.17347069084644318, + "learning_rate": 9.991639137342015e-05, + "loss": 0.0306, + "step": 6750 + }, + { + "grad_norm": 0.21487559378147125, + "learning_rate": 9.991543283710064e-05, + "loss": 0.0387, + "step": 6760 + }, + { + "grad_norm": 0.17933903634548187, + "learning_rate": 9.991446884210445e-05, + "loss": 0.0299, + "step": 6770 + }, + { + "grad_norm": 0.23860688507556915, + "learning_rate": 9.9913499388537e-05, + "loss": 0.0305, + "step": 6780 + }, + { + "grad_norm": 0.18094094097614288, + "learning_rate": 9.99125244765043e-05, + "loss": 0.0342, + "step": 6790 + }, + { + "grad_norm": 0.24189533293247223, + "learning_rate": 9.991154410611296e-05, + "loss": 0.0355, + "step": 6800 + }, + { + "grad_norm": 0.20464807748794556, + "learning_rate": 9.99105582774702e-05, + "loss": 0.0328, + "step": 6810 + }, + { + "grad_norm": 0.23017235100269318, + "learning_rate": 9.990956699068384e-05, + "loss": 0.0289, + "step": 6820 + }, + { + "grad_norm": 0.20303837954998016, + "learning_rate": 9.990857024586224e-05, + "loss": 0.0322, + "step": 6830 + }, + { + "grad_norm": 0.13697852194309235, + "learning_rate": 9.990756804311446e-05, + "loss": 0.0295, + "step": 6840 + }, + { + "grad_norm": 0.20161789655685425, + "learning_rate": 9.990656038255006e-05, + "loss": 0.0332, + "step": 6850 + }, + { + "grad_norm": 0.16595321893692017, + "learning_rate": 9.990554726427926e-05, + "loss": 0.0352, + "step": 6860 + }, + { + "grad_norm": 0.17169463634490967, + "learning_rate": 9.990452868841284e-05, + "loss": 0.0293, + "step": 6870 + }, + { + "grad_norm": 0.1427028328180313, + "learning_rate": 9.99035046550622e-05, + "loss": 0.0299, + "step": 6880 + }, + { + "grad_norm": 0.212447851896286, + "learning_rate": 9.99024751643393e-05, + "loss": 0.0314, + "step": 6890 + }, + { + "grad_norm": 0.23710960149765015, + "learning_rate": 9.990144021635677e-05, + "loss": 0.0313, + "step": 6900 + }, + { + "grad_norm": 0.16380059719085693, + "learning_rate": 9.990039981122775e-05, + "loss": 0.0309, + "step": 6910 + }, + { + "grad_norm": 0.2079963982105255, + "learning_rate": 9.989935394906602e-05, + "loss": 0.0334, + "step": 6920 + }, + { + "grad_norm": 0.24272094666957855, + "learning_rate": 9.989830262998598e-05, + "loss": 0.0294, + "step": 6930 + }, + { + "grad_norm": 0.22272495925426483, + "learning_rate": 9.989724585410259e-05, + "loss": 0.0335, + "step": 6940 + }, + { + "grad_norm": 0.18978382647037506, + "learning_rate": 9.989618362153139e-05, + "loss": 0.03, + "step": 6950 + }, + { + "grad_norm": 0.1547386199235916, + "learning_rate": 9.989511593238859e-05, + "loss": 0.0271, + "step": 6960 + }, + { + "grad_norm": 0.2653886079788208, + "learning_rate": 9.98940427867909e-05, + "loss": 0.0318, + "step": 6970 + }, + { + "grad_norm": 0.20789600908756256, + "learning_rate": 9.989296418485573e-05, + "loss": 0.0298, + "step": 6980 + }, + { + "grad_norm": 0.1918748915195465, + "learning_rate": 9.989188012670101e-05, + "loss": 0.0303, + "step": 6990 + }, + { + "grad_norm": 0.1783900409936905, + "learning_rate": 9.989079061244528e-05, + "loss": 0.0274, + "step": 7000 + }, + { + "grad_norm": 0.22401897609233856, + "learning_rate": 9.988969564220769e-05, + "loss": 0.0292, + "step": 7010 + }, + { + "grad_norm": 0.22406475245952606, + "learning_rate": 9.988859521610801e-05, + "loss": 0.0342, + "step": 7020 + }, + { + "grad_norm": 0.19223256409168243, + "learning_rate": 9.988748933426656e-05, + "loss": 0.0317, + "step": 7030 + }, + { + "grad_norm": 0.20647002756595612, + "learning_rate": 9.988637799680428e-05, + "loss": 0.0272, + "step": 7040 + }, + { + "grad_norm": 0.16920864582061768, + "learning_rate": 9.98852612038427e-05, + "loss": 0.0274, + "step": 7050 + }, + { + "grad_norm": 0.17921346426010132, + "learning_rate": 9.988413895550397e-05, + "loss": 0.0298, + "step": 7060 + }, + { + "grad_norm": 0.21491295099258423, + "learning_rate": 9.98830112519108e-05, + "loss": 0.0316, + "step": 7070 + }, + { + "grad_norm": 0.2768172323703766, + "learning_rate": 9.98818780931865e-05, + "loss": 0.0322, + "step": 7080 + }, + { + "grad_norm": 0.19876210391521454, + "learning_rate": 9.988073947945502e-05, + "loss": 0.0298, + "step": 7090 + }, + { + "grad_norm": 0.16212648153305054, + "learning_rate": 9.987959541084087e-05, + "loss": 0.0311, + "step": 7100 + }, + { + "grad_norm": 0.19158676266670227, + "learning_rate": 9.987844588746915e-05, + "loss": 0.0304, + "step": 7110 + }, + { + "grad_norm": 0.34177350997924805, + "learning_rate": 9.987729090946558e-05, + "loss": 0.0334, + "step": 7120 + }, + { + "grad_norm": 0.1311742663383484, + "learning_rate": 9.987613047695647e-05, + "loss": 0.0311, + "step": 7130 + }, + { + "grad_norm": 0.17719070613384247, + "learning_rate": 9.987496459006871e-05, + "loss": 0.0372, + "step": 7140 + }, + { + "grad_norm": 0.24357640743255615, + "learning_rate": 9.987379324892982e-05, + "loss": 0.0291, + "step": 7150 + }, + { + "grad_norm": 0.17287778854370117, + "learning_rate": 9.987261645366788e-05, + "loss": 0.0316, + "step": 7160 + }, + { + "grad_norm": 0.1399814784526825, + "learning_rate": 9.987143420441158e-05, + "loss": 0.0328, + "step": 7170 + }, + { + "grad_norm": 0.1757952719926834, + "learning_rate": 9.987024650129022e-05, + "loss": 0.0309, + "step": 7180 + }, + { + "grad_norm": 0.14032907783985138, + "learning_rate": 9.986905334443368e-05, + "loss": 0.0327, + "step": 7190 + }, + { + "grad_norm": 0.1773601472377777, + "learning_rate": 9.986785473397245e-05, + "loss": 0.03, + "step": 7200 + }, + { + "grad_norm": 0.19041909277439117, + "learning_rate": 9.98666506700376e-05, + "loss": 0.0328, + "step": 7210 + }, + { + "grad_norm": 0.16437478363513947, + "learning_rate": 9.986544115276081e-05, + "loss": 0.032, + "step": 7220 + }, + { + "grad_norm": 0.17368559539318085, + "learning_rate": 9.986422618227433e-05, + "loss": 0.0279, + "step": 7230 + }, + { + "grad_norm": 0.22915691137313843, + "learning_rate": 9.986300575871106e-05, + "loss": 0.0294, + "step": 7240 + }, + { + "grad_norm": 0.18923626840114594, + "learning_rate": 9.986177988220444e-05, + "loss": 0.0292, + "step": 7250 + }, + { + "grad_norm": 0.15416090190410614, + "learning_rate": 9.986054855288856e-05, + "loss": 0.0299, + "step": 7260 + }, + { + "grad_norm": 0.21726670861244202, + "learning_rate": 9.985931177089802e-05, + "loss": 0.0289, + "step": 7270 + }, + { + "grad_norm": 0.15424323081970215, + "learning_rate": 9.985806953636814e-05, + "loss": 0.0292, + "step": 7280 + }, + { + "grad_norm": 0.16834352910518646, + "learning_rate": 9.985682184943471e-05, + "loss": 0.027, + "step": 7290 + }, + { + "grad_norm": 0.18289043009281158, + "learning_rate": 9.98555687102342e-05, + "loss": 0.0318, + "step": 7300 + }, + { + "grad_norm": 0.17295777797698975, + "learning_rate": 9.985431011890367e-05, + "loss": 0.0277, + "step": 7310 + }, + { + "grad_norm": 0.1822638064622879, + "learning_rate": 9.985304607558075e-05, + "loss": 0.0303, + "step": 7320 + }, + { + "grad_norm": 0.1303212195634842, + "learning_rate": 9.985177658040364e-05, + "loss": 0.0265, + "step": 7330 + }, + { + "grad_norm": 0.14535638689994812, + "learning_rate": 9.985050163351119e-05, + "loss": 0.027, + "step": 7340 + }, + { + "grad_norm": 0.13688494265079498, + "learning_rate": 9.984922123504286e-05, + "loss": 0.0301, + "step": 7350 + }, + { + "grad_norm": 0.19832707941532135, + "learning_rate": 9.984793538513862e-05, + "loss": 0.0306, + "step": 7360 + }, + { + "grad_norm": 0.21907919645309448, + "learning_rate": 9.984664408393912e-05, + "loss": 0.0306, + "step": 7370 + }, + { + "grad_norm": 0.16624656319618225, + "learning_rate": 9.984534733158556e-05, + "loss": 0.0283, + "step": 7380 + }, + { + "grad_norm": 0.18858885765075684, + "learning_rate": 9.984404512821977e-05, + "loss": 0.0286, + "step": 7390 + }, + { + "grad_norm": 0.17665143311023712, + "learning_rate": 9.984273747398411e-05, + "loss": 0.0297, + "step": 7400 + }, + { + "grad_norm": 0.1615772843360901, + "learning_rate": 9.984142436902165e-05, + "loss": 0.0308, + "step": 7410 + }, + { + "grad_norm": 0.1729613095521927, + "learning_rate": 9.984010581347596e-05, + "loss": 0.033, + "step": 7420 + }, + { + "grad_norm": 0.17431111633777618, + "learning_rate": 9.983878180749121e-05, + "loss": 0.0291, + "step": 7430 + }, + { + "grad_norm": 0.22719500958919525, + "learning_rate": 9.983745235121222e-05, + "loss": 0.0341, + "step": 7440 + }, + { + "grad_norm": 0.1798018217086792, + "learning_rate": 9.983611744478438e-05, + "loss": 0.028, + "step": 7450 + }, + { + "grad_norm": 0.17133843898773193, + "learning_rate": 9.983477708835365e-05, + "loss": 0.0261, + "step": 7460 + }, + { + "grad_norm": 0.17771805822849274, + "learning_rate": 9.983343128206664e-05, + "loss": 0.0341, + "step": 7470 + }, + { + "grad_norm": 0.17559008300304413, + "learning_rate": 9.983208002607049e-05, + "loss": 0.0306, + "step": 7480 + }, + { + "grad_norm": 0.21553251147270203, + "learning_rate": 9.9830723320513e-05, + "loss": 0.0283, + "step": 7490 + }, + { + "grad_norm": 0.19332420825958252, + "learning_rate": 9.982936116554254e-05, + "loss": 0.0277, + "step": 7500 + }, + { + "grad_norm": 0.1704283207654953, + "learning_rate": 9.982799356130803e-05, + "loss": 0.0274, + "step": 7510 + }, + { + "grad_norm": 0.191847026348114, + "learning_rate": 9.982662050795908e-05, + "loss": 0.0299, + "step": 7520 + }, + { + "grad_norm": 0.1728695183992386, + "learning_rate": 9.982524200564583e-05, + "loss": 0.0306, + "step": 7530 + }, + { + "grad_norm": 0.14135895669460297, + "learning_rate": 9.982385805451901e-05, + "loss": 0.0275, + "step": 7540 + }, + { + "grad_norm": 0.16340233385562897, + "learning_rate": 9.982246865472998e-05, + "loss": 0.0257, + "step": 7550 + }, + { + "grad_norm": 0.1617847979068756, + "learning_rate": 9.982107380643069e-05, + "loss": 0.0288, + "step": 7560 + }, + { + "grad_norm": 0.17385059595108032, + "learning_rate": 9.981967350977368e-05, + "loss": 0.031, + "step": 7570 + }, + { + "grad_norm": 0.1581083983182907, + "learning_rate": 9.981826776491208e-05, + "loss": 0.026, + "step": 7580 + }, + { + "grad_norm": 0.2705019414424896, + "learning_rate": 9.98168565719996e-05, + "loss": 0.0289, + "step": 7590 + }, + { + "grad_norm": 0.22305966913700104, + "learning_rate": 9.98154399311906e-05, + "loss": 0.0245, + "step": 7600 + }, + { + "grad_norm": 0.19052311778068542, + "learning_rate": 9.981401784263997e-05, + "loss": 0.0306, + "step": 7610 + }, + { + "grad_norm": 0.20875051617622375, + "learning_rate": 9.981259030650326e-05, + "loss": 0.0272, + "step": 7620 + }, + { + "grad_norm": 0.2579907178878784, + "learning_rate": 9.981115732293655e-05, + "loss": 0.0269, + "step": 7630 + }, + { + "grad_norm": 0.17638257145881653, + "learning_rate": 9.980971889209659e-05, + "loss": 0.0283, + "step": 7640 + }, + { + "grad_norm": 0.13616618514060974, + "learning_rate": 9.980827501414064e-05, + "loss": 0.0257, + "step": 7650 + }, + { + "grad_norm": 0.1626826971769333, + "learning_rate": 9.980682568922663e-05, + "loss": 0.0288, + "step": 7660 + }, + { + "grad_norm": 0.11292412132024765, + "learning_rate": 9.980537091751304e-05, + "loss": 0.0296, + "step": 7670 + }, + { + "grad_norm": 0.1259966343641281, + "learning_rate": 9.980391069915897e-05, + "loss": 0.027, + "step": 7680 + }, + { + "grad_norm": 0.14290998876094818, + "learning_rate": 9.98024450343241e-05, + "loss": 0.0265, + "step": 7690 + }, + { + "grad_norm": 0.1619204878807068, + "learning_rate": 9.980097392316872e-05, + "loss": 0.0285, + "step": 7700 + }, + { + "grad_norm": 0.19668516516685486, + "learning_rate": 9.97994973658537e-05, + "loss": 0.026, + "step": 7710 + }, + { + "grad_norm": 0.18191011250019073, + "learning_rate": 9.979801536254054e-05, + "loss": 0.0282, + "step": 7720 + }, + { + "grad_norm": 0.13088515400886536, + "learning_rate": 9.979652791339127e-05, + "loss": 0.0257, + "step": 7730 + }, + { + "grad_norm": 0.19851389527320862, + "learning_rate": 9.97950350185686e-05, + "loss": 0.0281, + "step": 7740 + }, + { + "grad_norm": 0.1456284373998642, + "learning_rate": 9.979353667823574e-05, + "loss": 0.027, + "step": 7750 + }, + { + "grad_norm": 0.1574028581380844, + "learning_rate": 9.979203289255658e-05, + "loss": 0.0273, + "step": 7760 + }, + { + "grad_norm": 0.17340019345283508, + "learning_rate": 9.979052366169557e-05, + "loss": 0.0275, + "step": 7770 + }, + { + "grad_norm": 0.2250707596540451, + "learning_rate": 9.978900898581775e-05, + "loss": 0.0277, + "step": 7780 + }, + { + "grad_norm": 0.275399386882782, + "learning_rate": 9.978748886508875e-05, + "loss": 0.0296, + "step": 7790 + }, + { + "grad_norm": 0.1917940378189087, + "learning_rate": 9.978596329967484e-05, + "loss": 0.0307, + "step": 7800 + }, + { + "grad_norm": 0.20076577365398407, + "learning_rate": 9.978443228974284e-05, + "loss": 0.0271, + "step": 7810 + }, + { + "grad_norm": 0.19194139540195465, + "learning_rate": 9.978289583546015e-05, + "loss": 0.0291, + "step": 7820 + }, + { + "grad_norm": 0.17558225989341736, + "learning_rate": 9.978135393699484e-05, + "loss": 0.0302, + "step": 7830 + }, + { + "grad_norm": 0.20239335298538208, + "learning_rate": 9.977980659451548e-05, + "loss": 0.0265, + "step": 7840 + }, + { + "grad_norm": 0.20412544906139374, + "learning_rate": 9.977825380819135e-05, + "loss": 0.0272, + "step": 7850 + }, + { + "grad_norm": 0.14919361472129822, + "learning_rate": 9.97766955781922e-05, + "loss": 0.0251, + "step": 7860 + }, + { + "grad_norm": 0.20562846958637238, + "learning_rate": 9.977513190468848e-05, + "loss": 0.0296, + "step": 7870 + }, + { + "grad_norm": 0.12740463018417358, + "learning_rate": 9.977356278785116e-05, + "loss": 0.0302, + "step": 7880 + }, + { + "grad_norm": 0.2289516180753708, + "learning_rate": 9.977198822785184e-05, + "loss": 0.0291, + "step": 7890 + }, + { + "grad_norm": 0.18138830363750458, + "learning_rate": 9.977040822486273e-05, + "loss": 0.0266, + "step": 7900 + }, + { + "grad_norm": 0.1271064430475235, + "learning_rate": 9.97688227790566e-05, + "loss": 0.0287, + "step": 7910 + }, + { + "grad_norm": 0.1468590348958969, + "learning_rate": 9.976723189060684e-05, + "loss": 0.0268, + "step": 7920 + }, + { + "grad_norm": 0.22657889127731323, + "learning_rate": 9.976563555968742e-05, + "loss": 0.03, + "step": 7930 + }, + { + "grad_norm": 0.1558162271976471, + "learning_rate": 9.976403378647292e-05, + "loss": 0.031, + "step": 7940 + }, + { + "grad_norm": 0.10844185203313828, + "learning_rate": 9.97624265711385e-05, + "loss": 0.0287, + "step": 7950 + }, + { + "grad_norm": 0.2008112221956253, + "learning_rate": 9.976081391385993e-05, + "loss": 0.0277, + "step": 7960 + }, + { + "grad_norm": 0.13389372825622559, + "learning_rate": 9.975919581481356e-05, + "loss": 0.0246, + "step": 7970 + }, + { + "grad_norm": 0.1621895283460617, + "learning_rate": 9.975757227417634e-05, + "loss": 0.0279, + "step": 7980 + }, + { + "grad_norm": 0.164812833070755, + "learning_rate": 9.975594329212586e-05, + "loss": 0.0276, + "step": 7990 + }, + { + "grad_norm": 0.11784564703702927, + "learning_rate": 9.97543088688402e-05, + "loss": 0.0263, + "step": 8000 + }, + { + "grad_norm": 0.14874611794948578, + "learning_rate": 9.975266900449814e-05, + "loss": 0.0256, + "step": 8010 + }, + { + "grad_norm": 0.16082262992858887, + "learning_rate": 9.975102369927898e-05, + "loss": 0.0266, + "step": 8020 + }, + { + "grad_norm": 0.16412702202796936, + "learning_rate": 9.974937295336269e-05, + "loss": 0.0259, + "step": 8030 + }, + { + "grad_norm": 0.11318705976009369, + "learning_rate": 9.974771676692975e-05, + "loss": 0.0249, + "step": 8040 + }, + { + "grad_norm": 0.11539804935455322, + "learning_rate": 9.974605514016131e-05, + "loss": 0.0239, + "step": 8050 + }, + { + "grad_norm": 0.15975069999694824, + "learning_rate": 9.974438807323907e-05, + "loss": 0.0279, + "step": 8060 + }, + { + "grad_norm": 0.13917766511440277, + "learning_rate": 9.974271556634535e-05, + "loss": 0.0271, + "step": 8070 + }, + { + "grad_norm": 0.20182426273822784, + "learning_rate": 9.974103761966302e-05, + "loss": 0.0295, + "step": 8080 + }, + { + "grad_norm": 0.14557673037052155, + "learning_rate": 9.973935423337563e-05, + "loss": 0.0282, + "step": 8090 + }, + { + "grad_norm": 0.11023422330617905, + "learning_rate": 9.973766540766722e-05, + "loss": 0.0246, + "step": 8100 + }, + { + "grad_norm": 0.12580999732017517, + "learning_rate": 9.97359711427225e-05, + "loss": 0.0266, + "step": 8110 + }, + { + "grad_norm": 0.16455386579036713, + "learning_rate": 9.973427143872677e-05, + "loss": 0.0243, + "step": 8120 + }, + { + "grad_norm": 0.22847501933574677, + "learning_rate": 9.973256629586589e-05, + "loss": 0.0257, + "step": 8130 + }, + { + "grad_norm": 0.17985880374908447, + "learning_rate": 9.973085571432632e-05, + "loss": 0.0309, + "step": 8140 + }, + { + "grad_norm": 0.19408516585826874, + "learning_rate": 9.972913969429513e-05, + "loss": 0.026, + "step": 8150 + }, + { + "grad_norm": 0.13128694891929626, + "learning_rate": 9.972741823596e-05, + "loss": 0.0247, + "step": 8160 + }, + { + "grad_norm": 0.1474815309047699, + "learning_rate": 9.972569133950917e-05, + "loss": 0.0259, + "step": 8170 + }, + { + "grad_norm": 0.13521228730678558, + "learning_rate": 9.972395900513151e-05, + "loss": 0.0261, + "step": 8180 + }, + { + "grad_norm": 0.11593032628297806, + "learning_rate": 9.972222123301645e-05, + "loss": 0.0258, + "step": 8190 + }, + { + "grad_norm": 0.16711068153381348, + "learning_rate": 9.972047802335403e-05, + "loss": 0.0235, + "step": 8200 + }, + { + "grad_norm": 0.16960230469703674, + "learning_rate": 9.971872937633488e-05, + "loss": 0.028, + "step": 8210 + }, + { + "grad_norm": 0.1604234129190445, + "learning_rate": 9.971697529215024e-05, + "loss": 0.0255, + "step": 8220 + }, + { + "grad_norm": 0.11691098660230637, + "learning_rate": 9.971521577099192e-05, + "loss": 0.0281, + "step": 8230 + }, + { + "grad_norm": 0.11912453174591064, + "learning_rate": 9.971345081305236e-05, + "loss": 0.0275, + "step": 8240 + }, + { + "grad_norm": 0.11770563572645187, + "learning_rate": 9.971168041852456e-05, + "loss": 0.0249, + "step": 8250 + }, + { + "grad_norm": 0.14540135860443115, + "learning_rate": 9.970990458760215e-05, + "loss": 0.0249, + "step": 8260 + }, + { + "grad_norm": 0.16588033735752106, + "learning_rate": 9.970812332047929e-05, + "loss": 0.0271, + "step": 8270 + }, + { + "grad_norm": 0.2020530253648758, + "learning_rate": 9.97063366173508e-05, + "loss": 0.0258, + "step": 8280 + }, + { + "grad_norm": 0.08881165832281113, + "learning_rate": 9.970454447841207e-05, + "loss": 0.0244, + "step": 8290 + }, + { + "grad_norm": 0.1579853892326355, + "learning_rate": 9.970274690385909e-05, + "loss": 0.0248, + "step": 8300 + }, + { + "grad_norm": 0.18947359919548035, + "learning_rate": 9.970094389388844e-05, + "loss": 0.0263, + "step": 8310 + }, + { + "grad_norm": 0.13466961681842804, + "learning_rate": 9.969913544869728e-05, + "loss": 0.0287, + "step": 8320 + }, + { + "grad_norm": 0.22102345526218414, + "learning_rate": 9.96973215684834e-05, + "loss": 0.0262, + "step": 8330 + }, + { + "grad_norm": 0.15573221445083618, + "learning_rate": 9.969550225344513e-05, + "loss": 0.0264, + "step": 8340 + }, + { + "grad_norm": 0.15057486295700073, + "learning_rate": 9.969367750378147e-05, + "loss": 0.0221, + "step": 8350 + }, + { + "grad_norm": 0.10567989945411682, + "learning_rate": 9.969184731969194e-05, + "loss": 0.0247, + "step": 8360 + }, + { + "grad_norm": 0.12744295597076416, + "learning_rate": 9.96900117013767e-05, + "loss": 0.0262, + "step": 8370 + }, + { + "grad_norm": 0.1572844535112381, + "learning_rate": 9.96881706490365e-05, + "loss": 0.0243, + "step": 8380 + }, + { + "grad_norm": 0.22161415219306946, + "learning_rate": 9.968632416287265e-05, + "loss": 0.0277, + "step": 8390 + }, + { + "grad_norm": 0.20931731164455414, + "learning_rate": 9.96844722430871e-05, + "loss": 0.0295, + "step": 8400 + }, + { + "grad_norm": 0.11495661735534668, + "learning_rate": 9.968261488988235e-05, + "loss": 0.0238, + "step": 8410 + }, + { + "grad_norm": 0.1654428392648697, + "learning_rate": 9.968075210346155e-05, + "loss": 0.0237, + "step": 8420 + }, + { + "grad_norm": 0.14979666471481323, + "learning_rate": 9.967888388402839e-05, + "loss": 0.0263, + "step": 8430 + }, + { + "grad_norm": 0.16380833089351654, + "learning_rate": 9.967701023178717e-05, + "loss": 0.0254, + "step": 8440 + }, + { + "grad_norm": 0.11650239676237106, + "learning_rate": 9.967513114694282e-05, + "loss": 0.0238, + "step": 8450 + }, + { + "grad_norm": 0.15148864686489105, + "learning_rate": 9.967324662970079e-05, + "loss": 0.026, + "step": 8460 + }, + { + "grad_norm": 0.1352785974740982, + "learning_rate": 9.96713566802672e-05, + "loss": 0.024, + "step": 8470 + }, + { + "grad_norm": 0.1626315414905548, + "learning_rate": 9.966946129884873e-05, + "loss": 0.0247, + "step": 8480 + }, + { + "grad_norm": 0.21498878300189972, + "learning_rate": 9.966756048565265e-05, + "loss": 0.0249, + "step": 8490 + }, + { + "grad_norm": 0.11640583723783493, + "learning_rate": 9.966565424088681e-05, + "loss": 0.0267, + "step": 8500 + }, + { + "grad_norm": 0.21127887070178986, + "learning_rate": 9.96637425647597e-05, + "loss": 0.0296, + "step": 8510 + }, + { + "grad_norm": 0.14297668635845184, + "learning_rate": 9.966182545748038e-05, + "loss": 0.0282, + "step": 8520 + }, + { + "grad_norm": 0.18890608847141266, + "learning_rate": 9.96599029192585e-05, + "loss": 0.0241, + "step": 8530 + }, + { + "grad_norm": 0.11609553545713425, + "learning_rate": 9.965797495030428e-05, + "loss": 0.0256, + "step": 8540 + }, + { + "grad_norm": 0.22640280425548553, + "learning_rate": 9.96560415508286e-05, + "loss": 0.0304, + "step": 8550 + }, + { + "grad_norm": 0.18448376655578613, + "learning_rate": 9.965410272104286e-05, + "loss": 0.0269, + "step": 8560 + }, + { + "grad_norm": 0.2522740960121155, + "learning_rate": 9.96521584611591e-05, + "loss": 0.0271, + "step": 8570 + }, + { + "grad_norm": 0.17407649755477905, + "learning_rate": 9.965020877138994e-05, + "loss": 0.0296, + "step": 8580 + }, + { + "grad_norm": 0.12948955595493317, + "learning_rate": 9.964825365194861e-05, + "loss": 0.026, + "step": 8590 + }, + { + "grad_norm": 0.13800622522830963, + "learning_rate": 9.96462931030489e-05, + "loss": 0.0261, + "step": 8600 + }, + { + "grad_norm": 0.1714741289615631, + "learning_rate": 9.96443271249052e-05, + "loss": 0.0253, + "step": 8610 + }, + { + "grad_norm": 0.1265614926815033, + "learning_rate": 9.964235571773255e-05, + "loss": 0.0234, + "step": 8620 + }, + { + "grad_norm": 0.10119137912988663, + "learning_rate": 9.96403788817465e-05, + "loss": 0.0237, + "step": 8630 + }, + { + "grad_norm": 0.12252849340438843, + "learning_rate": 9.963839661716325e-05, + "loss": 0.0245, + "step": 8640 + }, + { + "grad_norm": 0.16964396834373474, + "learning_rate": 9.963640892419958e-05, + "loss": 0.0266, + "step": 8650 + }, + { + "grad_norm": 0.13728967308998108, + "learning_rate": 9.963441580307286e-05, + "loss": 0.0228, + "step": 8660 + }, + { + "grad_norm": 0.12883734703063965, + "learning_rate": 9.963241725400104e-05, + "loss": 0.0233, + "step": 8670 + }, + { + "grad_norm": 0.18656067550182343, + "learning_rate": 9.963041327720271e-05, + "loss": 0.0261, + "step": 8680 + }, + { + "grad_norm": 0.1957779824733734, + "learning_rate": 9.962840387289697e-05, + "loss": 0.0246, + "step": 8690 + }, + { + "grad_norm": 0.11867353320121765, + "learning_rate": 9.962638904130363e-05, + "loss": 0.0243, + "step": 8700 + }, + { + "grad_norm": 0.12276717275381088, + "learning_rate": 9.962436878264298e-05, + "loss": 0.0231, + "step": 8710 + }, + { + "grad_norm": 0.13062988221645355, + "learning_rate": 9.962234309713598e-05, + "loss": 0.0259, + "step": 8720 + }, + { + "grad_norm": 0.10883663594722748, + "learning_rate": 9.962031198500414e-05, + "loss": 0.0251, + "step": 8730 + }, + { + "grad_norm": 0.1776639223098755, + "learning_rate": 9.961827544646958e-05, + "loss": 0.025, + "step": 8740 + }, + { + "grad_norm": 0.1399870216846466, + "learning_rate": 9.961623348175501e-05, + "loss": 0.0248, + "step": 8750 + }, + { + "grad_norm": 0.22545203566551208, + "learning_rate": 9.961418609108377e-05, + "loss": 0.0236, + "step": 8760 + }, + { + "grad_norm": 0.12217805534601212, + "learning_rate": 9.961213327467971e-05, + "loss": 0.0253, + "step": 8770 + }, + { + "grad_norm": 0.10473377257585526, + "learning_rate": 9.961007503276736e-05, + "loss": 0.027, + "step": 8780 + }, + { + "grad_norm": 0.17841805517673492, + "learning_rate": 9.960801136557179e-05, + "loss": 0.0229, + "step": 8790 + }, + { + "grad_norm": 0.10361885279417038, + "learning_rate": 9.960594227331866e-05, + "loss": 0.0273, + "step": 8800 + }, + { + "grad_norm": 0.13598845899105072, + "learning_rate": 9.960386775623429e-05, + "loss": 0.0249, + "step": 8810 + }, + { + "grad_norm": 0.12972748279571533, + "learning_rate": 9.96017878145455e-05, + "loss": 0.0243, + "step": 8820 + }, + { + "grad_norm": 0.11496877670288086, + "learning_rate": 9.959970244847977e-05, + "loss": 0.0225, + "step": 8830 + }, + { + "grad_norm": 0.19380147755146027, + "learning_rate": 9.959761165826518e-05, + "loss": 0.0245, + "step": 8840 + }, + { + "grad_norm": 0.13206307590007782, + "learning_rate": 9.959551544413033e-05, + "loss": 0.0263, + "step": 8850 + }, + { + "grad_norm": 0.17544177174568176, + "learning_rate": 9.959341380630448e-05, + "loss": 0.0232, + "step": 8860 + }, + { + "grad_norm": 0.09528627246618271, + "learning_rate": 9.959130674501746e-05, + "loss": 0.0233, + "step": 8870 + }, + { + "grad_norm": 0.1639525592327118, + "learning_rate": 9.958919426049968e-05, + "loss": 0.025, + "step": 8880 + }, + { + "grad_norm": 0.16601234674453735, + "learning_rate": 9.958707635298219e-05, + "loss": 0.0254, + "step": 8890 + }, + { + "grad_norm": 0.17776130139827728, + "learning_rate": 9.958495302269657e-05, + "loss": 0.0251, + "step": 8900 + }, + { + "grad_norm": 0.15484941005706787, + "learning_rate": 9.958282426987503e-05, + "loss": 0.0258, + "step": 8910 + }, + { + "grad_norm": 0.13786917924880981, + "learning_rate": 9.95806900947504e-05, + "loss": 0.0221, + "step": 8920 + }, + { + "grad_norm": 0.1301954835653305, + "learning_rate": 9.957855049755604e-05, + "loss": 0.0273, + "step": 8930 + }, + { + "grad_norm": 0.09511040151119232, + "learning_rate": 9.957640547852593e-05, + "loss": 0.0263, + "step": 8940 + }, + { + "grad_norm": 0.18696966767311096, + "learning_rate": 9.957425503789466e-05, + "loss": 0.0266, + "step": 8950 + }, + { + "grad_norm": 0.20102407038211823, + "learning_rate": 9.957209917589738e-05, + "loss": 0.0258, + "step": 8960 + }, + { + "grad_norm": 0.18750327825546265, + "learning_rate": 9.956993789276987e-05, + "loss": 0.0257, + "step": 8970 + }, + { + "grad_norm": 0.11981291323900223, + "learning_rate": 9.956777118874847e-05, + "loss": 0.0263, + "step": 8980 + }, + { + "grad_norm": 0.1594652384519577, + "learning_rate": 9.956559906407016e-05, + "loss": 0.0254, + "step": 8990 + }, + { + "grad_norm": 0.16209600865840912, + "learning_rate": 9.956342151897245e-05, + "loss": 0.0261, + "step": 9000 + }, + { + "grad_norm": 0.1579834371805191, + "learning_rate": 9.956123855369346e-05, + "loss": 0.025, + "step": 9010 + }, + { + "grad_norm": 0.10833077877759933, + "learning_rate": 9.955905016847196e-05, + "loss": 0.0214, + "step": 9020 + }, + { + "grad_norm": 0.14651493728160858, + "learning_rate": 9.955685636354723e-05, + "loss": 0.0233, + "step": 9030 + }, + { + "grad_norm": 0.14848026633262634, + "learning_rate": 9.95546571391592e-05, + "loss": 0.0217, + "step": 9040 + }, + { + "grad_norm": 0.11094438284635544, + "learning_rate": 9.955245249554837e-05, + "loss": 0.0238, + "step": 9050 + }, + { + "grad_norm": 0.16227422654628754, + "learning_rate": 9.955024243295582e-05, + "loss": 0.023, + "step": 9060 + }, + { + "grad_norm": 0.14059175550937653, + "learning_rate": 9.954802695162328e-05, + "loss": 0.0239, + "step": 9070 + }, + { + "grad_norm": 0.13934741914272308, + "learning_rate": 9.954580605179302e-05, + "loss": 0.0275, + "step": 9080 + }, + { + "grad_norm": 0.10446570813655853, + "learning_rate": 9.954357973370788e-05, + "loss": 0.0257, + "step": 9090 + }, + { + "grad_norm": 0.12971989810466766, + "learning_rate": 9.954134799761135e-05, + "loss": 0.0244, + "step": 9100 + }, + { + "grad_norm": 0.17994427680969238, + "learning_rate": 9.953911084374748e-05, + "loss": 0.0244, + "step": 9110 + }, + { + "grad_norm": 0.11135292053222656, + "learning_rate": 9.953686827236093e-05, + "loss": 0.027, + "step": 9120 + }, + { + "grad_norm": 0.1393471658229828, + "learning_rate": 9.953462028369695e-05, + "loss": 0.0247, + "step": 9130 + }, + { + "grad_norm": 0.1056198999285698, + "learning_rate": 9.953236687800136e-05, + "loss": 0.0223, + "step": 9140 + }, + { + "grad_norm": 0.17250175774097443, + "learning_rate": 9.95301080555206e-05, + "loss": 0.0231, + "step": 9150 + }, + { + "grad_norm": 0.15285474061965942, + "learning_rate": 9.952784381650171e-05, + "loss": 0.025, + "step": 9160 + }, + { + "grad_norm": 0.15354588627815247, + "learning_rate": 9.952557416119226e-05, + "loss": 0.0256, + "step": 9170 + }, + { + "grad_norm": 0.1349913477897644, + "learning_rate": 9.95232990898405e-05, + "loss": 0.0225, + "step": 9180 + }, + { + "grad_norm": 0.11906614899635315, + "learning_rate": 9.95210186026952e-05, + "loss": 0.0226, + "step": 9190 + }, + { + "grad_norm": 0.1172308474779129, + "learning_rate": 9.951873270000576e-05, + "loss": 0.0239, + "step": 9200 + }, + { + "grad_norm": 0.19220265746116638, + "learning_rate": 9.951644138202216e-05, + "loss": 0.0257, + "step": 9210 + }, + { + "grad_norm": 0.1514112949371338, + "learning_rate": 9.951414464899498e-05, + "loss": 0.0276, + "step": 9220 + }, + { + "grad_norm": 0.14441697299480438, + "learning_rate": 9.951184250117538e-05, + "loss": 0.0247, + "step": 9230 + }, + { + "grad_norm": 0.16247938573360443, + "learning_rate": 9.950953493881513e-05, + "loss": 0.0265, + "step": 9240 + }, + { + "grad_norm": 0.17582081258296967, + "learning_rate": 9.950722196216658e-05, + "loss": 0.0242, + "step": 9250 + }, + { + "grad_norm": 0.15075579285621643, + "learning_rate": 9.950490357148265e-05, + "loss": 0.0241, + "step": 9260 + }, + { + "grad_norm": 0.11693905293941498, + "learning_rate": 9.950257976701692e-05, + "loss": 0.0243, + "step": 9270 + }, + { + "grad_norm": 0.10572965443134308, + "learning_rate": 9.950025054902348e-05, + "loss": 0.0241, + "step": 9280 + }, + { + "grad_norm": 0.17651818692684174, + "learning_rate": 9.949791591775706e-05, + "loss": 0.0294, + "step": 9290 + }, + { + "grad_norm": 0.1406458020210266, + "learning_rate": 9.949557587347298e-05, + "loss": 0.0274, + "step": 9300 + }, + { + "grad_norm": 0.1346500664949417, + "learning_rate": 9.949323041642713e-05, + "loss": 0.0252, + "step": 9310 + }, + { + "grad_norm": 0.12250657379627228, + "learning_rate": 9.949087954687602e-05, + "loss": 0.0249, + "step": 9320 + }, + { + "grad_norm": 0.16565348207950592, + "learning_rate": 9.948852326507672e-05, + "loss": 0.0241, + "step": 9330 + }, + { + "grad_norm": 0.15738365054130554, + "learning_rate": 9.948616157128694e-05, + "loss": 0.0233, + "step": 9340 + }, + { + "grad_norm": 0.11947453022003174, + "learning_rate": 9.948379446576493e-05, + "loss": 0.0221, + "step": 9350 + }, + { + "grad_norm": 0.1058434247970581, + "learning_rate": 9.948142194876952e-05, + "loss": 0.0245, + "step": 9360 + }, + { + "grad_norm": 0.11502014845609665, + "learning_rate": 9.947904402056024e-05, + "loss": 0.0238, + "step": 9370 + }, + { + "grad_norm": 0.1770431101322174, + "learning_rate": 9.947666068139708e-05, + "loss": 0.0218, + "step": 9380 + }, + { + "grad_norm": 0.15386459231376648, + "learning_rate": 9.947427193154071e-05, + "loss": 0.025, + "step": 9390 + }, + { + "grad_norm": 0.14441047608852386, + "learning_rate": 9.947187777125233e-05, + "loss": 0.025, + "step": 9400 + }, + { + "grad_norm": 0.1526997983455658, + "learning_rate": 9.946947820079377e-05, + "loss": 0.0235, + "step": 9410 + }, + { + "grad_norm": 0.0885559692978859, + "learning_rate": 9.946707322042747e-05, + "loss": 0.0209, + "step": 9420 + }, + { + "grad_norm": 0.14175209403038025, + "learning_rate": 9.94646628304164e-05, + "loss": 0.0239, + "step": 9430 + }, + { + "grad_norm": 0.12171471863985062, + "learning_rate": 9.946224703102418e-05, + "loss": 0.0252, + "step": 9440 + }, + { + "grad_norm": 0.16582556068897247, + "learning_rate": 9.945982582251498e-05, + "loss": 0.024, + "step": 9450 + }, + { + "grad_norm": 0.12112084776163101, + "learning_rate": 9.94573992051536e-05, + "loss": 0.0265, + "step": 9460 + }, + { + "grad_norm": 0.1673687845468521, + "learning_rate": 9.94549671792054e-05, + "loss": 0.0247, + "step": 9470 + }, + { + "grad_norm": 0.12104880809783936, + "learning_rate": 9.945252974493635e-05, + "loss": 0.025, + "step": 9480 + }, + { + "grad_norm": 0.1857769936323166, + "learning_rate": 9.9450086902613e-05, + "loss": 0.0248, + "step": 9490 + }, + { + "grad_norm": 0.12937363982200623, + "learning_rate": 9.944763865250248e-05, + "loss": 0.0237, + "step": 9500 + }, + { + "grad_norm": 0.1303400844335556, + "learning_rate": 9.944518499487254e-05, + "loss": 0.0226, + "step": 9510 + }, + { + "grad_norm": 0.13551320135593414, + "learning_rate": 9.944272592999151e-05, + "loss": 0.024, + "step": 9520 + }, + { + "grad_norm": 0.12407840043306351, + "learning_rate": 9.94402614581283e-05, + "loss": 0.0243, + "step": 9530 + }, + { + "grad_norm": 0.19571803510189056, + "learning_rate": 9.943779157955244e-05, + "loss": 0.0266, + "step": 9540 + }, + { + "grad_norm": 0.18422141671180725, + "learning_rate": 9.943531629453403e-05, + "loss": 0.0271, + "step": 9550 + }, + { + "grad_norm": 0.1153038963675499, + "learning_rate": 9.943283560334375e-05, + "loss": 0.0235, + "step": 9560 + }, + { + "grad_norm": 0.12982219457626343, + "learning_rate": 9.943034950625288e-05, + "loss": 0.0261, + "step": 9570 + }, + { + "grad_norm": 0.16142192482948303, + "learning_rate": 9.942785800353332e-05, + "loss": 0.0238, + "step": 9580 + }, + { + "grad_norm": 0.15784525871276855, + "learning_rate": 9.942536109545751e-05, + "loss": 0.0219, + "step": 9590 + }, + { + "grad_norm": 0.14936943352222443, + "learning_rate": 9.942285878229853e-05, + "loss": 0.0256, + "step": 9600 + }, + { + "grad_norm": 0.12934520840644836, + "learning_rate": 9.942035106433001e-05, + "loss": 0.0262, + "step": 9610 + }, + { + "grad_norm": 0.15121611952781677, + "learning_rate": 9.94178379418262e-05, + "loss": 0.0217, + "step": 9620 + }, + { + "grad_norm": 0.2659941017627716, + "learning_rate": 9.941531941506194e-05, + "loss": 0.0242, + "step": 9630 + }, + { + "grad_norm": 0.09359481930732727, + "learning_rate": 9.941279548431263e-05, + "loss": 0.0237, + "step": 9640 + }, + { + "grad_norm": 0.13586777448654175, + "learning_rate": 9.941026614985431e-05, + "loss": 0.0248, + "step": 9650 + }, + { + "grad_norm": 0.1573447287082672, + "learning_rate": 9.940773141196357e-05, + "loss": 0.0224, + "step": 9660 + }, + { + "grad_norm": 0.09598958492279053, + "learning_rate": 9.94051912709176e-05, + "loss": 0.0221, + "step": 9670 + }, + { + "grad_norm": 0.11014977097511292, + "learning_rate": 9.940264572699421e-05, + "loss": 0.0233, + "step": 9680 + }, + { + "grad_norm": 0.12950769066810608, + "learning_rate": 9.940009478047174e-05, + "loss": 0.0236, + "step": 9690 + }, + { + "grad_norm": 0.17299066483974457, + "learning_rate": 9.939753843162918e-05, + "loss": 0.0229, + "step": 9700 + }, + { + "grad_norm": 0.12348958849906921, + "learning_rate": 9.939497668074609e-05, + "loss": 0.022, + "step": 9710 + }, + { + "grad_norm": 0.1434316337108612, + "learning_rate": 9.93924095281026e-05, + "loss": 0.0226, + "step": 9720 + }, + { + "grad_norm": 0.13191094994544983, + "learning_rate": 9.938983697397948e-05, + "loss": 0.0229, + "step": 9730 + }, + { + "grad_norm": 0.09415646642446518, + "learning_rate": 9.938725901865805e-05, + "loss": 0.0214, + "step": 9740 + }, + { + "grad_norm": 0.14485302567481995, + "learning_rate": 9.93846756624202e-05, + "loss": 0.0213, + "step": 9750 + }, + { + "grad_norm": 0.0772198736667633, + "learning_rate": 9.938208690554849e-05, + "loss": 0.024, + "step": 9760 + }, + { + "grad_norm": 0.1275121122598648, + "learning_rate": 9.9379492748326e-05, + "loss": 0.0251, + "step": 9770 + }, + { + "grad_norm": 0.09929405897855759, + "learning_rate": 9.937689319103641e-05, + "loss": 0.0214, + "step": 9780 + }, + { + "grad_norm": 0.15368066728115082, + "learning_rate": 9.937428823396404e-05, + "loss": 0.0244, + "step": 9790 + }, + { + "grad_norm": 0.09524477273225784, + "learning_rate": 9.937167787739372e-05, + "loss": 0.0197, + "step": 9800 + }, + { + "grad_norm": 0.13655465841293335, + "learning_rate": 9.936906212161095e-05, + "loss": 0.0223, + "step": 9810 + }, + { + "grad_norm": 0.14055487513542175, + "learning_rate": 9.936644096690176e-05, + "loss": 0.0246, + "step": 9820 + }, + { + "grad_norm": 0.1737348884344101, + "learning_rate": 9.936381441355282e-05, + "loss": 0.0194, + "step": 9830 + }, + { + "grad_norm": 0.20234528183937073, + "learning_rate": 9.936118246185136e-05, + "loss": 0.0249, + "step": 9840 + }, + { + "grad_norm": 0.13557766377925873, + "learning_rate": 9.935854511208518e-05, + "loss": 0.0243, + "step": 9850 + }, + { + "grad_norm": 0.08751602470874786, + "learning_rate": 9.935590236454272e-05, + "loss": 0.0206, + "step": 9860 + }, + { + "grad_norm": 0.11708709597587585, + "learning_rate": 9.935325421951298e-05, + "loss": 0.023, + "step": 9870 + }, + { + "grad_norm": 0.11453195661306381, + "learning_rate": 9.935060067728557e-05, + "loss": 0.0238, + "step": 9880 + }, + { + "grad_norm": 0.16880561411380768, + "learning_rate": 9.934794173815067e-05, + "loss": 0.0225, + "step": 9890 + }, + { + "grad_norm": 0.203630730509758, + "learning_rate": 9.934527740239906e-05, + "loss": 0.0203, + "step": 9900 + }, + { + "grad_norm": 0.17656277120113373, + "learning_rate": 9.934260767032209e-05, + "loss": 0.0192, + "step": 9910 + }, + { + "grad_norm": 0.1608324497938156, + "learning_rate": 9.933993254221172e-05, + "loss": 0.0228, + "step": 9920 + }, + { + "grad_norm": 0.08388954401016235, + "learning_rate": 9.933725201836053e-05, + "loss": 0.0188, + "step": 9930 + }, + { + "grad_norm": 0.12987518310546875, + "learning_rate": 9.933456609906162e-05, + "loss": 0.0239, + "step": 9940 + }, + { + "grad_norm": 0.14508096873760223, + "learning_rate": 9.933187478460875e-05, + "loss": 0.0215, + "step": 9950 + }, + { + "grad_norm": 0.13526523113250732, + "learning_rate": 9.93291780752962e-05, + "loss": 0.0207, + "step": 9960 + }, + { + "grad_norm": 0.1139252558350563, + "learning_rate": 9.932647597141893e-05, + "loss": 0.0194, + "step": 9970 + }, + { + "grad_norm": 0.11842416971921921, + "learning_rate": 9.932376847327239e-05, + "loss": 0.0245, + "step": 9980 + }, + { + "grad_norm": 0.12512986361980438, + "learning_rate": 9.932105558115268e-05, + "loss": 0.024, + "step": 9990 + }, + { + "grad_norm": 0.1664276123046875, + "learning_rate": 9.931833729535651e-05, + "loss": 0.02, + "step": 10000 + }, + { + "grad_norm": 0.10673431307077408, + "learning_rate": 9.931561361618111e-05, + "loss": 0.0233, + "step": 10010 + }, + { + "grad_norm": 0.1305750459432602, + "learning_rate": 9.931288454392435e-05, + "loss": 0.0205, + "step": 10020 + }, + { + "grad_norm": 0.1553530991077423, + "learning_rate": 9.931015007888467e-05, + "loss": 0.0219, + "step": 10030 + }, + { + "grad_norm": 0.1351170390844345, + "learning_rate": 9.930741022136112e-05, + "loss": 0.0212, + "step": 10040 + }, + { + "grad_norm": 0.14922448992729187, + "learning_rate": 9.930466497165333e-05, + "loss": 0.025, + "step": 10050 + }, + { + "grad_norm": 0.1253233551979065, + "learning_rate": 9.93019143300615e-05, + "loss": 0.0236, + "step": 10060 + }, + { + "grad_norm": 0.11356404423713684, + "learning_rate": 9.929915829688644e-05, + "loss": 0.0206, + "step": 10070 + }, + { + "grad_norm": 0.12487953901290894, + "learning_rate": 9.929639687242955e-05, + "loss": 0.0217, + "step": 10080 + }, + { + "grad_norm": 0.12445942312479019, + "learning_rate": 9.929363005699281e-05, + "loss": 0.0182, + "step": 10090 + }, + { + "grad_norm": 0.1228921040892601, + "learning_rate": 9.92908578508788e-05, + "loss": 0.0211, + "step": 10100 + }, + { + "grad_norm": 0.1131834164261818, + "learning_rate": 9.928808025439069e-05, + "loss": 0.0227, + "step": 10110 + }, + { + "grad_norm": 0.11593768745660782, + "learning_rate": 9.928529726783223e-05, + "loss": 0.0214, + "step": 10120 + }, + { + "grad_norm": 0.14002355933189392, + "learning_rate": 9.928250889150774e-05, + "loss": 0.0228, + "step": 10130 + }, + { + "grad_norm": 0.1002628430724144, + "learning_rate": 9.92797151257222e-05, + "loss": 0.0212, + "step": 10140 + }, + { + "grad_norm": 0.14436431229114532, + "learning_rate": 9.927691597078108e-05, + "loss": 0.0227, + "step": 10150 + }, + { + "grad_norm": 0.10643894225358963, + "learning_rate": 9.927411142699053e-05, + "loss": 0.021, + "step": 10160 + }, + { + "grad_norm": 0.10424897074699402, + "learning_rate": 9.927130149465725e-05, + "loss": 0.0222, + "step": 10170 + }, + { + "grad_norm": 0.12039266526699066, + "learning_rate": 9.92684861740885e-05, + "loss": 0.024, + "step": 10180 + }, + { + "grad_norm": 0.1570591777563095, + "learning_rate": 9.926566546559217e-05, + "loss": 0.0224, + "step": 10190 + }, + { + "grad_norm": 0.10663271695375443, + "learning_rate": 9.926283936947673e-05, + "loss": 0.0224, + "step": 10200 + }, + { + "grad_norm": 0.09974405914545059, + "learning_rate": 9.926000788605126e-05, + "loss": 0.0257, + "step": 10210 + }, + { + "grad_norm": 0.11634522676467896, + "learning_rate": 9.92571710156254e-05, + "loss": 0.021, + "step": 10220 + }, + { + "grad_norm": 0.1604422926902771, + "learning_rate": 9.925432875850936e-05, + "loss": 0.0248, + "step": 10230 + }, + { + "grad_norm": 0.16627256572246552, + "learning_rate": 9.925148111501396e-05, + "loss": 0.0221, + "step": 10240 + }, + { + "grad_norm": 0.11862243711948395, + "learning_rate": 9.924862808545066e-05, + "loss": 0.023, + "step": 10250 + }, + { + "grad_norm": 0.10307101160287857, + "learning_rate": 9.924576967013141e-05, + "loss": 0.0217, + "step": 10260 + }, + { + "grad_norm": 0.14203718304634094, + "learning_rate": 9.924290586936887e-05, + "loss": 0.0207, + "step": 10270 + }, + { + "grad_norm": 0.11466484516859055, + "learning_rate": 9.924003668347614e-05, + "loss": 0.0248, + "step": 10280 + }, + { + "grad_norm": 0.12790977954864502, + "learning_rate": 9.923716211276704e-05, + "loss": 0.0218, + "step": 10290 + }, + { + "grad_norm": 0.13151828944683075, + "learning_rate": 9.923428215755594e-05, + "loss": 0.0217, + "step": 10300 + }, + { + "grad_norm": 0.12300531566143036, + "learning_rate": 9.923139681815775e-05, + "loss": 0.0204, + "step": 10310 + }, + { + "grad_norm": 0.1359768509864807, + "learning_rate": 9.922850609488801e-05, + "loss": 0.0225, + "step": 10320 + }, + { + "grad_norm": 0.13186870515346527, + "learning_rate": 9.922560998806287e-05, + "loss": 0.0187, + "step": 10330 + }, + { + "grad_norm": 0.14656752347946167, + "learning_rate": 9.922270849799905e-05, + "loss": 0.0245, + "step": 10340 + }, + { + "grad_norm": 0.11943362653255463, + "learning_rate": 9.92198016250138e-05, + "loss": 0.0216, + "step": 10350 + }, + { + "grad_norm": 0.12179002165794373, + "learning_rate": 9.921688936942506e-05, + "loss": 0.0235, + "step": 10360 + }, + { + "grad_norm": 0.15333302319049835, + "learning_rate": 9.921397173155129e-05, + "loss": 0.0204, + "step": 10370 + }, + { + "grad_norm": 0.17273977398872375, + "learning_rate": 9.921104871171157e-05, + "loss": 0.0217, + "step": 10380 + }, + { + "grad_norm": 0.16344094276428223, + "learning_rate": 9.920812031022554e-05, + "loss": 0.0225, + "step": 10390 + }, + { + "grad_norm": 0.19978109002113342, + "learning_rate": 9.920518652741348e-05, + "loss": 0.0236, + "step": 10400 + }, + { + "grad_norm": 0.14070387184619904, + "learning_rate": 9.920224736359618e-05, + "loss": 0.0232, + "step": 10410 + }, + { + "grad_norm": 0.11902616918087006, + "learning_rate": 9.91993028190951e-05, + "loss": 0.0198, + "step": 10420 + }, + { + "grad_norm": 0.1139720156788826, + "learning_rate": 9.919635289423222e-05, + "loss": 0.0204, + "step": 10430 + }, + { + "grad_norm": 0.11003448814153671, + "learning_rate": 9.919339758933015e-05, + "loss": 0.0231, + "step": 10440 + }, + { + "grad_norm": 0.0800674557685852, + "learning_rate": 9.919043690471209e-05, + "loss": 0.0208, + "step": 10450 + }, + { + "grad_norm": 0.10463476181030273, + "learning_rate": 9.91874708407018e-05, + "loss": 0.0205, + "step": 10460 + }, + { + "grad_norm": 0.11197946220636368, + "learning_rate": 9.918449939762367e-05, + "loss": 0.022, + "step": 10470 + }, + { + "grad_norm": 0.14770038425922394, + "learning_rate": 9.91815225758026e-05, + "loss": 0.0216, + "step": 10480 + }, + { + "grad_norm": 0.11716348677873611, + "learning_rate": 9.917854037556419e-05, + "loss": 0.022, + "step": 10490 + }, + { + "grad_norm": 0.10649168491363525, + "learning_rate": 9.917555279723454e-05, + "loss": 0.0208, + "step": 10500 + }, + { + "grad_norm": 0.10381058603525162, + "learning_rate": 9.917255984114036e-05, + "loss": 0.021, + "step": 10510 + }, + { + "grad_norm": 0.15530072152614594, + "learning_rate": 9.916956150760896e-05, + "loss": 0.0208, + "step": 10520 + }, + { + "grad_norm": 0.12792445719242096, + "learning_rate": 9.916655779696826e-05, + "loss": 0.0202, + "step": 10530 + }, + { + "grad_norm": 0.1383054107427597, + "learning_rate": 9.916354870954671e-05, + "loss": 0.0227, + "step": 10540 + }, + { + "grad_norm": 0.14764830470085144, + "learning_rate": 9.91605342456734e-05, + "loss": 0.0222, + "step": 10550 + }, + { + "grad_norm": 0.12677894532680511, + "learning_rate": 9.915751440567795e-05, + "loss": 0.0209, + "step": 10560 + }, + { + "grad_norm": 0.1268938183784485, + "learning_rate": 9.915448918989066e-05, + "loss": 0.0226, + "step": 10570 + }, + { + "grad_norm": 0.1265874207019806, + "learning_rate": 9.915145859864232e-05, + "loss": 0.0221, + "step": 10580 + }, + { + "grad_norm": 0.1203022301197052, + "learning_rate": 9.914842263226437e-05, + "loss": 0.0197, + "step": 10590 + }, + { + "grad_norm": 0.0795116126537323, + "learning_rate": 9.914538129108882e-05, + "loss": 0.0214, + "step": 10600 + }, + { + "grad_norm": 0.10579085350036621, + "learning_rate": 9.914233457544825e-05, + "loss": 0.0238, + "step": 10610 + }, + { + "grad_norm": 0.12006451934576035, + "learning_rate": 9.913928248567586e-05, + "loss": 0.023, + "step": 10620 + }, + { + "grad_norm": 0.09657932817935944, + "learning_rate": 9.913622502210542e-05, + "loss": 0.0223, + "step": 10630 + }, + { + "grad_norm": 0.10639280825853348, + "learning_rate": 9.913316218507128e-05, + "loss": 0.0213, + "step": 10640 + }, + { + "grad_norm": 0.13762274384498596, + "learning_rate": 9.91300939749084e-05, + "loss": 0.0195, + "step": 10650 + }, + { + "grad_norm": 0.14038395881652832, + "learning_rate": 9.91270203919523e-05, + "loss": 0.0249, + "step": 10660 + }, + { + "grad_norm": 0.11768355220556259, + "learning_rate": 9.912394143653912e-05, + "loss": 0.0184, + "step": 10670 + }, + { + "grad_norm": 0.122138611972332, + "learning_rate": 9.912085710900555e-05, + "loss": 0.0217, + "step": 10680 + }, + { + "grad_norm": 0.1180528849363327, + "learning_rate": 9.911776740968892e-05, + "loss": 0.0228, + "step": 10690 + }, + { + "grad_norm": 0.11318353563547134, + "learning_rate": 9.911467233892709e-05, + "loss": 0.0202, + "step": 10700 + }, + { + "grad_norm": 0.11388374865055084, + "learning_rate": 9.911157189705853e-05, + "loss": 0.0197, + "step": 10710 + }, + { + "grad_norm": 0.18323886394500732, + "learning_rate": 9.910846608442229e-05, + "loss": 0.0226, + "step": 10720 + }, + { + "grad_norm": 0.1650165468454361, + "learning_rate": 9.910535490135805e-05, + "loss": 0.0225, + "step": 10730 + }, + { + "grad_norm": 0.1762736588716507, + "learning_rate": 9.910223834820603e-05, + "loss": 0.0234, + "step": 10740 + }, + { + "grad_norm": 0.10477007180452347, + "learning_rate": 9.909911642530703e-05, + "loss": 0.0239, + "step": 10750 + }, + { + "grad_norm": 0.08356012403964996, + "learning_rate": 9.909598913300249e-05, + "loss": 0.0208, + "step": 10760 + }, + { + "grad_norm": 0.11125002056360245, + "learning_rate": 9.909285647163438e-05, + "loss": 0.0198, + "step": 10770 + }, + { + "grad_norm": 0.09904006123542786, + "learning_rate": 9.908971844154531e-05, + "loss": 0.0227, + "step": 10780 + }, + { + "grad_norm": 0.15102224051952362, + "learning_rate": 9.908657504307843e-05, + "loss": 0.0217, + "step": 10790 + }, + { + "grad_norm": 0.13168089091777802, + "learning_rate": 9.908342627657751e-05, + "loss": 0.021, + "step": 10800 + }, + { + "grad_norm": 0.19224101305007935, + "learning_rate": 9.908027214238689e-05, + "loss": 0.0229, + "step": 10810 + }, + { + "grad_norm": 0.15523391962051392, + "learning_rate": 9.90771126408515e-05, + "loss": 0.0203, + "step": 10820 + }, + { + "grad_norm": 0.1200302466750145, + "learning_rate": 9.907394777231685e-05, + "loss": 0.021, + "step": 10830 + }, + { + "grad_norm": 0.11374656111001968, + "learning_rate": 9.907077753712905e-05, + "loss": 0.0233, + "step": 10840 + }, + { + "grad_norm": 0.10197438299655914, + "learning_rate": 9.906760193563482e-05, + "loss": 0.0211, + "step": 10850 + }, + { + "grad_norm": 0.0931377112865448, + "learning_rate": 9.906442096818139e-05, + "loss": 0.023, + "step": 10860 + }, + { + "grad_norm": 0.15956498682498932, + "learning_rate": 9.906123463511665e-05, + "loss": 0.0226, + "step": 10870 + }, + { + "grad_norm": 0.12862536311149597, + "learning_rate": 9.905804293678907e-05, + "loss": 0.0221, + "step": 10880 + }, + { + "grad_norm": 0.09745146334171295, + "learning_rate": 9.905484587354766e-05, + "loss": 0.0183, + "step": 10890 + }, + { + "grad_norm": 0.11357780545949936, + "learning_rate": 9.905164344574205e-05, + "loss": 0.0198, + "step": 10900 + }, + { + "grad_norm": 0.13641878962516785, + "learning_rate": 9.904843565372248e-05, + "loss": 0.02, + "step": 10910 + }, + { + "grad_norm": 0.11867544054985046, + "learning_rate": 9.904522249783972e-05, + "loss": 0.0229, + "step": 10920 + }, + { + "grad_norm": 0.08847283571958542, + "learning_rate": 9.904200397844517e-05, + "loss": 0.0206, + "step": 10930 + }, + { + "grad_norm": 0.1016751229763031, + "learning_rate": 9.903878009589078e-05, + "loss": 0.021, + "step": 10940 + }, + { + "grad_norm": 0.09040387719869614, + "learning_rate": 9.903555085052915e-05, + "loss": 0.0219, + "step": 10950 + }, + { + "grad_norm": 0.09382423758506775, + "learning_rate": 9.903231624271338e-05, + "loss": 0.0218, + "step": 10960 + }, + { + "grad_norm": 0.15218591690063477, + "learning_rate": 9.902907627279724e-05, + "loss": 0.0212, + "step": 10970 + }, + { + "grad_norm": 0.09057074785232544, + "learning_rate": 9.902583094113504e-05, + "loss": 0.0199, + "step": 10980 + }, + { + "grad_norm": 0.08328817784786224, + "learning_rate": 9.902258024808168e-05, + "loss": 0.0219, + "step": 10990 + }, + { + "grad_norm": 0.09239186346530914, + "learning_rate": 9.901932419399264e-05, + "loss": 0.0233, + "step": 11000 + }, + { + "grad_norm": 0.13061200082302094, + "learning_rate": 9.9016062779224e-05, + "loss": 0.0209, + "step": 11010 + }, + { + "grad_norm": 0.1136040911078453, + "learning_rate": 9.901279600413242e-05, + "loss": 0.0196, + "step": 11020 + }, + { + "grad_norm": 0.2035122662782669, + "learning_rate": 9.900952386907518e-05, + "loss": 0.0212, + "step": 11030 + }, + { + "grad_norm": 0.1309998333454132, + "learning_rate": 9.90062463744101e-05, + "loss": 0.0197, + "step": 11040 + }, + { + "grad_norm": 0.13750676810741425, + "learning_rate": 9.900296352049558e-05, + "loss": 0.0195, + "step": 11050 + }, + { + "grad_norm": 0.12164152413606644, + "learning_rate": 9.899967530769065e-05, + "loss": 0.0238, + "step": 11060 + }, + { + "grad_norm": 0.10340868681669235, + "learning_rate": 9.899638173635489e-05, + "loss": 0.0204, + "step": 11070 + }, + { + "grad_norm": 0.12157773226499557, + "learning_rate": 9.899308280684849e-05, + "loss": 0.0186, + "step": 11080 + }, + { + "grad_norm": 0.12674227356910706, + "learning_rate": 9.898977851953222e-05, + "loss": 0.0181, + "step": 11090 + }, + { + "grad_norm": 0.10025876760482788, + "learning_rate": 9.898646887476741e-05, + "loss": 0.0214, + "step": 11100 + }, + { + "grad_norm": 0.07498561590909958, + "learning_rate": 9.898315387291603e-05, + "loss": 0.0202, + "step": 11110 + }, + { + "grad_norm": 0.13848835229873657, + "learning_rate": 9.89798335143406e-05, + "loss": 0.0186, + "step": 11120 + }, + { + "grad_norm": 0.15635430812835693, + "learning_rate": 9.897650779940419e-05, + "loss": 0.0224, + "step": 11130 + }, + { + "grad_norm": 0.15009944140911102, + "learning_rate": 9.897317672847054e-05, + "loss": 0.021, + "step": 11140 + }, + { + "grad_norm": 0.09135011583566666, + "learning_rate": 9.89698403019039e-05, + "loss": 0.0182, + "step": 11150 + }, + { + "grad_norm": 0.09327132254838943, + "learning_rate": 9.896649852006917e-05, + "loss": 0.0201, + "step": 11160 + }, + { + "grad_norm": 0.09337292611598969, + "learning_rate": 9.896315138333177e-05, + "loss": 0.0218, + "step": 11170 + }, + { + "grad_norm": 0.12572547793388367, + "learning_rate": 9.895979889205774e-05, + "loss": 0.0221, + "step": 11180 + }, + { + "grad_norm": 0.11508753895759583, + "learning_rate": 9.895644104661372e-05, + "loss": 0.018, + "step": 11190 + }, + { + "grad_norm": 0.09043750166893005, + "learning_rate": 9.895307784736691e-05, + "loss": 0.0188, + "step": 11200 + }, + { + "grad_norm": 0.10829249769449234, + "learning_rate": 9.894970929468512e-05, + "loss": 0.0215, + "step": 11210 + }, + { + "grad_norm": 0.08424866944551468, + "learning_rate": 9.89463353889367e-05, + "loss": 0.0189, + "step": 11220 + }, + { + "grad_norm": 0.09133709222078323, + "learning_rate": 9.894295613049065e-05, + "loss": 0.0192, + "step": 11230 + }, + { + "grad_norm": 0.17868784070014954, + "learning_rate": 9.893957151971649e-05, + "loss": 0.0208, + "step": 11240 + }, + { + "grad_norm": 0.09498214721679688, + "learning_rate": 9.893618155698436e-05, + "loss": 0.018, + "step": 11250 + }, + { + "grad_norm": 0.08039020001888275, + "learning_rate": 9.8932786242665e-05, + "loss": 0.0166, + "step": 11260 + }, + { + "grad_norm": 0.07393798977136612, + "learning_rate": 9.89293855771297e-05, + "loss": 0.0173, + "step": 11270 + }, + { + "grad_norm": 0.11388009041547775, + "learning_rate": 9.892597956075036e-05, + "loss": 0.0192, + "step": 11280 + }, + { + "grad_norm": 0.09853452444076538, + "learning_rate": 9.892256819389947e-05, + "loss": 0.0194, + "step": 11290 + }, + { + "grad_norm": 0.13407835364341736, + "learning_rate": 9.891915147695006e-05, + "loss": 0.0214, + "step": 11300 + }, + { + "grad_norm": 0.13082093000411987, + "learning_rate": 9.891572941027577e-05, + "loss": 0.0199, + "step": 11310 + }, + { + "grad_norm": 0.08540236204862595, + "learning_rate": 9.89123019942509e-05, + "loss": 0.0174, + "step": 11320 + }, + { + "grad_norm": 0.14373044669628143, + "learning_rate": 9.89088692292502e-05, + "loss": 0.0227, + "step": 11330 + }, + { + "grad_norm": 0.09848491847515106, + "learning_rate": 9.89054311156491e-05, + "loss": 0.0183, + "step": 11340 + }, + { + "grad_norm": 0.10322052240371704, + "learning_rate": 9.890198765382357e-05, + "loss": 0.0197, + "step": 11350 + }, + { + "grad_norm": 0.09606979042291641, + "learning_rate": 9.889853884415021e-05, + "loss": 0.0193, + "step": 11360 + }, + { + "grad_norm": 0.11560884118080139, + "learning_rate": 9.889508468700614e-05, + "loss": 0.0214, + "step": 11370 + }, + { + "grad_norm": 0.16283945739269257, + "learning_rate": 9.889162518276915e-05, + "loss": 0.0211, + "step": 11380 + }, + { + "grad_norm": 0.11689665913581848, + "learning_rate": 9.888816033181752e-05, + "loss": 0.0222, + "step": 11390 + }, + { + "grad_norm": 0.11007826775312424, + "learning_rate": 9.888469013453018e-05, + "loss": 0.0216, + "step": 11400 + }, + { + "grad_norm": 0.1319931596517563, + "learning_rate": 9.888121459128663e-05, + "loss": 0.0205, + "step": 11410 + }, + { + "grad_norm": 0.14042186737060547, + "learning_rate": 9.887773370246693e-05, + "loss": 0.0215, + "step": 11420 + }, + { + "grad_norm": 0.10286351293325424, + "learning_rate": 9.887424746845177e-05, + "loss": 0.0176, + "step": 11430 + }, + { + "grad_norm": 0.10235398262739182, + "learning_rate": 9.887075588962239e-05, + "loss": 0.0186, + "step": 11440 + }, + { + "grad_norm": 0.12303323298692703, + "learning_rate": 9.88672589663606e-05, + "loss": 0.0223, + "step": 11450 + }, + { + "grad_norm": 0.09070851653814316, + "learning_rate": 9.886375669904886e-05, + "loss": 0.0193, + "step": 11460 + }, + { + "grad_norm": 0.12476318329572678, + "learning_rate": 9.886024908807014e-05, + "loss": 0.0192, + "step": 11470 + }, + { + "grad_norm": 0.1196754202246666, + "learning_rate": 9.885673613380806e-05, + "loss": 0.0176, + "step": 11480 + }, + { + "grad_norm": 0.0946112722158432, + "learning_rate": 9.885321783664676e-05, + "loss": 0.0184, + "step": 11490 + }, + { + "grad_norm": 0.07786524295806885, + "learning_rate": 9.884969419697101e-05, + "loss": 0.0176, + "step": 11500 + }, + { + "grad_norm": 0.1152561604976654, + "learning_rate": 9.884616521516614e-05, + "loss": 0.0221, + "step": 11510 + }, + { + "grad_norm": 0.14086756110191345, + "learning_rate": 9.88426308916181e-05, + "loss": 0.0189, + "step": 11520 + }, + { + "grad_norm": 0.12005932629108429, + "learning_rate": 9.883909122671335e-05, + "loss": 0.0156, + "step": 11530 + }, + { + "grad_norm": 0.12720711529254913, + "learning_rate": 9.883554622083904e-05, + "loss": 0.018, + "step": 11540 + }, + { + "grad_norm": 0.12153167277574539, + "learning_rate": 9.88319958743828e-05, + "loss": 0.0208, + "step": 11550 + }, + { + "grad_norm": 0.13079753518104553, + "learning_rate": 9.882844018773291e-05, + "loss": 0.0189, + "step": 11560 + }, + { + "grad_norm": 0.1598237305879593, + "learning_rate": 9.882487916127823e-05, + "loss": 0.0216, + "step": 11570 + }, + { + "grad_norm": 0.07229523360729218, + "learning_rate": 9.882131279540815e-05, + "loss": 0.0166, + "step": 11580 + }, + { + "grad_norm": 0.09569326788187027, + "learning_rate": 9.881774109051271e-05, + "loss": 0.0183, + "step": 11590 + }, + { + "grad_norm": 0.09865188598632812, + "learning_rate": 9.881416404698252e-05, + "loss": 0.0202, + "step": 11600 + }, + { + "grad_norm": 0.08559571206569672, + "learning_rate": 9.881058166520873e-05, + "loss": 0.0196, + "step": 11610 + }, + { + "grad_norm": 0.1027107909321785, + "learning_rate": 9.880699394558311e-05, + "loss": 0.0186, + "step": 11620 + }, + { + "grad_norm": 0.12083937972784042, + "learning_rate": 9.880340088849801e-05, + "loss": 0.0198, + "step": 11630 + }, + { + "grad_norm": 0.09742484241724014, + "learning_rate": 9.879980249434637e-05, + "loss": 0.0224, + "step": 11640 + }, + { + "grad_norm": 0.09829592704772949, + "learning_rate": 9.879619876352168e-05, + "loss": 0.0215, + "step": 11650 + }, + { + "grad_norm": 0.14753198623657227, + "learning_rate": 9.879258969641809e-05, + "loss": 0.0174, + "step": 11660 + }, + { + "grad_norm": 0.15937405824661255, + "learning_rate": 9.878897529343023e-05, + "loss": 0.019, + "step": 11670 + }, + { + "grad_norm": 0.11583585292100906, + "learning_rate": 9.878535555495338e-05, + "loss": 0.0198, + "step": 11680 + }, + { + "grad_norm": 0.08946330845355988, + "learning_rate": 9.87817304813834e-05, + "loss": 0.0216, + "step": 11690 + }, + { + "grad_norm": 0.1246834248304367, + "learning_rate": 9.877810007311671e-05, + "loss": 0.0174, + "step": 11700 + }, + { + "grad_norm": 0.20715773105621338, + "learning_rate": 9.877446433055035e-05, + "loss": 0.0217, + "step": 11710 + }, + { + "grad_norm": 0.10733963549137115, + "learning_rate": 9.877082325408191e-05, + "loss": 0.0173, + "step": 11720 + }, + { + "grad_norm": 0.15510877966880798, + "learning_rate": 9.876717684410954e-05, + "loss": 0.0206, + "step": 11730 + }, + { + "grad_norm": 0.10463812202215195, + "learning_rate": 9.876352510103204e-05, + "loss": 0.02, + "step": 11740 + }, + { + "grad_norm": 0.07919870316982269, + "learning_rate": 9.875986802524875e-05, + "loss": 0.0157, + "step": 11750 + }, + { + "grad_norm": 0.08622200042009354, + "learning_rate": 9.87562056171596e-05, + "loss": 0.0213, + "step": 11760 + }, + { + "grad_norm": 0.09116686135530472, + "learning_rate": 9.875253787716511e-05, + "loss": 0.0174, + "step": 11770 + }, + { + "grad_norm": 0.07351832836866379, + "learning_rate": 9.874886480566637e-05, + "loss": 0.0187, + "step": 11780 + }, + { + "grad_norm": 0.09157246351242065, + "learning_rate": 9.874518640306507e-05, + "loss": 0.0201, + "step": 11790 + }, + { + "grad_norm": 0.09250602126121521, + "learning_rate": 9.874150266976347e-05, + "loss": 0.0169, + "step": 11800 + }, + { + "grad_norm": 0.13948170840740204, + "learning_rate": 9.873781360616443e-05, + "loss": 0.0225, + "step": 11810 + }, + { + "grad_norm": 0.08863574266433716, + "learning_rate": 9.873411921267137e-05, + "loss": 0.016, + "step": 11820 + }, + { + "grad_norm": 0.1281464695930481, + "learning_rate": 9.873041948968829e-05, + "loss": 0.0196, + "step": 11830 + }, + { + "grad_norm": 0.08626741170883179, + "learning_rate": 9.872671443761981e-05, + "loss": 0.0197, + "step": 11840 + }, + { + "grad_norm": 0.11754148453474045, + "learning_rate": 9.872300405687109e-05, + "loss": 0.0187, + "step": 11850 + }, + { + "grad_norm": 0.13984814286231995, + "learning_rate": 9.871928834784792e-05, + "loss": 0.0182, + "step": 11860 + }, + { + "grad_norm": 0.1153414323925972, + "learning_rate": 9.871556731095661e-05, + "loss": 0.0208, + "step": 11870 + }, + { + "grad_norm": 0.09604552388191223, + "learning_rate": 9.871184094660411e-05, + "loss": 0.0182, + "step": 11880 + }, + { + "grad_norm": 0.10850073397159576, + "learning_rate": 9.870810925519791e-05, + "loss": 0.0192, + "step": 11890 + }, + { + "grad_norm": 0.12843932211399078, + "learning_rate": 9.870437223714612e-05, + "loss": 0.0196, + "step": 11900 + }, + { + "grad_norm": 0.08195850253105164, + "learning_rate": 9.87006298928574e-05, + "loss": 0.019, + "step": 11910 + }, + { + "grad_norm": 0.1476384401321411, + "learning_rate": 9.869688222274103e-05, + "loss": 0.0206, + "step": 11920 + }, + { + "grad_norm": 0.08777712285518646, + "learning_rate": 9.869312922720681e-05, + "loss": 0.02, + "step": 11930 + }, + { + "grad_norm": 0.20618024468421936, + "learning_rate": 9.868937090666521e-05, + "loss": 0.0216, + "step": 11940 + }, + { + "grad_norm": 0.13168221712112427, + "learning_rate": 9.86856072615272e-05, + "loss": 0.0228, + "step": 11950 + }, + { + "grad_norm": 0.10804498940706253, + "learning_rate": 9.868183829220438e-05, + "loss": 0.0185, + "step": 11960 + }, + { + "grad_norm": 0.06811357289552689, + "learning_rate": 9.867806399910893e-05, + "loss": 0.0167, + "step": 11970 + }, + { + "grad_norm": 0.08083131909370422, + "learning_rate": 9.867428438265356e-05, + "loss": 0.0196, + "step": 11980 + }, + { + "grad_norm": 0.14089320600032806, + "learning_rate": 9.867049944325165e-05, + "loss": 0.0193, + "step": 11990 + }, + { + "grad_norm": 0.0786733627319336, + "learning_rate": 9.86667091813171e-05, + "loss": 0.0185, + "step": 12000 + }, + { + "grad_norm": 0.07819323241710663, + "learning_rate": 9.866291359726438e-05, + "loss": 0.0222, + "step": 12010 + }, + { + "grad_norm": 0.0755460113286972, + "learning_rate": 9.865911269150861e-05, + "loss": 0.0186, + "step": 12020 + }, + { + "grad_norm": 0.11038484424352646, + "learning_rate": 9.865530646446544e-05, + "loss": 0.0178, + "step": 12030 + }, + { + "grad_norm": 0.07199062407016754, + "learning_rate": 9.86514949165511e-05, + "loss": 0.02, + "step": 12040 + }, + { + "grad_norm": 0.09368589520454407, + "learning_rate": 9.864767804818243e-05, + "loss": 0.0204, + "step": 12050 + }, + { + "grad_norm": 0.13598944246768951, + "learning_rate": 9.86438558597768e-05, + "loss": 0.0227, + "step": 12060 + }, + { + "grad_norm": 0.11829891055822372, + "learning_rate": 9.864002835175225e-05, + "loss": 0.0212, + "step": 12070 + }, + { + "grad_norm": 0.11646050214767456, + "learning_rate": 9.863619552452734e-05, + "loss": 0.0218, + "step": 12080 + }, + { + "grad_norm": 0.15102319419384003, + "learning_rate": 9.863235737852119e-05, + "loss": 0.0221, + "step": 12090 + }, + { + "grad_norm": 0.1501859724521637, + "learning_rate": 9.862851391415356e-05, + "loss": 0.0176, + "step": 12100 + }, + { + "grad_norm": 0.10747445374727249, + "learning_rate": 9.862466513184477e-05, + "loss": 0.02, + "step": 12110 + }, + { + "grad_norm": 0.10117769986391068, + "learning_rate": 9.86208110320157e-05, + "loss": 0.0183, + "step": 12120 + }, + { + "grad_norm": 0.143532395362854, + "learning_rate": 9.861695161508784e-05, + "loss": 0.0191, + "step": 12130 + }, + { + "grad_norm": 0.11663344502449036, + "learning_rate": 9.861308688148324e-05, + "loss": 0.0181, + "step": 12140 + }, + { + "grad_norm": 0.07059823721647263, + "learning_rate": 9.860921683162455e-05, + "loss": 0.0203, + "step": 12150 + }, + { + "grad_norm": 0.08052023500204086, + "learning_rate": 9.860534146593499e-05, + "loss": 0.02, + "step": 12160 + }, + { + "grad_norm": 0.09444302320480347, + "learning_rate": 9.860146078483836e-05, + "loss": 0.018, + "step": 12170 + }, + { + "grad_norm": 0.09100353717803955, + "learning_rate": 9.859757478875905e-05, + "loss": 0.0175, + "step": 12180 + }, + { + "grad_norm": 0.11720895767211914, + "learning_rate": 9.859368347812204e-05, + "loss": 0.0198, + "step": 12190 + }, + { + "grad_norm": 0.14124836027622223, + "learning_rate": 9.858978685335285e-05, + "loss": 0.0191, + "step": 12200 + }, + { + "grad_norm": 0.09017921984195709, + "learning_rate": 9.858588491487763e-05, + "loss": 0.0203, + "step": 12210 + }, + { + "grad_norm": 0.10629937797784805, + "learning_rate": 9.858197766312308e-05, + "loss": 0.0205, + "step": 12220 + }, + { + "grad_norm": 0.10510770976543427, + "learning_rate": 9.857806509851649e-05, + "loss": 0.0217, + "step": 12230 + }, + { + "grad_norm": 0.16974951326847076, + "learning_rate": 9.857414722148574e-05, + "loss": 0.0204, + "step": 12240 + }, + { + "grad_norm": 0.11237230151891708, + "learning_rate": 9.857022403245928e-05, + "loss": 0.0178, + "step": 12250 + }, + { + "grad_norm": 0.09872031956911087, + "learning_rate": 9.856629553186615e-05, + "loss": 0.0149, + "step": 12260 + }, + { + "grad_norm": 0.11979834735393524, + "learning_rate": 9.856236172013595e-05, + "loss": 0.0182, + "step": 12270 + }, + { + "grad_norm": 0.07640117406845093, + "learning_rate": 9.85584225976989e-05, + "loss": 0.0191, + "step": 12280 + }, + { + "grad_norm": 0.12731312215328217, + "learning_rate": 9.855447816498575e-05, + "loss": 0.0217, + "step": 12290 + }, + { + "grad_norm": 0.1324252337217331, + "learning_rate": 9.855052842242787e-05, + "loss": 0.0192, + "step": 12300 + }, + { + "grad_norm": 0.08743160218000412, + "learning_rate": 9.85465733704572e-05, + "loss": 0.0203, + "step": 12310 + }, + { + "grad_norm": 0.07232674211263657, + "learning_rate": 9.854261300950624e-05, + "loss": 0.0197, + "step": 12320 + }, + { + "grad_norm": 0.08698372542858124, + "learning_rate": 9.853864734000813e-05, + "loss": 0.0174, + "step": 12330 + }, + { + "grad_norm": 0.08428923040628433, + "learning_rate": 9.85346763623965e-05, + "loss": 0.0202, + "step": 12340 + }, + { + "grad_norm": 0.13523244857788086, + "learning_rate": 9.853070007710564e-05, + "loss": 0.0203, + "step": 12350 + }, + { + "grad_norm": 0.06215342506766319, + "learning_rate": 9.85267184845704e-05, + "loss": 0.0166, + "step": 12360 + }, + { + "grad_norm": 0.0990382730960846, + "learning_rate": 9.852273158522616e-05, + "loss": 0.0172, + "step": 12370 + }, + { + "grad_norm": 0.08788575977087021, + "learning_rate": 9.851873937950896e-05, + "loss": 0.0197, + "step": 12380 + }, + { + "grad_norm": 0.11520876735448837, + "learning_rate": 9.851474186785537e-05, + "loss": 0.0199, + "step": 12390 + }, + { + "grad_norm": 0.10402555763721466, + "learning_rate": 9.851073905070254e-05, + "loss": 0.021, + "step": 12400 + }, + { + "grad_norm": 0.1127438098192215, + "learning_rate": 9.850673092848824e-05, + "loss": 0.0187, + "step": 12410 + }, + { + "grad_norm": 0.1106114611029625, + "learning_rate": 9.850271750165077e-05, + "loss": 0.0184, + "step": 12420 + }, + { + "grad_norm": 0.1021503284573555, + "learning_rate": 9.849869877062902e-05, + "loss": 0.019, + "step": 12430 + }, + { + "grad_norm": 0.1442083865404129, + "learning_rate": 9.849467473586252e-05, + "loss": 0.02, + "step": 12440 + }, + { + "grad_norm": 0.11252303421497345, + "learning_rate": 9.849064539779127e-05, + "loss": 0.021, + "step": 12450 + }, + { + "grad_norm": 0.07130570709705353, + "learning_rate": 9.848661075685594e-05, + "loss": 0.021, + "step": 12460 + }, + { + "grad_norm": 0.13267651200294495, + "learning_rate": 9.848257081349778e-05, + "loss": 0.0168, + "step": 12470 + }, + { + "grad_norm": 0.10983357578516006, + "learning_rate": 9.847852556815856e-05, + "loss": 0.0184, + "step": 12480 + }, + { + "grad_norm": 0.0857045128941536, + "learning_rate": 9.847447502128067e-05, + "loss": 0.0171, + "step": 12490 + }, + { + "grad_norm": 0.11983425170183182, + "learning_rate": 9.847041917330708e-05, + "loss": 0.0183, + "step": 12500 + }, + { + "grad_norm": 0.11188527196645737, + "learning_rate": 9.846635802468132e-05, + "loss": 0.0172, + "step": 12510 + }, + { + "grad_norm": 0.11227057874202728, + "learning_rate": 9.84622915758475e-05, + "loss": 0.0183, + "step": 12520 + }, + { + "grad_norm": 0.1042749285697937, + "learning_rate": 9.845821982725034e-05, + "loss": 0.0178, + "step": 12530 + }, + { + "grad_norm": 0.10752243548631668, + "learning_rate": 9.845414277933514e-05, + "loss": 0.0163, + "step": 12540 + }, + { + "grad_norm": 0.1364673376083374, + "learning_rate": 9.845006043254771e-05, + "loss": 0.0195, + "step": 12550 + }, + { + "grad_norm": 0.11632810533046722, + "learning_rate": 9.844597278733451e-05, + "loss": 0.0168, + "step": 12560 + }, + { + "grad_norm": 0.08821900933980942, + "learning_rate": 9.844187984414259e-05, + "loss": 0.0189, + "step": 12570 + }, + { + "grad_norm": 0.09241683036088943, + "learning_rate": 9.84377816034195e-05, + "loss": 0.0167, + "step": 12580 + }, + { + "grad_norm": 0.06567702442407608, + "learning_rate": 9.843367806561345e-05, + "loss": 0.017, + "step": 12590 + }, + { + "grad_norm": 0.1358315795660019, + "learning_rate": 9.842956923117317e-05, + "loss": 0.0202, + "step": 12600 + }, + { + "grad_norm": 0.10001810640096664, + "learning_rate": 9.842545510054802e-05, + "loss": 0.0186, + "step": 12610 + }, + { + "grad_norm": 0.08340951055288315, + "learning_rate": 9.842133567418792e-05, + "loss": 0.0189, + "step": 12620 + }, + { + "grad_norm": 0.12579146027565002, + "learning_rate": 9.841721095254333e-05, + "loss": 0.0218, + "step": 12630 + }, + { + "grad_norm": 0.10321997851133347, + "learning_rate": 9.841308093606537e-05, + "loss": 0.0208, + "step": 12640 + }, + { + "grad_norm": 0.10321027040481567, + "learning_rate": 9.840894562520565e-05, + "loss": 0.019, + "step": 12650 + }, + { + "grad_norm": 0.08962968736886978, + "learning_rate": 9.840480502041642e-05, + "loss": 0.0174, + "step": 12660 + }, + { + "grad_norm": 0.10034719109535217, + "learning_rate": 9.840065912215049e-05, + "loss": 0.019, + "step": 12670 + }, + { + "grad_norm": 0.10972726345062256, + "learning_rate": 9.839650793086124e-05, + "loss": 0.0191, + "step": 12680 + }, + { + "grad_norm": 0.13006097078323364, + "learning_rate": 9.839235144700265e-05, + "loss": 0.0208, + "step": 12690 + }, + { + "grad_norm": 0.12322720140218735, + "learning_rate": 9.838818967102926e-05, + "loss": 0.0182, + "step": 12700 + }, + { + "grad_norm": 0.0951308086514473, + "learning_rate": 9.83840226033962e-05, + "loss": 0.0193, + "step": 12710 + }, + { + "grad_norm": 0.10740872472524643, + "learning_rate": 9.837985024455918e-05, + "loss": 0.0196, + "step": 12720 + }, + { + "grad_norm": 0.1245117038488388, + "learning_rate": 9.837567259497447e-05, + "loss": 0.0178, + "step": 12730 + }, + { + "grad_norm": 0.11362750828266144, + "learning_rate": 9.837148965509894e-05, + "loss": 0.018, + "step": 12740 + }, + { + "grad_norm": 0.10389836877584457, + "learning_rate": 9.836730142539001e-05, + "loss": 0.0171, + "step": 12750 + }, + { + "grad_norm": 0.10672479122877121, + "learning_rate": 9.836310790630574e-05, + "loss": 0.0196, + "step": 12760 + }, + { + "grad_norm": 0.16736862063407898, + "learning_rate": 9.83589090983047e-05, + "loss": 0.0164, + "step": 12770 + }, + { + "grad_norm": 0.1613023728132248, + "learning_rate": 9.835470500184605e-05, + "loss": 0.0197, + "step": 12780 + }, + { + "grad_norm": 0.07236884534358978, + "learning_rate": 9.835049561738957e-05, + "loss": 0.0216, + "step": 12790 + }, + { + "grad_norm": 0.13719627261161804, + "learning_rate": 9.834628094539558e-05, + "loss": 0.0189, + "step": 12800 + }, + { + "grad_norm": 0.09305033087730408, + "learning_rate": 9.834206098632499e-05, + "loss": 0.018, + "step": 12810 + }, + { + "grad_norm": 0.06769400835037231, + "learning_rate": 9.833783574063931e-05, + "loss": 0.0157, + "step": 12820 + }, + { + "grad_norm": 0.08803427219390869, + "learning_rate": 9.833360520880058e-05, + "loss": 0.018, + "step": 12830 + }, + { + "grad_norm": 0.09445924311876297, + "learning_rate": 9.832936939127144e-05, + "loss": 0.0178, + "step": 12840 + }, + { + "grad_norm": 0.10560660809278488, + "learning_rate": 9.832512828851515e-05, + "loss": 0.02, + "step": 12850 + }, + { + "grad_norm": 0.07455317676067352, + "learning_rate": 9.832088190099546e-05, + "loss": 0.0179, + "step": 12860 + }, + { + "grad_norm": 0.0914570763707161, + "learning_rate": 9.831663022917679e-05, + "loss": 0.0225, + "step": 12870 + }, + { + "grad_norm": 0.08151020854711533, + "learning_rate": 9.831237327352407e-05, + "loss": 0.0155, + "step": 12880 + }, + { + "grad_norm": 0.0833992213010788, + "learning_rate": 9.830811103450286e-05, + "loss": 0.0172, + "step": 12890 + }, + { + "grad_norm": 0.06790637969970703, + "learning_rate": 9.830384351257924e-05, + "loss": 0.0185, + "step": 12900 + }, + { + "grad_norm": 0.09268930554389954, + "learning_rate": 9.829957070821993e-05, + "loss": 0.0161, + "step": 12910 + }, + { + "grad_norm": 0.09075707197189331, + "learning_rate": 9.829529262189218e-05, + "loss": 0.0187, + "step": 12920 + }, + { + "grad_norm": 0.08926078677177429, + "learning_rate": 9.829100925406385e-05, + "loss": 0.0184, + "step": 12930 + }, + { + "grad_norm": 0.0865911915898323, + "learning_rate": 9.828672060520333e-05, + "loss": 0.018, + "step": 12940 + }, + { + "grad_norm": 0.07953394204378128, + "learning_rate": 9.828242667577966e-05, + "loss": 0.0191, + "step": 12950 + }, + { + "grad_norm": 0.10773986577987671, + "learning_rate": 9.82781274662624e-05, + "loss": 0.0212, + "step": 12960 + }, + { + "grad_norm": 0.11151953786611557, + "learning_rate": 9.82738229771217e-05, + "loss": 0.0186, + "step": 12970 + }, + { + "grad_norm": 0.10347878187894821, + "learning_rate": 9.826951320882829e-05, + "loss": 0.0186, + "step": 12980 + }, + { + "grad_norm": 0.08884608000516891, + "learning_rate": 9.826519816185351e-05, + "loss": 0.0168, + "step": 12990 + }, + { + "grad_norm": 0.13110148906707764, + "learning_rate": 9.826087783666921e-05, + "loss": 0.0191, + "step": 13000 + }, + { + "grad_norm": 0.10310567915439606, + "learning_rate": 9.825655223374787e-05, + "loss": 0.019, + "step": 13010 + }, + { + "grad_norm": 0.10602068156003952, + "learning_rate": 9.825222135356253e-05, + "loss": 0.0203, + "step": 13020 + }, + { + "grad_norm": 0.08576307445764542, + "learning_rate": 9.82478851965868e-05, + "loss": 0.022, + "step": 13030 + }, + { + "grad_norm": 0.08160389959812164, + "learning_rate": 9.82435437632949e-05, + "loss": 0.0211, + "step": 13040 + }, + { + "grad_norm": 0.08550047129392624, + "learning_rate": 9.823919705416158e-05, + "loss": 0.0167, + "step": 13050 + }, + { + "grad_norm": 0.08277249336242676, + "learning_rate": 9.82348450696622e-05, + "loss": 0.0189, + "step": 13060 + }, + { + "grad_norm": 0.12474393099546432, + "learning_rate": 9.823048781027268e-05, + "loss": 0.018, + "step": 13070 + }, + { + "grad_norm": 0.08101331442594528, + "learning_rate": 9.822612527646953e-05, + "loss": 0.016, + "step": 13080 + }, + { + "grad_norm": 0.0653521865606308, + "learning_rate": 9.822175746872984e-05, + "loss": 0.0178, + "step": 13090 + }, + { + "grad_norm": 0.1021999716758728, + "learning_rate": 9.821738438753123e-05, + "loss": 0.0169, + "step": 13100 + }, + { + "grad_norm": 0.07699805498123169, + "learning_rate": 9.821300603335196e-05, + "loss": 0.018, + "step": 13110 + }, + { + "grad_norm": 0.10376700758934021, + "learning_rate": 9.820862240667085e-05, + "loss": 0.0173, + "step": 13120 + }, + { + "grad_norm": 0.07694051414728165, + "learning_rate": 9.820423350796726e-05, + "loss": 0.0171, + "step": 13130 + }, + { + "grad_norm": 0.08121339976787567, + "learning_rate": 9.819983933772118e-05, + "loss": 0.0189, + "step": 13140 + }, + { + "grad_norm": 0.09006659686565399, + "learning_rate": 9.819543989641314e-05, + "loss": 0.0166, + "step": 13150 + }, + { + "grad_norm": 0.06666059792041779, + "learning_rate": 9.819103518452423e-05, + "loss": 0.0178, + "step": 13160 + }, + { + "grad_norm": 0.07819269597530365, + "learning_rate": 9.818662520253618e-05, + "loss": 0.0155, + "step": 13170 + }, + { + "grad_norm": 0.0821094736456871, + "learning_rate": 9.818220995093126e-05, + "loss": 0.0183, + "step": 13180 + }, + { + "grad_norm": 0.11115472763776779, + "learning_rate": 9.817778943019228e-05, + "loss": 0.0198, + "step": 13190 + }, + { + "grad_norm": 0.11258429288864136, + "learning_rate": 9.81733636408027e-05, + "loss": 0.0164, + "step": 13200 + }, + { + "grad_norm": 0.11227858811616898, + "learning_rate": 9.816893258324649e-05, + "loss": 0.0177, + "step": 13210 + }, + { + "grad_norm": 0.15088166296482086, + "learning_rate": 9.816449625800823e-05, + "loss": 0.0167, + "step": 13220 + }, + { + "grad_norm": 0.0977974683046341, + "learning_rate": 9.816005466557308e-05, + "loss": 0.0171, + "step": 13230 + }, + { + "grad_norm": 0.11447764188051224, + "learning_rate": 9.815560780642674e-05, + "loss": 0.017, + "step": 13240 + }, + { + "grad_norm": 0.11135975271463394, + "learning_rate": 9.815115568105555e-05, + "loss": 0.0165, + "step": 13250 + }, + { + "grad_norm": 0.09328409284353256, + "learning_rate": 9.814669828994638e-05, + "loss": 0.0191, + "step": 13260 + }, + { + "grad_norm": 0.09972452372312546, + "learning_rate": 9.814223563358665e-05, + "loss": 0.0168, + "step": 13270 + }, + { + "grad_norm": 0.10036281496286392, + "learning_rate": 9.813776771246443e-05, + "loss": 0.021, + "step": 13280 + }, + { + "grad_norm": 0.10818080604076385, + "learning_rate": 9.813329452706829e-05, + "loss": 0.0179, + "step": 13290 + }, + { + "grad_norm": 0.12042199820280075, + "learning_rate": 9.812881607788744e-05, + "loss": 0.0166, + "step": 13300 + }, + { + "grad_norm": 0.10639107972383499, + "learning_rate": 9.812433236541163e-05, + "loss": 0.0189, + "step": 13310 + }, + { + "grad_norm": 0.1919373720884323, + "learning_rate": 9.811984339013116e-05, + "loss": 0.0184, + "step": 13320 + }, + { + "grad_norm": 0.13824321329593658, + "learning_rate": 9.811534915253698e-05, + "loss": 0.0221, + "step": 13330 + }, + { + "grad_norm": 0.13592737913131714, + "learning_rate": 9.811084965312056e-05, + "loss": 0.0206, + "step": 13340 + }, + { + "grad_norm": 0.1139478087425232, + "learning_rate": 9.810634489237396e-05, + "loss": 0.0241, + "step": 13350 + }, + { + "grad_norm": 0.09412574768066406, + "learning_rate": 9.81018348707898e-05, + "loss": 0.0201, + "step": 13360 + }, + { + "grad_norm": 0.11508378386497498, + "learning_rate": 9.809731958886131e-05, + "loss": 0.0175, + "step": 13370 + }, + { + "grad_norm": 0.08511648327112198, + "learning_rate": 9.809279904708224e-05, + "loss": 0.0165, + "step": 13380 + }, + { + "grad_norm": 0.09043066203594208, + "learning_rate": 9.808827324594699e-05, + "loss": 0.0171, + "step": 13390 + }, + { + "grad_norm": 0.0856509879231453, + "learning_rate": 9.808374218595046e-05, + "loss": 0.0195, + "step": 13400 + }, + { + "grad_norm": 0.08653421700000763, + "learning_rate": 9.80792058675882e-05, + "loss": 0.0176, + "step": 13410 + }, + { + "grad_norm": 0.10022665560245514, + "learning_rate": 9.807466429135627e-05, + "loss": 0.0179, + "step": 13420 + }, + { + "grad_norm": 0.07237831503152847, + "learning_rate": 9.807011745775132e-05, + "loss": 0.0175, + "step": 13430 + }, + { + "grad_norm": 0.10197737067937851, + "learning_rate": 9.806556536727061e-05, + "loss": 0.0199, + "step": 13440 + }, + { + "grad_norm": 0.07180045545101166, + "learning_rate": 9.806100802041193e-05, + "loss": 0.0193, + "step": 13450 + }, + { + "grad_norm": 0.10971759259700775, + "learning_rate": 9.805644541767368e-05, + "loss": 0.0196, + "step": 13460 + }, + { + "grad_norm": 0.10484638810157776, + "learning_rate": 9.805187755955478e-05, + "loss": 0.0172, + "step": 13470 + }, + { + "grad_norm": 0.08137377351522446, + "learning_rate": 9.804730444655483e-05, + "loss": 0.0168, + "step": 13480 + }, + { + "grad_norm": 0.09620700031518936, + "learning_rate": 9.804272607917388e-05, + "loss": 0.0167, + "step": 13490 + }, + { + "grad_norm": 0.07494360953569412, + "learning_rate": 9.803814245791265e-05, + "loss": 0.0166, + "step": 13500 + }, + { + "grad_norm": 0.10569518059492111, + "learning_rate": 9.803355358327239e-05, + "loss": 0.018, + "step": 13510 + }, + { + "grad_norm": 0.07735764980316162, + "learning_rate": 9.802895945575492e-05, + "loss": 0.0188, + "step": 13520 + }, + { + "grad_norm": 0.07722298800945282, + "learning_rate": 9.802436007586266e-05, + "loss": 0.0158, + "step": 13530 + }, + { + "grad_norm": 0.1297617256641388, + "learning_rate": 9.801975544409858e-05, + "loss": 0.0182, + "step": 13540 + }, + { + "grad_norm": 0.08792968839406967, + "learning_rate": 9.801514556096625e-05, + "loss": 0.0152, + "step": 13550 + }, + { + "grad_norm": 0.11359523236751556, + "learning_rate": 9.801053042696977e-05, + "loss": 0.0162, + "step": 13560 + }, + { + "grad_norm": 0.07449427992105484, + "learning_rate": 9.800591004261388e-05, + "loss": 0.0161, + "step": 13570 + }, + { + "grad_norm": 0.085602767765522, + "learning_rate": 9.800128440840385e-05, + "loss": 0.0162, + "step": 13580 + }, + { + "grad_norm": 0.08882226794958115, + "learning_rate": 9.799665352484552e-05, + "loss": 0.0178, + "step": 13590 + }, + { + "grad_norm": 0.09505950659513474, + "learning_rate": 9.799201739244532e-05, + "loss": 0.0188, + "step": 13600 + }, + { + "grad_norm": 0.06977617740631104, + "learning_rate": 9.798737601171025e-05, + "loss": 0.0194, + "step": 13610 + }, + { + "grad_norm": 0.09178666025400162, + "learning_rate": 9.79827293831479e-05, + "loss": 0.0167, + "step": 13620 + }, + { + "grad_norm": 0.09067119657993317, + "learning_rate": 9.797807750726638e-05, + "loss": 0.0186, + "step": 13630 + }, + { + "grad_norm": 0.10018664598464966, + "learning_rate": 9.797342038457446e-05, + "loss": 0.0153, + "step": 13640 + }, + { + "grad_norm": 0.08932913839817047, + "learning_rate": 9.796875801558141e-05, + "loss": 0.0189, + "step": 13650 + }, + { + "grad_norm": 0.10163521021604538, + "learning_rate": 9.79640904007971e-05, + "loss": 0.0182, + "step": 13660 + }, + { + "grad_norm": 0.0844612717628479, + "learning_rate": 9.795941754073199e-05, + "loss": 0.016, + "step": 13670 + }, + { + "grad_norm": 0.10908888280391693, + "learning_rate": 9.795473943589705e-05, + "loss": 0.0174, + "step": 13680 + }, + { + "grad_norm": 0.12093853205442429, + "learning_rate": 9.795005608680394e-05, + "loss": 0.0193, + "step": 13690 + }, + { + "grad_norm": 0.11356997489929199, + "learning_rate": 9.794536749396477e-05, + "loss": 0.0167, + "step": 13700 + }, + { + "grad_norm": 0.08572718501091003, + "learning_rate": 9.79406736578923e-05, + "loss": 0.0179, + "step": 13710 + }, + { + "grad_norm": 0.10243862867355347, + "learning_rate": 9.793597457909984e-05, + "loss": 0.0162, + "step": 13720 + }, + { + "grad_norm": 0.11627528816461563, + "learning_rate": 9.793127025810127e-05, + "loss": 0.0179, + "step": 13730 + }, + { + "grad_norm": 0.10127932578325272, + "learning_rate": 9.792656069541104e-05, + "loss": 0.0177, + "step": 13740 + }, + { + "grad_norm": 0.13543546199798584, + "learning_rate": 9.79218458915442e-05, + "loss": 0.0175, + "step": 13750 + }, + { + "grad_norm": 0.11244973540306091, + "learning_rate": 9.791712584701634e-05, + "loss": 0.0175, + "step": 13760 + }, + { + "grad_norm": 0.09574391692876816, + "learning_rate": 9.791240056234364e-05, + "loss": 0.0184, + "step": 13770 + }, + { + "grad_norm": 0.12345273047685623, + "learning_rate": 9.790767003804283e-05, + "loss": 0.0178, + "step": 13780 + }, + { + "grad_norm": 0.10375077277421951, + "learning_rate": 9.790293427463126e-05, + "loss": 0.0168, + "step": 13790 + }, + { + "grad_norm": 0.10086861252784729, + "learning_rate": 9.789819327262684e-05, + "loss": 0.0169, + "step": 13800 + }, + { + "grad_norm": 0.10715437680482864, + "learning_rate": 9.7893447032548e-05, + "loss": 0.017, + "step": 13810 + }, + { + "grad_norm": 0.15422889590263367, + "learning_rate": 9.78886955549138e-05, + "loss": 0.0158, + "step": 13820 + }, + { + "grad_norm": 0.1230480968952179, + "learning_rate": 9.788393884024387e-05, + "loss": 0.0166, + "step": 13830 + }, + { + "grad_norm": 0.11342737078666687, + "learning_rate": 9.787917688905836e-05, + "loss": 0.0176, + "step": 13840 + }, + { + "grad_norm": 0.14177191257476807, + "learning_rate": 9.787440970187807e-05, + "loss": 0.0196, + "step": 13850 + }, + { + "grad_norm": 0.08954007923603058, + "learning_rate": 9.786963727922429e-05, + "loss": 0.0159, + "step": 13860 + }, + { + "grad_norm": 0.10712911933660507, + "learning_rate": 9.786485962161897e-05, + "loss": 0.0166, + "step": 13870 + }, + { + "grad_norm": 0.09306254982948303, + "learning_rate": 9.786007672958455e-05, + "loss": 0.016, + "step": 13880 + }, + { + "grad_norm": 0.09073283523321152, + "learning_rate": 9.78552886036441e-05, + "loss": 0.016, + "step": 13890 + }, + { + "grad_norm": 0.07888103276491165, + "learning_rate": 9.785049524432124e-05, + "loss": 0.0175, + "step": 13900 + }, + { + "grad_norm": 0.07760605216026306, + "learning_rate": 9.784569665214016e-05, + "loss": 0.0165, + "step": 13910 + }, + { + "grad_norm": 0.11728457361459732, + "learning_rate": 9.784089282762563e-05, + "loss": 0.0201, + "step": 13920 + }, + { + "grad_norm": 0.0689106360077858, + "learning_rate": 9.7836083771303e-05, + "loss": 0.0152, + "step": 13930 + }, + { + "grad_norm": 0.07257848978042603, + "learning_rate": 9.783126948369817e-05, + "loss": 0.0161, + "step": 13940 + }, + { + "grad_norm": 0.0645076110959053, + "learning_rate": 9.78264499653376e-05, + "loss": 0.0139, + "step": 13950 + }, + { + "grad_norm": 0.07381048053503036, + "learning_rate": 9.782162521674838e-05, + "loss": 0.0148, + "step": 13960 + }, + { + "grad_norm": 0.08939614146947861, + "learning_rate": 9.781679523845812e-05, + "loss": 0.0189, + "step": 13970 + }, + { + "grad_norm": 0.09444253146648407, + "learning_rate": 9.781196003099502e-05, + "loss": 0.017, + "step": 13980 + }, + { + "grad_norm": 0.09652118384838104, + "learning_rate": 9.780711959488786e-05, + "loss": 0.0182, + "step": 13990 + }, + { + "grad_norm": 0.09903587400913239, + "learning_rate": 9.780227393066599e-05, + "loss": 0.0158, + "step": 14000 + }, + { + "grad_norm": 0.09939441829919815, + "learning_rate": 9.77974230388593e-05, + "loss": 0.017, + "step": 14010 + }, + { + "grad_norm": 0.10484892874956131, + "learning_rate": 9.779256691999829e-05, + "loss": 0.0192, + "step": 14020 + }, + { + "grad_norm": 0.10489719361066818, + "learning_rate": 9.778770557461403e-05, + "loss": 0.0185, + "step": 14030 + }, + { + "grad_norm": 0.09830044209957123, + "learning_rate": 9.778283900323812e-05, + "loss": 0.0176, + "step": 14040 + }, + { + "grad_norm": 0.08115673810243607, + "learning_rate": 9.777796720640277e-05, + "loss": 0.0197, + "step": 14050 + }, + { + "grad_norm": 0.10459916293621063, + "learning_rate": 9.777309018464078e-05, + "loss": 0.0189, + "step": 14060 + }, + { + "grad_norm": 0.15139609575271606, + "learning_rate": 9.776820793848547e-05, + "loss": 0.0191, + "step": 14070 + }, + { + "grad_norm": 0.08998500555753708, + "learning_rate": 9.776332046847075e-05, + "loss": 0.0187, + "step": 14080 + }, + { + "grad_norm": 0.1251755654811859, + "learning_rate": 9.775842777513111e-05, + "loss": 0.0161, + "step": 14090 + }, + { + "grad_norm": 0.10435283929109573, + "learning_rate": 9.775352985900163e-05, + "loss": 0.0167, + "step": 14100 + }, + { + "grad_norm": 0.07186121493577957, + "learning_rate": 9.774862672061791e-05, + "loss": 0.0179, + "step": 14110 + }, + { + "grad_norm": 0.09268464148044586, + "learning_rate": 9.774371836051616e-05, + "loss": 0.0196, + "step": 14120 + }, + { + "grad_norm": 0.10039036720991135, + "learning_rate": 9.773880477923315e-05, + "loss": 0.0183, + "step": 14130 + }, + { + "grad_norm": 0.12944737076759338, + "learning_rate": 9.773388597730623e-05, + "loss": 0.015, + "step": 14140 + }, + { + "grad_norm": 0.08704682439565659, + "learning_rate": 9.77289619552733e-05, + "loss": 0.0185, + "step": 14150 + }, + { + "grad_norm": 0.09097624570131302, + "learning_rate": 9.772403271367285e-05, + "loss": 0.0169, + "step": 14160 + }, + { + "grad_norm": 0.07190629839897156, + "learning_rate": 9.771909825304396e-05, + "loss": 0.0148, + "step": 14170 + }, + { + "grad_norm": 0.1318836212158203, + "learning_rate": 9.771415857392619e-05, + "loss": 0.0158, + "step": 14180 + }, + { + "grad_norm": 0.08844310790300369, + "learning_rate": 9.770921367685978e-05, + "loss": 0.0154, + "step": 14190 + }, + { + "grad_norm": 0.13034069538116455, + "learning_rate": 9.770426356238551e-05, + "loss": 0.0166, + "step": 14200 + }, + { + "grad_norm": 0.12518489360809326, + "learning_rate": 9.769930823104469e-05, + "loss": 0.0165, + "step": 14210 + }, + { + "grad_norm": 0.08605556935071945, + "learning_rate": 9.769434768337926e-05, + "loss": 0.0159, + "step": 14220 + }, + { + "grad_norm": 0.0840495303273201, + "learning_rate": 9.768938191993164e-05, + "loss": 0.017, + "step": 14230 + }, + { + "grad_norm": 0.08828173577785492, + "learning_rate": 9.768441094124494e-05, + "loss": 0.0164, + "step": 14240 + }, + { + "grad_norm": 0.09520884603261948, + "learning_rate": 9.767943474786275e-05, + "loss": 0.0171, + "step": 14250 + }, + { + "grad_norm": 0.07384520024061203, + "learning_rate": 9.767445334032923e-05, + "loss": 0.0175, + "step": 14260 + }, + { + "grad_norm": 0.09110711514949799, + "learning_rate": 9.766946671918919e-05, + "loss": 0.0174, + "step": 14270 + }, + { + "grad_norm": 0.09260065853595734, + "learning_rate": 9.766447488498796e-05, + "loss": 0.0177, + "step": 14280 + }, + { + "grad_norm": 0.11251272261142731, + "learning_rate": 9.765947783827139e-05, + "loss": 0.017, + "step": 14290 + }, + { + "grad_norm": 0.10018701106309891, + "learning_rate": 9.765447557958599e-05, + "loss": 0.0164, + "step": 14300 + }, + { + "grad_norm": 0.08109252899885178, + "learning_rate": 9.764946810947879e-05, + "loss": 0.0165, + "step": 14310 + }, + { + "grad_norm": 0.08935870975255966, + "learning_rate": 9.764445542849738e-05, + "loss": 0.0156, + "step": 14320 + }, + { + "grad_norm": 0.08285413682460785, + "learning_rate": 9.763943753718998e-05, + "loss": 0.0162, + "step": 14330 + }, + { + "grad_norm": 0.07460112869739532, + "learning_rate": 9.76344144361053e-05, + "loss": 0.0178, + "step": 14340 + }, + { + "grad_norm": 0.07177694886922836, + "learning_rate": 9.762938612579269e-05, + "loss": 0.0154, + "step": 14350 + }, + { + "grad_norm": 0.08516347408294678, + "learning_rate": 9.762435260680202e-05, + "loss": 0.0157, + "step": 14360 + }, + { + "grad_norm": 0.13087491691112518, + "learning_rate": 9.761931387968373e-05, + "loss": 0.0163, + "step": 14370 + }, + { + "grad_norm": 0.0940103605389595, + "learning_rate": 9.76142699449889e-05, + "loss": 0.0169, + "step": 14380 + }, + { + "grad_norm": 0.10885131359100342, + "learning_rate": 9.760922080326908e-05, + "loss": 0.0145, + "step": 14390 + }, + { + "grad_norm": 0.07411511242389679, + "learning_rate": 9.760416645507644e-05, + "loss": 0.0155, + "step": 14400 + }, + { + "grad_norm": 0.11501516401767731, + "learning_rate": 9.759910690096375e-05, + "loss": 0.0169, + "step": 14410 + }, + { + "grad_norm": 0.08929752558469772, + "learning_rate": 9.759404214148429e-05, + "loss": 0.0164, + "step": 14420 + }, + { + "grad_norm": 0.07792402803897858, + "learning_rate": 9.758897217719191e-05, + "loss": 0.0173, + "step": 14430 + }, + { + "grad_norm": 0.1320357620716095, + "learning_rate": 9.758389700864113e-05, + "loss": 0.0173, + "step": 14440 + }, + { + "grad_norm": 0.08461495488882065, + "learning_rate": 9.757881663638688e-05, + "loss": 0.0158, + "step": 14450 + }, + { + "grad_norm": 0.12581460177898407, + "learning_rate": 9.757373106098478e-05, + "loss": 0.0171, + "step": 14460 + }, + { + "grad_norm": 0.13393379747867584, + "learning_rate": 9.756864028299097e-05, + "loss": 0.0167, + "step": 14470 + }, + { + "grad_norm": 0.0894089937210083, + "learning_rate": 9.75635443029622e-05, + "loss": 0.0153, + "step": 14480 + }, + { + "grad_norm": 0.08401979506015778, + "learning_rate": 9.755844312145572e-05, + "loss": 0.0157, + "step": 14490 + }, + { + "grad_norm": 0.06852132827043533, + "learning_rate": 9.755333673902941e-05, + "loss": 0.0147, + "step": 14500 + }, + { + "grad_norm": 0.10065612941980362, + "learning_rate": 9.75482251562417e-05, + "loss": 0.0189, + "step": 14510 + }, + { + "grad_norm": 0.07461901009082794, + "learning_rate": 9.754310837365155e-05, + "loss": 0.0171, + "step": 14520 + }, + { + "grad_norm": 0.07376010715961456, + "learning_rate": 9.753798639181856e-05, + "loss": 0.0171, + "step": 14530 + }, + { + "grad_norm": 0.06547222286462784, + "learning_rate": 9.753285921130286e-05, + "loss": 0.0186, + "step": 14540 + }, + { + "grad_norm": 0.08302805572748184, + "learning_rate": 9.752772683266512e-05, + "loss": 0.0162, + "step": 14550 + }, + { + "grad_norm": 0.08911304175853729, + "learning_rate": 9.752258925646665e-05, + "loss": 0.0151, + "step": 14560 + }, + { + "grad_norm": 0.0730442926287651, + "learning_rate": 9.751744648326926e-05, + "loss": 0.0171, + "step": 14570 + }, + { + "grad_norm": 0.07701433449983597, + "learning_rate": 9.751229851363536e-05, + "loss": 0.0165, + "step": 14580 + }, + { + "grad_norm": 0.08609162271022797, + "learning_rate": 9.750714534812793e-05, + "loss": 0.017, + "step": 14590 + }, + { + "grad_norm": 0.10212894529104233, + "learning_rate": 9.750198698731053e-05, + "loss": 0.0162, + "step": 14600 + }, + { + "grad_norm": 0.1006547287106514, + "learning_rate": 9.749682343174722e-05, + "loss": 0.0163, + "step": 14610 + }, + { + "grad_norm": 0.10319578647613525, + "learning_rate": 9.749165468200272e-05, + "loss": 0.0184, + "step": 14620 + }, + { + "grad_norm": 0.08450404554605484, + "learning_rate": 9.748648073864229e-05, + "loss": 0.0159, + "step": 14630 + }, + { + "grad_norm": 0.06702975183725357, + "learning_rate": 9.748130160223168e-05, + "loss": 0.0171, + "step": 14640 + }, + { + "grad_norm": 0.0719652846455574, + "learning_rate": 9.747611727333734e-05, + "loss": 0.0165, + "step": 14650 + }, + { + "grad_norm": 0.09140424430370331, + "learning_rate": 9.74709277525262e-05, + "loss": 0.0176, + "step": 14660 + }, + { + "grad_norm": 0.131885826587677, + "learning_rate": 9.746573304036576e-05, + "loss": 0.0182, + "step": 14670 + }, + { + "grad_norm": 0.11780253052711487, + "learning_rate": 9.746053313742412e-05, + "loss": 0.0151, + "step": 14680 + }, + { + "grad_norm": 0.07324707508087158, + "learning_rate": 9.745532804426994e-05, + "loss": 0.016, + "step": 14690 + }, + { + "grad_norm": 0.14315040409564972, + "learning_rate": 9.745011776147242e-05, + "loss": 0.0189, + "step": 14700 + }, + { + "grad_norm": 0.09653580188751221, + "learning_rate": 9.744490228960138e-05, + "loss": 0.017, + "step": 14710 + }, + { + "grad_norm": 0.10789943486452103, + "learning_rate": 9.743968162922713e-05, + "loss": 0.0158, + "step": 14720 + }, + { + "grad_norm": 0.08904899656772614, + "learning_rate": 9.743445578092064e-05, + "loss": 0.0189, + "step": 14730 + }, + { + "grad_norm": 0.08925770223140717, + "learning_rate": 9.742922474525338e-05, + "loss": 0.0188, + "step": 14740 + }, + { + "grad_norm": 0.09378491342067719, + "learning_rate": 9.742398852279741e-05, + "loss": 0.0156, + "step": 14750 + }, + { + "grad_norm": 0.08003883808851242, + "learning_rate": 9.741874711412535e-05, + "loss": 0.0158, + "step": 14760 + }, + { + "grad_norm": 0.06381060183048248, + "learning_rate": 9.741350051981042e-05, + "loss": 0.0168, + "step": 14770 + }, + { + "grad_norm": 0.07680083811283112, + "learning_rate": 9.740824874042633e-05, + "loss": 0.0146, + "step": 14780 + }, + { + "grad_norm": 0.07818491756916046, + "learning_rate": 9.740299177654746e-05, + "loss": 0.0149, + "step": 14790 + }, + { + "grad_norm": 0.10514160245656967, + "learning_rate": 9.739772962874867e-05, + "loss": 0.0161, + "step": 14800 + }, + { + "grad_norm": 0.09378231316804886, + "learning_rate": 9.739246229760541e-05, + "loss": 0.0192, + "step": 14810 + }, + { + "grad_norm": 0.13337372243404388, + "learning_rate": 9.738718978369376e-05, + "loss": 0.0164, + "step": 14820 + }, + { + "grad_norm": 0.14459730684757233, + "learning_rate": 9.738191208759025e-05, + "loss": 0.0173, + "step": 14830 + }, + { + "grad_norm": 0.1437181830406189, + "learning_rate": 9.73766292098721e-05, + "loss": 0.0177, + "step": 14840 + }, + { + "grad_norm": 0.130435973405838, + "learning_rate": 9.737134115111699e-05, + "loss": 0.0174, + "step": 14850 + }, + { + "grad_norm": 0.09422898292541504, + "learning_rate": 9.736604791190323e-05, + "loss": 0.0185, + "step": 14860 + }, + { + "grad_norm": 0.11645254492759705, + "learning_rate": 9.73607494928097e-05, + "loss": 0.0157, + "step": 14870 + }, + { + "grad_norm": 0.14172562956809998, + "learning_rate": 9.735544589441581e-05, + "loss": 0.0177, + "step": 14880 + }, + { + "grad_norm": 0.07672831416130066, + "learning_rate": 9.735013711730154e-05, + "loss": 0.0173, + "step": 14890 + }, + { + "grad_norm": 0.10704755783081055, + "learning_rate": 9.734482316204747e-05, + "loss": 0.0148, + "step": 14900 + }, + { + "grad_norm": 0.11713682115077972, + "learning_rate": 9.733950402923473e-05, + "loss": 0.016, + "step": 14910 + }, + { + "grad_norm": 0.08579736948013306, + "learning_rate": 9.7334179719445e-05, + "loss": 0.0144, + "step": 14920 + }, + { + "grad_norm": 0.09758981317281723, + "learning_rate": 9.732885023326053e-05, + "loss": 0.0146, + "step": 14930 + }, + { + "grad_norm": 0.07719141989946365, + "learning_rate": 9.732351557126418e-05, + "loss": 0.0147, + "step": 14940 + }, + { + "grad_norm": 0.08501316606998444, + "learning_rate": 9.731817573403929e-05, + "loss": 0.0169, + "step": 14950 + }, + { + "grad_norm": 0.1039375439286232, + "learning_rate": 9.731283072216985e-05, + "loss": 0.0157, + "step": 14960 + }, + { + "grad_norm": 0.12252656370401382, + "learning_rate": 9.730748053624039e-05, + "loss": 0.0154, + "step": 14970 + }, + { + "grad_norm": 0.11885801702737808, + "learning_rate": 9.730212517683598e-05, + "loss": 0.0164, + "step": 14980 + }, + { + "grad_norm": 0.10320528596639633, + "learning_rate": 9.729676464454228e-05, + "loss": 0.0173, + "step": 14990 + }, + { + "grad_norm": 0.09687267988920212, + "learning_rate": 9.72913989399455e-05, + "loss": 0.0158, + "step": 15000 + }, + { + "grad_norm": 0.08257591724395752, + "learning_rate": 9.728602806363242e-05, + "loss": 0.015, + "step": 15010 + }, + { + "grad_norm": 0.09711789339780807, + "learning_rate": 9.728065201619043e-05, + "loss": 0.0165, + "step": 15020 + }, + { + "grad_norm": 0.054562848061323166, + "learning_rate": 9.727527079820742e-05, + "loss": 0.0149, + "step": 15030 + }, + { + "grad_norm": 0.07123826444149017, + "learning_rate": 9.726988441027186e-05, + "loss": 0.0168, + "step": 15040 + }, + { + "grad_norm": 0.0791480764746666, + "learning_rate": 9.726449285297281e-05, + "loss": 0.0158, + "step": 15050 + }, + { + "grad_norm": 0.08413182199001312, + "learning_rate": 9.72590961268999e-05, + "loss": 0.0155, + "step": 15060 + }, + { + "grad_norm": 0.08988498896360397, + "learning_rate": 9.725369423264328e-05, + "loss": 0.0172, + "step": 15070 + }, + { + "grad_norm": 0.08739021420478821, + "learning_rate": 9.72482871707937e-05, + "loss": 0.0174, + "step": 15080 + }, + { + "grad_norm": 0.07049430161714554, + "learning_rate": 9.724287494194247e-05, + "loss": 0.0156, + "step": 15090 + }, + { + "grad_norm": 0.11356295645236969, + "learning_rate": 9.723745754668147e-05, + "loss": 0.0151, + "step": 15100 + }, + { + "grad_norm": 0.09718220680952072, + "learning_rate": 9.723203498560313e-05, + "loss": 0.0164, + "step": 15110 + }, + { + "grad_norm": 0.0677257850766182, + "learning_rate": 9.722660725930046e-05, + "loss": 0.0139, + "step": 15120 + }, + { + "grad_norm": 0.08850738406181335, + "learning_rate": 9.722117436836702e-05, + "loss": 0.0152, + "step": 15130 + }, + { + "grad_norm": 0.07196758687496185, + "learning_rate": 9.721573631339696e-05, + "loss": 0.0175, + "step": 15140 + }, + { + "grad_norm": 0.10719230771064758, + "learning_rate": 9.721029309498494e-05, + "loss": 0.0147, + "step": 15150 + }, + { + "grad_norm": 0.10271183401346207, + "learning_rate": 9.720484471372627e-05, + "loss": 0.0161, + "step": 15160 + }, + { + "grad_norm": 0.12114246189594269, + "learning_rate": 9.719939117021673e-05, + "loss": 0.0162, + "step": 15170 + }, + { + "grad_norm": 0.06938464194536209, + "learning_rate": 9.719393246505275e-05, + "loss": 0.0158, + "step": 15180 + }, + { + "grad_norm": 0.10491663217544556, + "learning_rate": 9.718846859883128e-05, + "loss": 0.0176, + "step": 15190 + }, + { + "grad_norm": 0.12062524259090424, + "learning_rate": 9.718299957214982e-05, + "loss": 0.0166, + "step": 15200 + }, + { + "grad_norm": 0.10185704380273819, + "learning_rate": 9.717752538560646e-05, + "loss": 0.0138, + "step": 15210 + }, + { + "grad_norm": 0.12051507830619812, + "learning_rate": 9.717204603979986e-05, + "loss": 0.0157, + "step": 15220 + }, + { + "grad_norm": 0.14958688616752625, + "learning_rate": 9.716656153532922e-05, + "loss": 0.0195, + "step": 15230 + }, + { + "grad_norm": 0.11221020668745041, + "learning_rate": 9.716107187279434e-05, + "loss": 0.0177, + "step": 15240 + }, + { + "grad_norm": 0.11555545032024384, + "learning_rate": 9.715557705279555e-05, + "loss": 0.0195, + "step": 15250 + }, + { + "grad_norm": 0.11231406778097153, + "learning_rate": 9.715007707593372e-05, + "loss": 0.0179, + "step": 15260 + }, + { + "grad_norm": 0.10567612200975418, + "learning_rate": 9.714457194281036e-05, + "loss": 0.0149, + "step": 15270 + }, + { + "grad_norm": 0.097401924431324, + "learning_rate": 9.713906165402751e-05, + "loss": 0.0205, + "step": 15280 + }, + { + "grad_norm": 0.1009175181388855, + "learning_rate": 9.713354621018774e-05, + "loss": 0.0215, + "step": 15290 + }, + { + "grad_norm": 0.10960385948419571, + "learning_rate": 9.712802561189422e-05, + "loss": 0.017, + "step": 15300 + }, + { + "grad_norm": 0.10981173813343048, + "learning_rate": 9.712249985975069e-05, + "loss": 0.0167, + "step": 15310 + }, + { + "grad_norm": 0.10036725550889969, + "learning_rate": 9.71169689543614e-05, + "loss": 0.0186, + "step": 15320 + }, + { + "grad_norm": 0.07979282736778259, + "learning_rate": 9.711143289633123e-05, + "loss": 0.0148, + "step": 15330 + }, + { + "grad_norm": 0.10820917040109634, + "learning_rate": 9.710589168626561e-05, + "loss": 0.0163, + "step": 15340 + }, + { + "grad_norm": 0.09420433640480042, + "learning_rate": 9.710034532477048e-05, + "loss": 0.0154, + "step": 15350 + }, + { + "grad_norm": 0.08343526721000671, + "learning_rate": 9.709479381245239e-05, + "loss": 0.0196, + "step": 15360 + }, + { + "grad_norm": 0.10640036314725876, + "learning_rate": 9.708923714991847e-05, + "loss": 0.0156, + "step": 15370 + }, + { + "grad_norm": 0.08248300105333328, + "learning_rate": 9.708367533777638e-05, + "loss": 0.0167, + "step": 15380 + }, + { + "grad_norm": 0.06888309121131897, + "learning_rate": 9.707810837663431e-05, + "loss": 0.0153, + "step": 15390 + }, + { + "grad_norm": 0.11827143281698227, + "learning_rate": 9.707253626710113e-05, + "loss": 0.0192, + "step": 15400 + }, + { + "grad_norm": 0.06229855492711067, + "learning_rate": 9.706695900978613e-05, + "loss": 0.0168, + "step": 15410 + }, + { + "grad_norm": 0.11822646111249924, + "learning_rate": 9.706137660529926e-05, + "loss": 0.0171, + "step": 15420 + }, + { + "grad_norm": 0.08231879025697708, + "learning_rate": 9.705578905425101e-05, + "loss": 0.0175, + "step": 15430 + }, + { + "grad_norm": 0.07486173510551453, + "learning_rate": 9.705019635725241e-05, + "loss": 0.0173, + "step": 15440 + }, + { + "grad_norm": 0.09344454109668732, + "learning_rate": 9.704459851491508e-05, + "loss": 0.0188, + "step": 15450 + }, + { + "grad_norm": 0.07669525593519211, + "learning_rate": 9.703899552785118e-05, + "loss": 0.0168, + "step": 15460 + }, + { + "grad_norm": 0.09893082827329636, + "learning_rate": 9.703338739667346e-05, + "loss": 0.0159, + "step": 15470 + }, + { + "grad_norm": 0.08456466346979141, + "learning_rate": 9.70277741219952e-05, + "loss": 0.0148, + "step": 15480 + }, + { + "grad_norm": 0.07999996095895767, + "learning_rate": 9.702215570443027e-05, + "loss": 0.0147, + "step": 15490 + }, + { + "grad_norm": 0.10323282331228256, + "learning_rate": 9.701653214459309e-05, + "loss": 0.0191, + "step": 15500 + }, + { + "grad_norm": 0.06973984837532043, + "learning_rate": 9.701090344309865e-05, + "loss": 0.0158, + "step": 15510 + }, + { + "grad_norm": 0.07170428335666656, + "learning_rate": 9.700526960056247e-05, + "loss": 0.0177, + "step": 15520 + }, + { + "grad_norm": 0.07798376679420471, + "learning_rate": 9.699963061760068e-05, + "loss": 0.0173, + "step": 15530 + }, + { + "grad_norm": 0.11200594156980515, + "learning_rate": 9.699398649482997e-05, + "loss": 0.0151, + "step": 15540 + }, + { + "grad_norm": 0.0885353609919548, + "learning_rate": 9.698833723286753e-05, + "loss": 0.0181, + "step": 15550 + }, + { + "grad_norm": 0.06633448600769043, + "learning_rate": 9.698268283233118e-05, + "loss": 0.0133, + "step": 15560 + }, + { + "grad_norm": 0.09471970051527023, + "learning_rate": 9.697702329383929e-05, + "loss": 0.0144, + "step": 15570 + }, + { + "grad_norm": 0.08287052065134048, + "learning_rate": 9.697135861801074e-05, + "loss": 0.013, + "step": 15580 + }, + { + "grad_norm": 0.10036738216876984, + "learning_rate": 9.696568880546505e-05, + "loss": 0.014, + "step": 15590 + }, + { + "grad_norm": 0.06394003331661224, + "learning_rate": 9.696001385682223e-05, + "loss": 0.014, + "step": 15600 + }, + { + "grad_norm": 0.07913387566804886, + "learning_rate": 9.695433377270291e-05, + "loss": 0.018, + "step": 15610 + }, + { + "grad_norm": 0.11789686232805252, + "learning_rate": 9.694864855372824e-05, + "loss": 0.0174, + "step": 15620 + }, + { + "grad_norm": 0.08406290411949158, + "learning_rate": 9.694295820051995e-05, + "loss": 0.0145, + "step": 15630 + }, + { + "grad_norm": 0.08161713927984238, + "learning_rate": 9.693726271370032e-05, + "loss": 0.0149, + "step": 15640 + }, + { + "grad_norm": 0.08280576020479202, + "learning_rate": 9.693156209389221e-05, + "loss": 0.0172, + "step": 15650 + }, + { + "grad_norm": 0.09011637419462204, + "learning_rate": 9.692585634171905e-05, + "loss": 0.0171, + "step": 15660 + }, + { + "grad_norm": 0.06488237529993057, + "learning_rate": 9.692014545780476e-05, + "loss": 0.0141, + "step": 15670 + }, + { + "grad_norm": 0.11465026438236237, + "learning_rate": 9.691442944277393e-05, + "loss": 0.0156, + "step": 15680 + }, + { + "grad_norm": 0.08677785843610764, + "learning_rate": 9.690870829725162e-05, + "loss": 0.016, + "step": 15690 + }, + { + "grad_norm": 0.07640250772237778, + "learning_rate": 9.69029820218635e-05, + "loss": 0.0158, + "step": 15700 + }, + { + "grad_norm": 0.09303756803274155, + "learning_rate": 9.689725061723579e-05, + "loss": 0.0135, + "step": 15710 + }, + { + "grad_norm": 0.07624910026788712, + "learning_rate": 9.689151408399527e-05, + "loss": 0.0156, + "step": 15720 + }, + { + "grad_norm": 0.10515431314706802, + "learning_rate": 9.688577242276924e-05, + "loss": 0.0178, + "step": 15730 + }, + { + "grad_norm": 0.11413893848657608, + "learning_rate": 9.688002563418566e-05, + "loss": 0.0176, + "step": 15740 + }, + { + "grad_norm": 0.1187121719121933, + "learning_rate": 9.687427371887293e-05, + "loss": 0.0133, + "step": 15750 + }, + { + "grad_norm": 0.07212452590465546, + "learning_rate": 9.686851667746012e-05, + "loss": 0.0151, + "step": 15760 + }, + { + "grad_norm": 0.060764193534851074, + "learning_rate": 9.686275451057677e-05, + "loss": 0.0128, + "step": 15770 + }, + { + "grad_norm": 0.08409739285707474, + "learning_rate": 9.685698721885308e-05, + "loss": 0.015, + "step": 15780 + }, + { + "grad_norm": 0.11450190097093582, + "learning_rate": 9.68512148029197e-05, + "loss": 0.0153, + "step": 15790 + }, + { + "grad_norm": 0.0802425816655159, + "learning_rate": 9.684543726340791e-05, + "loss": 0.0155, + "step": 15800 + }, + { + "grad_norm": 0.06813767552375793, + "learning_rate": 9.683965460094952e-05, + "loss": 0.0187, + "step": 15810 + }, + { + "grad_norm": 0.09139250218868256, + "learning_rate": 9.683386681617694e-05, + "loss": 0.0143, + "step": 15820 + }, + { + "grad_norm": 0.11045898497104645, + "learning_rate": 9.68280739097231e-05, + "loss": 0.017, + "step": 15830 + }, + { + "grad_norm": 0.10634029656648636, + "learning_rate": 9.682227588222148e-05, + "loss": 0.0159, + "step": 15840 + }, + { + "grad_norm": 0.10278807580471039, + "learning_rate": 9.681647273430618e-05, + "loss": 0.0152, + "step": 15850 + }, + { + "grad_norm": 0.12374964356422424, + "learning_rate": 9.681066446661182e-05, + "loss": 0.0153, + "step": 15860 + }, + { + "grad_norm": 0.07994350790977478, + "learning_rate": 9.680485107977357e-05, + "loss": 0.0152, + "step": 15870 + }, + { + "grad_norm": 0.07463517785072327, + "learning_rate": 9.679903257442716e-05, + "loss": 0.0157, + "step": 15880 + }, + { + "grad_norm": 0.0844612643122673, + "learning_rate": 9.679320895120891e-05, + "loss": 0.0162, + "step": 15890 + }, + { + "grad_norm": 0.0758095309138298, + "learning_rate": 9.67873802107557e-05, + "loss": 0.0154, + "step": 15900 + }, + { + "grad_norm": 0.08728672564029694, + "learning_rate": 9.67815463537049e-05, + "loss": 0.0148, + "step": 15910 + }, + { + "grad_norm": 0.06486783921718597, + "learning_rate": 9.677570738069457e-05, + "loss": 0.015, + "step": 15920 + }, + { + "grad_norm": 0.06772690266370773, + "learning_rate": 9.676986329236318e-05, + "loss": 0.0175, + "step": 15930 + }, + { + "grad_norm": 0.09137888252735138, + "learning_rate": 9.676401408934987e-05, + "loss": 0.0173, + "step": 15940 + }, + { + "grad_norm": 0.059741515666246414, + "learning_rate": 9.675815977229428e-05, + "loss": 0.0163, + "step": 15950 + }, + { + "grad_norm": 0.09688283503055573, + "learning_rate": 9.675230034183664e-05, + "loss": 0.0144, + "step": 15960 + }, + { + "grad_norm": 0.06893589347600937, + "learning_rate": 9.674643579861773e-05, + "loss": 0.0188, + "step": 15970 + }, + { + "grad_norm": 0.054266732186079025, + "learning_rate": 9.674056614327886e-05, + "loss": 0.0144, + "step": 15980 + }, + { + "grad_norm": 0.07728904485702515, + "learning_rate": 9.673469137646198e-05, + "loss": 0.0142, + "step": 15990 + }, + { + "grad_norm": 0.0784553587436676, + "learning_rate": 9.67288114988095e-05, + "loss": 0.0158, + "step": 16000 + }, + { + "grad_norm": 0.07563012838363647, + "learning_rate": 9.672292651096447e-05, + "loss": 0.0146, + "step": 16010 + }, + { + "grad_norm": 0.07696602493524551, + "learning_rate": 9.671703641357042e-05, + "loss": 0.0141, + "step": 16020 + }, + { + "grad_norm": 0.07319343090057373, + "learning_rate": 9.67111412072715e-05, + "loss": 0.0157, + "step": 16030 + }, + { + "grad_norm": 0.07819941639900208, + "learning_rate": 9.670524089271242e-05, + "loss": 0.0138, + "step": 16040 + }, + { + "grad_norm": 0.09770367294549942, + "learning_rate": 9.669933547053842e-05, + "loss": 0.014, + "step": 16050 + }, + { + "grad_norm": 0.11456435173749924, + "learning_rate": 9.669342494139531e-05, + "loss": 0.0149, + "step": 16060 + }, + { + "grad_norm": 0.08932524174451828, + "learning_rate": 9.668750930592943e-05, + "loss": 0.0161, + "step": 16070 + }, + { + "grad_norm": 0.0807054191827774, + "learning_rate": 9.668158856478775e-05, + "loss": 0.0142, + "step": 16080 + }, + { + "grad_norm": 0.08076685667037964, + "learning_rate": 9.66756627186177e-05, + "loss": 0.0154, + "step": 16090 + }, + { + "grad_norm": 0.06102403625845909, + "learning_rate": 9.666973176806737e-05, + "loss": 0.0161, + "step": 16100 + }, + { + "grad_norm": 0.09888605028390884, + "learning_rate": 9.666379571378534e-05, + "loss": 0.0148, + "step": 16110 + }, + { + "grad_norm": 0.0706026703119278, + "learning_rate": 9.665785455642076e-05, + "loss": 0.0147, + "step": 16120 + }, + { + "grad_norm": 0.10230395942926407, + "learning_rate": 9.665190829662337e-05, + "loss": 0.0169, + "step": 16130 + }, + { + "grad_norm": 0.05651211366057396, + "learning_rate": 9.664595693504342e-05, + "loss": 0.0179, + "step": 16140 + }, + { + "grad_norm": 0.06709986180067062, + "learning_rate": 9.664000047233175e-05, + "loss": 0.0156, + "step": 16150 + }, + { + "grad_norm": 0.0796661376953125, + "learning_rate": 9.663403890913976e-05, + "loss": 0.0143, + "step": 16160 + }, + { + "grad_norm": 0.1229308694601059, + "learning_rate": 9.662807224611938e-05, + "loss": 0.0176, + "step": 16170 + }, + { + "grad_norm": 0.17252281308174133, + "learning_rate": 9.662210048392311e-05, + "loss": 0.0163, + "step": 16180 + }, + { + "grad_norm": 0.13472424447536469, + "learning_rate": 9.661612362320405e-05, + "loss": 0.017, + "step": 16190 + }, + { + "grad_norm": 0.1490597426891327, + "learning_rate": 9.661014166461579e-05, + "loss": 0.0158, + "step": 16200 + }, + { + "grad_norm": 0.12157272547483444, + "learning_rate": 9.66041546088125e-05, + "loss": 0.0164, + "step": 16210 + }, + { + "grad_norm": 0.09972437471151352, + "learning_rate": 9.659816245644895e-05, + "loss": 0.0152, + "step": 16220 + }, + { + "grad_norm": 0.1134122759103775, + "learning_rate": 9.65921652081804e-05, + "loss": 0.0157, + "step": 16230 + }, + { + "grad_norm": 0.09732352197170258, + "learning_rate": 9.658616286466271e-05, + "loss": 0.0177, + "step": 16240 + }, + { + "grad_norm": 0.1263282746076584, + "learning_rate": 9.65801554265523e-05, + "loss": 0.0161, + "step": 16250 + }, + { + "grad_norm": 0.07828904688358307, + "learning_rate": 9.657414289450612e-05, + "loss": 0.0137, + "step": 16260 + }, + { + "grad_norm": 0.07488004863262177, + "learning_rate": 9.656812526918171e-05, + "loss": 0.0143, + "step": 16270 + }, + { + "grad_norm": 0.06580894440412521, + "learning_rate": 9.656210255123712e-05, + "loss": 0.0144, + "step": 16280 + }, + { + "grad_norm": 0.10997074097394943, + "learning_rate": 9.6556074741331e-05, + "loss": 0.0157, + "step": 16290 + }, + { + "grad_norm": 0.09868430346250534, + "learning_rate": 9.655004184012256e-05, + "loss": 0.0186, + "step": 16300 + }, + { + "grad_norm": 0.07080147415399551, + "learning_rate": 9.654400384827152e-05, + "loss": 0.0153, + "step": 16310 + }, + { + "grad_norm": 0.0949472039937973, + "learning_rate": 9.653796076643818e-05, + "loss": 0.0155, + "step": 16320 + }, + { + "grad_norm": 0.10242494195699692, + "learning_rate": 9.653191259528344e-05, + "loss": 0.0144, + "step": 16330 + }, + { + "grad_norm": 0.12022335082292557, + "learning_rate": 9.65258593354687e-05, + "loss": 0.0152, + "step": 16340 + }, + { + "grad_norm": 0.07606828957796097, + "learning_rate": 9.651980098765591e-05, + "loss": 0.0151, + "step": 16350 + }, + { + "grad_norm": 0.10068518668413162, + "learning_rate": 9.651373755250765e-05, + "loss": 0.0161, + "step": 16360 + }, + { + "grad_norm": 0.06121862679719925, + "learning_rate": 9.650766903068697e-05, + "loss": 0.0152, + "step": 16370 + }, + { + "grad_norm": 0.11296346038579941, + "learning_rate": 9.650159542285753e-05, + "loss": 0.0168, + "step": 16380 + }, + { + "grad_norm": 0.08875925838947296, + "learning_rate": 9.649551672968353e-05, + "loss": 0.0157, + "step": 16390 + }, + { + "grad_norm": 0.06682731956243515, + "learning_rate": 9.648943295182973e-05, + "loss": 0.0133, + "step": 16400 + }, + { + "grad_norm": 0.08220618218183517, + "learning_rate": 9.648334408996144e-05, + "loss": 0.0137, + "step": 16410 + }, + { + "grad_norm": 0.0887591540813446, + "learning_rate": 9.647725014474452e-05, + "loss": 0.0136, + "step": 16420 + }, + { + "grad_norm": 0.10933201760053635, + "learning_rate": 9.64711511168454e-05, + "loss": 0.0156, + "step": 16430 + }, + { + "grad_norm": 0.07845265418291092, + "learning_rate": 9.646504700693108e-05, + "loss": 0.0149, + "step": 16440 + }, + { + "grad_norm": 0.09577114880084991, + "learning_rate": 9.645893781566907e-05, + "loss": 0.0149, + "step": 16450 + }, + { + "grad_norm": 0.08652720600366592, + "learning_rate": 9.645282354372744e-05, + "loss": 0.0156, + "step": 16460 + }, + { + "grad_norm": 0.10020699352025986, + "learning_rate": 9.644670419177491e-05, + "loss": 0.0168, + "step": 16470 + }, + { + "grad_norm": 0.06952742487192154, + "learning_rate": 9.644057976048062e-05, + "loss": 0.0146, + "step": 16480 + }, + { + "grad_norm": 0.08463500440120697, + "learning_rate": 9.643445025051435e-05, + "loss": 0.0158, + "step": 16490 + }, + { + "grad_norm": 0.06964442133903503, + "learning_rate": 9.642831566254641e-05, + "loss": 0.0141, + "step": 16500 + }, + { + "grad_norm": 0.08211473375558853, + "learning_rate": 9.642217599724769e-05, + "loss": 0.0147, + "step": 16510 + }, + { + "grad_norm": 0.07701071351766586, + "learning_rate": 9.64160312552896e-05, + "loss": 0.0141, + "step": 16520 + }, + { + "grad_norm": 0.0658341646194458, + "learning_rate": 9.64098814373441e-05, + "loss": 0.0154, + "step": 16530 + }, + { + "grad_norm": 0.09133567661046982, + "learning_rate": 9.640372654408374e-05, + "loss": 0.0144, + "step": 16540 + }, + { + "grad_norm": 0.0907941460609436, + "learning_rate": 9.639756657618162e-05, + "loss": 0.0162, + "step": 16550 + }, + { + "grad_norm": 0.07993725687265396, + "learning_rate": 9.639140153431138e-05, + "loss": 0.0161, + "step": 16560 + }, + { + "grad_norm": 0.0926126018166542, + "learning_rate": 9.638523141914721e-05, + "loss": 0.0182, + "step": 16570 + }, + { + "grad_norm": 0.07088207453489304, + "learning_rate": 9.637905623136388e-05, + "loss": 0.0171, + "step": 16580 + }, + { + "grad_norm": 0.059745337814092636, + "learning_rate": 9.637287597163669e-05, + "loss": 0.0143, + "step": 16590 + }, + { + "grad_norm": 0.07741400599479675, + "learning_rate": 9.63666906406415e-05, + "loss": 0.0133, + "step": 16600 + }, + { + "grad_norm": 0.09870545566082001, + "learning_rate": 9.636050023905473e-05, + "loss": 0.0138, + "step": 16610 + }, + { + "grad_norm": 0.10599103569984436, + "learning_rate": 9.635430476755336e-05, + "loss": 0.0153, + "step": 16620 + }, + { + "grad_norm": 0.10407677292823792, + "learning_rate": 9.63481042268149e-05, + "loss": 0.0134, + "step": 16630 + }, + { + "grad_norm": 0.07230226695537567, + "learning_rate": 9.634189861751745e-05, + "loss": 0.0132, + "step": 16640 + }, + { + "grad_norm": 0.09474929422140121, + "learning_rate": 9.633568794033967e-05, + "loss": 0.0149, + "step": 16650 + }, + { + "grad_norm": 0.07154323160648346, + "learning_rate": 9.63294721959607e-05, + "loss": 0.0145, + "step": 16660 + }, + { + "grad_norm": 0.06603164970874786, + "learning_rate": 9.63232513850603e-05, + "loss": 0.0141, + "step": 16670 + }, + { + "grad_norm": 0.09233855456113815, + "learning_rate": 9.631702550831878e-05, + "loss": 0.0151, + "step": 16680 + }, + { + "grad_norm": 0.08157889544963837, + "learning_rate": 9.631079456641698e-05, + "loss": 0.016, + "step": 16690 + }, + { + "grad_norm": 0.08817217499017715, + "learning_rate": 9.630455856003632e-05, + "loss": 0.0147, + "step": 16700 + }, + { + "grad_norm": 0.13340365886688232, + "learning_rate": 9.629831748985876e-05, + "loss": 0.0136, + "step": 16710 + }, + { + "grad_norm": 0.08786139637231827, + "learning_rate": 9.629207135656679e-05, + "loss": 0.0161, + "step": 16720 + }, + { + "grad_norm": 0.08116839826107025, + "learning_rate": 9.628582016084353e-05, + "loss": 0.0168, + "step": 16730 + }, + { + "grad_norm": 0.09536854922771454, + "learning_rate": 9.627956390337254e-05, + "loss": 0.0153, + "step": 16740 + }, + { + "grad_norm": 0.06872587651014328, + "learning_rate": 9.627330258483802e-05, + "loss": 0.0156, + "step": 16750 + }, + { + "grad_norm": 0.06573814898729324, + "learning_rate": 9.62670362059247e-05, + "loss": 0.0149, + "step": 16760 + }, + { + "grad_norm": 0.11442092806100845, + "learning_rate": 9.626076476731786e-05, + "loss": 0.017, + "step": 16770 + }, + { + "grad_norm": 0.09616407006978989, + "learning_rate": 9.625448826970336e-05, + "loss": 0.0149, + "step": 16780 + }, + { + "grad_norm": 0.10353627055883408, + "learning_rate": 9.624820671376755e-05, + "loss": 0.015, + "step": 16790 + }, + { + "grad_norm": 0.11529350280761719, + "learning_rate": 9.62419201001974e-05, + "loss": 0.0152, + "step": 16800 + }, + { + "grad_norm": 0.1026981994509697, + "learning_rate": 9.623562842968037e-05, + "loss": 0.0146, + "step": 16810 + }, + { + "grad_norm": 0.056202925741672516, + "learning_rate": 9.622933170290454e-05, + "loss": 0.0148, + "step": 16820 + }, + { + "grad_norm": 0.1315266489982605, + "learning_rate": 9.622302992055849e-05, + "loss": 0.0161, + "step": 16830 + }, + { + "grad_norm": 0.10440204292535782, + "learning_rate": 9.62167230833314e-05, + "loss": 0.0138, + "step": 16840 + }, + { + "grad_norm": 0.07405200600624084, + "learning_rate": 9.621041119191295e-05, + "loss": 0.0143, + "step": 16850 + }, + { + "grad_norm": 0.07865498214960098, + "learning_rate": 9.620409424699342e-05, + "loss": 0.0151, + "step": 16860 + }, + { + "grad_norm": 0.06433415412902832, + "learning_rate": 9.619777224926359e-05, + "loss": 0.0134, + "step": 16870 + }, + { + "grad_norm": 0.09821578860282898, + "learning_rate": 9.619144519941485e-05, + "loss": 0.0143, + "step": 16880 + }, + { + "grad_norm": 0.055784981697797775, + "learning_rate": 9.618511309813912e-05, + "loss": 0.0122, + "step": 16890 + }, + { + "grad_norm": 0.08560451865196228, + "learning_rate": 9.617877594612886e-05, + "loss": 0.0167, + "step": 16900 + }, + { + "grad_norm": 0.06443429738283157, + "learning_rate": 9.617243374407707e-05, + "loss": 0.014, + "step": 16910 + }, + { + "grad_norm": 0.07342159003019333, + "learning_rate": 9.616608649267736e-05, + "loss": 0.015, + "step": 16920 + }, + { + "grad_norm": 0.08671201765537262, + "learning_rate": 9.615973419262385e-05, + "loss": 0.0146, + "step": 16930 + }, + { + "grad_norm": 0.058947570621967316, + "learning_rate": 9.615337684461119e-05, + "loss": 0.0137, + "step": 16940 + }, + { + "grad_norm": 0.12124082446098328, + "learning_rate": 9.614701444933465e-05, + "loss": 0.0158, + "step": 16950 + }, + { + "grad_norm": 0.103487029671669, + "learning_rate": 9.614064700748997e-05, + "loss": 0.0141, + "step": 16960 + }, + { + "grad_norm": 0.08242224901914597, + "learning_rate": 9.613427451977352e-05, + "loss": 0.0138, + "step": 16970 + }, + { + "grad_norm": 0.09494166076183319, + "learning_rate": 9.612789698688216e-05, + "loss": 0.014, + "step": 16980 + }, + { + "grad_norm": 0.0858401358127594, + "learning_rate": 9.612151440951334e-05, + "loss": 0.015, + "step": 16990 + }, + { + "grad_norm": 0.06852635741233826, + "learning_rate": 9.611512678836506e-05, + "loss": 0.0154, + "step": 17000 + }, + { + "grad_norm": 0.05770751088857651, + "learning_rate": 9.610873412413584e-05, + "loss": 0.0133, + "step": 17010 + }, + { + "grad_norm": 0.07286989688873291, + "learning_rate": 9.610233641752476e-05, + "loss": 0.0121, + "step": 17020 + }, + { + "grad_norm": 0.09897786378860474, + "learning_rate": 9.609593366923151e-05, + "loss": 0.0135, + "step": 17030 + }, + { + "grad_norm": 0.09225397557020187, + "learning_rate": 9.608952587995625e-05, + "loss": 0.0137, + "step": 17040 + }, + { + "grad_norm": 0.08916440606117249, + "learning_rate": 9.608311305039972e-05, + "loss": 0.0166, + "step": 17050 + }, + { + "grad_norm": 0.05813876539468765, + "learning_rate": 9.607669518126326e-05, + "loss": 0.014, + "step": 17060 + }, + { + "grad_norm": 0.08226367086172104, + "learning_rate": 9.607027227324866e-05, + "loss": 0.0138, + "step": 17070 + }, + { + "grad_norm": 0.08185189962387085, + "learning_rate": 9.606384432705837e-05, + "loss": 0.016, + "step": 17080 + }, + { + "grad_norm": 0.09063917398452759, + "learning_rate": 9.60574113433953e-05, + "loss": 0.0146, + "step": 17090 + }, + { + "grad_norm": 0.06948176771402359, + "learning_rate": 9.6050973322963e-05, + "loss": 0.0151, + "step": 17100 + }, + { + "grad_norm": 0.09132290631532669, + "learning_rate": 9.604453026646547e-05, + "loss": 0.0128, + "step": 17110 + }, + { + "grad_norm": 0.08672737330198288, + "learning_rate": 9.603808217460735e-05, + "loss": 0.0149, + "step": 17120 + }, + { + "grad_norm": 0.08769980818033218, + "learning_rate": 9.603162904809377e-05, + "loss": 0.0142, + "step": 17130 + }, + { + "grad_norm": 0.09206046164035797, + "learning_rate": 9.602517088763045e-05, + "loss": 0.015, + "step": 17140 + }, + { + "grad_norm": 0.10058386623859406, + "learning_rate": 9.601870769392365e-05, + "loss": 0.015, + "step": 17150 + }, + { + "grad_norm": 0.07961054891347885, + "learning_rate": 9.601223946768017e-05, + "loss": 0.0129, + "step": 17160 + }, + { + "grad_norm": 0.09147070348262787, + "learning_rate": 9.600576620960734e-05, + "loss": 0.0143, + "step": 17170 + }, + { + "grad_norm": 0.12036542594432831, + "learning_rate": 9.599928792041308e-05, + "loss": 0.0155, + "step": 17180 + }, + { + "grad_norm": 0.12649127840995789, + "learning_rate": 9.599280460080587e-05, + "loss": 0.0152, + "step": 17190 + }, + { + "grad_norm": 0.11146551370620728, + "learning_rate": 9.59863162514947e-05, + "loss": 0.0135, + "step": 17200 + }, + { + "grad_norm": 0.06289748847484589, + "learning_rate": 9.597982287318911e-05, + "loss": 0.0152, + "step": 17210 + }, + { + "grad_norm": 0.13149340450763702, + "learning_rate": 9.597332446659923e-05, + "loss": 0.015, + "step": 17220 + }, + { + "grad_norm": 0.07139015197753906, + "learning_rate": 9.59668210324357e-05, + "loss": 0.0136, + "step": 17230 + }, + { + "grad_norm": 0.07976996153593063, + "learning_rate": 9.596031257140974e-05, + "loss": 0.0137, + "step": 17240 + }, + { + "grad_norm": 0.07831709086894989, + "learning_rate": 9.59537990842331e-05, + "loss": 0.0139, + "step": 17250 + }, + { + "grad_norm": 0.05571134015917778, + "learning_rate": 9.594728057161806e-05, + "loss": 0.0143, + "step": 17260 + }, + { + "grad_norm": 0.07858608663082123, + "learning_rate": 9.594075703427752e-05, + "loss": 0.0141, + "step": 17270 + }, + { + "grad_norm": 0.1109570637345314, + "learning_rate": 9.593422847292486e-05, + "loss": 0.0136, + "step": 17280 + }, + { + "grad_norm": 0.09226933866739273, + "learning_rate": 9.592769488827402e-05, + "loss": 0.016, + "step": 17290 + }, + { + "grad_norm": 0.09829147160053253, + "learning_rate": 9.592115628103952e-05, + "loss": 0.0139, + "step": 17300 + }, + { + "grad_norm": 0.06682165712118149, + "learning_rate": 9.591461265193643e-05, + "loss": 0.0135, + "step": 17310 + }, + { + "grad_norm": 0.08840740472078323, + "learning_rate": 9.590806400168032e-05, + "loss": 0.0134, + "step": 17320 + }, + { + "grad_norm": 0.05979165434837341, + "learning_rate": 9.590151033098735e-05, + "loss": 0.0127, + "step": 17330 + }, + { + "grad_norm": 0.11604401469230652, + "learning_rate": 9.589495164057423e-05, + "loss": 0.0149, + "step": 17340 + }, + { + "grad_norm": 0.0765567347407341, + "learning_rate": 9.58883879311582e-05, + "loss": 0.012, + "step": 17350 + }, + { + "grad_norm": 0.07373659312725067, + "learning_rate": 9.588181920345705e-05, + "loss": 0.0145, + "step": 17360 + }, + { + "grad_norm": 0.0796244665980339, + "learning_rate": 9.587524545818913e-05, + "loss": 0.0133, + "step": 17370 + }, + { + "grad_norm": 0.06170288845896721, + "learning_rate": 9.586866669607335e-05, + "loss": 0.0153, + "step": 17380 + }, + { + "grad_norm": 0.060983654111623764, + "learning_rate": 9.586208291782915e-05, + "loss": 0.0135, + "step": 17390 + }, + { + "grad_norm": 0.06183053180575371, + "learning_rate": 9.58554941241765e-05, + "loss": 0.0149, + "step": 17400 + }, + { + "grad_norm": 0.07890139520168304, + "learning_rate": 9.584890031583596e-05, + "loss": 0.014, + "step": 17410 + }, + { + "grad_norm": 0.11198510229587555, + "learning_rate": 9.584230149352861e-05, + "loss": 0.0142, + "step": 17420 + }, + { + "grad_norm": 0.0936039611697197, + "learning_rate": 9.58356976579761e-05, + "loss": 0.0134, + "step": 17430 + }, + { + "grad_norm": 0.08402911573648453, + "learning_rate": 9.58290888099006e-05, + "loss": 0.0146, + "step": 17440 + }, + { + "grad_norm": 0.07385579496622086, + "learning_rate": 9.582247495002486e-05, + "loss": 0.0149, + "step": 17450 + }, + { + "grad_norm": 0.07259340584278107, + "learning_rate": 9.581585607907214e-05, + "loss": 0.0158, + "step": 17460 + }, + { + "grad_norm": 0.08728817850351334, + "learning_rate": 9.580923219776628e-05, + "loss": 0.0139, + "step": 17470 + }, + { + "grad_norm": 0.11429502815008163, + "learning_rate": 9.580260330683167e-05, + "loss": 0.0168, + "step": 17480 + }, + { + "grad_norm": 0.07179030030965805, + "learning_rate": 9.579596940699322e-05, + "loss": 0.0153, + "step": 17490 + }, + { + "grad_norm": 0.11820162087678909, + "learning_rate": 9.578933049897643e-05, + "loss": 0.015, + "step": 17500 + }, + { + "grad_norm": 0.07894546538591385, + "learning_rate": 9.578268658350728e-05, + "loss": 0.0135, + "step": 17510 + }, + { + "grad_norm": 0.057363107800483704, + "learning_rate": 9.577603766131235e-05, + "loss": 0.0134, + "step": 17520 + }, + { + "grad_norm": 0.09922187030315399, + "learning_rate": 9.576938373311878e-05, + "loss": 0.0143, + "step": 17530 + }, + { + "grad_norm": 0.10140219330787659, + "learning_rate": 9.576272479965421e-05, + "loss": 0.0149, + "step": 17540 + }, + { + "grad_norm": 0.0936661809682846, + "learning_rate": 9.575606086164687e-05, + "loss": 0.0161, + "step": 17550 + }, + { + "grad_norm": 0.08426675200462341, + "learning_rate": 9.57493919198255e-05, + "loss": 0.0138, + "step": 17560 + }, + { + "grad_norm": 0.07028456777334213, + "learning_rate": 9.57427179749194e-05, + "loss": 0.0127, + "step": 17570 + }, + { + "grad_norm": 0.1072024255990982, + "learning_rate": 9.573603902765846e-05, + "loss": 0.0158, + "step": 17580 + }, + { + "grad_norm": 0.10248829424381256, + "learning_rate": 9.572935507877304e-05, + "loss": 0.0143, + "step": 17590 + }, + { + "grad_norm": 0.0803036019206047, + "learning_rate": 9.57226661289941e-05, + "loss": 0.0156, + "step": 17600 + }, + { + "grad_norm": 0.06381505727767944, + "learning_rate": 9.571597217905315e-05, + "loss": 0.0134, + "step": 17610 + }, + { + "grad_norm": 0.11357130855321884, + "learning_rate": 9.57092732296822e-05, + "loss": 0.0154, + "step": 17620 + }, + { + "grad_norm": 0.08334162831306458, + "learning_rate": 9.570256928161385e-05, + "loss": 0.0132, + "step": 17630 + }, + { + "grad_norm": 0.06991543620824814, + "learning_rate": 9.569586033558126e-05, + "loss": 0.0131, + "step": 17640 + }, + { + "grad_norm": 0.11800023168325424, + "learning_rate": 9.568914639231807e-05, + "loss": 0.0118, + "step": 17650 + }, + { + "grad_norm": 0.06595084816217422, + "learning_rate": 9.568242745255852e-05, + "loss": 0.0143, + "step": 17660 + }, + { + "grad_norm": 0.07482575625181198, + "learning_rate": 9.567570351703739e-05, + "loss": 0.0141, + "step": 17670 + }, + { + "grad_norm": 0.10461737215518951, + "learning_rate": 9.566897458649001e-05, + "loss": 0.0137, + "step": 17680 + }, + { + "grad_norm": 0.07876875251531601, + "learning_rate": 9.566224066165221e-05, + "loss": 0.0142, + "step": 17690 + }, + { + "grad_norm": 0.0840437114238739, + "learning_rate": 9.565550174326043e-05, + "loss": 0.0134, + "step": 17700 + }, + { + "grad_norm": 0.09259117394685745, + "learning_rate": 9.564875783205162e-05, + "loss": 0.0156, + "step": 17710 + }, + { + "grad_norm": 0.09890114516019821, + "learning_rate": 9.564200892876328e-05, + "loss": 0.0147, + "step": 17720 + }, + { + "grad_norm": 0.0745483934879303, + "learning_rate": 9.563525503413348e-05, + "loss": 0.0165, + "step": 17730 + }, + { + "grad_norm": 0.08738379925489426, + "learning_rate": 9.562849614890079e-05, + "loss": 0.0135, + "step": 17740 + }, + { + "grad_norm": 0.07433148473501205, + "learning_rate": 9.562173227380436e-05, + "loss": 0.0146, + "step": 17750 + }, + { + "grad_norm": 0.08379042893648148, + "learning_rate": 9.561496340958389e-05, + "loss": 0.0139, + "step": 17760 + }, + { + "grad_norm": 0.11231797188520432, + "learning_rate": 9.560818955697959e-05, + "loss": 0.0163, + "step": 17770 + }, + { + "grad_norm": 0.04844020679593086, + "learning_rate": 9.560141071673228e-05, + "loss": 0.0125, + "step": 17780 + }, + { + "grad_norm": 0.06508524715900421, + "learning_rate": 9.559462688958323e-05, + "loss": 0.0141, + "step": 17790 + }, + { + "grad_norm": 0.08349772542715073, + "learning_rate": 9.558783807627434e-05, + "loss": 0.0136, + "step": 17800 + }, + { + "grad_norm": 0.07922055572271347, + "learning_rate": 9.558104427754801e-05, + "loss": 0.0122, + "step": 17810 + }, + { + "grad_norm": 0.08691585808992386, + "learning_rate": 9.557424549414722e-05, + "loss": 0.0143, + "step": 17820 + }, + { + "grad_norm": 0.08551903069019318, + "learning_rate": 9.556744172681546e-05, + "loss": 0.0132, + "step": 17830 + }, + { + "grad_norm": 0.058136481791734695, + "learning_rate": 9.556063297629677e-05, + "loss": 0.016, + "step": 17840 + }, + { + "grad_norm": 0.06415168195962906, + "learning_rate": 9.555381924333578e-05, + "loss": 0.0142, + "step": 17850 + }, + { + "grad_norm": 0.09098028391599655, + "learning_rate": 9.554700052867758e-05, + "loss": 0.0159, + "step": 17860 + }, + { + "grad_norm": 0.1098448783159256, + "learning_rate": 9.554017683306789e-05, + "loss": 0.0147, + "step": 17870 + }, + { + "grad_norm": 0.1209409162402153, + "learning_rate": 9.553334815725294e-05, + "loss": 0.0157, + "step": 17880 + }, + { + "grad_norm": 0.056746382266283035, + "learning_rate": 9.552651450197949e-05, + "loss": 0.0129, + "step": 17890 + }, + { + "grad_norm": 0.0783831924200058, + "learning_rate": 9.551967586799486e-05, + "loss": 0.0141, + "step": 17900 + }, + { + "grad_norm": 0.08266005665063858, + "learning_rate": 9.551283225604692e-05, + "loss": 0.0144, + "step": 17910 + }, + { + "grad_norm": 0.07726728916168213, + "learning_rate": 9.550598366688406e-05, + "loss": 0.0154, + "step": 17920 + }, + { + "grad_norm": 0.11709296703338623, + "learning_rate": 9.549913010125526e-05, + "loss": 0.0171, + "step": 17930 + }, + { + "grad_norm": 0.12902632355690002, + "learning_rate": 9.549227155990999e-05, + "loss": 0.0144, + "step": 17940 + }, + { + "grad_norm": 0.10918963700532913, + "learning_rate": 9.548540804359828e-05, + "loss": 0.0159, + "step": 17950 + }, + { + "grad_norm": 0.11854313313961029, + "learning_rate": 9.547853955307077e-05, + "loss": 0.0164, + "step": 17960 + }, + { + "grad_norm": 0.10053683072328568, + "learning_rate": 9.547166608907853e-05, + "loss": 0.0162, + "step": 17970 + }, + { + "grad_norm": 0.0883626788854599, + "learning_rate": 9.546478765237326e-05, + "loss": 0.0126, + "step": 17980 + }, + { + "grad_norm": 0.07817288488149643, + "learning_rate": 9.545790424370715e-05, + "loss": 0.0118, + "step": 17990 + }, + { + "grad_norm": 0.10344041883945465, + "learning_rate": 9.5451015863833e-05, + "loss": 0.014, + "step": 18000 + }, + { + "grad_norm": 0.09559693932533264, + "learning_rate": 9.544412251350408e-05, + "loss": 0.0157, + "step": 18010 + }, + { + "grad_norm": 0.06738005578517914, + "learning_rate": 9.543722419347422e-05, + "loss": 0.014, + "step": 18020 + }, + { + "grad_norm": 0.09466677904129028, + "learning_rate": 9.543032090449788e-05, + "loss": 0.0143, + "step": 18030 + }, + { + "grad_norm": 0.07298137247562408, + "learning_rate": 9.542341264732992e-05, + "loss": 0.0153, + "step": 18040 + }, + { + "grad_norm": 0.07173354178667068, + "learning_rate": 9.541649942272585e-05, + "loss": 0.0146, + "step": 18050 + }, + { + "grad_norm": 0.07637643814086914, + "learning_rate": 9.54095812314417e-05, + "loss": 0.0139, + "step": 18060 + }, + { + "grad_norm": 0.08192616701126099, + "learning_rate": 9.540265807423401e-05, + "loss": 0.0144, + "step": 18070 + }, + { + "grad_norm": 0.06720715016126633, + "learning_rate": 9.53957299518599e-05, + "loss": 0.0125, + "step": 18080 + }, + { + "grad_norm": 0.06264913827180862, + "learning_rate": 9.5388796865077e-05, + "loss": 0.0178, + "step": 18090 + }, + { + "grad_norm": 0.09007018059492111, + "learning_rate": 9.538185881464353e-05, + "loss": 0.0147, + "step": 18100 + }, + { + "grad_norm": 0.06374156475067139, + "learning_rate": 9.537491580131821e-05, + "loss": 0.0125, + "step": 18110 + }, + { + "grad_norm": 0.11187387257814407, + "learning_rate": 9.53679678258603e-05, + "loss": 0.0158, + "step": 18120 + }, + { + "grad_norm": 0.10211978107690811, + "learning_rate": 9.536101488902966e-05, + "loss": 0.0166, + "step": 18130 + }, + { + "grad_norm": 0.08266670256853104, + "learning_rate": 9.535405699158663e-05, + "loss": 0.0131, + "step": 18140 + }, + { + "grad_norm": 0.10628144443035126, + "learning_rate": 9.53470941342921e-05, + "loss": 0.0168, + "step": 18150 + }, + { + "grad_norm": 0.08147669583559036, + "learning_rate": 9.534012631790756e-05, + "loss": 0.0147, + "step": 18160 + }, + { + "grad_norm": 0.06648498773574829, + "learning_rate": 9.533315354319494e-05, + "loss": 0.0171, + "step": 18170 + }, + { + "grad_norm": 0.08529240638017654, + "learning_rate": 9.532617581091682e-05, + "loss": 0.0137, + "step": 18180 + }, + { + "grad_norm": 0.10848747193813324, + "learning_rate": 9.531919312183629e-05, + "loss": 0.0145, + "step": 18190 + }, + { + "grad_norm": 0.08168429136276245, + "learning_rate": 9.531220547671688e-05, + "loss": 0.0142, + "step": 18200 + }, + { + "grad_norm": 0.09681801497936249, + "learning_rate": 9.530521287632285e-05, + "loss": 0.0143, + "step": 18210 + }, + { + "grad_norm": 0.07834851741790771, + "learning_rate": 9.529821532141884e-05, + "loss": 0.0156, + "step": 18220 + }, + { + "grad_norm": 0.08704425394535065, + "learning_rate": 9.52912128127701e-05, + "loss": 0.0129, + "step": 18230 + }, + { + "grad_norm": 0.05975700914859772, + "learning_rate": 9.528420535114244e-05, + "loss": 0.0128, + "step": 18240 + }, + { + "grad_norm": 0.08338546752929688, + "learning_rate": 9.527719293730215e-05, + "loss": 0.0141, + "step": 18250 + }, + { + "grad_norm": 0.0703260600566864, + "learning_rate": 9.527017557201611e-05, + "loss": 0.0098, + "step": 18260 + }, + { + "grad_norm": 0.06943580508232117, + "learning_rate": 9.526315325605176e-05, + "loss": 0.0127, + "step": 18270 + }, + { + "grad_norm": 0.10754788666963577, + "learning_rate": 9.525612599017699e-05, + "loss": 0.0124, + "step": 18280 + }, + { + "grad_norm": 0.09164774417877197, + "learning_rate": 9.524909377516033e-05, + "loss": 0.0134, + "step": 18290 + }, + { + "grad_norm": 0.09173892438411713, + "learning_rate": 9.524205661177081e-05, + "loss": 0.0149, + "step": 18300 + }, + { + "grad_norm": 0.08338182419538498, + "learning_rate": 9.523501450077801e-05, + "loss": 0.0137, + "step": 18310 + }, + { + "grad_norm": 0.0748651772737503, + "learning_rate": 9.522796744295202e-05, + "loss": 0.0137, + "step": 18320 + }, + { + "grad_norm": 0.07363210618495941, + "learning_rate": 9.522091543906352e-05, + "loss": 0.0137, + "step": 18330 + }, + { + "grad_norm": 0.1012755036354065, + "learning_rate": 9.521385848988369e-05, + "loss": 0.0159, + "step": 18340 + }, + { + "grad_norm": 0.0875842422246933, + "learning_rate": 9.520679659618428e-05, + "loss": 0.0169, + "step": 18350 + }, + { + "grad_norm": 0.09187383949756622, + "learning_rate": 9.519972975873754e-05, + "loss": 0.0122, + "step": 18360 + }, + { + "grad_norm": 0.08638172596693039, + "learning_rate": 9.519265797831633e-05, + "loss": 0.0135, + "step": 18370 + }, + { + "grad_norm": 0.07230407744646072, + "learning_rate": 9.518558125569399e-05, + "loss": 0.014, + "step": 18380 + }, + { + "grad_norm": 0.09850700199604034, + "learning_rate": 9.517849959164442e-05, + "loss": 0.0143, + "step": 18390 + }, + { + "grad_norm": 0.05845101550221443, + "learning_rate": 9.517141298694205e-05, + "loss": 0.0148, + "step": 18400 + }, + { + "grad_norm": 0.06605890393257141, + "learning_rate": 9.516432144236188e-05, + "loss": 0.0146, + "step": 18410 + }, + { + "grad_norm": 0.06766490638256073, + "learning_rate": 9.515722495867941e-05, + "loss": 0.0134, + "step": 18420 + }, + { + "grad_norm": 0.07237444818019867, + "learning_rate": 9.515012353667072e-05, + "loss": 0.0146, + "step": 18430 + }, + { + "grad_norm": 0.08279552310705185, + "learning_rate": 9.51430171771124e-05, + "loss": 0.0156, + "step": 18440 + }, + { + "grad_norm": 0.0818285122513771, + "learning_rate": 9.513590588078159e-05, + "loss": 0.0124, + "step": 18450 + }, + { + "grad_norm": 0.09229769557714462, + "learning_rate": 9.512878964845597e-05, + "loss": 0.0138, + "step": 18460 + }, + { + "grad_norm": 0.06954493373632431, + "learning_rate": 9.512166848091377e-05, + "loss": 0.0145, + "step": 18470 + }, + { + "grad_norm": 0.08836394548416138, + "learning_rate": 9.511454237893376e-05, + "loss": 0.0154, + "step": 18480 + }, + { + "grad_norm": 0.09192389249801636, + "learning_rate": 9.51074113432952e-05, + "loss": 0.0153, + "step": 18490 + }, + { + "grad_norm": 0.10274040699005127, + "learning_rate": 9.510027537477797e-05, + "loss": 0.0154, + "step": 18500 + }, + { + "grad_norm": 0.09377382695674896, + "learning_rate": 9.509313447416242e-05, + "loss": 0.0123, + "step": 18510 + }, + { + "grad_norm": 0.06850511580705643, + "learning_rate": 9.508598864222949e-05, + "loss": 0.0162, + "step": 18520 + }, + { + "grad_norm": 0.05019276216626167, + "learning_rate": 9.507883787976062e-05, + "loss": 0.0152, + "step": 18530 + }, + { + "grad_norm": 0.09652246534824371, + "learning_rate": 9.507168218753781e-05, + "loss": 0.0134, + "step": 18540 + }, + { + "grad_norm": 0.08060775697231293, + "learning_rate": 9.506452156634362e-05, + "loss": 0.0134, + "step": 18550 + }, + { + "grad_norm": 0.0784023329615593, + "learning_rate": 9.505735601696109e-05, + "loss": 0.0146, + "step": 18560 + }, + { + "grad_norm": 0.08776912838220596, + "learning_rate": 9.505018554017385e-05, + "loss": 0.0123, + "step": 18570 + }, + { + "grad_norm": 0.0687154158949852, + "learning_rate": 9.504301013676604e-05, + "loss": 0.0131, + "step": 18580 + }, + { + "grad_norm": 0.08245193213224411, + "learning_rate": 9.503582980752238e-05, + "loss": 0.0134, + "step": 18590 + }, + { + "grad_norm": 0.07100142538547516, + "learning_rate": 9.502864455322809e-05, + "loss": 0.0143, + "step": 18600 + }, + { + "grad_norm": 0.05608610436320305, + "learning_rate": 9.502145437466891e-05, + "loss": 0.0145, + "step": 18610 + }, + { + "grad_norm": 0.11763744056224823, + "learning_rate": 9.501425927263116e-05, + "loss": 0.0148, + "step": 18620 + }, + { + "grad_norm": 0.09918577969074249, + "learning_rate": 9.500705924790172e-05, + "loss": 0.0146, + "step": 18630 + }, + { + "grad_norm": 0.054086603224277496, + "learning_rate": 9.499985430126794e-05, + "loss": 0.0129, + "step": 18640 + }, + { + "grad_norm": 0.062239523977041245, + "learning_rate": 9.499264443351775e-05, + "loss": 0.0144, + "step": 18650 + }, + { + "grad_norm": 0.06911284476518631, + "learning_rate": 9.498542964543961e-05, + "loss": 0.0132, + "step": 18660 + }, + { + "grad_norm": 0.0946943610906601, + "learning_rate": 9.497820993782252e-05, + "loss": 0.0138, + "step": 18670 + }, + { + "grad_norm": 0.05856192857027054, + "learning_rate": 9.497098531145601e-05, + "loss": 0.0138, + "step": 18680 + }, + { + "grad_norm": 0.11220404505729675, + "learning_rate": 9.496375576713017e-05, + "loss": 0.0146, + "step": 18690 + }, + { + "grad_norm": 0.09795906394720078, + "learning_rate": 9.49565213056356e-05, + "loss": 0.0157, + "step": 18700 + }, + { + "grad_norm": 0.08247089385986328, + "learning_rate": 9.494928192776342e-05, + "loss": 0.014, + "step": 18710 + }, + { + "grad_norm": 0.08385693281888962, + "learning_rate": 9.494203763430538e-05, + "loss": 0.0126, + "step": 18720 + }, + { + "grad_norm": 0.08289770036935806, + "learning_rate": 9.493478842605366e-05, + "loss": 0.0134, + "step": 18730 + }, + { + "grad_norm": 0.06921159476041794, + "learning_rate": 9.492753430380105e-05, + "loss": 0.0172, + "step": 18740 + }, + { + "grad_norm": 0.06056634709239006, + "learning_rate": 9.492027526834083e-05, + "loss": 0.0129, + "step": 18750 + }, + { + "grad_norm": 0.08746670186519623, + "learning_rate": 9.491301132046684e-05, + "loss": 0.0128, + "step": 18760 + }, + { + "grad_norm": 0.07960443943738937, + "learning_rate": 9.490574246097345e-05, + "loss": 0.0146, + "step": 18770 + }, + { + "grad_norm": 0.07863933593034744, + "learning_rate": 9.48984686906556e-05, + "loss": 0.0162, + "step": 18780 + }, + { + "grad_norm": 0.08855835348367691, + "learning_rate": 9.489119001030871e-05, + "loss": 0.0124, + "step": 18790 + }, + { + "grad_norm": 0.058552440255880356, + "learning_rate": 9.488390642072878e-05, + "loss": 0.0137, + "step": 18800 + }, + { + "grad_norm": 0.11179336905479431, + "learning_rate": 9.48766179227123e-05, + "loss": 0.0148, + "step": 18810 + }, + { + "grad_norm": 0.10709404200315475, + "learning_rate": 9.486932451705636e-05, + "loss": 0.0158, + "step": 18820 + }, + { + "grad_norm": 0.14052796363830566, + "learning_rate": 9.486202620455857e-05, + "loss": 0.0141, + "step": 18830 + }, + { + "grad_norm": 0.07007721066474915, + "learning_rate": 9.485472298601704e-05, + "loss": 0.0165, + "step": 18840 + }, + { + "grad_norm": 0.09688038378953934, + "learning_rate": 9.484741486223043e-05, + "loss": 0.0172, + "step": 18850 + }, + { + "grad_norm": 0.07264384627342224, + "learning_rate": 9.484010183399797e-05, + "loss": 0.0132, + "step": 18860 + }, + { + "grad_norm": 0.08929206430912018, + "learning_rate": 9.483278390211938e-05, + "loss": 0.0146, + "step": 18870 + }, + { + "grad_norm": 0.0987863838672638, + "learning_rate": 9.482546106739496e-05, + "loss": 0.0143, + "step": 18880 + }, + { + "grad_norm": 0.09424898028373718, + "learning_rate": 9.48181333306255e-05, + "loss": 0.0141, + "step": 18890 + }, + { + "grad_norm": 0.07332047820091248, + "learning_rate": 9.481080069261237e-05, + "loss": 0.0139, + "step": 18900 + }, + { + "grad_norm": 0.08100694417953491, + "learning_rate": 9.480346315415745e-05, + "loss": 0.0133, + "step": 18910 + }, + { + "grad_norm": 0.10714302957057953, + "learning_rate": 9.479612071606314e-05, + "loss": 0.0136, + "step": 18920 + }, + { + "grad_norm": 0.08478699624538422, + "learning_rate": 9.478877337913244e-05, + "loss": 0.0184, + "step": 18930 + }, + { + "grad_norm": 0.09989957511425018, + "learning_rate": 9.478142114416881e-05, + "loss": 0.0121, + "step": 18940 + }, + { + "grad_norm": 0.10024051368236542, + "learning_rate": 9.47740640119763e-05, + "loss": 0.0126, + "step": 18950 + }, + { + "grad_norm": 0.06904249638319016, + "learning_rate": 9.476670198335947e-05, + "loss": 0.0137, + "step": 18960 + }, + { + "grad_norm": 0.07664097845554352, + "learning_rate": 9.47593350591234e-05, + "loss": 0.0145, + "step": 18970 + }, + { + "grad_norm": 0.07216230779886246, + "learning_rate": 9.475196324007376e-05, + "loss": 0.0119, + "step": 18980 + }, + { + "grad_norm": 0.0867597684264183, + "learning_rate": 9.474458652701669e-05, + "loss": 0.016, + "step": 18990 + }, + { + "grad_norm": 0.07769118249416351, + "learning_rate": 9.473720492075892e-05, + "loss": 0.0147, + "step": 19000 + }, + { + "grad_norm": 0.06542930752038956, + "learning_rate": 9.472981842210768e-05, + "loss": 0.0141, + "step": 19010 + }, + { + "grad_norm": 0.076763816177845, + "learning_rate": 9.472242703187074e-05, + "loss": 0.0136, + "step": 19020 + }, + { + "grad_norm": 0.07648476958274841, + "learning_rate": 9.471503075085643e-05, + "loss": 0.0122, + "step": 19030 + }, + { + "grad_norm": 0.1096569374203682, + "learning_rate": 9.470762957987359e-05, + "loss": 0.0135, + "step": 19040 + }, + { + "grad_norm": 0.099770188331604, + "learning_rate": 9.470022351973158e-05, + "loss": 0.0142, + "step": 19050 + }, + { + "grad_norm": 0.09721003472805023, + "learning_rate": 9.469281257124034e-05, + "loss": 0.0141, + "step": 19060 + }, + { + "grad_norm": 0.0920524075627327, + "learning_rate": 9.46853967352103e-05, + "loss": 0.0136, + "step": 19070 + }, + { + "grad_norm": 0.09366965293884277, + "learning_rate": 9.467797601245246e-05, + "loss": 0.0148, + "step": 19080 + }, + { + "grad_norm": 0.0897781178355217, + "learning_rate": 9.467055040377834e-05, + "loss": 0.0139, + "step": 19090 + }, + { + "grad_norm": 0.07821401208639145, + "learning_rate": 9.466311990999999e-05, + "loss": 0.0124, + "step": 19100 + }, + { + "grad_norm": 0.08268683403730392, + "learning_rate": 9.465568453193e-05, + "loss": 0.0125, + "step": 19110 + }, + { + "grad_norm": 0.07603219896554947, + "learning_rate": 9.464824427038148e-05, + "loss": 0.0134, + "step": 19120 + }, + { + "grad_norm": 0.07840317487716675, + "learning_rate": 9.46407991261681e-05, + "loss": 0.0125, + "step": 19130 + }, + { + "grad_norm": 0.06912463158369064, + "learning_rate": 9.463334910010404e-05, + "loss": 0.0119, + "step": 19140 + }, + { + "grad_norm": 0.06614715605974197, + "learning_rate": 9.462589419300403e-05, + "loss": 0.0135, + "step": 19150 + }, + { + "grad_norm": 0.0804128423333168, + "learning_rate": 9.461843440568333e-05, + "loss": 0.0133, + "step": 19160 + }, + { + "grad_norm": 0.1121562048792839, + "learning_rate": 9.461096973895773e-05, + "loss": 0.014, + "step": 19170 + }, + { + "grad_norm": 0.1213621273636818, + "learning_rate": 9.460350019364355e-05, + "loss": 0.0156, + "step": 19180 + }, + { + "grad_norm": 0.06590765714645386, + "learning_rate": 9.459602577055764e-05, + "loss": 0.0138, + "step": 19190 + }, + { + "grad_norm": 0.08071877062320709, + "learning_rate": 9.45885464705174e-05, + "loss": 0.0124, + "step": 19200 + }, + { + "grad_norm": 0.0863441675901413, + "learning_rate": 9.458106229434076e-05, + "loss": 0.0147, + "step": 19210 + }, + { + "grad_norm": 0.10960695147514343, + "learning_rate": 9.457357324284617e-05, + "loss": 0.0121, + "step": 19220 + }, + { + "grad_norm": 0.10359258949756622, + "learning_rate": 9.456607931685262e-05, + "loss": 0.0118, + "step": 19230 + }, + { + "grad_norm": 0.11748063564300537, + "learning_rate": 9.455858051717965e-05, + "loss": 0.0128, + "step": 19240 + }, + { + "grad_norm": 0.11046987026929855, + "learning_rate": 9.45510768446473e-05, + "loss": 0.0148, + "step": 19250 + }, + { + "grad_norm": 0.06776459515094757, + "learning_rate": 9.454356830007618e-05, + "loss": 0.0121, + "step": 19260 + }, + { + "grad_norm": 0.07218404859304428, + "learning_rate": 9.45360548842874e-05, + "loss": 0.0125, + "step": 19270 + }, + { + "grad_norm": 0.09077366441488266, + "learning_rate": 9.452853659810261e-05, + "loss": 0.0144, + "step": 19280 + }, + { + "grad_norm": 0.08097659051418304, + "learning_rate": 9.452101344234401e-05, + "loss": 0.0133, + "step": 19290 + }, + { + "grad_norm": 0.07660925388336182, + "learning_rate": 9.451348541783431e-05, + "loss": 0.0116, + "step": 19300 + }, + { + "grad_norm": 0.09826795011758804, + "learning_rate": 9.450595252539678e-05, + "loss": 0.0137, + "step": 19310 + }, + { + "grad_norm": 0.07150454074144363, + "learning_rate": 9.449841476585518e-05, + "loss": 0.0119, + "step": 19320 + }, + { + "grad_norm": 0.059455279260873795, + "learning_rate": 9.449087214003384e-05, + "loss": 0.0141, + "step": 19330 + }, + { + "grad_norm": 0.08595329523086548, + "learning_rate": 9.448332464875765e-05, + "loss": 0.0138, + "step": 19340 + }, + { + "grad_norm": 0.07092422991991043, + "learning_rate": 9.447577229285192e-05, + "loss": 0.0129, + "step": 19350 + }, + { + "grad_norm": 0.0905522033572197, + "learning_rate": 9.446821507314261e-05, + "loss": 0.0151, + "step": 19360 + }, + { + "grad_norm": 0.07671350240707397, + "learning_rate": 9.446065299045617e-05, + "loss": 0.0131, + "step": 19370 + }, + { + "grad_norm": 0.06867796927690506, + "learning_rate": 9.445308604561955e-05, + "loss": 0.0144, + "step": 19380 + }, + { + "grad_norm": 0.07932105660438538, + "learning_rate": 9.444551423946028e-05, + "loss": 0.0134, + "step": 19390 + }, + { + "grad_norm": 0.08485456556081772, + "learning_rate": 9.443793757280638e-05, + "loss": 0.0137, + "step": 19400 + }, + { + "grad_norm": 0.07384902983903885, + "learning_rate": 9.443035604648646e-05, + "loss": 0.0134, + "step": 19410 + }, + { + "grad_norm": 0.08990788459777832, + "learning_rate": 9.44227696613296e-05, + "loss": 0.0146, + "step": 19420 + }, + { + "grad_norm": 0.1072549894452095, + "learning_rate": 9.441517841816542e-05, + "loss": 0.0138, + "step": 19430 + }, + { + "grad_norm": 0.07334071397781372, + "learning_rate": 9.440758231782413e-05, + "loss": 0.0168, + "step": 19440 + }, + { + "grad_norm": 0.10824315249919891, + "learning_rate": 9.439998136113639e-05, + "loss": 0.0159, + "step": 19450 + }, + { + "grad_norm": 0.09810400754213333, + "learning_rate": 9.439237554893344e-05, + "loss": 0.014, + "step": 19460 + }, + { + "grad_norm": 0.08003492653369904, + "learning_rate": 9.438476488204705e-05, + "loss": 0.0145, + "step": 19470 + }, + { + "grad_norm": 0.1028994619846344, + "learning_rate": 9.43771493613095e-05, + "loss": 0.016, + "step": 19480 + }, + { + "grad_norm": 0.12324433773756027, + "learning_rate": 9.436952898755362e-05, + "loss": 0.0146, + "step": 19490 + }, + { + "grad_norm": 0.0743984505534172, + "learning_rate": 9.436190376161276e-05, + "loss": 0.0123, + "step": 19500 + }, + { + "grad_norm": 0.06576721370220184, + "learning_rate": 9.43542736843208e-05, + "loss": 0.0119, + "step": 19510 + }, + { + "grad_norm": 0.08010228723287582, + "learning_rate": 9.434663875651216e-05, + "loss": 0.0155, + "step": 19520 + }, + { + "grad_norm": 0.0735834389925003, + "learning_rate": 9.433899897902177e-05, + "loss": 0.0157, + "step": 19530 + }, + { + "grad_norm": 0.0791843980550766, + "learning_rate": 9.433135435268511e-05, + "loss": 0.0128, + "step": 19540 + }, + { + "grad_norm": 0.08810634166002274, + "learning_rate": 9.432370487833819e-05, + "loss": 0.0156, + "step": 19550 + }, + { + "grad_norm": 0.07548737525939941, + "learning_rate": 9.431605055681756e-05, + "loss": 0.0182, + "step": 19560 + }, + { + "grad_norm": 0.09143898636102676, + "learning_rate": 9.430839138896026e-05, + "loss": 0.0115, + "step": 19570 + }, + { + "grad_norm": 0.06965810060501099, + "learning_rate": 9.43007273756039e-05, + "loss": 0.0137, + "step": 19580 + }, + { + "grad_norm": 0.05784895643591881, + "learning_rate": 9.429305851758658e-05, + "loss": 0.0116, + "step": 19590 + }, + { + "grad_norm": 0.05656879395246506, + "learning_rate": 9.428538481574699e-05, + "loss": 0.0135, + "step": 19600 + }, + { + "grad_norm": 0.09148076176643372, + "learning_rate": 9.42777062709243e-05, + "loss": 0.0137, + "step": 19610 + }, + { + "grad_norm": 0.06974953413009644, + "learning_rate": 9.427002288395821e-05, + "loss": 0.0128, + "step": 19620 + }, + { + "grad_norm": 0.09356562048196793, + "learning_rate": 9.426233465568898e-05, + "loss": 0.0111, + "step": 19630 + }, + { + "grad_norm": 0.08627453446388245, + "learning_rate": 9.42546415869574e-05, + "loss": 0.0126, + "step": 19640 + }, + { + "grad_norm": 0.08811312168836594, + "learning_rate": 9.424694367860473e-05, + "loss": 0.013, + "step": 19650 + }, + { + "grad_norm": 0.08358759433031082, + "learning_rate": 9.423924093147284e-05, + "loss": 0.0137, + "step": 19660 + }, + { + "grad_norm": 0.0640799030661583, + "learning_rate": 9.423153334640407e-05, + "loss": 0.0131, + "step": 19670 + }, + { + "grad_norm": 0.07135758548974991, + "learning_rate": 9.42238209242413e-05, + "loss": 0.0126, + "step": 19680 + }, + { + "grad_norm": 0.06959839165210724, + "learning_rate": 9.421610366582798e-05, + "loss": 0.0125, + "step": 19690 + }, + { + "grad_norm": 0.10442892462015152, + "learning_rate": 9.420838157200803e-05, + "loss": 0.0154, + "step": 19700 + }, + { + "grad_norm": 0.08280587941408157, + "learning_rate": 9.420065464362594e-05, + "loss": 0.0121, + "step": 19710 + }, + { + "grad_norm": 0.08015097677707672, + "learning_rate": 9.419292288152673e-05, + "loss": 0.0123, + "step": 19720 + }, + { + "grad_norm": 0.06107442080974579, + "learning_rate": 9.418518628655588e-05, + "loss": 0.0151, + "step": 19730 + }, + { + "grad_norm": 0.09253711998462677, + "learning_rate": 9.417744485955951e-05, + "loss": 0.011, + "step": 19740 + }, + { + "grad_norm": 0.06730136275291443, + "learning_rate": 9.41696986013842e-05, + "loss": 0.0115, + "step": 19750 + }, + { + "grad_norm": 0.08267882466316223, + "learning_rate": 9.416194751287705e-05, + "loss": 0.0126, + "step": 19760 + }, + { + "grad_norm": 0.06379757821559906, + "learning_rate": 9.415419159488572e-05, + "loss": 0.0151, + "step": 19770 + }, + { + "grad_norm": 0.07248469442129135, + "learning_rate": 9.414643084825837e-05, + "loss": 0.0106, + "step": 19780 + }, + { + "grad_norm": 0.0742906928062439, + "learning_rate": 9.413866527384372e-05, + "loss": 0.0133, + "step": 19790 + }, + { + "grad_norm": 0.07378097623586655, + "learning_rate": 9.4130894872491e-05, + "loss": 0.0129, + "step": 19800 + }, + { + "grad_norm": 0.08199550956487656, + "learning_rate": 9.412311964504998e-05, + "loss": 0.0124, + "step": 19810 + }, + { + "grad_norm": 0.09566925466060638, + "learning_rate": 9.411533959237091e-05, + "loss": 0.0146, + "step": 19820 + }, + { + "grad_norm": 0.05821158364415169, + "learning_rate": 9.410755471530464e-05, + "loss": 0.0137, + "step": 19830 + }, + { + "grad_norm": 0.07587173581123352, + "learning_rate": 9.40997650147025e-05, + "loss": 0.0132, + "step": 19840 + }, + { + "grad_norm": 0.10128951072692871, + "learning_rate": 9.409197049141637e-05, + "loss": 0.0143, + "step": 19850 + }, + { + "grad_norm": 0.08851223438978195, + "learning_rate": 9.408417114629863e-05, + "loss": 0.0142, + "step": 19860 + }, + { + "grad_norm": 0.09777208417654037, + "learning_rate": 9.40763669802022e-05, + "loss": 0.0128, + "step": 19870 + }, + { + "grad_norm": 0.09111947566270828, + "learning_rate": 9.406855799398056e-05, + "loss": 0.0127, + "step": 19880 + }, + { + "grad_norm": 0.051958080381155014, + "learning_rate": 9.406074418848767e-05, + "loss": 0.0169, + "step": 19890 + }, + { + "grad_norm": 0.0567096583545208, + "learning_rate": 9.405292556457805e-05, + "loss": 0.0132, + "step": 19900 + }, + { + "grad_norm": 0.11889825761318207, + "learning_rate": 9.404510212310671e-05, + "loss": 0.0158, + "step": 19910 + }, + { + "grad_norm": 0.09546094387769699, + "learning_rate": 9.403727386492924e-05, + "loss": 0.0125, + "step": 19920 + }, + { + "grad_norm": 0.09540724009275436, + "learning_rate": 9.40294407909017e-05, + "loss": 0.0164, + "step": 19930 + }, + { + "grad_norm": 0.10127728432416916, + "learning_rate": 9.40216029018807e-05, + "loss": 0.014, + "step": 19940 + }, + { + "grad_norm": 0.09209833294153214, + "learning_rate": 9.401376019872338e-05, + "loss": 0.0134, + "step": 19950 + }, + { + "grad_norm": 0.09694864600896835, + "learning_rate": 9.400591268228746e-05, + "loss": 0.0146, + "step": 19960 + }, + { + "grad_norm": 0.07331608235836029, + "learning_rate": 9.399806035343106e-05, + "loss": 0.016, + "step": 19970 + }, + { + "grad_norm": 0.06529773026704788, + "learning_rate": 9.399020321301294e-05, + "loss": 0.0131, + "step": 19980 + }, + { + "grad_norm": 0.06701574474573135, + "learning_rate": 9.398234126189234e-05, + "loss": 0.0134, + "step": 19990 + }, + { + "grad_norm": 0.06096507981419563, + "learning_rate": 9.397447450092902e-05, + "loss": 0.0144, + "step": 20000 + }, + { + "grad_norm": 0.08914698660373688, + "learning_rate": 9.39666029309833e-05, + "loss": 0.0123, + "step": 20010 + }, + { + "grad_norm": 0.05012337490916252, + "learning_rate": 9.395872655291596e-05, + "loss": 0.0127, + "step": 20020 + }, + { + "grad_norm": 0.07258706539869308, + "learning_rate": 9.395084536758838e-05, + "loss": 0.0153, + "step": 20030 + }, + { + "grad_norm": 0.07409591972827911, + "learning_rate": 9.394295937586243e-05, + "loss": 0.0124, + "step": 20040 + }, + { + "grad_norm": 0.07680035382509232, + "learning_rate": 9.393506857860052e-05, + "loss": 0.0138, + "step": 20050 + }, + { + "grad_norm": 0.07891662418842316, + "learning_rate": 9.392717297666555e-05, + "loss": 0.0143, + "step": 20060 + }, + { + "grad_norm": 0.06096508353948593, + "learning_rate": 9.391927257092101e-05, + "loss": 0.0133, + "step": 20070 + }, + { + "grad_norm": 0.0617886520922184, + "learning_rate": 9.391136736223085e-05, + "loss": 0.0133, + "step": 20080 + }, + { + "grad_norm": 0.0531410276889801, + "learning_rate": 9.390345735145956e-05, + "loss": 0.0145, + "step": 20090 + }, + { + "grad_norm": 0.06942968815565109, + "learning_rate": 9.389554253947219e-05, + "loss": 0.0113, + "step": 20100 + }, + { + "grad_norm": 0.09904040396213531, + "learning_rate": 9.388762292713428e-05, + "loss": 0.0138, + "step": 20110 + }, + { + "grad_norm": 0.06503242254257202, + "learning_rate": 9.38796985153119e-05, + "loss": 0.0136, + "step": 20120 + }, + { + "grad_norm": 0.1204347014427185, + "learning_rate": 9.387176930487169e-05, + "loss": 0.0136, + "step": 20130 + }, + { + "grad_norm": 0.09170888364315033, + "learning_rate": 9.386383529668072e-05, + "loss": 0.0118, + "step": 20140 + }, + { + "grad_norm": 0.08338720351457596, + "learning_rate": 9.385589649160669e-05, + "loss": 0.0145, + "step": 20150 + }, + { + "grad_norm": 0.05380641669034958, + "learning_rate": 9.384795289051775e-05, + "loss": 0.0121, + "step": 20160 + }, + { + "grad_norm": 0.07151124626398087, + "learning_rate": 9.384000449428261e-05, + "loss": 0.0135, + "step": 20170 + }, + { + "grad_norm": 0.09023123979568481, + "learning_rate": 9.383205130377048e-05, + "loss": 0.013, + "step": 20180 + }, + { + "grad_norm": 0.0758901834487915, + "learning_rate": 9.382409331985114e-05, + "loss": 0.0143, + "step": 20190 + }, + { + "grad_norm": 0.06448604166507721, + "learning_rate": 9.381613054339482e-05, + "loss": 0.0126, + "step": 20200 + }, + { + "grad_norm": 0.07841980457305908, + "learning_rate": 9.380816297527235e-05, + "loss": 0.0123, + "step": 20210 + }, + { + "grad_norm": 0.07671146839857101, + "learning_rate": 9.380019061635506e-05, + "loss": 0.0121, + "step": 20220 + }, + { + "grad_norm": 0.07748616486787796, + "learning_rate": 9.379221346751474e-05, + "loss": 0.0136, + "step": 20230 + }, + { + "grad_norm": 0.05040748417377472, + "learning_rate": 9.378423152962382e-05, + "loss": 0.0117, + "step": 20240 + }, + { + "grad_norm": 0.09455285966396332, + "learning_rate": 9.377624480355517e-05, + "loss": 0.0125, + "step": 20250 + }, + { + "grad_norm": 0.06462828814983368, + "learning_rate": 9.376825329018219e-05, + "loss": 0.012, + "step": 20260 + }, + { + "grad_norm": 0.06674428284168243, + "learning_rate": 9.376025699037884e-05, + "loss": 0.0124, + "step": 20270 + }, + { + "grad_norm": 0.07018990814685822, + "learning_rate": 9.37522559050196e-05, + "loss": 0.0133, + "step": 20280 + }, + { + "grad_norm": 0.05756533145904541, + "learning_rate": 9.37442500349794e-05, + "loss": 0.0118, + "step": 20290 + }, + { + "grad_norm": 0.10032577812671661, + "learning_rate": 9.373623938113381e-05, + "loss": 0.0137, + "step": 20300 + }, + { + "grad_norm": 0.08078505098819733, + "learning_rate": 9.372822394435883e-05, + "loss": 0.0135, + "step": 20310 + }, + { + "grad_norm": 0.07041949778795242, + "learning_rate": 9.372020372553102e-05, + "loss": 0.0115, + "step": 20320 + }, + { + "grad_norm": 0.08046631515026093, + "learning_rate": 9.371217872552746e-05, + "loss": 0.0131, + "step": 20330 + }, + { + "grad_norm": 0.11528025567531586, + "learning_rate": 9.370414894522576e-05, + "loss": 0.0146, + "step": 20340 + }, + { + "grad_norm": 0.0851670503616333, + "learning_rate": 9.369611438550406e-05, + "loss": 0.0113, + "step": 20350 + }, + { + "grad_norm": 0.07662615925073624, + "learning_rate": 9.368807504724095e-05, + "loss": 0.0149, + "step": 20360 + }, + { + "grad_norm": 0.06230173632502556, + "learning_rate": 9.368003093131565e-05, + "loss": 0.0115, + "step": 20370 + }, + { + "grad_norm": 0.08554113656282425, + "learning_rate": 9.367198203860785e-05, + "loss": 0.0141, + "step": 20380 + }, + { + "grad_norm": 0.06193159520626068, + "learning_rate": 9.366392836999774e-05, + "loss": 0.0139, + "step": 20390 + }, + { + "grad_norm": 0.0759027972817421, + "learning_rate": 9.365586992636607e-05, + "loss": 0.0148, + "step": 20400 + }, + { + "grad_norm": 0.09795445948839188, + "learning_rate": 9.364780670859412e-05, + "loss": 0.0115, + "step": 20410 + }, + { + "grad_norm": 0.08659220486879349, + "learning_rate": 9.363973871756364e-05, + "loss": 0.014, + "step": 20420 + }, + { + "grad_norm": 0.08297713100910187, + "learning_rate": 9.363166595415696e-05, + "loss": 0.0124, + "step": 20430 + }, + { + "grad_norm": 0.10349799692630768, + "learning_rate": 9.362358841925686e-05, + "loss": 0.0148, + "step": 20440 + }, + { + "grad_norm": 0.06494539976119995, + "learning_rate": 9.361550611374674e-05, + "loss": 0.0138, + "step": 20450 + }, + { + "grad_norm": 0.07551504671573639, + "learning_rate": 9.360741903851043e-05, + "loss": 0.0124, + "step": 20460 + }, + { + "grad_norm": 0.06664519011974335, + "learning_rate": 9.359932719443236e-05, + "loss": 0.0151, + "step": 20470 + }, + { + "grad_norm": 0.11213123053312302, + "learning_rate": 9.35912305823974e-05, + "loss": 0.014, + "step": 20480 + }, + { + "grad_norm": 0.08102381974458694, + "learning_rate": 9.358312920329101e-05, + "loss": 0.0128, + "step": 20490 + }, + { + "grad_norm": 0.07035671919584274, + "learning_rate": 9.357502305799914e-05, + "loss": 0.0117, + "step": 20500 + }, + { + "grad_norm": 0.06950099766254425, + "learning_rate": 9.356691214740824e-05, + "loss": 0.0124, + "step": 20510 + }, + { + "grad_norm": 0.07187695801258087, + "learning_rate": 9.355879647240535e-05, + "loss": 0.0117, + "step": 20520 + }, + { + "grad_norm": 0.065907321870327, + "learning_rate": 9.355067603387798e-05, + "loss": 0.0114, + "step": 20530 + }, + { + "grad_norm": 0.1000409722328186, + "learning_rate": 9.354255083271412e-05, + "loss": 0.0121, + "step": 20540 + }, + { + "grad_norm": 0.06835688650608063, + "learning_rate": 9.353442086980239e-05, + "loss": 0.0146, + "step": 20550 + }, + { + "grad_norm": 0.06694292277097702, + "learning_rate": 9.352628614603185e-05, + "loss": 0.0141, + "step": 20560 + }, + { + "grad_norm": 0.0730302631855011, + "learning_rate": 9.351814666229209e-05, + "loss": 0.0136, + "step": 20570 + }, + { + "grad_norm": 0.07835564762353897, + "learning_rate": 9.351000241947324e-05, + "loss": 0.0114, + "step": 20580 + }, + { + "grad_norm": 0.060181789100170135, + "learning_rate": 9.350185341846594e-05, + "loss": 0.0116, + "step": 20590 + }, + { + "grad_norm": 0.07781063765287399, + "learning_rate": 9.349369966016134e-05, + "loss": 0.0133, + "step": 20600 + }, + { + "grad_norm": 0.062733955681324, + "learning_rate": 9.348554114545117e-05, + "loss": 0.0117, + "step": 20610 + }, + { + "grad_norm": 0.06292034685611725, + "learning_rate": 9.347737787522758e-05, + "loss": 0.0135, + "step": 20620 + }, + { + "grad_norm": 0.08868899941444397, + "learning_rate": 9.346920985038332e-05, + "loss": 0.0154, + "step": 20630 + }, + { + "grad_norm": 0.07176916301250458, + "learning_rate": 9.346103707181162e-05, + "loss": 0.0135, + "step": 20640 + }, + { + "grad_norm": 0.09815767407417297, + "learning_rate": 9.345285954040626e-05, + "loss": 0.0125, + "step": 20650 + }, + { + "grad_norm": 0.09213843941688538, + "learning_rate": 9.34446772570615e-05, + "loss": 0.0124, + "step": 20660 + }, + { + "grad_norm": 0.1251029521226883, + "learning_rate": 9.343649022267214e-05, + "loss": 0.0125, + "step": 20670 + }, + { + "grad_norm": 0.09877660870552063, + "learning_rate": 9.342829843813353e-05, + "loss": 0.0129, + "step": 20680 + }, + { + "grad_norm": 0.08716394007205963, + "learning_rate": 9.342010190434149e-05, + "loss": 0.0128, + "step": 20690 + }, + { + "grad_norm": 0.05636369436979294, + "learning_rate": 9.34119006221924e-05, + "loss": 0.0123, + "step": 20700 + }, + { + "grad_norm": 0.06633681058883667, + "learning_rate": 9.340369459258313e-05, + "loss": 0.0137, + "step": 20710 + }, + { + "grad_norm": 0.08850602060556412, + "learning_rate": 9.339548381641106e-05, + "loss": 0.0136, + "step": 20720 + }, + { + "grad_norm": 0.08599228411912918, + "learning_rate": 9.338726829457413e-05, + "loss": 0.0122, + "step": 20730 + }, + { + "grad_norm": 0.05441346764564514, + "learning_rate": 9.337904802797078e-05, + "loss": 0.0128, + "step": 20740 + }, + { + "grad_norm": 0.06548526138067245, + "learning_rate": 9.337082301749993e-05, + "loss": 0.0136, + "step": 20750 + }, + { + "grad_norm": 0.08351113647222519, + "learning_rate": 9.336259326406109e-05, + "loss": 0.0113, + "step": 20760 + }, + { + "grad_norm": 0.07235804945230484, + "learning_rate": 9.335435876855427e-05, + "loss": 0.0132, + "step": 20770 + }, + { + "grad_norm": 0.06529303640127182, + "learning_rate": 9.334611953187994e-05, + "loss": 0.014, + "step": 20780 + }, + { + "grad_norm": 0.06449609249830246, + "learning_rate": 9.333787555493914e-05, + "loss": 0.0162, + "step": 20790 + }, + { + "grad_norm": 0.06651897728443146, + "learning_rate": 9.332962683863345e-05, + "loss": 0.0119, + "step": 20800 + }, + { + "grad_norm": 0.09393954277038574, + "learning_rate": 9.332137338386489e-05, + "loss": 0.0139, + "step": 20810 + }, + { + "grad_norm": 0.09198420494794846, + "learning_rate": 9.33131151915361e-05, + "loss": 0.0136, + "step": 20820 + }, + { + "grad_norm": 0.0690983235836029, + "learning_rate": 9.330485226255012e-05, + "loss": 0.0119, + "step": 20830 + }, + { + "grad_norm": 0.06700243800878525, + "learning_rate": 9.329658459781061e-05, + "loss": 0.0123, + "step": 20840 + }, + { + "grad_norm": 0.07252756506204605, + "learning_rate": 9.328831219822172e-05, + "loss": 0.015, + "step": 20850 + }, + { + "grad_norm": 0.07168008387088776, + "learning_rate": 9.328003506468808e-05, + "loss": 0.0119, + "step": 20860 + }, + { + "grad_norm": 0.07987533509731293, + "learning_rate": 9.327175319811488e-05, + "loss": 0.0123, + "step": 20870 + }, + { + "grad_norm": 0.05868144333362579, + "learning_rate": 9.326346659940781e-05, + "loss": 0.0119, + "step": 20880 + }, + { + "grad_norm": 0.08495910465717316, + "learning_rate": 9.325517526947308e-05, + "loss": 0.0132, + "step": 20890 + }, + { + "grad_norm": 0.06718135625123978, + "learning_rate": 9.32468792092174e-05, + "loss": 0.0138, + "step": 20900 + }, + { + "grad_norm": 0.06529372930526733, + "learning_rate": 9.323857841954803e-05, + "loss": 0.012, + "step": 20910 + }, + { + "grad_norm": 0.09520366787910461, + "learning_rate": 9.323027290137276e-05, + "loss": 0.0126, + "step": 20920 + }, + { + "grad_norm": 0.06357331573963165, + "learning_rate": 9.322196265559981e-05, + "loss": 0.012, + "step": 20930 + }, + { + "grad_norm": 0.0570567287504673, + "learning_rate": 9.321364768313803e-05, + "loss": 0.0133, + "step": 20940 + }, + { + "grad_norm": 0.07153379172086716, + "learning_rate": 9.32053279848967e-05, + "loss": 0.0117, + "step": 20950 + }, + { + "grad_norm": 0.06423597037792206, + "learning_rate": 9.319700356178567e-05, + "loss": 0.0133, + "step": 20960 + }, + { + "grad_norm": 0.07873474061489105, + "learning_rate": 9.318867441471527e-05, + "loss": 0.0116, + "step": 20970 + }, + { + "grad_norm": 0.08072840422391891, + "learning_rate": 9.318034054459637e-05, + "loss": 0.0123, + "step": 20980 + }, + { + "grad_norm": 0.09068774431943893, + "learning_rate": 9.317200195234034e-05, + "loss": 0.0145, + "step": 20990 + }, + { + "grad_norm": 0.06660063564777374, + "learning_rate": 9.316365863885909e-05, + "loss": 0.0117, + "step": 21000 + }, + { + "grad_norm": 0.05460226535797119, + "learning_rate": 9.315531060506502e-05, + "loss": 0.0162, + "step": 21010 + }, + { + "grad_norm": 0.06872059404850006, + "learning_rate": 9.314695785187108e-05, + "loss": 0.0138, + "step": 21020 + }, + { + "grad_norm": 0.08260168135166168, + "learning_rate": 9.313860038019069e-05, + "loss": 0.0122, + "step": 21030 + }, + { + "grad_norm": 0.06587257236242294, + "learning_rate": 9.313023819093782e-05, + "loss": 0.011, + "step": 21040 + }, + { + "grad_norm": 0.07964158803224564, + "learning_rate": 9.312187128502695e-05, + "loss": 0.0169, + "step": 21050 + }, + { + "grad_norm": 0.09189444035291672, + "learning_rate": 9.311349966337307e-05, + "loss": 0.0128, + "step": 21060 + }, + { + "grad_norm": 0.07271469384431839, + "learning_rate": 9.310512332689169e-05, + "loss": 0.0156, + "step": 21070 + }, + { + "grad_norm": 0.0795682817697525, + "learning_rate": 9.309674227649883e-05, + "loss": 0.0127, + "step": 21080 + }, + { + "grad_norm": 0.06593406945466995, + "learning_rate": 9.308835651311103e-05, + "loss": 0.0115, + "step": 21090 + }, + { + "grad_norm": 0.09774960577487946, + "learning_rate": 9.307996603764533e-05, + "loss": 0.0109, + "step": 21100 + }, + { + "grad_norm": 0.06490223109722137, + "learning_rate": 9.307157085101932e-05, + "loss": 0.0155, + "step": 21110 + }, + { + "grad_norm": 0.13494884967803955, + "learning_rate": 9.306317095415109e-05, + "loss": 0.0161, + "step": 21120 + }, + { + "grad_norm": 0.09651137888431549, + "learning_rate": 9.305476634795922e-05, + "loss": 0.0124, + "step": 21130 + }, + { + "grad_norm": 0.06487906724214554, + "learning_rate": 9.304635703336284e-05, + "loss": 0.0128, + "step": 21140 + }, + { + "grad_norm": 0.06640094518661499, + "learning_rate": 9.303794301128157e-05, + "loss": 0.0124, + "step": 21150 + }, + { + "grad_norm": 0.08654491603374481, + "learning_rate": 9.302952428263555e-05, + "loss": 0.0127, + "step": 21160 + }, + { + "grad_norm": 0.0565912239253521, + "learning_rate": 9.302110084834545e-05, + "loss": 0.0116, + "step": 21170 + }, + { + "grad_norm": 0.07122615724802017, + "learning_rate": 9.301267270933245e-05, + "loss": 0.0144, + "step": 21180 + }, + { + "grad_norm": 0.08205242455005646, + "learning_rate": 9.300423986651823e-05, + "loss": 0.0126, + "step": 21190 + }, + { + "grad_norm": 0.06128653883934021, + "learning_rate": 9.299580232082501e-05, + "loss": 0.0148, + "step": 21200 + }, + { + "grad_norm": 0.0664830282330513, + "learning_rate": 9.298736007317547e-05, + "loss": 0.014, + "step": 21210 + }, + { + "grad_norm": 0.11196567863225937, + "learning_rate": 9.297891312449288e-05, + "loss": 0.0146, + "step": 21220 + }, + { + "grad_norm": 0.07565779983997345, + "learning_rate": 9.297046147570094e-05, + "loss": 0.0127, + "step": 21230 + }, + { + "grad_norm": 0.1043301448225975, + "learning_rate": 9.296200512772396e-05, + "loss": 0.0127, + "step": 21240 + }, + { + "grad_norm": 0.06515074521303177, + "learning_rate": 9.295354408148668e-05, + "loss": 0.0128, + "step": 21250 + }, + { + "grad_norm": 0.0717102438211441, + "learning_rate": 9.294507833791441e-05, + "loss": 0.0169, + "step": 21260 + }, + { + "grad_norm": 0.08235345035791397, + "learning_rate": 9.293660789793295e-05, + "loss": 0.0136, + "step": 21270 + }, + { + "grad_norm": 0.09803633391857147, + "learning_rate": 9.292813276246858e-05, + "loss": 0.0153, + "step": 21280 + }, + { + "grad_norm": 0.05216284096240997, + "learning_rate": 9.291965293244816e-05, + "loss": 0.0118, + "step": 21290 + }, + { + "grad_norm": 0.06866440176963806, + "learning_rate": 9.291116840879904e-05, + "loss": 0.0124, + "step": 21300 + }, + { + "grad_norm": 0.10614234209060669, + "learning_rate": 9.290267919244904e-05, + "loss": 0.0169, + "step": 21310 + }, + { + "grad_norm": 0.09201007336378098, + "learning_rate": 9.289418528432655e-05, + "loss": 0.0128, + "step": 21320 + }, + { + "grad_norm": 0.0783790722489357, + "learning_rate": 9.288568668536045e-05, + "loss": 0.0125, + "step": 21330 + }, + { + "grad_norm": 0.06552160531282425, + "learning_rate": 9.287718339648013e-05, + "loss": 0.0124, + "step": 21340 + }, + { + "grad_norm": 0.07067441940307617, + "learning_rate": 9.28686754186155e-05, + "loss": 0.0125, + "step": 21350 + }, + { + "grad_norm": 0.07312019914388657, + "learning_rate": 9.286016275269698e-05, + "loss": 0.0122, + "step": 21360 + }, + { + "grad_norm": 0.07705198973417282, + "learning_rate": 9.285164539965551e-05, + "loss": 0.0134, + "step": 21370 + }, + { + "grad_norm": 0.06333067268133163, + "learning_rate": 9.284312336042251e-05, + "loss": 0.0129, + "step": 21380 + }, + { + "grad_norm": 0.08880626410245895, + "learning_rate": 9.283459663592996e-05, + "loss": 0.0143, + "step": 21390 + }, + { + "grad_norm": 0.07014274597167969, + "learning_rate": 9.282606522711033e-05, + "loss": 0.0122, + "step": 21400 + }, + { + "grad_norm": 0.06213567033410072, + "learning_rate": 9.281752913489657e-05, + "loss": 0.0137, + "step": 21410 + }, + { + "grad_norm": 0.0934387668967247, + "learning_rate": 9.280898836022222e-05, + "loss": 0.0151, + "step": 21420 + }, + { + "grad_norm": 0.05543307214975357, + "learning_rate": 9.280044290402126e-05, + "loss": 0.0149, + "step": 21430 + }, + { + "grad_norm": 0.08440818637609482, + "learning_rate": 9.279189276722821e-05, + "loss": 0.0139, + "step": 21440 + }, + { + "grad_norm": 0.0767325684428215, + "learning_rate": 9.278333795077812e-05, + "loss": 0.0118, + "step": 21450 + }, + { + "grad_norm": 0.10272660106420517, + "learning_rate": 9.27747784556065e-05, + "loss": 0.0138, + "step": 21460 + }, + { + "grad_norm": 0.07827284932136536, + "learning_rate": 9.276621428264942e-05, + "loss": 0.0114, + "step": 21470 + }, + { + "grad_norm": 0.0642191469669342, + "learning_rate": 9.275764543284345e-05, + "loss": 0.0129, + "step": 21480 + }, + { + "grad_norm": 0.08528076857328415, + "learning_rate": 9.274907190712566e-05, + "loss": 0.0131, + "step": 21490 + }, + { + "grad_norm": 0.06238287687301636, + "learning_rate": 9.274049370643363e-05, + "loss": 0.0107, + "step": 21500 + }, + { + "grad_norm": 0.09327691793441772, + "learning_rate": 9.273191083170547e-05, + "loss": 0.0125, + "step": 21510 + }, + { + "grad_norm": 0.11083368211984634, + "learning_rate": 9.27233232838798e-05, + "loss": 0.0113, + "step": 21520 + }, + { + "grad_norm": 0.06160636246204376, + "learning_rate": 9.27147310638957e-05, + "loss": 0.0112, + "step": 21530 + }, + { + "grad_norm": 0.08074478805065155, + "learning_rate": 9.270613417269286e-05, + "loss": 0.0113, + "step": 21540 + }, + { + "grad_norm": 0.09177753329277039, + "learning_rate": 9.269753261121138e-05, + "loss": 0.0132, + "step": 21550 + }, + { + "grad_norm": 0.08159054070711136, + "learning_rate": 9.268892638039194e-05, + "loss": 0.0116, + "step": 21560 + }, + { + "grad_norm": 0.08159305900335312, + "learning_rate": 9.268031548117569e-05, + "loss": 0.0116, + "step": 21570 + }, + { + "grad_norm": 0.07202894240617752, + "learning_rate": 9.26716999145043e-05, + "loss": 0.0115, + "step": 21580 + }, + { + "grad_norm": 0.07156944274902344, + "learning_rate": 9.266307968131998e-05, + "loss": 0.0138, + "step": 21590 + }, + { + "grad_norm": 0.07029912620782852, + "learning_rate": 9.26544547825654e-05, + "loss": 0.0137, + "step": 21600 + }, + { + "grad_norm": 0.09164537489414215, + "learning_rate": 9.264582521918376e-05, + "loss": 0.0119, + "step": 21610 + }, + { + "grad_norm": 0.07353480160236359, + "learning_rate": 9.263719099211881e-05, + "loss": 0.0138, + "step": 21620 + }, + { + "grad_norm": 0.09685192257165909, + "learning_rate": 9.262855210231476e-05, + "loss": 0.0125, + "step": 21630 + }, + { + "grad_norm": 0.0845077708363533, + "learning_rate": 9.261990855071633e-05, + "loss": 0.0139, + "step": 21640 + }, + { + "grad_norm": 0.08635369688272476, + "learning_rate": 9.261126033826878e-05, + "loss": 0.0132, + "step": 21650 + }, + { + "grad_norm": 0.13969644904136658, + "learning_rate": 9.260260746591786e-05, + "loss": 0.0142, + "step": 21660 + }, + { + "grad_norm": 0.0819544866681099, + "learning_rate": 9.259394993460985e-05, + "loss": 0.0124, + "step": 21670 + }, + { + "grad_norm": 0.11278475075960159, + "learning_rate": 9.258528774529151e-05, + "loss": 0.0129, + "step": 21680 + }, + { + "grad_norm": 0.06973827630281448, + "learning_rate": 9.257662089891013e-05, + "loss": 0.0121, + "step": 21690 + }, + { + "grad_norm": 0.06296367198228836, + "learning_rate": 9.25679493964135e-05, + "loss": 0.013, + "step": 21700 + }, + { + "grad_norm": 0.08507392555475235, + "learning_rate": 9.255927323874994e-05, + "loss": 0.011, + "step": 21710 + }, + { + "grad_norm": 0.09240379929542542, + "learning_rate": 9.255059242686822e-05, + "loss": 0.0114, + "step": 21720 + }, + { + "grad_norm": 0.09327200800180435, + "learning_rate": 9.254190696171769e-05, + "loss": 0.011, + "step": 21730 + }, + { + "grad_norm": 0.07233674824237823, + "learning_rate": 9.25332168442482e-05, + "loss": 0.014, + "step": 21740 + }, + { + "grad_norm": 0.10872936248779297, + "learning_rate": 9.252452207541004e-05, + "loss": 0.0129, + "step": 21750 + }, + { + "grad_norm": 0.07421533018350601, + "learning_rate": 9.251582265615409e-05, + "loss": 0.0125, + "step": 21760 + }, + { + "grad_norm": 0.10345764458179474, + "learning_rate": 9.250711858743169e-05, + "loss": 0.0133, + "step": 21770 + }, + { + "grad_norm": 0.08709132671356201, + "learning_rate": 9.24984098701947e-05, + "loss": 0.0126, + "step": 21780 + }, + { + "grad_norm": 0.06848196685314178, + "learning_rate": 9.248969650539552e-05, + "loss": 0.0121, + "step": 21790 + }, + { + "grad_norm": 0.111284539103508, + "learning_rate": 9.2480978493987e-05, + "loss": 0.0131, + "step": 21800 + }, + { + "grad_norm": 0.0776459351181984, + "learning_rate": 9.247225583692256e-05, + "loss": 0.0127, + "step": 21810 + }, + { + "grad_norm": 0.07553330063819885, + "learning_rate": 9.246352853515607e-05, + "loss": 0.0116, + "step": 21820 + }, + { + "grad_norm": 0.10016046464443207, + "learning_rate": 9.245479658964194e-05, + "loss": 0.0136, + "step": 21830 + }, + { + "grad_norm": 0.05723404884338379, + "learning_rate": 9.244606000133507e-05, + "loss": 0.013, + "step": 21840 + }, + { + "grad_norm": 0.10867104679346085, + "learning_rate": 9.24373187711909e-05, + "loss": 0.0114, + "step": 21850 + }, + { + "grad_norm": 0.07821256667375565, + "learning_rate": 9.242857290016537e-05, + "loss": 0.0128, + "step": 21860 + }, + { + "grad_norm": 0.07708839327096939, + "learning_rate": 9.241982238921488e-05, + "loss": 0.0129, + "step": 21870 + }, + { + "grad_norm": 0.07308632135391235, + "learning_rate": 9.24110672392964e-05, + "loss": 0.0115, + "step": 21880 + }, + { + "grad_norm": 0.08320635557174683, + "learning_rate": 9.240230745136737e-05, + "loss": 0.0119, + "step": 21890 + }, + { + "grad_norm": 0.10253797471523285, + "learning_rate": 9.239354302638575e-05, + "loss": 0.0118, + "step": 21900 + }, + { + "grad_norm": 0.059243690222501755, + "learning_rate": 9.238477396531e-05, + "loss": 0.0097, + "step": 21910 + }, + { + "grad_norm": 0.053682442754507065, + "learning_rate": 9.23760002690991e-05, + "loss": 0.0111, + "step": 21920 + }, + { + "grad_norm": 0.052981600165367126, + "learning_rate": 9.236722193871252e-05, + "loss": 0.0113, + "step": 21930 + }, + { + "grad_norm": 0.06685374677181244, + "learning_rate": 9.235843897511023e-05, + "loss": 0.0106, + "step": 21940 + }, + { + "grad_norm": 0.06666896492242813, + "learning_rate": 9.234965137925276e-05, + "loss": 0.0142, + "step": 21950 + }, + { + "grad_norm": 0.08285274356603622, + "learning_rate": 9.234085915210108e-05, + "loss": 0.0144, + "step": 21960 + }, + { + "grad_norm": 0.08094636350870132, + "learning_rate": 9.23320622946167e-05, + "loss": 0.0162, + "step": 21970 + }, + { + "grad_norm": 0.07988988608121872, + "learning_rate": 9.232326080776163e-05, + "loss": 0.0118, + "step": 21980 + }, + { + "grad_norm": 0.07145094126462936, + "learning_rate": 9.23144546924984e-05, + "loss": 0.0157, + "step": 21990 + }, + { + "grad_norm": 0.07144103199243546, + "learning_rate": 9.230564394979e-05, + "loss": 0.0115, + "step": 22000 + }, + { + "grad_norm": 0.09146758913993835, + "learning_rate": 9.22968285806e-05, + "loss": 0.0142, + "step": 22010 + }, + { + "grad_norm": 0.08983413130044937, + "learning_rate": 9.228800858589242e-05, + "loss": 0.0142, + "step": 22020 + }, + { + "grad_norm": 0.10055433213710785, + "learning_rate": 9.227918396663179e-05, + "loss": 0.0126, + "step": 22030 + }, + { + "grad_norm": 0.046857111155986786, + "learning_rate": 9.227035472378319e-05, + "loss": 0.0125, + "step": 22040 + }, + { + "grad_norm": 0.08890251815319061, + "learning_rate": 9.226152085831213e-05, + "loss": 0.0141, + "step": 22050 + }, + { + "grad_norm": 0.08843214809894562, + "learning_rate": 9.22526823711847e-05, + "loss": 0.0125, + "step": 22060 + }, + { + "grad_norm": 0.06601320207118988, + "learning_rate": 9.224383926336745e-05, + "loss": 0.0122, + "step": 22070 + }, + { + "grad_norm": 0.06983937323093414, + "learning_rate": 9.223499153582744e-05, + "loss": 0.0129, + "step": 22080 + }, + { + "grad_norm": 0.07131041586399078, + "learning_rate": 9.222613918953226e-05, + "loss": 0.0118, + "step": 22090 + }, + { + "grad_norm": 0.10862605273723602, + "learning_rate": 9.221728222544999e-05, + "loss": 0.0151, + "step": 22100 + }, + { + "grad_norm": 0.07562939822673798, + "learning_rate": 9.22084206445492e-05, + "loss": 0.0137, + "step": 22110 + }, + { + "grad_norm": 0.0982627123594284, + "learning_rate": 9.2199554447799e-05, + "loss": 0.0133, + "step": 22120 + }, + { + "grad_norm": 0.06953819841146469, + "learning_rate": 9.219068363616897e-05, + "loss": 0.0129, + "step": 22130 + }, + { + "grad_norm": 0.0918317660689354, + "learning_rate": 9.218180821062919e-05, + "loss": 0.0118, + "step": 22140 + }, + { + "grad_norm": 0.07258684188127518, + "learning_rate": 9.21729281721503e-05, + "loss": 0.0148, + "step": 22150 + }, + { + "grad_norm": 0.06958422064781189, + "learning_rate": 9.216404352170339e-05, + "loss": 0.0109, + "step": 22160 + }, + { + "grad_norm": 0.0810585767030716, + "learning_rate": 9.215515426026007e-05, + "loss": 0.0146, + "step": 22170 + }, + { + "grad_norm": 0.07851805537939072, + "learning_rate": 9.214626038879246e-05, + "loss": 0.015, + "step": 22180 + }, + { + "grad_norm": 0.07757508754730225, + "learning_rate": 9.21373619082732e-05, + "loss": 0.012, + "step": 22190 + }, + { + "grad_norm": 0.07727071642875671, + "learning_rate": 9.212845881967535e-05, + "loss": 0.0133, + "step": 22200 + }, + { + "grad_norm": 0.081436887383461, + "learning_rate": 9.211955112397262e-05, + "loss": 0.0127, + "step": 22210 + }, + { + "grad_norm": 0.06047457829117775, + "learning_rate": 9.211063882213909e-05, + "loss": 0.0135, + "step": 22220 + }, + { + "grad_norm": 0.08996164798736572, + "learning_rate": 9.210172191514942e-05, + "loss": 0.0127, + "step": 22230 + }, + { + "grad_norm": 0.07456523925065994, + "learning_rate": 9.209280040397874e-05, + "loss": 0.0137, + "step": 22240 + }, + { + "grad_norm": 0.08107127994298935, + "learning_rate": 9.208387428960268e-05, + "loss": 0.0122, + "step": 22250 + }, + { + "grad_norm": 0.09122536331415176, + "learning_rate": 9.20749435729974e-05, + "loss": 0.0139, + "step": 22260 + }, + { + "grad_norm": 0.05897000804543495, + "learning_rate": 9.206600825513957e-05, + "loss": 0.0108, + "step": 22270 + }, + { + "grad_norm": 0.08410907536745071, + "learning_rate": 9.20570683370063e-05, + "loss": 0.0114, + "step": 22280 + }, + { + "grad_norm": 0.07366377115249634, + "learning_rate": 9.204812381957528e-05, + "loss": 0.0121, + "step": 22290 + }, + { + "grad_norm": 0.0608694963157177, + "learning_rate": 9.203917470382465e-05, + "loss": 0.0128, + "step": 22300 + }, + { + "grad_norm": 0.07491675764322281, + "learning_rate": 9.203022099073309e-05, + "loss": 0.0141, + "step": 22310 + }, + { + "grad_norm": 0.08907347917556763, + "learning_rate": 9.202126268127976e-05, + "loss": 0.0149, + "step": 22320 + }, + { + "grad_norm": 0.108442023396492, + "learning_rate": 9.20122997764443e-05, + "loss": 0.0158, + "step": 22330 + }, + { + "grad_norm": 0.10802390426397324, + "learning_rate": 9.200333227720692e-05, + "loss": 0.0147, + "step": 22340 + }, + { + "grad_norm": 0.08546006679534912, + "learning_rate": 9.199436018454826e-05, + "loss": 0.0123, + "step": 22350 + }, + { + "grad_norm": 0.07100430130958557, + "learning_rate": 9.198538349944952e-05, + "loss": 0.0119, + "step": 22360 + }, + { + "grad_norm": 0.06665410846471786, + "learning_rate": 9.197640222289234e-05, + "loss": 0.0133, + "step": 22370 + }, + { + "grad_norm": 0.06660311669111252, + "learning_rate": 9.196741635585895e-05, + "loss": 0.0123, + "step": 22380 + }, + { + "grad_norm": 0.0762614980340004, + "learning_rate": 9.195842589933199e-05, + "loss": 0.0125, + "step": 22390 + }, + { + "grad_norm": 0.06741651892662048, + "learning_rate": 9.194943085429466e-05, + "loss": 0.015, + "step": 22400 + }, + { + "grad_norm": 0.07849186658859253, + "learning_rate": 9.194043122173065e-05, + "loss": 0.014, + "step": 22410 + }, + { + "grad_norm": 0.04269883781671524, + "learning_rate": 9.193142700262413e-05, + "loss": 0.0121, + "step": 22420 + }, + { + "grad_norm": 0.0965939611196518, + "learning_rate": 9.192241819795979e-05, + "loss": 0.013, + "step": 22430 + }, + { + "grad_norm": 0.07048540562391281, + "learning_rate": 9.191340480872284e-05, + "loss": 0.0133, + "step": 22440 + }, + { + "grad_norm": 0.05701163038611412, + "learning_rate": 9.190438683589895e-05, + "loss": 0.0124, + "step": 22450 + }, + { + "grad_norm": 0.086389921605587, + "learning_rate": 9.189536428047432e-05, + "loss": 0.0136, + "step": 22460 + }, + { + "grad_norm": 0.08928017318248749, + "learning_rate": 9.188633714343564e-05, + "loss": 0.0125, + "step": 22470 + }, + { + "grad_norm": 0.07150637358427048, + "learning_rate": 9.18773054257701e-05, + "loss": 0.0132, + "step": 22480 + }, + { + "grad_norm": 0.06870241463184357, + "learning_rate": 9.18682691284654e-05, + "loss": 0.0148, + "step": 22490 + }, + { + "grad_norm": 0.07000686973333359, + "learning_rate": 9.185922825250974e-05, + "loss": 0.0127, + "step": 22500 + }, + { + "grad_norm": 0.10520769655704498, + "learning_rate": 9.185018279889181e-05, + "loss": 0.0143, + "step": 22510 + }, + { + "grad_norm": 0.07319633662700653, + "learning_rate": 9.184113276860082e-05, + "loss": 0.0131, + "step": 22520 + }, + { + "grad_norm": 0.06495548784732819, + "learning_rate": 9.183207816262645e-05, + "loss": 0.0114, + "step": 22530 + }, + { + "grad_norm": 0.06893372535705566, + "learning_rate": 9.182301898195891e-05, + "loss": 0.0139, + "step": 22540 + }, + { + "grad_norm": 0.06489243358373642, + "learning_rate": 9.181395522758889e-05, + "loss": 0.0123, + "step": 22550 + }, + { + "grad_norm": 0.07642873376607895, + "learning_rate": 9.180488690050759e-05, + "loss": 0.0102, + "step": 22560 + }, + { + "grad_norm": 0.08983872085809708, + "learning_rate": 9.179581400170671e-05, + "loss": 0.0112, + "step": 22570 + }, + { + "grad_norm": 0.08849067240953445, + "learning_rate": 9.178673653217845e-05, + "loss": 0.0109, + "step": 22580 + }, + { + "grad_norm": 0.07936225086450577, + "learning_rate": 9.177765449291551e-05, + "loss": 0.0128, + "step": 22590 + }, + { + "grad_norm": 0.09737654030323029, + "learning_rate": 9.176856788491109e-05, + "loss": 0.014, + "step": 22600 + }, + { + "grad_norm": 0.10242357105016708, + "learning_rate": 9.175947670915887e-05, + "loss": 0.016, + "step": 22610 + }, + { + "grad_norm": 0.05558028444647789, + "learning_rate": 9.175038096665309e-05, + "loss": 0.0114, + "step": 22620 + }, + { + "grad_norm": 0.0914151668548584, + "learning_rate": 9.17412806583884e-05, + "loss": 0.0121, + "step": 22630 + }, + { + "grad_norm": 0.07262474298477173, + "learning_rate": 9.173217578536002e-05, + "loss": 0.0103, + "step": 22640 + }, + { + "grad_norm": 0.09691644459962845, + "learning_rate": 9.172306634856362e-05, + "loss": 0.0126, + "step": 22650 + }, + { + "grad_norm": 0.07869601994752884, + "learning_rate": 9.171395234899545e-05, + "loss": 0.0111, + "step": 22660 + }, + { + "grad_norm": 0.056170374155044556, + "learning_rate": 9.170483378765214e-05, + "loss": 0.0126, + "step": 22670 + }, + { + "grad_norm": 0.070449098944664, + "learning_rate": 9.169571066553091e-05, + "loss": 0.0107, + "step": 22680 + }, + { + "grad_norm": 0.08128318935632706, + "learning_rate": 9.168658298362946e-05, + "loss": 0.0124, + "step": 22690 + }, + { + "grad_norm": 0.08832787722349167, + "learning_rate": 9.167745074294598e-05, + "loss": 0.0113, + "step": 22700 + }, + { + "grad_norm": 0.08226906508207321, + "learning_rate": 9.166831394447913e-05, + "loss": 0.014, + "step": 22710 + }, + { + "grad_norm": 0.06414595246315002, + "learning_rate": 9.165917258922812e-05, + "loss": 0.0105, + "step": 22720 + }, + { + "grad_norm": 0.05053498595952988, + "learning_rate": 9.165002667819262e-05, + "loss": 0.011, + "step": 22730 + }, + { + "grad_norm": 0.06366612017154694, + "learning_rate": 9.164087621237282e-05, + "loss": 0.0098, + "step": 22740 + }, + { + "grad_norm": 0.08182495087385178, + "learning_rate": 9.163172119276942e-05, + "loss": 0.0105, + "step": 22750 + }, + { + "grad_norm": 0.09377168118953705, + "learning_rate": 9.162256162038358e-05, + "loss": 0.0103, + "step": 22760 + }, + { + "grad_norm": 0.06710493564605713, + "learning_rate": 9.161339749621698e-05, + "loss": 0.0104, + "step": 22770 + }, + { + "grad_norm": 0.0632198229432106, + "learning_rate": 9.160422882127177e-05, + "loss": 0.0127, + "step": 22780 + }, + { + "grad_norm": 0.06629215180873871, + "learning_rate": 9.159505559655069e-05, + "loss": 0.0103, + "step": 22790 + }, + { + "grad_norm": 0.07548151910305023, + "learning_rate": 9.158587782305684e-05, + "loss": 0.0139, + "step": 22800 + }, + { + "grad_norm": 0.07803913950920105, + "learning_rate": 9.157669550179391e-05, + "loss": 0.0112, + "step": 22810 + }, + { + "grad_norm": 0.0715150460600853, + "learning_rate": 9.156750863376609e-05, + "loss": 0.0102, + "step": 22820 + }, + { + "grad_norm": 0.08449499309062958, + "learning_rate": 9.155831721997801e-05, + "loss": 0.0122, + "step": 22830 + }, + { + "grad_norm": 0.08019547909498215, + "learning_rate": 9.154912126143484e-05, + "loss": 0.0109, + "step": 22840 + }, + { + "grad_norm": 0.056670043617486954, + "learning_rate": 9.153992075914224e-05, + "loss": 0.0134, + "step": 22850 + }, + { + "grad_norm": 0.07009641081094742, + "learning_rate": 9.153071571410635e-05, + "loss": 0.0147, + "step": 22860 + }, + { + "grad_norm": 0.06787601858377457, + "learning_rate": 9.152150612733384e-05, + "loss": 0.0115, + "step": 22870 + }, + { + "grad_norm": 0.08998970687389374, + "learning_rate": 9.151229199983184e-05, + "loss": 0.0132, + "step": 22880 + }, + { + "grad_norm": 0.04476353898644447, + "learning_rate": 9.150307333260802e-05, + "loss": 0.0097, + "step": 22890 + }, + { + "grad_norm": 0.08823813498020172, + "learning_rate": 9.149385012667048e-05, + "loss": 0.013, + "step": 22900 + }, + { + "grad_norm": 0.10355127602815628, + "learning_rate": 9.148462238302788e-05, + "loss": 0.0117, + "step": 22910 + }, + { + "grad_norm": 0.061525505036115646, + "learning_rate": 9.147539010268936e-05, + "loss": 0.0122, + "step": 22920 + }, + { + "grad_norm": 0.06531984359025955, + "learning_rate": 9.14661532866645e-05, + "loss": 0.0111, + "step": 22930 + }, + { + "grad_norm": 0.05433022603392601, + "learning_rate": 9.145691193596348e-05, + "loss": 0.0133, + "step": 22940 + }, + { + "grad_norm": 0.1122080609202385, + "learning_rate": 9.144766605159691e-05, + "loss": 0.0116, + "step": 22950 + }, + { + "grad_norm": 0.08263754099607468, + "learning_rate": 9.14384156345759e-05, + "loss": 0.0118, + "step": 22960 + }, + { + "grad_norm": 0.10457459092140198, + "learning_rate": 9.142916068591204e-05, + "loss": 0.0121, + "step": 22970 + }, + { + "grad_norm": 0.06416161358356476, + "learning_rate": 9.141990120661746e-05, + "loss": 0.0119, + "step": 22980 + }, + { + "grad_norm": 0.09685714542865753, + "learning_rate": 9.141063719770475e-05, + "loss": 0.0118, + "step": 22990 + }, + { + "grad_norm": 0.05974778160452843, + "learning_rate": 9.140136866018704e-05, + "loss": 0.0133, + "step": 23000 + }, + { + "grad_norm": 0.07808618247509003, + "learning_rate": 9.139209559507788e-05, + "loss": 0.014, + "step": 23010 + }, + { + "grad_norm": 0.0700877234339714, + "learning_rate": 9.13828180033914e-05, + "loss": 0.0121, + "step": 23020 + }, + { + "grad_norm": 0.060015298426151276, + "learning_rate": 9.137353588614212e-05, + "loss": 0.0127, + "step": 23030 + }, + { + "grad_norm": 0.053441956639289856, + "learning_rate": 9.136424924434519e-05, + "loss": 0.0103, + "step": 23040 + }, + { + "grad_norm": 0.06433279812335968, + "learning_rate": 9.135495807901615e-05, + "loss": 0.0148, + "step": 23050 + }, + { + "grad_norm": 0.07444929331541061, + "learning_rate": 9.134566239117108e-05, + "loss": 0.0127, + "step": 23060 + }, + { + "grad_norm": 0.07146142423152924, + "learning_rate": 9.13363621818265e-05, + "loss": 0.0113, + "step": 23070 + }, + { + "grad_norm": 0.11465326696634293, + "learning_rate": 9.132705745199953e-05, + "loss": 0.0144, + "step": 23080 + }, + { + "grad_norm": 0.061054255813360214, + "learning_rate": 9.131774820270768e-05, + "loss": 0.0121, + "step": 23090 + }, + { + "grad_norm": 0.0802350714802742, + "learning_rate": 9.130843443496901e-05, + "loss": 0.0145, + "step": 23100 + }, + { + "grad_norm": 0.08746164292097092, + "learning_rate": 9.129911614980206e-05, + "loss": 0.0131, + "step": 23110 + }, + { + "grad_norm": 0.11913242191076279, + "learning_rate": 9.128979334822584e-05, + "loss": 0.0125, + "step": 23120 + }, + { + "grad_norm": 0.0671694278717041, + "learning_rate": 9.128046603125992e-05, + "loss": 0.0146, + "step": 23130 + }, + { + "grad_norm": 0.07889624685049057, + "learning_rate": 9.12711341999243e-05, + "loss": 0.011, + "step": 23140 + }, + { + "grad_norm": 0.07678806036710739, + "learning_rate": 9.12617978552395e-05, + "loss": 0.0124, + "step": 23150 + }, + { + "grad_norm": 0.07568395882844925, + "learning_rate": 9.12524569982265e-05, + "loss": 0.0142, + "step": 23160 + }, + { + "grad_norm": 0.13994014263153076, + "learning_rate": 9.124311162990684e-05, + "loss": 0.0154, + "step": 23170 + }, + { + "grad_norm": 0.06974758207798004, + "learning_rate": 9.12337617513025e-05, + "loss": 0.0117, + "step": 23180 + }, + { + "grad_norm": 0.07944439351558685, + "learning_rate": 9.122440736343596e-05, + "loss": 0.0116, + "step": 23190 + }, + { + "grad_norm": 0.05696938559412956, + "learning_rate": 9.12150484673302e-05, + "loss": 0.0122, + "step": 23200 + }, + { + "grad_norm": 0.06253613531589508, + "learning_rate": 9.120568506400873e-05, + "loss": 0.0126, + "step": 23210 + }, + { + "grad_norm": 0.07713945209980011, + "learning_rate": 9.119631715449548e-05, + "loss": 0.0117, + "step": 23220 + }, + { + "grad_norm": 0.06834503263235092, + "learning_rate": 9.118694473981493e-05, + "loss": 0.0122, + "step": 23230 + }, + { + "grad_norm": 0.05801789090037346, + "learning_rate": 9.117756782099203e-05, + "loss": 0.0145, + "step": 23240 + }, + { + "grad_norm": 0.09612366557121277, + "learning_rate": 9.11681863990522e-05, + "loss": 0.0144, + "step": 23250 + }, + { + "grad_norm": 0.1053575873374939, + "learning_rate": 9.115880047502142e-05, + "loss": 0.0108, + "step": 23260 + }, + { + "grad_norm": 0.07787153869867325, + "learning_rate": 9.114941004992609e-05, + "loss": 0.0132, + "step": 23270 + }, + { + "grad_norm": 0.06771361827850342, + "learning_rate": 9.114001512479317e-05, + "loss": 0.0118, + "step": 23280 + }, + { + "grad_norm": 0.049454160034656525, + "learning_rate": 9.113061570065003e-05, + "loss": 0.0118, + "step": 23290 + }, + { + "grad_norm": 0.08059076964855194, + "learning_rate": 9.112121177852459e-05, + "loss": 0.0137, + "step": 23300 + }, + { + "grad_norm": 0.07262596487998962, + "learning_rate": 9.111180335944527e-05, + "loss": 0.0123, + "step": 23310 + }, + { + "grad_norm": 0.07873563468456268, + "learning_rate": 9.110239044444093e-05, + "loss": 0.014, + "step": 23320 + }, + { + "grad_norm": 0.09427855908870697, + "learning_rate": 9.109297303454099e-05, + "loss": 0.0135, + "step": 23330 + }, + { + "grad_norm": 0.0942431390285492, + "learning_rate": 9.108355113077526e-05, + "loss": 0.0138, + "step": 23340 + }, + { + "grad_norm": 0.08343586325645447, + "learning_rate": 9.107412473417419e-05, + "loss": 0.0129, + "step": 23350 + }, + { + "grad_norm": 0.07213388383388519, + "learning_rate": 9.106469384576858e-05, + "loss": 0.0102, + "step": 23360 + }, + { + "grad_norm": 0.06837046891450882, + "learning_rate": 9.105525846658978e-05, + "loss": 0.0133, + "step": 23370 + }, + { + "grad_norm": 0.06811095029115677, + "learning_rate": 9.104581859766965e-05, + "loss": 0.0106, + "step": 23380 + }, + { + "grad_norm": 0.07940193265676498, + "learning_rate": 9.10363742400405e-05, + "loss": 0.0133, + "step": 23390 + }, + { + "grad_norm": 0.05458305403590202, + "learning_rate": 9.102692539473518e-05, + "loss": 0.0115, + "step": 23400 + }, + { + "grad_norm": 0.10042702406644821, + "learning_rate": 9.101747206278697e-05, + "loss": 0.0123, + "step": 23410 + }, + { + "grad_norm": 0.08374042063951492, + "learning_rate": 9.100801424522968e-05, + "loss": 0.0118, + "step": 23420 + }, + { + "grad_norm": 0.08205347508192062, + "learning_rate": 9.099855194309762e-05, + "loss": 0.0116, + "step": 23430 + }, + { + "grad_norm": 0.06425795704126358, + "learning_rate": 9.098908515742554e-05, + "loss": 0.0136, + "step": 23440 + }, + { + "grad_norm": 0.08811625093221664, + "learning_rate": 9.097961388924873e-05, + "loss": 0.0117, + "step": 23450 + }, + { + "grad_norm": 0.09900195151567459, + "learning_rate": 9.097013813960298e-05, + "loss": 0.013, + "step": 23460 + }, + { + "grad_norm": 0.07510870695114136, + "learning_rate": 9.09606579095245e-05, + "loss": 0.0116, + "step": 23470 + }, + { + "grad_norm": 0.08244680613279343, + "learning_rate": 9.095117320005008e-05, + "loss": 0.015, + "step": 23480 + }, + { + "grad_norm": 0.09447763860225677, + "learning_rate": 9.094168401221691e-05, + "loss": 0.0118, + "step": 23490 + }, + { + "grad_norm": 0.06990213692188263, + "learning_rate": 9.093219034706273e-05, + "loss": 0.0119, + "step": 23500 + }, + { + "grad_norm": 0.06871993839740753, + "learning_rate": 9.092269220562577e-05, + "loss": 0.0129, + "step": 23510 + }, + { + "grad_norm": 0.08558008074760437, + "learning_rate": 9.09131895889447e-05, + "loss": 0.0129, + "step": 23520 + }, + { + "grad_norm": 0.11905894428491592, + "learning_rate": 9.090368249805873e-05, + "loss": 0.0136, + "step": 23530 + }, + { + "grad_norm": 0.07926132529973984, + "learning_rate": 9.089417093400754e-05, + "loss": 0.0125, + "step": 23540 + }, + { + "grad_norm": 0.06360290199518204, + "learning_rate": 9.088465489783131e-05, + "loss": 0.0139, + "step": 23550 + }, + { + "grad_norm": 0.04889256879687309, + "learning_rate": 9.087513439057068e-05, + "loss": 0.0101, + "step": 23560 + }, + { + "grad_norm": 0.056856587529182434, + "learning_rate": 9.08656094132668e-05, + "loss": 0.0136, + "step": 23570 + }, + { + "grad_norm": 0.05570723116397858, + "learning_rate": 9.085607996696134e-05, + "loss": 0.0127, + "step": 23580 + }, + { + "grad_norm": 0.04248875379562378, + "learning_rate": 9.084654605269639e-05, + "loss": 0.0114, + "step": 23590 + }, + { + "grad_norm": 0.06259770691394806, + "learning_rate": 9.083700767151457e-05, + "loss": 0.013, + "step": 23600 + }, + { + "grad_norm": 0.05411958694458008, + "learning_rate": 9.082746482445898e-05, + "loss": 0.0121, + "step": 23610 + }, + { + "grad_norm": 0.05213863030076027, + "learning_rate": 9.081791751257325e-05, + "loss": 0.0126, + "step": 23620 + }, + { + "grad_norm": 0.06773386895656586, + "learning_rate": 9.080836573690142e-05, + "loss": 0.0114, + "step": 23630 + }, + { + "grad_norm": 0.0752195343375206, + "learning_rate": 9.079880949848805e-05, + "loss": 0.0111, + "step": 23640 + }, + { + "grad_norm": 0.06526979804039001, + "learning_rate": 9.078924879837822e-05, + "loss": 0.0111, + "step": 23650 + }, + { + "grad_norm": 0.07097107917070389, + "learning_rate": 9.077968363761747e-05, + "loss": 0.0135, + "step": 23660 + }, + { + "grad_norm": 0.07123493403196335, + "learning_rate": 9.077011401725182e-05, + "loss": 0.0128, + "step": 23670 + }, + { + "grad_norm": 0.06264451891183853, + "learning_rate": 9.07605399383278e-05, + "loss": 0.0109, + "step": 23680 + }, + { + "grad_norm": 0.06806483864784241, + "learning_rate": 9.075096140189243e-05, + "loss": 0.0103, + "step": 23690 + }, + { + "grad_norm": 0.0754259005188942, + "learning_rate": 9.074137840899318e-05, + "loss": 0.0124, + "step": 23700 + }, + { + "grad_norm": 0.07751594483852386, + "learning_rate": 9.073179096067804e-05, + "loss": 0.015, + "step": 23710 + }, + { + "grad_norm": 0.08017495274543762, + "learning_rate": 9.072219905799549e-05, + "loss": 0.0119, + "step": 23720 + }, + { + "grad_norm": 0.09794259816408157, + "learning_rate": 9.071260270199447e-05, + "loss": 0.0116, + "step": 23730 + }, + { + "grad_norm": 0.05582457780838013, + "learning_rate": 9.070300189372441e-05, + "loss": 0.0101, + "step": 23740 + }, + { + "grad_norm": 0.059666190296411514, + "learning_rate": 9.069339663423528e-05, + "loss": 0.012, + "step": 23750 + }, + { + "grad_norm": 0.06641379743814468, + "learning_rate": 9.068378692457747e-05, + "loss": 0.0115, + "step": 23760 + }, + { + "grad_norm": 0.07015407085418701, + "learning_rate": 9.067417276580189e-05, + "loss": 0.0109, + "step": 23770 + }, + { + "grad_norm": 0.11304566264152527, + "learning_rate": 9.066455415895993e-05, + "loss": 0.0109, + "step": 23780 + }, + { + "grad_norm": 0.07864721119403839, + "learning_rate": 9.065493110510346e-05, + "loss": 0.0104, + "step": 23790 + }, + { + "grad_norm": 0.08711553364992142, + "learning_rate": 9.064530360528484e-05, + "loss": 0.0123, + "step": 23800 + }, + { + "grad_norm": 0.12010364979505539, + "learning_rate": 9.063567166055695e-05, + "loss": 0.0119, + "step": 23810 + }, + { + "grad_norm": 0.08693695813417435, + "learning_rate": 9.062603527197308e-05, + "loss": 0.0125, + "step": 23820 + }, + { + "grad_norm": 0.0687461793422699, + "learning_rate": 9.06163944405871e-05, + "loss": 0.0134, + "step": 23830 + }, + { + "grad_norm": 0.0731625035405159, + "learning_rate": 9.060674916745327e-05, + "loss": 0.0122, + "step": 23840 + }, + { + "grad_norm": 0.08469484001398087, + "learning_rate": 9.05970994536264e-05, + "loss": 0.0123, + "step": 23850 + }, + { + "grad_norm": 0.11006812751293182, + "learning_rate": 9.05874453001618e-05, + "loss": 0.012, + "step": 23860 + }, + { + "grad_norm": 0.05717344209551811, + "learning_rate": 9.057778670811517e-05, + "loss": 0.0128, + "step": 23870 + }, + { + "grad_norm": 0.06571761518716812, + "learning_rate": 9.056812367854281e-05, + "loss": 0.0125, + "step": 23880 + }, + { + "grad_norm": 0.10332079976797104, + "learning_rate": 9.055845621250143e-05, + "loss": 0.0129, + "step": 23890 + }, + { + "grad_norm": 0.10165787488222122, + "learning_rate": 9.054878431104825e-05, + "loss": 0.0131, + "step": 23900 + }, + { + "grad_norm": 0.1022486761212349, + "learning_rate": 9.0539107975241e-05, + "loss": 0.0133, + "step": 23910 + }, + { + "grad_norm": 0.08858468383550644, + "learning_rate": 9.052942720613784e-05, + "loss": 0.0119, + "step": 23920 + }, + { + "grad_norm": 0.09914841502904892, + "learning_rate": 9.051974200479745e-05, + "loss": 0.0129, + "step": 23930 + }, + { + "grad_norm": 0.06871508806943893, + "learning_rate": 9.051005237227901e-05, + "loss": 0.0137, + "step": 23940 + }, + { + "grad_norm": 0.06785311549901962, + "learning_rate": 9.050035830964215e-05, + "loss": 0.013, + "step": 23950 + }, + { + "grad_norm": 0.05763935297727585, + "learning_rate": 9.049065981794698e-05, + "loss": 0.0131, + "step": 23960 + }, + { + "grad_norm": 0.048139579594135284, + "learning_rate": 9.048095689825414e-05, + "loss": 0.0093, + "step": 23970 + }, + { + "grad_norm": 0.05658433586359024, + "learning_rate": 9.047124955162472e-05, + "loss": 0.0112, + "step": 23980 + }, + { + "grad_norm": 0.07199013978242874, + "learning_rate": 9.046153777912028e-05, + "loss": 0.0108, + "step": 23990 + }, + { + "grad_norm": 0.05578765273094177, + "learning_rate": 9.045182158180292e-05, + "loss": 0.0101, + "step": 24000 + }, + { + "grad_norm": 0.06942387670278549, + "learning_rate": 9.044210096073516e-05, + "loss": 0.0132, + "step": 24010 + }, + { + "grad_norm": 0.07203807681798935, + "learning_rate": 9.043237591698004e-05, + "loss": 0.0137, + "step": 24020 + }, + { + "grad_norm": 0.07284518331289291, + "learning_rate": 9.04226464516011e-05, + "loss": 0.0124, + "step": 24030 + }, + { + "grad_norm": 0.0950632095336914, + "learning_rate": 9.041291256566229e-05, + "loss": 0.012, + "step": 24040 + }, + { + "grad_norm": 0.05989207699894905, + "learning_rate": 9.040317426022814e-05, + "loss": 0.0119, + "step": 24050 + }, + { + "grad_norm": 0.07302702963352203, + "learning_rate": 9.03934315363636e-05, + "loss": 0.0139, + "step": 24060 + }, + { + "grad_norm": 0.07891596108675003, + "learning_rate": 9.038368439513409e-05, + "loss": 0.0119, + "step": 24070 + }, + { + "grad_norm": 0.05830038711428642, + "learning_rate": 9.03739328376056e-05, + "loss": 0.0159, + "step": 24080 + }, + { + "grad_norm": 0.07395153492689133, + "learning_rate": 9.036417686484451e-05, + "loss": 0.0115, + "step": 24090 + }, + { + "grad_norm": 0.11902844905853271, + "learning_rate": 9.035441647791773e-05, + "loss": 0.0128, + "step": 24100 + }, + { + "grad_norm": 0.0684194341301918, + "learning_rate": 9.034465167789263e-05, + "loss": 0.0121, + "step": 24110 + }, + { + "grad_norm": 0.07207026332616806, + "learning_rate": 9.033488246583706e-05, + "loss": 0.0125, + "step": 24120 + }, + { + "grad_norm": 0.0970812514424324, + "learning_rate": 9.032510884281941e-05, + "loss": 0.0098, + "step": 24130 + }, + { + "grad_norm": 0.06794407218694687, + "learning_rate": 9.031533080990848e-05, + "loss": 0.0124, + "step": 24140 + }, + { + "grad_norm": 0.04405917227268219, + "learning_rate": 9.030554836817358e-05, + "loss": 0.0097, + "step": 24150 + }, + { + "grad_norm": 0.0621095709502697, + "learning_rate": 9.029576151868451e-05, + "loss": 0.0112, + "step": 24160 + }, + { + "grad_norm": 0.055076275020837784, + "learning_rate": 9.028597026251155e-05, + "loss": 0.0113, + "step": 24170 + }, + { + "grad_norm": 0.06508580595254898, + "learning_rate": 9.027617460072547e-05, + "loss": 0.0102, + "step": 24180 + }, + { + "grad_norm": 0.12252774834632874, + "learning_rate": 9.026637453439745e-05, + "loss": 0.014, + "step": 24190 + }, + { + "grad_norm": 0.09085146337747574, + "learning_rate": 9.025657006459927e-05, + "loss": 0.0135, + "step": 24200 + }, + { + "grad_norm": 0.07451119273900986, + "learning_rate": 9.024676119240311e-05, + "loss": 0.0112, + "step": 24210 + }, + { + "grad_norm": 0.09974651038646698, + "learning_rate": 9.023694791888166e-05, + "loss": 0.0145, + "step": 24220 + }, + { + "grad_norm": 0.06497611850500107, + "learning_rate": 9.022713024510808e-05, + "loss": 0.0103, + "step": 24230 + }, + { + "grad_norm": 0.08507797122001648, + "learning_rate": 9.021730817215601e-05, + "loss": 0.0125, + "step": 24240 + }, + { + "grad_norm": 0.06994592398405075, + "learning_rate": 9.02074817010996e-05, + "loss": 0.0109, + "step": 24250 + }, + { + "grad_norm": 0.08881720900535583, + "learning_rate": 9.019765083301342e-05, + "loss": 0.0122, + "step": 24260 + }, + { + "grad_norm": 0.04916784539818764, + "learning_rate": 9.01878155689726e-05, + "loss": 0.0126, + "step": 24270 + }, + { + "grad_norm": 0.06666525453329086, + "learning_rate": 9.017797591005268e-05, + "loss": 0.0132, + "step": 24280 + }, + { + "grad_norm": 0.051042500883340836, + "learning_rate": 9.016813185732972e-05, + "loss": 0.011, + "step": 24290 + }, + { + "grad_norm": 0.0616622194647789, + "learning_rate": 9.015828341188027e-05, + "loss": 0.01, + "step": 24300 + }, + { + "grad_norm": 0.07893317937850952, + "learning_rate": 9.01484305747813e-05, + "loss": 0.0117, + "step": 24310 + }, + { + "grad_norm": 0.06247490271925926, + "learning_rate": 9.013857334711033e-05, + "loss": 0.014, + "step": 24320 + }, + { + "grad_norm": 0.0877302885055542, + "learning_rate": 9.012871172994534e-05, + "loss": 0.014, + "step": 24330 + }, + { + "grad_norm": 0.07029400765895844, + "learning_rate": 9.011884572436476e-05, + "loss": 0.0116, + "step": 24340 + }, + { + "grad_norm": 0.0631587877869606, + "learning_rate": 9.010897533144754e-05, + "loss": 0.0093, + "step": 24350 + }, + { + "grad_norm": 0.05799131840467453, + "learning_rate": 9.009910055227306e-05, + "loss": 0.0114, + "step": 24360 + }, + { + "grad_norm": 0.052770569920539856, + "learning_rate": 9.008922138792124e-05, + "loss": 0.0105, + "step": 24370 + }, + { + "grad_norm": 0.04402097314596176, + "learning_rate": 9.007933783947244e-05, + "loss": 0.0098, + "step": 24380 + }, + { + "grad_norm": 0.05591050907969475, + "learning_rate": 9.006944990800752e-05, + "loss": 0.0098, + "step": 24390 + }, + { + "grad_norm": 0.04575073719024658, + "learning_rate": 9.005955759460779e-05, + "loss": 0.0093, + "step": 24400 + }, + { + "grad_norm": 0.0719534233212471, + "learning_rate": 9.004966090035508e-05, + "loss": 0.0104, + "step": 24410 + }, + { + "grad_norm": 0.06449224054813385, + "learning_rate": 9.003975982633166e-05, + "loss": 0.01, + "step": 24420 + }, + { + "grad_norm": 0.09396453201770782, + "learning_rate": 9.00298543736203e-05, + "loss": 0.0107, + "step": 24430 + }, + { + "grad_norm": 0.07163118571043015, + "learning_rate": 9.001994454330427e-05, + "loss": 0.0121, + "step": 24440 + }, + { + "grad_norm": 0.07134078443050385, + "learning_rate": 9.001003033646727e-05, + "loss": 0.0113, + "step": 24450 + }, + { + "grad_norm": 0.10597314685583115, + "learning_rate": 9.00001117541935e-05, + "loss": 0.0109, + "step": 24460 + }, + { + "grad_norm": 0.0948633998632431, + "learning_rate": 8.999018879756764e-05, + "loss": 0.0122, + "step": 24470 + }, + { + "grad_norm": 0.10636141896247864, + "learning_rate": 8.998026146767487e-05, + "loss": 0.0119, + "step": 24480 + }, + { + "grad_norm": 0.06424765288829803, + "learning_rate": 8.99703297656008e-05, + "loss": 0.0112, + "step": 24490 + }, + { + "grad_norm": 0.05628672614693642, + "learning_rate": 8.996039369243156e-05, + "loss": 0.0113, + "step": 24500 + }, + { + "grad_norm": 0.06831100583076477, + "learning_rate": 8.995045324925378e-05, + "loss": 0.0093, + "step": 24510 + }, + { + "grad_norm": 0.06761655956506729, + "learning_rate": 8.994050843715448e-05, + "loss": 0.0096, + "step": 24520 + }, + { + "grad_norm": 0.05476953089237213, + "learning_rate": 8.993055925722121e-05, + "loss": 0.0112, + "step": 24530 + }, + { + "grad_norm": 0.08612125366926193, + "learning_rate": 8.992060571054202e-05, + "loss": 0.0129, + "step": 24540 + }, + { + "grad_norm": 0.07502663135528564, + "learning_rate": 8.991064779820542e-05, + "loss": 0.0117, + "step": 24550 + }, + { + "grad_norm": 0.05631255358457565, + "learning_rate": 8.990068552130036e-05, + "loss": 0.0106, + "step": 24560 + }, + { + "grad_norm": 0.07166197896003723, + "learning_rate": 8.989071888091634e-05, + "loss": 0.0125, + "step": 24570 + }, + { + "grad_norm": 0.06193261221051216, + "learning_rate": 8.988074787814329e-05, + "loss": 0.0115, + "step": 24580 + }, + { + "grad_norm": 0.07808050513267517, + "learning_rate": 8.987077251407158e-05, + "loss": 0.0127, + "step": 24590 + }, + { + "grad_norm": 0.11140437424182892, + "learning_rate": 8.986079278979216e-05, + "loss": 0.0134, + "step": 24600 + }, + { + "grad_norm": 0.13487018644809723, + "learning_rate": 8.985080870639635e-05, + "loss": 0.0122, + "step": 24610 + }, + { + "grad_norm": 0.07908067852258682, + "learning_rate": 8.984082026497603e-05, + "loss": 0.0115, + "step": 24620 + }, + { + "grad_norm": 0.062018461525440216, + "learning_rate": 8.98308274666235e-05, + "loss": 0.0108, + "step": 24630 + }, + { + "grad_norm": 0.057432565838098526, + "learning_rate": 8.982083031243155e-05, + "loss": 0.0106, + "step": 24640 + }, + { + "grad_norm": 0.07251432538032532, + "learning_rate": 8.98108288034935e-05, + "loss": 0.0113, + "step": 24650 + }, + { + "grad_norm": 0.06408245116472244, + "learning_rate": 8.980082294090305e-05, + "loss": 0.0115, + "step": 24660 + }, + { + "grad_norm": 0.06104855239391327, + "learning_rate": 8.979081272575443e-05, + "loss": 0.0099, + "step": 24670 + }, + { + "grad_norm": 0.0729132667183876, + "learning_rate": 8.978079815914236e-05, + "loss": 0.0093, + "step": 24680 + }, + { + "grad_norm": 0.05777515843510628, + "learning_rate": 8.977077924216202e-05, + "loss": 0.0098, + "step": 24690 + }, + { + "grad_norm": 0.0684749186038971, + "learning_rate": 8.976075597590905e-05, + "loss": 0.0102, + "step": 24700 + }, + { + "grad_norm": 0.11327634751796722, + "learning_rate": 8.975072836147958e-05, + "loss": 0.012, + "step": 24710 + }, + { + "grad_norm": 0.09305562824010849, + "learning_rate": 8.974069639997025e-05, + "loss": 0.0143, + "step": 24720 + }, + { + "grad_norm": 0.06704527884721756, + "learning_rate": 8.973066009247808e-05, + "loss": 0.0099, + "step": 24730 + }, + { + "grad_norm": 0.08366426825523376, + "learning_rate": 8.972061944010066e-05, + "loss": 0.0105, + "step": 24740 + }, + { + "grad_norm": 0.06760212779045105, + "learning_rate": 8.971057444393603e-05, + "loss": 0.0125, + "step": 24750 + }, + { + "grad_norm": 0.07794041186571121, + "learning_rate": 8.970052510508268e-05, + "loss": 0.0124, + "step": 24760 + }, + { + "grad_norm": 0.06963370740413666, + "learning_rate": 8.969047142463959e-05, + "loss": 0.0115, + "step": 24770 + }, + { + "grad_norm": 0.0861382856965065, + "learning_rate": 8.968041340370621e-05, + "loss": 0.0097, + "step": 24780 + }, + { + "grad_norm": 0.047589387744665146, + "learning_rate": 8.96703510433825e-05, + "loss": 0.0101, + "step": 24790 + }, + { + "grad_norm": 0.06293578445911407, + "learning_rate": 8.966028434476883e-05, + "loss": 0.0117, + "step": 24800 + }, + { + "grad_norm": 0.07534723728895187, + "learning_rate": 8.96502133089661e-05, + "loss": 0.0116, + "step": 24810 + }, + { + "grad_norm": 0.05295528098940849, + "learning_rate": 8.964013793707564e-05, + "loss": 0.0108, + "step": 24820 + }, + { + "grad_norm": 0.056243497878313065, + "learning_rate": 8.963005823019932e-05, + "loss": 0.0109, + "step": 24830 + }, + { + "grad_norm": 0.07317247241735458, + "learning_rate": 8.961997418943939e-05, + "loss": 0.0129, + "step": 24840 + }, + { + "grad_norm": 0.06544160097837448, + "learning_rate": 8.960988581589865e-05, + "loss": 0.0117, + "step": 24850 + }, + { + "grad_norm": 0.07948657125234604, + "learning_rate": 8.959979311068037e-05, + "loss": 0.0117, + "step": 24860 + }, + { + "grad_norm": 0.06594881415367126, + "learning_rate": 8.958969607488823e-05, + "loss": 0.0112, + "step": 24870 + }, + { + "grad_norm": 0.059516970068216324, + "learning_rate": 8.957959470962647e-05, + "loss": 0.0117, + "step": 24880 + }, + { + "grad_norm": 0.1035076454281807, + "learning_rate": 8.956948901599971e-05, + "loss": 0.0113, + "step": 24890 + }, + { + "grad_norm": 0.08531444519758224, + "learning_rate": 8.955937899511315e-05, + "loss": 0.0131, + "step": 24900 + }, + { + "grad_norm": 0.06315915286540985, + "learning_rate": 8.954926464807238e-05, + "loss": 0.0112, + "step": 24910 + }, + { + "grad_norm": 0.0851447805762291, + "learning_rate": 8.953914597598347e-05, + "loss": 0.0107, + "step": 24920 + }, + { + "grad_norm": 0.06537319719791412, + "learning_rate": 8.952902297995303e-05, + "loss": 0.0122, + "step": 24930 + }, + { + "grad_norm": 0.08425691723823547, + "learning_rate": 8.951889566108804e-05, + "loss": 0.0114, + "step": 24940 + }, + { + "grad_norm": 0.050675611943006516, + "learning_rate": 8.950876402049606e-05, + "loss": 0.0121, + "step": 24950 + }, + { + "grad_norm": 0.0736597329378128, + "learning_rate": 8.949862805928504e-05, + "loss": 0.0124, + "step": 24960 + }, + { + "grad_norm": 0.07574900984764099, + "learning_rate": 8.948848777856343e-05, + "loss": 0.0099, + "step": 24970 + }, + { + "grad_norm": 0.04151364415884018, + "learning_rate": 8.947834317944017e-05, + "loss": 0.0121, + "step": 24980 + }, + { + "grad_norm": 0.0781414583325386, + "learning_rate": 8.946819426302466e-05, + "loss": 0.0087, + "step": 24990 + }, + { + "grad_norm": 0.08144041150808334, + "learning_rate": 8.945804103042676e-05, + "loss": 0.0104, + "step": 25000 + }, + { + "grad_norm": 0.06687798351049423, + "learning_rate": 8.944788348275681e-05, + "loss": 0.0101, + "step": 25010 + }, + { + "grad_norm": 0.07064452022314072, + "learning_rate": 8.943772162112565e-05, + "loss": 0.0118, + "step": 25020 + }, + { + "grad_norm": 0.08203534036874771, + "learning_rate": 8.942755544664454e-05, + "loss": 0.0121, + "step": 25030 + }, + { + "grad_norm": 0.07762103527784348, + "learning_rate": 8.941738496042525e-05, + "loss": 0.0095, + "step": 25040 + }, + { + "grad_norm": 0.06595105677843094, + "learning_rate": 8.940721016357999e-05, + "loss": 0.0117, + "step": 25050 + }, + { + "grad_norm": 0.06747239083051682, + "learning_rate": 8.939703105722148e-05, + "loss": 0.0136, + "step": 25060 + }, + { + "grad_norm": 0.06602945923805237, + "learning_rate": 8.93868476424629e-05, + "loss": 0.0114, + "step": 25070 + }, + { + "grad_norm": 0.09630858898162842, + "learning_rate": 8.937665992041786e-05, + "loss": 0.0117, + "step": 25080 + }, + { + "grad_norm": 0.10758476704359055, + "learning_rate": 8.93664678922005e-05, + "loss": 0.011, + "step": 25090 + }, + { + "grad_norm": 0.08109549432992935, + "learning_rate": 8.93562715589254e-05, + "loss": 0.0118, + "step": 25100 + }, + { + "grad_norm": 0.06852302700281143, + "learning_rate": 8.934607092170762e-05, + "loss": 0.0113, + "step": 25110 + }, + { + "grad_norm": 0.07260569930076599, + "learning_rate": 8.933586598166266e-05, + "loss": 0.0107, + "step": 25120 + }, + { + "grad_norm": 0.05013340339064598, + "learning_rate": 8.932565673990655e-05, + "loss": 0.0126, + "step": 25130 + }, + { + "grad_norm": 0.04268249496817589, + "learning_rate": 8.931544319755574e-05, + "loss": 0.0102, + "step": 25140 + }, + { + "grad_norm": 0.05600746348500252, + "learning_rate": 8.930522535572718e-05, + "loss": 0.0113, + "step": 25150 + }, + { + "grad_norm": 0.06703299283981323, + "learning_rate": 8.929500321553826e-05, + "loss": 0.0105, + "step": 25160 + }, + { + "grad_norm": 0.06391408294439316, + "learning_rate": 8.928477677810686e-05, + "loss": 0.0093, + "step": 25170 + }, + { + "grad_norm": 0.06622371822595596, + "learning_rate": 8.927454604455137e-05, + "loss": 0.0107, + "step": 25180 + }, + { + "grad_norm": 0.06542129814624786, + "learning_rate": 8.926431101599053e-05, + "loss": 0.0112, + "step": 25190 + }, + { + "grad_norm": 0.07687932252883911, + "learning_rate": 8.925407169354369e-05, + "loss": 0.0105, + "step": 25200 + }, + { + "grad_norm": 0.044621024280786514, + "learning_rate": 8.92438280783306e-05, + "loss": 0.0109, + "step": 25210 + }, + { + "grad_norm": 0.08268065005540848, + "learning_rate": 8.923358017147146e-05, + "loss": 0.012, + "step": 25220 + }, + { + "grad_norm": 0.05707167461514473, + "learning_rate": 8.922332797408697e-05, + "loss": 0.0094, + "step": 25230 + }, + { + "grad_norm": 0.07374458014965057, + "learning_rate": 8.921307148729831e-05, + "loss": 0.0127, + "step": 25240 + }, + { + "grad_norm": 0.07348703593015671, + "learning_rate": 8.920281071222712e-05, + "loss": 0.0098, + "step": 25250 + }, + { + "grad_norm": 0.07150103896856308, + "learning_rate": 8.919254564999548e-05, + "loss": 0.0122, + "step": 25260 + }, + { + "grad_norm": 0.06439197808504105, + "learning_rate": 8.918227630172598e-05, + "loss": 0.01, + "step": 25270 + }, + { + "grad_norm": 0.0735301822423935, + "learning_rate": 8.917200266854165e-05, + "loss": 0.0095, + "step": 25280 + }, + { + "grad_norm": 0.053222835063934326, + "learning_rate": 8.9161724751566e-05, + "loss": 0.0109, + "step": 25290 + }, + { + "grad_norm": 0.06069662421941757, + "learning_rate": 8.915144255192302e-05, + "loss": 0.0114, + "step": 25300 + }, + { + "grad_norm": 0.07265958189964294, + "learning_rate": 8.914115607073714e-05, + "loss": 0.0116, + "step": 25310 + }, + { + "grad_norm": 0.09354212135076523, + "learning_rate": 8.913086530913327e-05, + "loss": 0.0127, + "step": 25320 + }, + { + "grad_norm": 0.06470208615064621, + "learning_rate": 8.912057026823681e-05, + "loss": 0.0096, + "step": 25330 + }, + { + "grad_norm": 0.058756690472364426, + "learning_rate": 8.91102709491736e-05, + "loss": 0.0107, + "step": 25340 + }, + { + "grad_norm": 0.06861034035682678, + "learning_rate": 8.909996735306996e-05, + "loss": 0.0101, + "step": 25350 + }, + { + "grad_norm": 0.09760144352912903, + "learning_rate": 8.908965948105268e-05, + "loss": 0.0104, + "step": 25360 + }, + { + "grad_norm": 0.08781108260154724, + "learning_rate": 8.907934733424901e-05, + "loss": 0.0101, + "step": 25370 + }, + { + "grad_norm": 0.061607152223587036, + "learning_rate": 8.906903091378666e-05, + "loss": 0.0109, + "step": 25380 + }, + { + "grad_norm": 0.07450121641159058, + "learning_rate": 8.905871022079384e-05, + "loss": 0.0116, + "step": 25390 + }, + { + "grad_norm": 0.06920848041772842, + "learning_rate": 8.90483852563992e-05, + "loss": 0.0116, + "step": 25400 + }, + { + "grad_norm": 0.06980931758880615, + "learning_rate": 8.903805602173185e-05, + "loss": 0.0108, + "step": 25410 + }, + { + "grad_norm": 0.05053359270095825, + "learning_rate": 8.902772251792137e-05, + "loss": 0.0108, + "step": 25420 + }, + { + "grad_norm": 0.10332752019166946, + "learning_rate": 8.901738474609786e-05, + "loss": 0.0136, + "step": 25430 + }, + { + "grad_norm": 0.05420573055744171, + "learning_rate": 8.900704270739179e-05, + "loss": 0.0112, + "step": 25440 + }, + { + "grad_norm": 0.08534837514162064, + "learning_rate": 8.89966964029342e-05, + "loss": 0.0108, + "step": 25450 + }, + { + "grad_norm": 0.07386038452386856, + "learning_rate": 8.898634583385652e-05, + "loss": 0.0141, + "step": 25460 + }, + { + "grad_norm": 0.054298728704452515, + "learning_rate": 8.897599100129065e-05, + "loss": 0.0104, + "step": 25470 + }, + { + "grad_norm": 0.07308315485715866, + "learning_rate": 8.896563190636903e-05, + "loss": 0.0135, + "step": 25480 + }, + { + "grad_norm": 0.0667404979467392, + "learning_rate": 8.895526855022448e-05, + "loss": 0.012, + "step": 25490 + }, + { + "grad_norm": 0.08548206835985184, + "learning_rate": 8.894490093399033e-05, + "loss": 0.0122, + "step": 25500 + }, + { + "grad_norm": 0.07689207792282104, + "learning_rate": 8.893452905880035e-05, + "loss": 0.0123, + "step": 25510 + }, + { + "grad_norm": 0.0756186693906784, + "learning_rate": 8.892415292578883e-05, + "loss": 0.0124, + "step": 25520 + }, + { + "grad_norm": 0.07810994982719421, + "learning_rate": 8.891377253609046e-05, + "loss": 0.0114, + "step": 25530 + }, + { + "grad_norm": 0.09293977171182632, + "learning_rate": 8.890338789084043e-05, + "loss": 0.0092, + "step": 25540 + }, + { + "grad_norm": 0.04883860796689987, + "learning_rate": 8.88929989911744e-05, + "loss": 0.0118, + "step": 25550 + }, + { + "grad_norm": 0.08813192695379257, + "learning_rate": 8.888260583822847e-05, + "loss": 0.0145, + "step": 25560 + }, + { + "grad_norm": 0.08667748421430588, + "learning_rate": 8.887220843313921e-05, + "loss": 0.0105, + "step": 25570 + }, + { + "grad_norm": 0.05833839997649193, + "learning_rate": 8.88618067770437e-05, + "loss": 0.0141, + "step": 25580 + }, + { + "grad_norm": 0.06601551175117493, + "learning_rate": 8.885140087107942e-05, + "loss": 0.0115, + "step": 25590 + }, + { + "grad_norm": 0.060085367411375046, + "learning_rate": 8.884099071638436e-05, + "loss": 0.0098, + "step": 25600 + }, + { + "grad_norm": 0.07266167551279068, + "learning_rate": 8.883057631409695e-05, + "loss": 0.0115, + "step": 25610 + }, + { + "grad_norm": 0.09219089895486832, + "learning_rate": 8.882015766535608e-05, + "loss": 0.0147, + "step": 25620 + }, + { + "grad_norm": 0.0853685811161995, + "learning_rate": 8.880973477130115e-05, + "loss": 0.0121, + "step": 25630 + }, + { + "grad_norm": 0.07749525457620621, + "learning_rate": 8.879930763307197e-05, + "loss": 0.0105, + "step": 25640 + }, + { + "grad_norm": 0.06339532136917114, + "learning_rate": 8.878887625180884e-05, + "loss": 0.0093, + "step": 25650 + }, + { + "grad_norm": 0.05098492279648781, + "learning_rate": 8.877844062865253e-05, + "loss": 0.0104, + "step": 25660 + }, + { + "grad_norm": 0.07636255770921707, + "learning_rate": 8.876800076474424e-05, + "loss": 0.0115, + "step": 25670 + }, + { + "grad_norm": 0.1216287836432457, + "learning_rate": 8.875755666122568e-05, + "loss": 0.0128, + "step": 25680 + }, + { + "grad_norm": 0.09368305653333664, + "learning_rate": 8.8747108319239e-05, + "loss": 0.0125, + "step": 25690 + }, + { + "grad_norm": 0.0700559914112091, + "learning_rate": 8.87366557399268e-05, + "loss": 0.0116, + "step": 25700 + }, + { + "grad_norm": 0.08461187034845352, + "learning_rate": 8.872619892443217e-05, + "loss": 0.0115, + "step": 25710 + }, + { + "grad_norm": 0.055156856775283813, + "learning_rate": 8.871573787389865e-05, + "loss": 0.0105, + "step": 25720 + }, + { + "grad_norm": 0.07974468171596527, + "learning_rate": 8.870527258947024e-05, + "loss": 0.0118, + "step": 25730 + }, + { + "grad_norm": 0.06663290411233902, + "learning_rate": 8.869480307229143e-05, + "loss": 0.0109, + "step": 25740 + }, + { + "grad_norm": 0.06683249026536942, + "learning_rate": 8.868432932350712e-05, + "loss": 0.0103, + "step": 25750 + }, + { + "grad_norm": 0.06816543638706207, + "learning_rate": 8.867385134426272e-05, + "loss": 0.0106, + "step": 25760 + }, + { + "grad_norm": 0.08580340445041656, + "learning_rate": 8.866336913570407e-05, + "loss": 0.0097, + "step": 25770 + }, + { + "grad_norm": 0.10248767584562302, + "learning_rate": 8.865288269897751e-05, + "loss": 0.0114, + "step": 25780 + }, + { + "grad_norm": 0.0904194712638855, + "learning_rate": 8.864239203522981e-05, + "loss": 0.0113, + "step": 25790 + }, + { + "grad_norm": 0.09864185005426407, + "learning_rate": 8.863189714560822e-05, + "loss": 0.0106, + "step": 25800 + }, + { + "grad_norm": 0.08937656134366989, + "learning_rate": 8.862139803126043e-05, + "loss": 0.0107, + "step": 25810 + }, + { + "grad_norm": 0.06255406886339188, + "learning_rate": 8.861089469333463e-05, + "loss": 0.0111, + "step": 25820 + }, + { + "grad_norm": 0.06954924017190933, + "learning_rate": 8.860038713297944e-05, + "loss": 0.0102, + "step": 25830 + }, + { + "grad_norm": 0.07017858326435089, + "learning_rate": 8.858987535134394e-05, + "loss": 0.0122, + "step": 25840 + }, + { + "grad_norm": 0.1047702357172966, + "learning_rate": 8.857935934957769e-05, + "loss": 0.0132, + "step": 25850 + }, + { + "grad_norm": 0.06196035072207451, + "learning_rate": 8.856883912883071e-05, + "loss": 0.0095, + "step": 25860 + }, + { + "grad_norm": 0.0817309021949768, + "learning_rate": 8.855831469025346e-05, + "loss": 0.0114, + "step": 25870 + }, + { + "grad_norm": 0.09572640806436539, + "learning_rate": 8.854778603499689e-05, + "loss": 0.0114, + "step": 25880 + }, + { + "grad_norm": 0.09274868667125702, + "learning_rate": 8.85372531642124e-05, + "loss": 0.0111, + "step": 25890 + }, + { + "grad_norm": 0.07829276472330093, + "learning_rate": 8.852671607905185e-05, + "loss": 0.0106, + "step": 25900 + }, + { + "grad_norm": 0.05473360791802406, + "learning_rate": 8.851617478066754e-05, + "loss": 0.0127, + "step": 25910 + }, + { + "grad_norm": 0.06573645025491714, + "learning_rate": 8.850562927021227e-05, + "loss": 0.0106, + "step": 25920 + }, + { + "grad_norm": 0.08737030625343323, + "learning_rate": 8.849507954883928e-05, + "loss": 0.0118, + "step": 25930 + }, + { + "grad_norm": 0.07397264242172241, + "learning_rate": 8.848452561770226e-05, + "loss": 0.0167, + "step": 25940 + }, + { + "grad_norm": 0.09526374191045761, + "learning_rate": 8.847396747795538e-05, + "loss": 0.0147, + "step": 25950 + }, + { + "grad_norm": 0.06249076500535011, + "learning_rate": 8.846340513075327e-05, + "loss": 0.012, + "step": 25960 + }, + { + "grad_norm": 0.08357302099466324, + "learning_rate": 8.845283857725099e-05, + "loss": 0.0101, + "step": 25970 + }, + { + "grad_norm": 0.08993510901927948, + "learning_rate": 8.844226781860409e-05, + "loss": 0.0118, + "step": 25980 + }, + { + "grad_norm": 0.09419005364179611, + "learning_rate": 8.84316928559686e-05, + "loss": 0.0102, + "step": 25990 + }, + { + "grad_norm": 0.06079823151230812, + "learning_rate": 8.842111369050094e-05, + "loss": 0.0133, + "step": 26000 + }, + { + "grad_norm": 0.05902041122317314, + "learning_rate": 8.841053032335808e-05, + "loss": 0.0122, + "step": 26010 + }, + { + "grad_norm": 0.09128464758396149, + "learning_rate": 8.839994275569735e-05, + "loss": 0.0115, + "step": 26020 + }, + { + "grad_norm": 0.09485946595668793, + "learning_rate": 8.838935098867662e-05, + "loss": 0.011, + "step": 26030 + }, + { + "grad_norm": 0.07313086092472076, + "learning_rate": 8.837875502345418e-05, + "loss": 0.0123, + "step": 26040 + }, + { + "grad_norm": 0.06546403467655182, + "learning_rate": 8.83681548611888e-05, + "loss": 0.0136, + "step": 26050 + }, + { + "grad_norm": 0.05192537233233452, + "learning_rate": 8.835755050303969e-05, + "loss": 0.0141, + "step": 26060 + }, + { + "grad_norm": 0.07769855111837387, + "learning_rate": 8.834694195016653e-05, + "loss": 0.0107, + "step": 26070 + }, + { + "grad_norm": 0.054742805659770966, + "learning_rate": 8.833632920372942e-05, + "loss": 0.0096, + "step": 26080 + }, + { + "grad_norm": 0.05546695366501808, + "learning_rate": 8.832571226488903e-05, + "loss": 0.0119, + "step": 26090 + }, + { + "grad_norm": 0.08278195559978485, + "learning_rate": 8.831509113480634e-05, + "loss": 0.0114, + "step": 26100 + }, + { + "grad_norm": 0.08277077227830887, + "learning_rate": 8.83044658146429e-05, + "loss": 0.0094, + "step": 26110 + }, + { + "grad_norm": 0.07169787585735321, + "learning_rate": 8.829383630556067e-05, + "loss": 0.0124, + "step": 26120 + }, + { + "grad_norm": 0.06107035279273987, + "learning_rate": 8.828320260872207e-05, + "loss": 0.0092, + "step": 26130 + }, + { + "grad_norm": 0.04418516904115677, + "learning_rate": 8.827256472529e-05, + "loss": 0.0102, + "step": 26140 + }, + { + "grad_norm": 0.0859520211815834, + "learning_rate": 8.826192265642778e-05, + "loss": 0.0108, + "step": 26150 + }, + { + "grad_norm": 0.07472452521324158, + "learning_rate": 8.825127640329923e-05, + "loss": 0.0116, + "step": 26160 + }, + { + "grad_norm": 0.10000235587358475, + "learning_rate": 8.824062596706861e-05, + "loss": 0.0117, + "step": 26170 + }, + { + "grad_norm": 0.0714007169008255, + "learning_rate": 8.822997134890062e-05, + "loss": 0.0098, + "step": 26180 + }, + { + "grad_norm": 0.07058817893266678, + "learning_rate": 8.821931254996044e-05, + "loss": 0.0116, + "step": 26190 + }, + { + "grad_norm": 0.06751994043588638, + "learning_rate": 8.82086495714137e-05, + "loss": 0.0103, + "step": 26200 + }, + { + "grad_norm": 0.053450971841812134, + "learning_rate": 8.81979824144265e-05, + "loss": 0.0124, + "step": 26210 + }, + { + "grad_norm": 0.08253885060548782, + "learning_rate": 8.818731108016536e-05, + "loss": 0.0128, + "step": 26220 + }, + { + "grad_norm": 0.07803954184055328, + "learning_rate": 8.81766355697973e-05, + "loss": 0.0129, + "step": 26230 + }, + { + "grad_norm": 0.0665193498134613, + "learning_rate": 8.816595588448977e-05, + "loss": 0.0095, + "step": 26240 + }, + { + "grad_norm": 0.09989971667528152, + "learning_rate": 8.81552720254107e-05, + "loss": 0.0108, + "step": 26250 + }, + { + "grad_norm": 0.06635235995054245, + "learning_rate": 8.814458399372842e-05, + "loss": 0.0094, + "step": 26260 + }, + { + "grad_norm": 0.08031062036752701, + "learning_rate": 8.813389179061181e-05, + "loss": 0.0112, + "step": 26270 + }, + { + "grad_norm": 0.056513722985982895, + "learning_rate": 8.812319541723012e-05, + "loss": 0.0126, + "step": 26280 + }, + { + "grad_norm": 0.05388905853033066, + "learning_rate": 8.811249487475309e-05, + "loss": 0.0101, + "step": 26290 + }, + { + "grad_norm": 0.10680124163627625, + "learning_rate": 8.810179016435092e-05, + "loss": 0.0108, + "step": 26300 + }, + { + "grad_norm": 0.07575053721666336, + "learning_rate": 8.809108128719428e-05, + "loss": 0.0118, + "step": 26310 + }, + { + "grad_norm": 0.05440732464194298, + "learning_rate": 8.808036824445424e-05, + "loss": 0.0119, + "step": 26320 + }, + { + "grad_norm": 0.08604631572961807, + "learning_rate": 8.806965103730238e-05, + "loss": 0.0114, + "step": 26330 + }, + { + "grad_norm": 0.060549426823854446, + "learning_rate": 8.805892966691074e-05, + "loss": 0.0096, + "step": 26340 + }, + { + "grad_norm": 0.0615871287882328, + "learning_rate": 8.804820413445175e-05, + "loss": 0.0098, + "step": 26350 + }, + { + "grad_norm": 0.0777655765414238, + "learning_rate": 8.803747444109837e-05, + "loss": 0.0107, + "step": 26360 + }, + { + "grad_norm": 0.059434451162815094, + "learning_rate": 8.802674058802399e-05, + "loss": 0.0128, + "step": 26370 + }, + { + "grad_norm": 0.06001640111207962, + "learning_rate": 8.801600257640241e-05, + "loss": 0.0094, + "step": 26380 + }, + { + "grad_norm": 0.07281716167926788, + "learning_rate": 8.800526040740795e-05, + "loss": 0.0112, + "step": 26390 + }, + { + "grad_norm": 0.0661473274230957, + "learning_rate": 8.799451408221535e-05, + "loss": 0.0115, + "step": 26400 + }, + { + "grad_norm": 0.07007303088903427, + "learning_rate": 8.798376360199982e-05, + "loss": 0.0102, + "step": 26410 + }, + { + "grad_norm": 0.09317179024219513, + "learning_rate": 8.797300896793701e-05, + "loss": 0.0108, + "step": 26420 + }, + { + "grad_norm": 0.07400081306695938, + "learning_rate": 8.796225018120302e-05, + "loss": 0.009, + "step": 26430 + }, + { + "grad_norm": 0.07362546771764755, + "learning_rate": 8.795148724297444e-05, + "loss": 0.0114, + "step": 26440 + }, + { + "grad_norm": 0.04131339490413666, + "learning_rate": 8.794072015442825e-05, + "loss": 0.0096, + "step": 26450 + }, + { + "grad_norm": 0.07098903506994247, + "learning_rate": 8.792994891674198e-05, + "loss": 0.0099, + "step": 26460 + }, + { + "grad_norm": 0.08461166173219681, + "learning_rate": 8.79191735310935e-05, + "loss": 0.0122, + "step": 26470 + }, + { + "grad_norm": 0.11180908232927322, + "learning_rate": 8.790839399866122e-05, + "loss": 0.0113, + "step": 26480 + }, + { + "grad_norm": 0.07779522985219955, + "learning_rate": 8.789761032062397e-05, + "loss": 0.0101, + "step": 26490 + }, + { + "grad_norm": 0.06543993204832077, + "learning_rate": 8.788682249816103e-05, + "loss": 0.0094, + "step": 26500 + }, + { + "grad_norm": 0.06102810055017471, + "learning_rate": 8.787603053245215e-05, + "loss": 0.0105, + "step": 26510 + }, + { + "grad_norm": 0.06394463777542114, + "learning_rate": 8.78652344246775e-05, + "loss": 0.011, + "step": 26520 + }, + { + "grad_norm": 0.07862093299627304, + "learning_rate": 8.785443417601776e-05, + "loss": 0.0093, + "step": 26530 + }, + { + "grad_norm": 0.07962948828935623, + "learning_rate": 8.784362978765401e-05, + "loss": 0.0118, + "step": 26540 + }, + { + "grad_norm": 0.07053820788860321, + "learning_rate": 8.783282126076779e-05, + "loss": 0.0108, + "step": 26550 + }, + { + "grad_norm": 0.06928926706314087, + "learning_rate": 8.782200859654112e-05, + "loss": 0.0109, + "step": 26560 + }, + { + "grad_norm": 0.07324729859828949, + "learning_rate": 8.781119179615646e-05, + "loss": 0.0135, + "step": 26570 + }, + { + "grad_norm": 0.1027989313006401, + "learning_rate": 8.780037086079674e-05, + "loss": 0.0102, + "step": 26580 + }, + { + "grad_norm": 0.07583539187908173, + "learning_rate": 8.778954579164527e-05, + "loss": 0.0106, + "step": 26590 + }, + { + "grad_norm": 0.11228267103433609, + "learning_rate": 8.777871658988588e-05, + "loss": 0.0131, + "step": 26600 + }, + { + "grad_norm": 0.07222297042608261, + "learning_rate": 8.776788325670285e-05, + "loss": 0.012, + "step": 26610 + }, + { + "grad_norm": 0.05962241813540459, + "learning_rate": 8.775704579328089e-05, + "loss": 0.0089, + "step": 26620 + }, + { + "grad_norm": 0.08818729221820831, + "learning_rate": 8.774620420080517e-05, + "loss": 0.011, + "step": 26630 + }, + { + "grad_norm": 0.08047772943973541, + "learning_rate": 8.773535848046131e-05, + "loss": 0.0113, + "step": 26640 + }, + { + "grad_norm": 0.06802690029144287, + "learning_rate": 8.772450863343538e-05, + "loss": 0.0113, + "step": 26650 + }, + { + "grad_norm": 0.044186875224113464, + "learning_rate": 8.77136546609139e-05, + "loss": 0.0115, + "step": 26660 + }, + { + "grad_norm": 0.06989802420139313, + "learning_rate": 8.770279656408385e-05, + "loss": 0.0105, + "step": 26670 + }, + { + "grad_norm": 0.08290854096412659, + "learning_rate": 8.769193434413265e-05, + "loss": 0.0133, + "step": 26680 + }, + { + "grad_norm": 0.06426209211349487, + "learning_rate": 8.76810680022482e-05, + "loss": 0.0106, + "step": 26690 + }, + { + "grad_norm": 0.0663241446018219, + "learning_rate": 8.767019753961878e-05, + "loss": 0.0097, + "step": 26700 + }, + { + "grad_norm": 0.08322715014219284, + "learning_rate": 8.765932295743321e-05, + "loss": 0.0089, + "step": 26710 + }, + { + "grad_norm": 0.06361830234527588, + "learning_rate": 8.764844425688068e-05, + "loss": 0.0091, + "step": 26720 + }, + { + "grad_norm": 0.1059017926454544, + "learning_rate": 8.763756143915092e-05, + "loss": 0.0114, + "step": 26730 + }, + { + "grad_norm": 0.07799675315618515, + "learning_rate": 8.7626674505434e-05, + "loss": 0.0103, + "step": 26740 + }, + { + "grad_norm": 0.07207705825567245, + "learning_rate": 8.761578345692053e-05, + "loss": 0.0132, + "step": 26750 + }, + { + "grad_norm": 0.07616905122995377, + "learning_rate": 8.760488829480156e-05, + "loss": 0.0113, + "step": 26760 + }, + { + "grad_norm": 0.08007347583770752, + "learning_rate": 8.759398902026854e-05, + "loss": 0.012, + "step": 26770 + }, + { + "grad_norm": 0.05464629456400871, + "learning_rate": 8.758308563451339e-05, + "loss": 0.0109, + "step": 26780 + }, + { + "grad_norm": 0.049503251910209656, + "learning_rate": 8.75721781387285e-05, + "loss": 0.0107, + "step": 26790 + }, + { + "grad_norm": 0.05456032231450081, + "learning_rate": 8.75612665341067e-05, + "loss": 0.0107, + "step": 26800 + }, + { + "grad_norm": 0.061560869216918945, + "learning_rate": 8.755035082184126e-05, + "loss": 0.0107, + "step": 26810 + }, + { + "grad_norm": 0.07566557079553604, + "learning_rate": 8.753943100312592e-05, + "loss": 0.011, + "step": 26820 + }, + { + "grad_norm": 0.06478225439786911, + "learning_rate": 8.752850707915484e-05, + "loss": 0.0115, + "step": 26830 + }, + { + "grad_norm": 0.08239030092954636, + "learning_rate": 8.751757905112264e-05, + "loss": 0.0107, + "step": 26840 + }, + { + "grad_norm": 0.1031942293047905, + "learning_rate": 8.75066469202244e-05, + "loss": 0.0114, + "step": 26850 + }, + { + "grad_norm": 0.050498127937316895, + "learning_rate": 8.749571068765567e-05, + "loss": 0.0097, + "step": 26860 + }, + { + "grad_norm": 0.07945623993873596, + "learning_rate": 8.748477035461238e-05, + "loss": 0.0115, + "step": 26870 + }, + { + "grad_norm": 0.05920981615781784, + "learning_rate": 8.747382592229095e-05, + "loss": 0.0095, + "step": 26880 + }, + { + "grad_norm": 0.0735166147351265, + "learning_rate": 8.746287739188828e-05, + "loss": 0.0103, + "step": 26890 + }, + { + "grad_norm": 0.08700094372034073, + "learning_rate": 8.745192476460165e-05, + "loss": 0.0121, + "step": 26900 + }, + { + "grad_norm": 0.06173097714781761, + "learning_rate": 8.744096804162882e-05, + "loss": 0.0111, + "step": 26910 + }, + { + "grad_norm": 0.08705215901136398, + "learning_rate": 8.743000722416804e-05, + "loss": 0.0125, + "step": 26920 + }, + { + "grad_norm": 0.09108871221542358, + "learning_rate": 8.741904231341793e-05, + "loss": 0.0097, + "step": 26930 + }, + { + "grad_norm": 0.08890659362077713, + "learning_rate": 8.740807331057762e-05, + "loss": 0.0096, + "step": 26940 + }, + { + "grad_norm": 0.10515842586755753, + "learning_rate": 8.739710021684667e-05, + "loss": 0.011, + "step": 26950 + }, + { + "grad_norm": 0.08350313454866409, + "learning_rate": 8.738612303342503e-05, + "loss": 0.0149, + "step": 26960 + }, + { + "grad_norm": 0.07583027333021164, + "learning_rate": 8.73751417615132e-05, + "loss": 0.0104, + "step": 26970 + }, + { + "grad_norm": 0.06524135917425156, + "learning_rate": 8.736415640231208e-05, + "loss": 0.0101, + "step": 26980 + }, + { + "grad_norm": 0.06573499739170074, + "learning_rate": 8.735316695702297e-05, + "loss": 0.011, + "step": 26990 + }, + { + "grad_norm": 0.06998388469219208, + "learning_rate": 8.734217342684769e-05, + "loss": 0.0115, + "step": 27000 + }, + { + "grad_norm": 0.06696115434169769, + "learning_rate": 8.733117581298847e-05, + "loss": 0.0114, + "step": 27010 + }, + { + "grad_norm": 0.07960862666368484, + "learning_rate": 8.732017411664796e-05, + "loss": 0.011, + "step": 27020 + }, + { + "grad_norm": 0.07307135313749313, + "learning_rate": 8.730916833902936e-05, + "loss": 0.0125, + "step": 27030 + }, + { + "grad_norm": 0.07200326025485992, + "learning_rate": 8.729815848133618e-05, + "loss": 0.0115, + "step": 27040 + }, + { + "grad_norm": 0.07247133553028107, + "learning_rate": 8.728714454477247e-05, + "loss": 0.0101, + "step": 27050 + }, + { + "grad_norm": 0.06419311463832855, + "learning_rate": 8.727612653054269e-05, + "loss": 0.0107, + "step": 27060 + }, + { + "grad_norm": 0.05855653062462807, + "learning_rate": 8.726510443985176e-05, + "loss": 0.0113, + "step": 27070 + }, + { + "grad_norm": 0.07473420351743698, + "learning_rate": 8.725407827390503e-05, + "loss": 0.0105, + "step": 27080 + }, + { + "grad_norm": 0.07858258485794067, + "learning_rate": 8.724304803390833e-05, + "loss": 0.0117, + "step": 27090 + }, + { + "grad_norm": 0.05973905697464943, + "learning_rate": 8.723201372106788e-05, + "loss": 0.0114, + "step": 27100 + }, + { + "grad_norm": 0.05891378968954086, + "learning_rate": 8.722097533659038e-05, + "loss": 0.012, + "step": 27110 + }, + { + "grad_norm": 0.09265824407339096, + "learning_rate": 8.720993288168299e-05, + "loss": 0.0129, + "step": 27120 + }, + { + "grad_norm": 0.08337894827127457, + "learning_rate": 8.719888635755327e-05, + "loss": 0.0093, + "step": 27130 + }, + { + "grad_norm": 0.06662779301404953, + "learning_rate": 8.718783576540928e-05, + "loss": 0.0095, + "step": 27140 + }, + { + "grad_norm": 0.08553696423768997, + "learning_rate": 8.717678110645948e-05, + "loss": 0.0132, + "step": 27150 + }, + { + "grad_norm": 0.1157907098531723, + "learning_rate": 8.716572238191279e-05, + "loss": 0.011, + "step": 27160 + }, + { + "grad_norm": 0.08539639413356781, + "learning_rate": 8.715465959297857e-05, + "loss": 0.0115, + "step": 27170 + }, + { + "grad_norm": 0.0648832619190216, + "learning_rate": 8.714359274086665e-05, + "loss": 0.0121, + "step": 27180 + }, + { + "grad_norm": 0.0832768902182579, + "learning_rate": 8.713252182678726e-05, + "loss": 0.0094, + "step": 27190 + }, + { + "grad_norm": 0.06459725648164749, + "learning_rate": 8.712144685195112e-05, + "loss": 0.0101, + "step": 27200 + }, + { + "grad_norm": 0.05659375339746475, + "learning_rate": 8.711036781756936e-05, + "loss": 0.01, + "step": 27210 + }, + { + "grad_norm": 0.060399919748306274, + "learning_rate": 8.709928472485357e-05, + "loss": 0.0116, + "step": 27220 + }, + { + "grad_norm": 0.0798841267824173, + "learning_rate": 8.708819757501579e-05, + "loss": 0.012, + "step": 27230 + }, + { + "grad_norm": 0.08287461847066879, + "learning_rate": 8.707710636926846e-05, + "loss": 0.0135, + "step": 27240 + }, + { + "grad_norm": 0.09417781233787537, + "learning_rate": 8.706601110882455e-05, + "loss": 0.0107, + "step": 27250 + }, + { + "grad_norm": 0.0823042243719101, + "learning_rate": 8.705491179489738e-05, + "loss": 0.0089, + "step": 27260 + }, + { + "grad_norm": 0.06014075502753258, + "learning_rate": 8.704380842870077e-05, + "loss": 0.0123, + "step": 27270 + }, + { + "grad_norm": 0.08185780793428421, + "learning_rate": 8.703270101144895e-05, + "loss": 0.0102, + "step": 27280 + }, + { + "grad_norm": 0.05161486566066742, + "learning_rate": 8.702158954435664e-05, + "loss": 0.0115, + "step": 27290 + }, + { + "grad_norm": 0.0644000917673111, + "learning_rate": 8.701047402863896e-05, + "loss": 0.0098, + "step": 27300 + }, + { + "grad_norm": 0.05048054829239845, + "learning_rate": 8.699935446551148e-05, + "loss": 0.0108, + "step": 27310 + }, + { + "grad_norm": 0.08954580128192902, + "learning_rate": 8.698823085619022e-05, + "loss": 0.0112, + "step": 27320 + }, + { + "grad_norm": 0.09412795305252075, + "learning_rate": 8.697710320189166e-05, + "loss": 0.0098, + "step": 27330 + }, + { + "grad_norm": 0.09312993288040161, + "learning_rate": 8.696597150383268e-05, + "loss": 0.0113, + "step": 27340 + }, + { + "grad_norm": 0.07349997013807297, + "learning_rate": 8.695483576323063e-05, + "loss": 0.0105, + "step": 27350 + }, + { + "grad_norm": 0.06754419207572937, + "learning_rate": 8.69436959813033e-05, + "loss": 0.0135, + "step": 27360 + }, + { + "grad_norm": 0.06871746480464935, + "learning_rate": 8.693255215926892e-05, + "loss": 0.0104, + "step": 27370 + }, + { + "grad_norm": 0.05596905201673508, + "learning_rate": 8.692140429834617e-05, + "loss": 0.0087, + "step": 27380 + }, + { + "grad_norm": 0.06298823654651642, + "learning_rate": 8.691025239975415e-05, + "loss": 0.0113, + "step": 27390 + }, + { + "grad_norm": 0.06315537542104721, + "learning_rate": 8.689909646471243e-05, + "loss": 0.0088, + "step": 27400 + }, + { + "grad_norm": 0.06118614226579666, + "learning_rate": 8.688793649444099e-05, + "loss": 0.0111, + "step": 27410 + }, + { + "grad_norm": 0.06916079670190811, + "learning_rate": 8.687677249016029e-05, + "loss": 0.0101, + "step": 27420 + }, + { + "grad_norm": 0.10704758763313293, + "learning_rate": 8.686560445309118e-05, + "loss": 0.0113, + "step": 27430 + }, + { + "grad_norm": 0.07997213304042816, + "learning_rate": 8.685443238445499e-05, + "loss": 0.0119, + "step": 27440 + }, + { + "grad_norm": 0.06499779969453812, + "learning_rate": 8.68432562854735e-05, + "loss": 0.0138, + "step": 27450 + }, + { + "grad_norm": 0.05787188559770584, + "learning_rate": 8.683207615736887e-05, + "loss": 0.0134, + "step": 27460 + }, + { + "grad_norm": 0.09857769310474396, + "learning_rate": 8.682089200136379e-05, + "loss": 0.0132, + "step": 27470 + }, + { + "grad_norm": 0.09984678030014038, + "learning_rate": 8.680970381868132e-05, + "loss": 0.0128, + "step": 27480 + }, + { + "grad_norm": 0.07838813215494156, + "learning_rate": 8.679851161054498e-05, + "loss": 0.0109, + "step": 27490 + }, + { + "grad_norm": 0.053414806723594666, + "learning_rate": 8.678731537817873e-05, + "loss": 0.0098, + "step": 27500 + }, + { + "grad_norm": 0.05085804685950279, + "learning_rate": 8.677611512280697e-05, + "loss": 0.0121, + "step": 27510 + }, + { + "grad_norm": 0.06677641719579697, + "learning_rate": 8.676491084565457e-05, + "loss": 0.0103, + "step": 27520 + }, + { + "grad_norm": 0.063811294734478, + "learning_rate": 8.675370254794678e-05, + "loss": 0.0128, + "step": 27530 + }, + { + "grad_norm": 0.07777106016874313, + "learning_rate": 8.674249023090935e-05, + "loss": 0.0095, + "step": 27540 + }, + { + "grad_norm": 0.06901233643293381, + "learning_rate": 8.673127389576843e-05, + "loss": 0.0101, + "step": 27550 + }, + { + "grad_norm": 0.08489833027124405, + "learning_rate": 8.67200535437506e-05, + "loss": 0.0112, + "step": 27560 + }, + { + "grad_norm": 0.0754576325416565, + "learning_rate": 8.670882917608296e-05, + "loss": 0.0101, + "step": 27570 + }, + { + "grad_norm": 0.06329666078090668, + "learning_rate": 8.669760079399292e-05, + "loss": 0.0105, + "step": 27580 + }, + { + "grad_norm": 0.06630691885948181, + "learning_rate": 8.668636839870845e-05, + "loss": 0.0104, + "step": 27590 + }, + { + "grad_norm": 0.0902172103524208, + "learning_rate": 8.667513199145789e-05, + "loss": 0.0125, + "step": 27600 + }, + { + "grad_norm": 0.10322301089763641, + "learning_rate": 8.666389157347002e-05, + "loss": 0.0107, + "step": 27610 + }, + { + "grad_norm": 0.09363310039043427, + "learning_rate": 8.66526471459741e-05, + "loss": 0.0103, + "step": 27620 + }, + { + "grad_norm": 0.07703382521867752, + "learning_rate": 8.66413987101998e-05, + "loss": 0.0135, + "step": 27630 + }, + { + "grad_norm": 0.09575937688350677, + "learning_rate": 8.663014626737723e-05, + "loss": 0.0123, + "step": 27640 + }, + { + "grad_norm": 0.07710728794336319, + "learning_rate": 8.661888981873691e-05, + "loss": 0.0111, + "step": 27650 + }, + { + "grad_norm": 0.07271567732095718, + "learning_rate": 8.660762936550988e-05, + "loss": 0.0104, + "step": 27660 + }, + { + "grad_norm": 0.052326854318380356, + "learning_rate": 8.659636490892753e-05, + "loss": 0.011, + "step": 27670 + }, + { + "grad_norm": 0.05755871161818504, + "learning_rate": 8.658509645022174e-05, + "loss": 0.0138, + "step": 27680 + }, + { + "grad_norm": 0.05674261599779129, + "learning_rate": 8.657382399062481e-05, + "loss": 0.0101, + "step": 27690 + }, + { + "grad_norm": 0.06687094271183014, + "learning_rate": 8.656254753136946e-05, + "loss": 0.0093, + "step": 27700 + }, + { + "grad_norm": 0.06744410842657089, + "learning_rate": 8.655126707368891e-05, + "loss": 0.0099, + "step": 27710 + }, + { + "grad_norm": 0.0787522941827774, + "learning_rate": 8.653998261881672e-05, + "loss": 0.0106, + "step": 27720 + }, + { + "grad_norm": 0.051807839423418045, + "learning_rate": 8.652869416798699e-05, + "loss": 0.0092, + "step": 27730 + }, + { + "grad_norm": 0.049858950078487396, + "learning_rate": 8.651740172243417e-05, + "loss": 0.01, + "step": 27740 + }, + { + "grad_norm": 0.06823766231536865, + "learning_rate": 8.65061052833932e-05, + "loss": 0.0093, + "step": 27750 + }, + { + "grad_norm": 0.058413587510585785, + "learning_rate": 8.649480485209945e-05, + "loss": 0.0093, + "step": 27760 + }, + { + "grad_norm": 0.056730207055807114, + "learning_rate": 8.64835004297887e-05, + "loss": 0.0095, + "step": 27770 + }, + { + "grad_norm": 0.057226210832595825, + "learning_rate": 8.64721920176972e-05, + "loss": 0.009, + "step": 27780 + }, + { + "grad_norm": 0.08478134125471115, + "learning_rate": 8.646087961706164e-05, + "loss": 0.0123, + "step": 27790 + }, + { + "grad_norm": 0.08992449194192886, + "learning_rate": 8.644956322911908e-05, + "loss": 0.0118, + "step": 27800 + }, + { + "grad_norm": 0.07615458220243454, + "learning_rate": 8.643824285510709e-05, + "loss": 0.0115, + "step": 27810 + }, + { + "grad_norm": 0.10793975740671158, + "learning_rate": 8.642691849626364e-05, + "loss": 0.012, + "step": 27820 + }, + { + "grad_norm": 0.09231512248516083, + "learning_rate": 8.641559015382717e-05, + "loss": 0.0109, + "step": 27830 + }, + { + "grad_norm": 0.059839148074388504, + "learning_rate": 8.640425782903649e-05, + "loss": 0.0097, + "step": 27840 + }, + { + "grad_norm": 0.054728325456380844, + "learning_rate": 8.639292152313091e-05, + "loss": 0.0116, + "step": 27850 + }, + { + "grad_norm": 0.06661666184663773, + "learning_rate": 8.638158123735015e-05, + "loss": 0.0109, + "step": 27860 + }, + { + "grad_norm": 0.07600661367177963, + "learning_rate": 8.637023697293436e-05, + "loss": 0.0104, + "step": 27870 + }, + { + "grad_norm": 0.04228316992521286, + "learning_rate": 8.635888873112414e-05, + "loss": 0.0105, + "step": 27880 + }, + { + "grad_norm": 0.07757987082004547, + "learning_rate": 8.634753651316052e-05, + "loss": 0.0116, + "step": 27890 + }, + { + "grad_norm": 0.07922618836164474, + "learning_rate": 8.633618032028496e-05, + "loss": 0.0091, + "step": 27900 + }, + { + "grad_norm": 0.06481893360614777, + "learning_rate": 8.632482015373934e-05, + "loss": 0.0111, + "step": 27910 + }, + { + "grad_norm": 0.06098375469446182, + "learning_rate": 8.6313456014766e-05, + "loss": 0.0104, + "step": 27920 + }, + { + "grad_norm": 0.07275677472352982, + "learning_rate": 8.630208790460771e-05, + "loss": 0.0106, + "step": 27930 + }, + { + "grad_norm": 0.053732361644506454, + "learning_rate": 8.629071582450768e-05, + "loss": 0.01, + "step": 27940 + }, + { + "grad_norm": 0.05306340008974075, + "learning_rate": 8.62793397757095e-05, + "loss": 0.0109, + "step": 27950 + }, + { + "grad_norm": 0.06855326145887375, + "learning_rate": 8.626795975945729e-05, + "loss": 0.0104, + "step": 27960 + }, + { + "grad_norm": 0.057416882365942, + "learning_rate": 8.625657577699551e-05, + "loss": 0.0095, + "step": 27970 + }, + { + "grad_norm": 0.081535205245018, + "learning_rate": 8.624518782956914e-05, + "loss": 0.012, + "step": 27980 + }, + { + "grad_norm": 0.09074296802282333, + "learning_rate": 8.62337959184235e-05, + "loss": 0.0102, + "step": 27990 + }, + { + "grad_norm": 0.09705395251512527, + "learning_rate": 8.622240004480441e-05, + "loss": 0.0103, + "step": 28000 + }, + { + "grad_norm": 0.08257239311933517, + "learning_rate": 8.621100020995814e-05, + "loss": 0.0091, + "step": 28010 + }, + { + "grad_norm": 0.08583172410726547, + "learning_rate": 8.619959641513132e-05, + "loss": 0.0127, + "step": 28020 + }, + { + "grad_norm": 0.0646716058254242, + "learning_rate": 8.618818866157105e-05, + "loss": 0.0103, + "step": 28030 + }, + { + "grad_norm": 0.06710757315158844, + "learning_rate": 8.617677695052487e-05, + "loss": 0.0114, + "step": 28040 + }, + { + "grad_norm": 0.09018830955028534, + "learning_rate": 8.616536128324078e-05, + "loss": 0.01, + "step": 28050 + }, + { + "grad_norm": 0.06306938827037811, + "learning_rate": 8.615394166096712e-05, + "loss": 0.0107, + "step": 28060 + }, + { + "grad_norm": 0.07227858155965805, + "learning_rate": 8.614251808495279e-05, + "loss": 0.0099, + "step": 28070 + }, + { + "grad_norm": 0.08643786609172821, + "learning_rate": 8.6131090556447e-05, + "loss": 0.0113, + "step": 28080 + }, + { + "grad_norm": 0.07679539173841476, + "learning_rate": 8.611965907669947e-05, + "loss": 0.0101, + "step": 28090 + }, + { + "grad_norm": 0.05640999227762222, + "learning_rate": 8.610822364696034e-05, + "loss": 0.0088, + "step": 28100 + }, + { + "grad_norm": 0.07217392325401306, + "learning_rate": 8.609678426848015e-05, + "loss": 0.0088, + "step": 28110 + }, + { + "grad_norm": 0.07209845632314682, + "learning_rate": 8.60853409425099e-05, + "loss": 0.0096, + "step": 28120 + }, + { + "grad_norm": 0.05882129818201065, + "learning_rate": 8.607389367030104e-05, + "loss": 0.0108, + "step": 28130 + }, + { + "grad_norm": 0.0481598787009716, + "learning_rate": 8.606244245310538e-05, + "loss": 0.0084, + "step": 28140 + }, + { + "grad_norm": 0.08883179724216461, + "learning_rate": 8.605098729217525e-05, + "loss": 0.0092, + "step": 28150 + }, + { + "grad_norm": 0.06929980218410492, + "learning_rate": 8.603952818876335e-05, + "loss": 0.0094, + "step": 28160 + }, + { + "grad_norm": 0.051660336554050446, + "learning_rate": 8.602806514412281e-05, + "loss": 0.0118, + "step": 28170 + }, + { + "grad_norm": 0.07122717052698135, + "learning_rate": 8.601659815950726e-05, + "loss": 0.0103, + "step": 28180 + }, + { + "grad_norm": 0.0860695019364357, + "learning_rate": 8.600512723617067e-05, + "loss": 0.012, + "step": 28190 + }, + { + "grad_norm": 0.06908632814884186, + "learning_rate": 8.59936523753675e-05, + "loss": 0.0087, + "step": 28200 + }, + { + "grad_norm": 0.051362842321395874, + "learning_rate": 8.598217357835264e-05, + "loss": 0.0091, + "step": 28210 + }, + { + "grad_norm": 0.07147125899791718, + "learning_rate": 8.597069084638135e-05, + "loss": 0.011, + "step": 28220 + }, + { + "grad_norm": 0.06744828820228577, + "learning_rate": 8.595920418070939e-05, + "loss": 0.011, + "step": 28230 + }, + { + "grad_norm": 0.07604867219924927, + "learning_rate": 8.594771358259295e-05, + "loss": 0.011, + "step": 28240 + }, + { + "grad_norm": 0.06482817232608795, + "learning_rate": 8.593621905328858e-05, + "loss": 0.0089, + "step": 28250 + }, + { + "grad_norm": 0.07350543141365051, + "learning_rate": 8.592472059405333e-05, + "loss": 0.0115, + "step": 28260 + }, + { + "grad_norm": 0.0672646015882492, + "learning_rate": 8.591321820614464e-05, + "loss": 0.0114, + "step": 28270 + }, + { + "grad_norm": 0.06489256769418716, + "learning_rate": 8.590171189082041e-05, + "loss": 0.0096, + "step": 28280 + }, + { + "grad_norm": 0.07650889456272125, + "learning_rate": 8.589020164933894e-05, + "loss": 0.012, + "step": 28290 + }, + { + "grad_norm": 0.07953009009361267, + "learning_rate": 8.587868748295898e-05, + "loss": 0.0103, + "step": 28300 + }, + { + "grad_norm": 0.04905785620212555, + "learning_rate": 8.586716939293971e-05, + "loss": 0.0104, + "step": 28310 + }, + { + "grad_norm": 0.05171644687652588, + "learning_rate": 8.58556473805407e-05, + "loss": 0.0113, + "step": 28320 + }, + { + "grad_norm": 0.09778998047113419, + "learning_rate": 8.584412144702202e-05, + "loss": 0.0116, + "step": 28330 + }, + { + "grad_norm": 0.08318382501602173, + "learning_rate": 8.58325915936441e-05, + "loss": 0.0108, + "step": 28340 + }, + { + "grad_norm": 0.05519875884056091, + "learning_rate": 8.582105782166783e-05, + "loss": 0.0102, + "step": 28350 + }, + { + "grad_norm": 0.08815515041351318, + "learning_rate": 8.580952013235455e-05, + "loss": 0.0108, + "step": 28360 + }, + { + "grad_norm": 0.06849632412195206, + "learning_rate": 8.579797852696596e-05, + "loss": 0.0105, + "step": 28370 + }, + { + "grad_norm": 0.07511256635189056, + "learning_rate": 8.578643300676428e-05, + "loss": 0.0099, + "step": 28380 + }, + { + "grad_norm": 0.08724360913038254, + "learning_rate": 8.577488357301209e-05, + "loss": 0.0101, + "step": 28390 + }, + { + "grad_norm": 0.07118374109268188, + "learning_rate": 8.576333022697242e-05, + "loss": 0.0094, + "step": 28400 + }, + { + "grad_norm": 0.0709829106926918, + "learning_rate": 8.575177296990873e-05, + "loss": 0.0101, + "step": 28410 + }, + { + "grad_norm": 0.06150805205106735, + "learning_rate": 8.574021180308489e-05, + "loss": 0.0116, + "step": 28420 + }, + { + "grad_norm": 0.04692322388291359, + "learning_rate": 8.572864672776523e-05, + "loss": 0.0092, + "step": 28430 + }, + { + "grad_norm": 0.07375450432300568, + "learning_rate": 8.571707774521447e-05, + "loss": 0.0112, + "step": 28440 + }, + { + "grad_norm": 0.06448157876729965, + "learning_rate": 8.57055048566978e-05, + "loss": 0.0115, + "step": 28450 + }, + { + "grad_norm": 0.06551893055438995, + "learning_rate": 8.569392806348078e-05, + "loss": 0.0099, + "step": 28460 + }, + { + "grad_norm": 0.06197969242930412, + "learning_rate": 8.568234736682947e-05, + "loss": 0.0102, + "step": 28470 + }, + { + "grad_norm": 0.05620419234037399, + "learning_rate": 8.567076276801029e-05, + "loss": 0.0106, + "step": 28480 + }, + { + "grad_norm": 0.08855967223644257, + "learning_rate": 8.565917426829013e-05, + "loss": 0.0117, + "step": 28490 + }, + { + "grad_norm": 0.06414008140563965, + "learning_rate": 8.564758186893628e-05, + "loss": 0.0086, + "step": 28500 + }, + { + "grad_norm": 0.053533781319856644, + "learning_rate": 8.563598557121649e-05, + "loss": 0.0102, + "step": 28510 + }, + { + "grad_norm": 0.06324410438537598, + "learning_rate": 8.562438537639888e-05, + "loss": 0.0102, + "step": 28520 + }, + { + "grad_norm": 0.0644838958978653, + "learning_rate": 8.561278128575206e-05, + "loss": 0.0102, + "step": 28530 + }, + { + "grad_norm": 0.07475923001766205, + "learning_rate": 8.5601173300545e-05, + "loss": 0.0108, + "step": 28540 + }, + { + "grad_norm": 0.058873556554317474, + "learning_rate": 8.558956142204717e-05, + "loss": 0.0109, + "step": 28550 + }, + { + "grad_norm": 0.06491872668266296, + "learning_rate": 8.55779456515284e-05, + "loss": 0.0092, + "step": 28560 + }, + { + "grad_norm": 0.06756184250116348, + "learning_rate": 8.556632599025898e-05, + "loss": 0.0119, + "step": 28570 + }, + { + "grad_norm": 0.064939945936203, + "learning_rate": 8.555470243950964e-05, + "loss": 0.0096, + "step": 28580 + }, + { + "grad_norm": 0.081581711769104, + "learning_rate": 8.554307500055148e-05, + "loss": 0.0107, + "step": 28590 + }, + { + "grad_norm": 0.041376180946826935, + "learning_rate": 8.553144367465609e-05, + "loss": 0.0097, + "step": 28600 + }, + { + "grad_norm": 0.07040801644325256, + "learning_rate": 8.551980846309544e-05, + "loss": 0.0098, + "step": 28610 + }, + { + "grad_norm": 0.07353593409061432, + "learning_rate": 8.550816936714193e-05, + "loss": 0.0094, + "step": 28620 + }, + { + "grad_norm": 0.06609233468770981, + "learning_rate": 8.549652638806841e-05, + "loss": 0.0106, + "step": 28630 + }, + { + "grad_norm": 0.05371759831905365, + "learning_rate": 8.548487952714812e-05, + "loss": 0.0097, + "step": 28640 + }, + { + "grad_norm": 0.09229908883571625, + "learning_rate": 8.547322878565478e-05, + "loss": 0.0121, + "step": 28650 + }, + { + "grad_norm": 0.06623736023902893, + "learning_rate": 8.546157416486245e-05, + "loss": 0.0093, + "step": 28660 + }, + { + "grad_norm": 0.06262039393186569, + "learning_rate": 8.54499156660457e-05, + "loss": 0.0088, + "step": 28670 + }, + { + "grad_norm": 0.09352150559425354, + "learning_rate": 8.543825329047947e-05, + "loss": 0.0118, + "step": 28680 + }, + { + "grad_norm": 0.08067680150270462, + "learning_rate": 8.542658703943913e-05, + "loss": 0.0114, + "step": 28690 + }, + { + "grad_norm": 0.08324920386075974, + "learning_rate": 8.541491691420051e-05, + "loss": 0.01, + "step": 28700 + }, + { + "grad_norm": 0.059317827224731445, + "learning_rate": 8.54032429160398e-05, + "loss": 0.0095, + "step": 28710 + }, + { + "grad_norm": 0.07078014314174652, + "learning_rate": 8.539156504623369e-05, + "loss": 0.0103, + "step": 28720 + }, + { + "grad_norm": 0.08691394329071045, + "learning_rate": 8.537988330605923e-05, + "loss": 0.0118, + "step": 28730 + }, + { + "grad_norm": 0.05883949622511864, + "learning_rate": 8.536819769679393e-05, + "loss": 0.0105, + "step": 28740 + }, + { + "grad_norm": 0.06505319476127625, + "learning_rate": 8.53565082197157e-05, + "loss": 0.0095, + "step": 28750 + }, + { + "grad_norm": 0.05597406625747681, + "learning_rate": 8.534481487610289e-05, + "loss": 0.0119, + "step": 28760 + }, + { + "grad_norm": 0.05898537114262581, + "learning_rate": 8.533311766723428e-05, + "loss": 0.0094, + "step": 28770 + }, + { + "grad_norm": 0.07485922425985336, + "learning_rate": 8.532141659438901e-05, + "loss": 0.0121, + "step": 28780 + }, + { + "grad_norm": 0.04786830022931099, + "learning_rate": 8.530971165884675e-05, + "loss": 0.0097, + "step": 28790 + }, + { + "grad_norm": 0.06558030098676682, + "learning_rate": 8.529800286188752e-05, + "loss": 0.0124, + "step": 28800 + }, + { + "grad_norm": 0.05234929174184799, + "learning_rate": 8.528629020479175e-05, + "loss": 0.0093, + "step": 28810 + }, + { + "grad_norm": 0.06669576466083527, + "learning_rate": 8.527457368884033e-05, + "loss": 0.0104, + "step": 28820 + }, + { + "grad_norm": 0.053488876670598984, + "learning_rate": 8.526285331531458e-05, + "loss": 0.0096, + "step": 28830 + }, + { + "grad_norm": 0.06537417322397232, + "learning_rate": 8.525112908549621e-05, + "loss": 0.0096, + "step": 28840 + }, + { + "grad_norm": 0.06960075348615646, + "learning_rate": 8.523940100066735e-05, + "loss": 0.0108, + "step": 28850 + }, + { + "grad_norm": 0.058930136263370514, + "learning_rate": 8.52276690621106e-05, + "loss": 0.0094, + "step": 28860 + }, + { + "grad_norm": 0.07483838498592377, + "learning_rate": 8.521593327110889e-05, + "loss": 0.0103, + "step": 28870 + }, + { + "grad_norm": 0.06836245954036713, + "learning_rate": 8.520419362894569e-05, + "loss": 0.0098, + "step": 28880 + }, + { + "grad_norm": 0.08797565847635269, + "learning_rate": 8.51924501369048e-05, + "loss": 0.0102, + "step": 28890 + }, + { + "grad_norm": 0.0747690424323082, + "learning_rate": 8.518070279627047e-05, + "loss": 0.0091, + "step": 28900 + }, + { + "grad_norm": 0.06790631264448166, + "learning_rate": 8.516895160832737e-05, + "loss": 0.0086, + "step": 28910 + }, + { + "grad_norm": 0.06733353435993195, + "learning_rate": 8.515719657436061e-05, + "loss": 0.0093, + "step": 28920 + }, + { + "grad_norm": 0.04764072597026825, + "learning_rate": 8.514543769565568e-05, + "loss": 0.0086, + "step": 28930 + }, + { + "grad_norm": 0.07240590453147888, + "learning_rate": 8.513367497349853e-05, + "loss": 0.0107, + "step": 28940 + }, + { + "grad_norm": 0.04245023429393768, + "learning_rate": 8.51219084091755e-05, + "loss": 0.0111, + "step": 28950 + }, + { + "grad_norm": 0.07775990664958954, + "learning_rate": 8.511013800397338e-05, + "loss": 0.0096, + "step": 28960 + }, + { + "grad_norm": 0.07098802924156189, + "learning_rate": 8.509836375917937e-05, + "loss": 0.0109, + "step": 28970 + }, + { + "grad_norm": 0.06680795550346375, + "learning_rate": 8.508658567608104e-05, + "loss": 0.01, + "step": 28980 + }, + { + "grad_norm": 0.051796041429042816, + "learning_rate": 8.507480375596647e-05, + "loss": 0.0101, + "step": 28990 + }, + { + "grad_norm": 0.05344679206609726, + "learning_rate": 8.506301800012408e-05, + "loss": 0.0113, + "step": 29000 + }, + { + "grad_norm": 0.06422470510005951, + "learning_rate": 8.505122840984278e-05, + "loss": 0.0092, + "step": 29010 + }, + { + "grad_norm": 0.06892473995685577, + "learning_rate": 8.503943498641182e-05, + "loss": 0.0123, + "step": 29020 + }, + { + "grad_norm": 0.06900984048843384, + "learning_rate": 8.502763773112095e-05, + "loss": 0.0113, + "step": 29030 + }, + { + "grad_norm": 0.07405035197734833, + "learning_rate": 8.501583664526026e-05, + "loss": 0.01, + "step": 29040 + }, + { + "grad_norm": 0.06916702538728714, + "learning_rate": 8.500403173012032e-05, + "loss": 0.0098, + "step": 29050 + }, + { + "grad_norm": 0.08887720853090286, + "learning_rate": 8.499222298699211e-05, + "loss": 0.0092, + "step": 29060 + }, + { + "grad_norm": 0.08470473438501358, + "learning_rate": 8.498041041716701e-05, + "loss": 0.0099, + "step": 29070 + }, + { + "grad_norm": 0.08084338158369064, + "learning_rate": 8.496859402193681e-05, + "loss": 0.0105, + "step": 29080 + }, + { + "grad_norm": 0.054567914456129074, + "learning_rate": 8.495677380259374e-05, + "loss": 0.0085, + "step": 29090 + }, + { + "grad_norm": 0.059994425624608994, + "learning_rate": 8.494494976043045e-05, + "loss": 0.0115, + "step": 29100 + }, + { + "grad_norm": 0.08417055010795593, + "learning_rate": 8.493312189673998e-05, + "loss": 0.0118, + "step": 29110 + }, + { + "grad_norm": 0.08751149475574493, + "learning_rate": 8.492129021281584e-05, + "loss": 0.0136, + "step": 29120 + }, + { + "grad_norm": 0.06731480360031128, + "learning_rate": 8.490945470995188e-05, + "loss": 0.0096, + "step": 29130 + }, + { + "grad_norm": 0.04650771990418434, + "learning_rate": 8.489761538944247e-05, + "loss": 0.0093, + "step": 29140 + }, + { + "grad_norm": 0.07866691797971725, + "learning_rate": 8.48857722525823e-05, + "loss": 0.0109, + "step": 29150 + }, + { + "grad_norm": 0.05291660130023956, + "learning_rate": 8.487392530066652e-05, + "loss": 0.0101, + "step": 29160 + }, + { + "grad_norm": 0.07809603214263916, + "learning_rate": 8.486207453499069e-05, + "loss": 0.0119, + "step": 29170 + }, + { + "grad_norm": 0.05156182497739792, + "learning_rate": 8.485021995685082e-05, + "loss": 0.0097, + "step": 29180 + }, + { + "grad_norm": 0.10803613811731339, + "learning_rate": 8.483836156754328e-05, + "loss": 0.0127, + "step": 29190 + }, + { + "grad_norm": 0.0875147357583046, + "learning_rate": 8.482649936836491e-05, + "loss": 0.0101, + "step": 29200 + }, + { + "grad_norm": 0.06401905417442322, + "learning_rate": 8.481463336061293e-05, + "loss": 0.0089, + "step": 29210 + }, + { + "grad_norm": 0.06257221102714539, + "learning_rate": 8.480276354558496e-05, + "loss": 0.0109, + "step": 29220 + }, + { + "grad_norm": 0.07188758999109268, + "learning_rate": 8.479088992457913e-05, + "loss": 0.0102, + "step": 29230 + }, + { + "grad_norm": 0.06904711574316025, + "learning_rate": 8.477901249889387e-05, + "loss": 0.012, + "step": 29240 + }, + { + "grad_norm": 0.05901928246021271, + "learning_rate": 8.47671312698281e-05, + "loss": 0.0089, + "step": 29250 + }, + { + "grad_norm": 0.08254657685756683, + "learning_rate": 8.475524623868112e-05, + "loss": 0.0098, + "step": 29260 + }, + { + "grad_norm": 0.07064244896173477, + "learning_rate": 8.474335740675266e-05, + "loss": 0.0114, + "step": 29270 + }, + { + "grad_norm": 0.07462765276432037, + "learning_rate": 8.473146477534289e-05, + "loss": 0.0089, + "step": 29280 + }, + { + "grad_norm": 0.03702063858509064, + "learning_rate": 8.471956834575232e-05, + "loss": 0.0081, + "step": 29290 + }, + { + "grad_norm": 0.05733676999807358, + "learning_rate": 8.470766811928197e-05, + "loss": 0.0123, + "step": 29300 + }, + { + "grad_norm": 0.07634445279836655, + "learning_rate": 8.469576409723323e-05, + "loss": 0.0119, + "step": 29310 + }, + { + "grad_norm": 0.0760822519659996, + "learning_rate": 8.468385628090788e-05, + "loss": 0.0105, + "step": 29320 + }, + { + "grad_norm": 0.064167320728302, + "learning_rate": 8.467194467160815e-05, + "loss": 0.0101, + "step": 29330 + }, + { + "grad_norm": 0.05674073472619057, + "learning_rate": 8.466002927063667e-05, + "loss": 0.0099, + "step": 29340 + }, + { + "grad_norm": 0.04885587468743324, + "learning_rate": 8.464811007929651e-05, + "loss": 0.0078, + "step": 29350 + }, + { + "grad_norm": 0.06162796542048454, + "learning_rate": 8.463618709889114e-05, + "loss": 0.0099, + "step": 29360 + }, + { + "grad_norm": 0.09385164082050323, + "learning_rate": 8.462426033072442e-05, + "loss": 0.0114, + "step": 29370 + }, + { + "grad_norm": 0.06211688369512558, + "learning_rate": 8.461232977610061e-05, + "loss": 0.009, + "step": 29380 + }, + { + "grad_norm": 0.04696791619062424, + "learning_rate": 8.46003954363245e-05, + "loss": 0.0101, + "step": 29390 + }, + { + "grad_norm": 0.08365952968597412, + "learning_rate": 8.458845731270115e-05, + "loss": 0.0105, + "step": 29400 + }, + { + "grad_norm": 0.06829580664634705, + "learning_rate": 8.45765154065361e-05, + "loss": 0.0097, + "step": 29410 + }, + { + "grad_norm": 0.0582728236913681, + "learning_rate": 8.456456971913532e-05, + "loss": 0.0105, + "step": 29420 + }, + { + "grad_norm": 0.06550891697406769, + "learning_rate": 8.455262025180517e-05, + "loss": 0.0108, + "step": 29430 + }, + { + "grad_norm": 0.04913477599620819, + "learning_rate": 8.454066700585242e-05, + "loss": 0.0076, + "step": 29440 + }, + { + "grad_norm": 0.06552720814943314, + "learning_rate": 8.452870998258423e-05, + "loss": 0.0122, + "step": 29450 + }, + { + "grad_norm": 0.07449924200773239, + "learning_rate": 8.451674918330825e-05, + "loss": 0.0091, + "step": 29460 + }, + { + "grad_norm": 0.050364624708890915, + "learning_rate": 8.450478460933246e-05, + "loss": 0.0101, + "step": 29470 + }, + { + "grad_norm": 0.07140706479549408, + "learning_rate": 8.449281626196532e-05, + "loss": 0.009, + "step": 29480 + }, + { + "grad_norm": 0.07952141016721725, + "learning_rate": 8.448084414251564e-05, + "loss": 0.0108, + "step": 29490 + }, + { + "grad_norm": 0.06245967373251915, + "learning_rate": 8.446886825229271e-05, + "loss": 0.0103, + "step": 29500 + }, + { + "grad_norm": 0.07276960462331772, + "learning_rate": 8.445688859260615e-05, + "loss": 0.0099, + "step": 29510 + }, + { + "grad_norm": 0.06515802443027496, + "learning_rate": 8.444490516476606e-05, + "loss": 0.0112, + "step": 29520 + }, + { + "grad_norm": 0.0670699030160904, + "learning_rate": 8.443291797008293e-05, + "loss": 0.0094, + "step": 29530 + }, + { + "grad_norm": 0.0789499580860138, + "learning_rate": 8.442092700986765e-05, + "loss": 0.0101, + "step": 29540 + }, + { + "grad_norm": 0.0571364127099514, + "learning_rate": 8.440893228543156e-05, + "loss": 0.0113, + "step": 29550 + }, + { + "grad_norm": 0.06025459244847298, + "learning_rate": 8.439693379808638e-05, + "loss": 0.0093, + "step": 29560 + }, + { + "grad_norm": 0.05266544595360756, + "learning_rate": 8.43849315491442e-05, + "loss": 0.0099, + "step": 29570 + }, + { + "grad_norm": 0.06495992094278336, + "learning_rate": 8.437292553991763e-05, + "loss": 0.0105, + "step": 29580 + }, + { + "grad_norm": 0.05143166333436966, + "learning_rate": 8.436091577171959e-05, + "loss": 0.0089, + "step": 29590 + }, + { + "grad_norm": 0.07561924308538437, + "learning_rate": 8.434890224586347e-05, + "loss": 0.0095, + "step": 29600 + }, + { + "grad_norm": 0.06591593474149704, + "learning_rate": 8.433688496366303e-05, + "loss": 0.009, + "step": 29610 + }, + { + "grad_norm": 0.06560372561216354, + "learning_rate": 8.432486392643248e-05, + "loss": 0.0091, + "step": 29620 + }, + { + "grad_norm": 0.10260824114084244, + "learning_rate": 8.431283913548643e-05, + "loss": 0.0126, + "step": 29630 + }, + { + "grad_norm": 0.05902416259050369, + "learning_rate": 8.430081059213985e-05, + "loss": 0.0093, + "step": 29640 + }, + { + "grad_norm": 0.05581432580947876, + "learning_rate": 8.428877829770823e-05, + "loss": 0.013, + "step": 29650 + }, + { + "grad_norm": 0.053968336433172226, + "learning_rate": 8.427674225350735e-05, + "loss": 0.0106, + "step": 29660 + }, + { + "grad_norm": 0.055729661136865616, + "learning_rate": 8.426470246085347e-05, + "loss": 0.0086, + "step": 29670 + }, + { + "grad_norm": 0.0717683732509613, + "learning_rate": 8.425265892106324e-05, + "loss": 0.0097, + "step": 29680 + }, + { + "grad_norm": 0.0983436182141304, + "learning_rate": 8.424061163545374e-05, + "loss": 0.0098, + "step": 29690 + }, + { + "grad_norm": 0.07921205461025238, + "learning_rate": 8.422856060534243e-05, + "loss": 0.0113, + "step": 29700 + }, + { + "grad_norm": 0.06405115872621536, + "learning_rate": 8.421650583204718e-05, + "loss": 0.0096, + "step": 29710 + }, + { + "grad_norm": 0.0663847029209137, + "learning_rate": 8.420444731688633e-05, + "loss": 0.0085, + "step": 29720 + }, + { + "grad_norm": 0.06066584959626198, + "learning_rate": 8.419238506117852e-05, + "loss": 0.0101, + "step": 29730 + }, + { + "grad_norm": 0.09473434835672379, + "learning_rate": 8.418031906624289e-05, + "loss": 0.0099, + "step": 29740 + }, + { + "grad_norm": 0.08149068802595139, + "learning_rate": 8.416824933339898e-05, + "loss": 0.0116, + "step": 29750 + }, + { + "grad_norm": 0.05690263584256172, + "learning_rate": 8.415617586396667e-05, + "loss": 0.0108, + "step": 29760 + }, + { + "grad_norm": 0.0746070146560669, + "learning_rate": 8.414409865926632e-05, + "loss": 0.0091, + "step": 29770 + }, + { + "grad_norm": 0.0934516191482544, + "learning_rate": 8.413201772061867e-05, + "loss": 0.0096, + "step": 29780 + }, + { + "grad_norm": 0.060170918703079224, + "learning_rate": 8.411993304934488e-05, + "loss": 0.0107, + "step": 29790 + }, + { + "grad_norm": 0.06918977200984955, + "learning_rate": 8.410784464676654e-05, + "loss": 0.0096, + "step": 29800 + }, + { + "grad_norm": 0.05743572488427162, + "learning_rate": 8.409575251420556e-05, + "loss": 0.0096, + "step": 29810 + }, + { + "grad_norm": 0.055466827005147934, + "learning_rate": 8.408365665298435e-05, + "loss": 0.0106, + "step": 29820 + }, + { + "grad_norm": 0.03992502763867378, + "learning_rate": 8.40715570644257e-05, + "loss": 0.0102, + "step": 29830 + }, + { + "grad_norm": 0.07728619873523712, + "learning_rate": 8.40594537498528e-05, + "loss": 0.0128, + "step": 29840 + }, + { + "grad_norm": 0.059346627444028854, + "learning_rate": 8.404734671058924e-05, + "loss": 0.0099, + "step": 29850 + }, + { + "grad_norm": 0.08341528475284576, + "learning_rate": 8.403523594795902e-05, + "loss": 0.0091, + "step": 29860 + }, + { + "grad_norm": 0.056296974420547485, + "learning_rate": 8.402312146328659e-05, + "loss": 0.0103, + "step": 29870 + }, + { + "grad_norm": 0.05686924606561661, + "learning_rate": 8.401100325789675e-05, + "loss": 0.0094, + "step": 29880 + }, + { + "grad_norm": 0.057612352073192596, + "learning_rate": 8.399888133311472e-05, + "loss": 0.0085, + "step": 29890 + }, + { + "grad_norm": 0.0849294662475586, + "learning_rate": 8.398675569026613e-05, + "loss": 0.0091, + "step": 29900 + }, + { + "grad_norm": 0.047040265053510666, + "learning_rate": 8.397462633067705e-05, + "loss": 0.0108, + "step": 29910 + }, + { + "grad_norm": 0.059813275933265686, + "learning_rate": 8.396249325567392e-05, + "loss": 0.0093, + "step": 29920 + }, + { + "grad_norm": 0.05194931849837303, + "learning_rate": 8.395035646658357e-05, + "loss": 0.0097, + "step": 29930 + }, + { + "grad_norm": 0.06988657265901566, + "learning_rate": 8.39382159647333e-05, + "loss": 0.0111, + "step": 29940 + }, + { + "grad_norm": 0.06067775934934616, + "learning_rate": 8.392607175145075e-05, + "loss": 0.0095, + "step": 29950 + }, + { + "grad_norm": 0.05847765877842903, + "learning_rate": 8.3913923828064e-05, + "loss": 0.0119, + "step": 29960 + }, + { + "grad_norm": 0.08612541854381561, + "learning_rate": 8.390177219590152e-05, + "loss": 0.01, + "step": 29970 + }, + { + "grad_norm": 0.06778641790151596, + "learning_rate": 8.388961685629222e-05, + "loss": 0.009, + "step": 29980 + }, + { + "grad_norm": 0.05296630039811134, + "learning_rate": 8.387745781056536e-05, + "loss": 0.0088, + "step": 29990 + }, + { + "grad_norm": 0.07174292206764221, + "learning_rate": 8.386529506005065e-05, + "loss": 0.0106, + "step": 30000 + }, + { + "grad_norm": 0.07757196575403214, + "learning_rate": 8.38531286060782e-05, + "loss": 0.0133, + "step": 30010 + }, + { + "grad_norm": 0.07669873535633087, + "learning_rate": 8.384095844997849e-05, + "loss": 0.0118, + "step": 30020 + }, + { + "grad_norm": 0.07371801882982254, + "learning_rate": 8.382878459308245e-05, + "loss": 0.011, + "step": 30030 + }, + { + "grad_norm": 0.05413452535867691, + "learning_rate": 8.381660703672138e-05, + "loss": 0.0096, + "step": 30040 + }, + { + "grad_norm": 0.09788563847541809, + "learning_rate": 8.380442578222702e-05, + "loss": 0.0102, + "step": 30050 + }, + { + "grad_norm": 0.06303906440734863, + "learning_rate": 8.379224083093146e-05, + "loss": 0.0115, + "step": 30060 + }, + { + "grad_norm": 0.06841795891523361, + "learning_rate": 8.378005218416727e-05, + "loss": 0.0125, + "step": 30070 + }, + { + "grad_norm": 0.10470515489578247, + "learning_rate": 8.376785984326735e-05, + "loss": 0.0139, + "step": 30080 + }, + { + "grad_norm": 0.0835881307721138, + "learning_rate": 8.375566380956506e-05, + "loss": 0.0097, + "step": 30090 + }, + { + "grad_norm": 0.08308401703834534, + "learning_rate": 8.374346408439411e-05, + "loss": 0.0108, + "step": 30100 + }, + { + "grad_norm": 0.0646706074476242, + "learning_rate": 8.373126066908868e-05, + "loss": 0.0098, + "step": 30110 + }, + { + "grad_norm": 0.08444757014513016, + "learning_rate": 8.371905356498326e-05, + "loss": 0.0098, + "step": 30120 + }, + { + "grad_norm": 0.05729012191295624, + "learning_rate": 8.370684277341288e-05, + "loss": 0.0093, + "step": 30130 + }, + { + "grad_norm": 0.06639302521944046, + "learning_rate": 8.369462829571282e-05, + "loss": 0.0091, + "step": 30140 + }, + { + "grad_norm": 0.04999241605401039, + "learning_rate": 8.36824101332189e-05, + "loss": 0.0085, + "step": 30150 + }, + { + "grad_norm": 0.05241038277745247, + "learning_rate": 8.367018828726721e-05, + "loss": 0.0113, + "step": 30160 + }, + { + "grad_norm": 0.08074601739645004, + "learning_rate": 8.365796275919438e-05, + "loss": 0.0113, + "step": 30170 + }, + { + "grad_norm": 0.060258761048316956, + "learning_rate": 8.364573355033734e-05, + "loss": 0.0093, + "step": 30180 + }, + { + "grad_norm": 0.07559333741664886, + "learning_rate": 8.363350066203346e-05, + "loss": 0.0105, + "step": 30190 + }, + { + "grad_norm": 0.06161624938249588, + "learning_rate": 8.362126409562053e-05, + "loss": 0.0111, + "step": 30200 + }, + { + "grad_norm": 0.09024052321910858, + "learning_rate": 8.360902385243667e-05, + "loss": 0.0086, + "step": 30210 + }, + { + "grad_norm": 0.056442342698574066, + "learning_rate": 8.359677993382052e-05, + "loss": 0.0109, + "step": 30220 + }, + { + "grad_norm": 0.07203790545463562, + "learning_rate": 8.358453234111103e-05, + "loss": 0.0099, + "step": 30230 + }, + { + "grad_norm": 0.04773034155368805, + "learning_rate": 8.357228107564756e-05, + "loss": 0.0091, + "step": 30240 + }, + { + "grad_norm": 0.0556766651570797, + "learning_rate": 8.356002613876993e-05, + "loss": 0.01, + "step": 30250 + }, + { + "grad_norm": 0.0740688368678093, + "learning_rate": 8.35477675318183e-05, + "loss": 0.0103, + "step": 30260 + }, + { + "grad_norm": 0.07131264358758926, + "learning_rate": 8.353550525613323e-05, + "loss": 0.0103, + "step": 30270 + }, + { + "grad_norm": 0.055426981300115585, + "learning_rate": 8.352323931305572e-05, + "loss": 0.0105, + "step": 30280 + }, + { + "grad_norm": 0.04764513671398163, + "learning_rate": 8.351096970392717e-05, + "loss": 0.0083, + "step": 30290 + }, + { + "grad_norm": 0.06838788837194443, + "learning_rate": 8.349869643008937e-05, + "loss": 0.012, + "step": 30300 + }, + { + "grad_norm": 0.070427805185318, + "learning_rate": 8.348641949288449e-05, + "loss": 0.0095, + "step": 30310 + }, + { + "grad_norm": 0.08559920638799667, + "learning_rate": 8.34741388936551e-05, + "loss": 0.009, + "step": 30320 + }, + { + "grad_norm": 0.047474682331085205, + "learning_rate": 8.346185463374423e-05, + "loss": 0.0097, + "step": 30330 + }, + { + "grad_norm": 0.07611316442489624, + "learning_rate": 8.344956671449524e-05, + "loss": 0.0117, + "step": 30340 + }, + { + "grad_norm": 0.06096412613987923, + "learning_rate": 8.343727513725192e-05, + "loss": 0.0098, + "step": 30350 + }, + { + "grad_norm": 0.04474838823080063, + "learning_rate": 8.342497990335847e-05, + "loss": 0.0099, + "step": 30360 + }, + { + "grad_norm": 0.09328620880842209, + "learning_rate": 8.341268101415946e-05, + "loss": 0.0111, + "step": 30370 + }, + { + "grad_norm": 0.09073927998542786, + "learning_rate": 8.34003784709999e-05, + "loss": 0.0106, + "step": 30380 + }, + { + "grad_norm": 0.08313833177089691, + "learning_rate": 8.338807227522516e-05, + "loss": 0.0089, + "step": 30390 + }, + { + "grad_norm": 0.04670808091759682, + "learning_rate": 8.337576242818103e-05, + "loss": 0.0093, + "step": 30400 + }, + { + "grad_norm": 0.05204812437295914, + "learning_rate": 8.336344893121372e-05, + "loss": 0.0076, + "step": 30410 + }, + { + "grad_norm": 0.05758281797170639, + "learning_rate": 8.335113178566977e-05, + "loss": 0.0099, + "step": 30420 + }, + { + "grad_norm": 0.05383795499801636, + "learning_rate": 8.33388109928962e-05, + "loss": 0.0086, + "step": 30430 + }, + { + "grad_norm": 0.06638531386852264, + "learning_rate": 8.33264865542404e-05, + "loss": 0.0091, + "step": 30440 + }, + { + "grad_norm": 0.052828915417194366, + "learning_rate": 8.331415847105013e-05, + "loss": 0.01, + "step": 30450 + }, + { + "grad_norm": 0.06335140019655228, + "learning_rate": 8.330182674467357e-05, + "loss": 0.0112, + "step": 30460 + }, + { + "grad_norm": 0.05422741919755936, + "learning_rate": 8.32894913764593e-05, + "loss": 0.0093, + "step": 30470 + }, + { + "grad_norm": 0.04809584468603134, + "learning_rate": 8.327715236775633e-05, + "loss": 0.0087, + "step": 30480 + }, + { + "grad_norm": 0.05560777708888054, + "learning_rate": 8.326480971991398e-05, + "loss": 0.0096, + "step": 30490 + }, + { + "grad_norm": 0.06764404475688934, + "learning_rate": 8.325246343428206e-05, + "loss": 0.0103, + "step": 30500 + }, + { + "grad_norm": 0.06260915100574493, + "learning_rate": 8.324011351221072e-05, + "loss": 0.0084, + "step": 30510 + }, + { + "grad_norm": 0.07634025067090988, + "learning_rate": 8.322775995505057e-05, + "loss": 0.0109, + "step": 30520 + }, + { + "grad_norm": 0.04959246516227722, + "learning_rate": 8.321540276415254e-05, + "loss": 0.0101, + "step": 30530 + }, + { + "grad_norm": 0.03492593392729759, + "learning_rate": 8.320304194086798e-05, + "loss": 0.0103, + "step": 30540 + }, + { + "grad_norm": 0.08651383966207504, + "learning_rate": 8.31906774865487e-05, + "loss": 0.0108, + "step": 30550 + }, + { + "grad_norm": 0.07159215211868286, + "learning_rate": 8.317830940254682e-05, + "loss": 0.0092, + "step": 30560 + }, + { + "grad_norm": 0.07361400127410889, + "learning_rate": 8.316593769021491e-05, + "loss": 0.0101, + "step": 30570 + }, + { + "grad_norm": 0.06570982187986374, + "learning_rate": 8.315356235090592e-05, + "loss": 0.009, + "step": 30580 + }, + { + "grad_norm": 0.06280873715877533, + "learning_rate": 8.314118338597319e-05, + "loss": 0.0073, + "step": 30590 + }, + { + "grad_norm": 0.05293428525328636, + "learning_rate": 8.312880079677048e-05, + "loss": 0.0087, + "step": 30600 + }, + { + "grad_norm": 0.08089785277843475, + "learning_rate": 8.311641458465191e-05, + "loss": 0.0093, + "step": 30610 + }, + { + "grad_norm": 0.05891507491469383, + "learning_rate": 8.310402475097205e-05, + "loss": 0.0095, + "step": 30620 + }, + { + "grad_norm": 0.06989660859107971, + "learning_rate": 8.309163129708581e-05, + "loss": 0.0088, + "step": 30630 + }, + { + "grad_norm": 0.06479176878929138, + "learning_rate": 8.307923422434852e-05, + "loss": 0.0122, + "step": 30640 + }, + { + "grad_norm": 0.0557851567864418, + "learning_rate": 8.30668335341159e-05, + "loss": 0.0097, + "step": 30650 + }, + { + "grad_norm": 0.06289505958557129, + "learning_rate": 8.305442922774408e-05, + "loss": 0.0102, + "step": 30660 + }, + { + "grad_norm": 0.07849457114934921, + "learning_rate": 8.304202130658959e-05, + "loss": 0.0096, + "step": 30670 + }, + { + "grad_norm": 0.07683049887418747, + "learning_rate": 8.302960977200931e-05, + "loss": 0.0106, + "step": 30680 + }, + { + "grad_norm": 0.06410747021436691, + "learning_rate": 8.301719462536058e-05, + "loss": 0.0079, + "step": 30690 + }, + { + "grad_norm": 0.07286671549081802, + "learning_rate": 8.300477586800108e-05, + "loss": 0.0089, + "step": 30700 + }, + { + "grad_norm": 0.06003856658935547, + "learning_rate": 8.299235350128892e-05, + "loss": 0.009, + "step": 30710 + }, + { + "grad_norm": 0.07199908047914505, + "learning_rate": 8.297992752658258e-05, + "loss": 0.0106, + "step": 30720 + }, + { + "grad_norm": 0.05021079629659653, + "learning_rate": 8.296749794524094e-05, + "loss": 0.0091, + "step": 30730 + }, + { + "grad_norm": 0.07827874273061752, + "learning_rate": 8.295506475862332e-05, + "loss": 0.0098, + "step": 30740 + }, + { + "grad_norm": 0.05121954530477524, + "learning_rate": 8.294262796808933e-05, + "loss": 0.0121, + "step": 30750 + }, + { + "grad_norm": 0.06454674154520035, + "learning_rate": 8.293018757499909e-05, + "loss": 0.0127, + "step": 30760 + }, + { + "grad_norm": 0.05585162341594696, + "learning_rate": 8.291774358071305e-05, + "loss": 0.0104, + "step": 30770 + }, + { + "grad_norm": 0.06322260200977325, + "learning_rate": 8.290529598659205e-05, + "loss": 0.0094, + "step": 30780 + }, + { + "grad_norm": 0.0869043618440628, + "learning_rate": 8.289284479399738e-05, + "loss": 0.0109, + "step": 30790 + }, + { + "grad_norm": 0.053383223712444305, + "learning_rate": 8.288039000429064e-05, + "loss": 0.0086, + "step": 30800 + }, + { + "grad_norm": 0.05747707560658455, + "learning_rate": 8.286793161883388e-05, + "loss": 0.0101, + "step": 30810 + }, + { + "grad_norm": 0.07797562330961227, + "learning_rate": 8.285546963898954e-05, + "loss": 0.0105, + "step": 30820 + }, + { + "grad_norm": 0.06754084676504135, + "learning_rate": 8.284300406612044e-05, + "loss": 0.0097, + "step": 30830 + }, + { + "grad_norm": 0.05687053129076958, + "learning_rate": 8.283053490158978e-05, + "loss": 0.0087, + "step": 30840 + }, + { + "grad_norm": 0.08217648416757584, + "learning_rate": 8.28180621467612e-05, + "loss": 0.0119, + "step": 30850 + }, + { + "grad_norm": 0.04642387852072716, + "learning_rate": 8.280558580299867e-05, + "loss": 0.0096, + "step": 30860 + }, + { + "grad_norm": 0.05971401557326317, + "learning_rate": 8.279310587166661e-05, + "loss": 0.0105, + "step": 30870 + }, + { + "grad_norm": 0.06114511936903, + "learning_rate": 8.278062235412978e-05, + "loss": 0.0108, + "step": 30880 + }, + { + "grad_norm": 0.06788422167301178, + "learning_rate": 8.276813525175339e-05, + "loss": 0.0118, + "step": 30890 + }, + { + "grad_norm": 0.04649658873677254, + "learning_rate": 8.2755644565903e-05, + "loss": 0.0092, + "step": 30900 + }, + { + "grad_norm": 0.04447044059634209, + "learning_rate": 8.274315029794454e-05, + "loss": 0.0075, + "step": 30910 + }, + { + "grad_norm": 0.06337841600179672, + "learning_rate": 8.273065244924443e-05, + "loss": 0.0106, + "step": 30920 + }, + { + "grad_norm": 0.07766278088092804, + "learning_rate": 8.271815102116936e-05, + "loss": 0.0084, + "step": 30930 + }, + { + "grad_norm": 0.07214797288179398, + "learning_rate": 8.270564601508648e-05, + "loss": 0.0104, + "step": 30940 + }, + { + "grad_norm": 0.058115944266319275, + "learning_rate": 8.269313743236333e-05, + "loss": 0.0099, + "step": 30950 + }, + { + "grad_norm": 0.08991585671901703, + "learning_rate": 8.268062527436783e-05, + "loss": 0.0113, + "step": 30960 + }, + { + "grad_norm": 0.07634659111499786, + "learning_rate": 8.266810954246828e-05, + "loss": 0.0083, + "step": 30970 + }, + { + "grad_norm": 0.07000186294317245, + "learning_rate": 8.265559023803338e-05, + "loss": 0.0091, + "step": 30980 + }, + { + "grad_norm": 0.05200362578034401, + "learning_rate": 8.264306736243223e-05, + "loss": 0.0112, + "step": 30990 + }, + { + "grad_norm": 0.07250773161649704, + "learning_rate": 8.263054091703432e-05, + "loss": 0.0099, + "step": 31000 + }, + { + "grad_norm": 0.048850782215595245, + "learning_rate": 8.26180109032095e-05, + "loss": 0.0102, + "step": 31010 + }, + { + "grad_norm": 0.050406455993652344, + "learning_rate": 8.260547732232807e-05, + "loss": 0.0092, + "step": 31020 + }, + { + "grad_norm": 0.0704440250992775, + "learning_rate": 8.259294017576063e-05, + "loss": 0.0085, + "step": 31030 + }, + { + "grad_norm": 0.05581372603774071, + "learning_rate": 8.258039946487828e-05, + "loss": 0.0092, + "step": 31040 + }, + { + "grad_norm": 0.04703902453184128, + "learning_rate": 8.256785519105241e-05, + "loss": 0.0101, + "step": 31050 + }, + { + "grad_norm": 0.057671722024679184, + "learning_rate": 8.255530735565488e-05, + "loss": 0.0108, + "step": 31060 + }, + { + "grad_norm": 0.061264898627996445, + "learning_rate": 8.254275596005787e-05, + "loss": 0.0104, + "step": 31070 + }, + { + "grad_norm": 0.055325232446193695, + "learning_rate": 8.2530201005634e-05, + "loss": 0.0111, + "step": 31080 + }, + { + "grad_norm": 0.044782690703868866, + "learning_rate": 8.251764249375626e-05, + "loss": 0.0085, + "step": 31090 + }, + { + "grad_norm": 0.07296866178512573, + "learning_rate": 8.250508042579803e-05, + "loss": 0.0111, + "step": 31100 + }, + { + "grad_norm": 0.05782005190849304, + "learning_rate": 8.249251480313307e-05, + "loss": 0.0111, + "step": 31110 + }, + { + "grad_norm": 0.06053957715630531, + "learning_rate": 8.247994562713555e-05, + "loss": 0.0091, + "step": 31120 + }, + { + "grad_norm": 0.0702984482049942, + "learning_rate": 8.246737289917998e-05, + "loss": 0.0096, + "step": 31130 + }, + { + "grad_norm": 0.049648839980363846, + "learning_rate": 8.245479662064135e-05, + "loss": 0.0093, + "step": 31140 + }, + { + "grad_norm": 0.0765315368771553, + "learning_rate": 8.244221679289496e-05, + "loss": 0.0107, + "step": 31150 + }, + { + "grad_norm": 0.055040646344423294, + "learning_rate": 8.242963341731652e-05, + "loss": 0.0081, + "step": 31160 + }, + { + "grad_norm": 0.0919472873210907, + "learning_rate": 8.24170464952821e-05, + "loss": 0.0082, + "step": 31170 + }, + { + "grad_norm": 0.06729723513126373, + "learning_rate": 8.240445602816824e-05, + "loss": 0.0074, + "step": 31180 + }, + { + "grad_norm": 0.0659179538488388, + "learning_rate": 8.239186201735179e-05, + "loss": 0.0103, + "step": 31190 + }, + { + "grad_norm": 0.10498956590890884, + "learning_rate": 8.237926446420998e-05, + "loss": 0.0121, + "step": 31200 + }, + { + "grad_norm": 0.07819326221942902, + "learning_rate": 8.236666337012052e-05, + "loss": 0.0096, + "step": 31210 + }, + { + "grad_norm": 0.08104666322469711, + "learning_rate": 8.23540587364614e-05, + "loss": 0.0099, + "step": 31220 + }, + { + "grad_norm": 0.05173890292644501, + "learning_rate": 8.234145056461107e-05, + "loss": 0.0089, + "step": 31230 + }, + { + "grad_norm": 0.08409108966588974, + "learning_rate": 8.232883885594832e-05, + "loss": 0.0099, + "step": 31240 + }, + { + "grad_norm": 0.07057248800992966, + "learning_rate": 8.231622361185236e-05, + "loss": 0.0092, + "step": 31250 + }, + { + "grad_norm": 0.07213360071182251, + "learning_rate": 8.230360483370278e-05, + "loss": 0.0088, + "step": 31260 + }, + { + "grad_norm": 0.06359097361564636, + "learning_rate": 8.229098252287953e-05, + "loss": 0.0083, + "step": 31270 + }, + { + "grad_norm": 0.09935145825147629, + "learning_rate": 8.2278356680763e-05, + "loss": 0.0085, + "step": 31280 + }, + { + "grad_norm": 0.05402519926428795, + "learning_rate": 8.22657273087339e-05, + "loss": 0.0094, + "step": 31290 + }, + { + "grad_norm": 0.06651995331048965, + "learning_rate": 8.225309440817336e-05, + "loss": 0.0082, + "step": 31300 + }, + { + "grad_norm": 0.057942699640989304, + "learning_rate": 8.224045798046293e-05, + "loss": 0.0099, + "step": 31310 + }, + { + "grad_norm": 0.07608316838741302, + "learning_rate": 8.22278180269845e-05, + "loss": 0.01, + "step": 31320 + }, + { + "grad_norm": 0.0680142194032669, + "learning_rate": 8.221517454912031e-05, + "loss": 0.0086, + "step": 31330 + }, + { + "grad_norm": 0.07144486159086227, + "learning_rate": 8.220252754825308e-05, + "loss": 0.0098, + "step": 31340 + }, + { + "grad_norm": 0.08310982584953308, + "learning_rate": 8.218987702576586e-05, + "loss": 0.0099, + "step": 31350 + }, + { + "grad_norm": 0.058680012822151184, + "learning_rate": 8.217722298304207e-05, + "loss": 0.0088, + "step": 31360 + }, + { + "grad_norm": 0.06454992294311523, + "learning_rate": 8.216456542146557e-05, + "loss": 0.0105, + "step": 31370 + }, + { + "grad_norm": 0.05794082209467888, + "learning_rate": 8.215190434242055e-05, + "loss": 0.0102, + "step": 31380 + }, + { + "grad_norm": 0.04020671546459198, + "learning_rate": 8.213923974729161e-05, + "loss": 0.0088, + "step": 31390 + }, + { + "grad_norm": 0.07692617923021317, + "learning_rate": 8.212657163746373e-05, + "loss": 0.0094, + "step": 31400 + }, + { + "grad_norm": 0.07429664582014084, + "learning_rate": 8.211390001432227e-05, + "loss": 0.0101, + "step": 31410 + }, + { + "grad_norm": 0.08222249895334244, + "learning_rate": 8.210122487925297e-05, + "loss": 0.0109, + "step": 31420 + }, + { + "grad_norm": 0.06354741007089615, + "learning_rate": 8.208854623364202e-05, + "loss": 0.0088, + "step": 31430 + }, + { + "grad_norm": 0.05520617589354515, + "learning_rate": 8.207586407887589e-05, + "loss": 0.0081, + "step": 31440 + }, + { + "grad_norm": 0.07159548997879028, + "learning_rate": 8.206317841634148e-05, + "loss": 0.0137, + "step": 31450 + }, + { + "grad_norm": 0.0625755786895752, + "learning_rate": 8.205048924742609e-05, + "loss": 0.0112, + "step": 31460 + }, + { + "grad_norm": 0.07563799619674683, + "learning_rate": 8.203779657351738e-05, + "loss": 0.0112, + "step": 31470 + }, + { + "grad_norm": 0.05988377705216408, + "learning_rate": 8.20251003960034e-05, + "loss": 0.0109, + "step": 31480 + }, + { + "grad_norm": 0.04638398066163063, + "learning_rate": 8.201240071627258e-05, + "loss": 0.0118, + "step": 31490 + }, + { + "grad_norm": 0.05530327931046486, + "learning_rate": 8.199969753571377e-05, + "loss": 0.0103, + "step": 31500 + }, + { + "grad_norm": 0.09905234724283218, + "learning_rate": 8.198699085571615e-05, + "loss": 0.0103, + "step": 31510 + }, + { + "grad_norm": 0.13258668780326843, + "learning_rate": 8.197428067766928e-05, + "loss": 0.0094, + "step": 31520 + }, + { + "grad_norm": 0.12068299949169159, + "learning_rate": 8.196156700296316e-05, + "loss": 0.013, + "step": 31530 + }, + { + "grad_norm": 0.06800536066293716, + "learning_rate": 8.194884983298814e-05, + "loss": 0.0134, + "step": 31540 + }, + { + "grad_norm": 0.10355666279792786, + "learning_rate": 8.193612916913491e-05, + "loss": 0.0105, + "step": 31550 + }, + { + "grad_norm": 0.08717864751815796, + "learning_rate": 8.192340501279463e-05, + "loss": 0.0121, + "step": 31560 + }, + { + "grad_norm": 0.07122638076543808, + "learning_rate": 8.191067736535876e-05, + "loss": 0.0106, + "step": 31570 + }, + { + "grad_norm": 0.06294141709804535, + "learning_rate": 8.18979462282192e-05, + "loss": 0.0086, + "step": 31580 + }, + { + "grad_norm": 0.04926826059818268, + "learning_rate": 8.188521160276819e-05, + "loss": 0.0078, + "step": 31590 + }, + { + "grad_norm": 0.043373916298151016, + "learning_rate": 8.187247349039837e-05, + "loss": 0.0072, + "step": 31600 + }, + { + "grad_norm": 0.05247466266155243, + "learning_rate": 8.185973189250278e-05, + "loss": 0.0094, + "step": 31610 + }, + { + "grad_norm": 0.040765464305877686, + "learning_rate": 8.184698681047482e-05, + "loss": 0.0089, + "step": 31620 + }, + { + "grad_norm": 0.0891541838645935, + "learning_rate": 8.183423824570827e-05, + "loss": 0.011, + "step": 31630 + }, + { + "grad_norm": 0.05400476232171059, + "learning_rate": 8.182148619959725e-05, + "loss": 0.0082, + "step": 31640 + }, + { + "grad_norm": 0.06960922479629517, + "learning_rate": 8.180873067353636e-05, + "loss": 0.0097, + "step": 31650 + }, + { + "grad_norm": 0.06267822533845901, + "learning_rate": 8.179597166892052e-05, + "loss": 0.0128, + "step": 31660 + }, + { + "grad_norm": 0.06376559287309647, + "learning_rate": 8.178320918714501e-05, + "loss": 0.0078, + "step": 31670 + }, + { + "grad_norm": 0.09705716371536255, + "learning_rate": 8.177044322960554e-05, + "loss": 0.0095, + "step": 31680 + }, + { + "grad_norm": 0.07178214192390442, + "learning_rate": 8.175767379769816e-05, + "loss": 0.0107, + "step": 31690 + }, + { + "grad_norm": 0.05171038955450058, + "learning_rate": 8.174490089281932e-05, + "loss": 0.0101, + "step": 31700 + }, + { + "grad_norm": 0.054259151220321655, + "learning_rate": 8.173212451636584e-05, + "loss": 0.0099, + "step": 31710 + }, + { + "grad_norm": 0.13235688209533691, + "learning_rate": 8.171934466973493e-05, + "loss": 0.0102, + "step": 31720 + }, + { + "grad_norm": 0.05948112532496452, + "learning_rate": 8.170656135432418e-05, + "loss": 0.0101, + "step": 31730 + }, + { + "grad_norm": 0.07326234877109528, + "learning_rate": 8.169377457153155e-05, + "loss": 0.0089, + "step": 31740 + }, + { + "grad_norm": 0.11096491664648056, + "learning_rate": 8.168098432275539e-05, + "loss": 0.0115, + "step": 31750 + }, + { + "grad_norm": 0.07520467042922974, + "learning_rate": 8.166819060939442e-05, + "loss": 0.0125, + "step": 31760 + }, + { + "grad_norm": 0.06747693568468094, + "learning_rate": 8.165539343284772e-05, + "loss": 0.0101, + "step": 31770 + }, + { + "grad_norm": 0.07117845863103867, + "learning_rate": 8.16425927945148e-05, + "loss": 0.0086, + "step": 31780 + }, + { + "grad_norm": 0.05255997180938721, + "learning_rate": 8.162978869579551e-05, + "loss": 0.0094, + "step": 31790 + }, + { + "grad_norm": 0.045006103813648224, + "learning_rate": 8.161698113809007e-05, + "loss": 0.0089, + "step": 31800 + }, + { + "grad_norm": 0.08358240127563477, + "learning_rate": 8.160417012279911e-05, + "loss": 0.0103, + "step": 31810 + }, + { + "grad_norm": 0.05948030948638916, + "learning_rate": 8.159135565132363e-05, + "loss": 0.0096, + "step": 31820 + }, + { + "grad_norm": 0.0785202756524086, + "learning_rate": 8.157853772506498e-05, + "loss": 0.01, + "step": 31830 + }, + { + "grad_norm": 0.12160501629114151, + "learning_rate": 8.156571634542494e-05, + "loss": 0.0118, + "step": 31840 + }, + { + "grad_norm": 0.07445114850997925, + "learning_rate": 8.15528915138056e-05, + "loss": 0.0103, + "step": 31850 + }, + { + "grad_norm": 0.07670064270496368, + "learning_rate": 8.154006323160949e-05, + "loss": 0.0118, + "step": 31860 + }, + { + "grad_norm": 0.07625847309827805, + "learning_rate": 8.152723150023949e-05, + "loss": 0.0095, + "step": 31870 + }, + { + "grad_norm": 0.06443900614976883, + "learning_rate": 8.151439632109886e-05, + "loss": 0.0095, + "step": 31880 + }, + { + "grad_norm": 0.07444475591182709, + "learning_rate": 8.150155769559122e-05, + "loss": 0.0083, + "step": 31890 + }, + { + "grad_norm": 0.07740038633346558, + "learning_rate": 8.148871562512058e-05, + "loss": 0.0102, + "step": 31900 + }, + { + "grad_norm": 0.06248581036925316, + "learning_rate": 8.147587011109136e-05, + "loss": 0.0085, + "step": 31910 + }, + { + "grad_norm": 0.06237035617232323, + "learning_rate": 8.14630211549083e-05, + "loss": 0.0102, + "step": 31920 + }, + { + "grad_norm": 0.07495313882827759, + "learning_rate": 8.145016875797655e-05, + "loss": 0.0091, + "step": 31930 + }, + { + "grad_norm": 0.052265871316194534, + "learning_rate": 8.143731292170164e-05, + "loss": 0.0071, + "step": 31940 + }, + { + "grad_norm": 0.06421558558940887, + "learning_rate": 8.142445364748944e-05, + "loss": 0.008, + "step": 31950 + }, + { + "grad_norm": 0.04469960182905197, + "learning_rate": 8.141159093674624e-05, + "loss": 0.0078, + "step": 31960 + }, + { + "grad_norm": 0.06602239608764648, + "learning_rate": 8.139872479087869e-05, + "loss": 0.0091, + "step": 31970 + }, + { + "grad_norm": 0.06853438168764114, + "learning_rate": 8.13858552112938e-05, + "loss": 0.0095, + "step": 31980 + }, + { + "grad_norm": 0.060859788209199905, + "learning_rate": 8.137298219939895e-05, + "loss": 0.0101, + "step": 31990 + }, + { + "grad_norm": 0.07438953965902328, + "learning_rate": 8.136010575660196e-05, + "loss": 0.0114, + "step": 32000 + }, + { + "grad_norm": 0.09816449135541916, + "learning_rate": 8.134722588431095e-05, + "loss": 0.0101, + "step": 32010 + }, + { + "grad_norm": 0.05742732435464859, + "learning_rate": 8.133434258393444e-05, + "loss": 0.0103, + "step": 32020 + }, + { + "grad_norm": 0.05956244096159935, + "learning_rate": 8.132145585688134e-05, + "loss": 0.0106, + "step": 32030 + }, + { + "grad_norm": 0.06549081951379776, + "learning_rate": 8.130856570456093e-05, + "loss": 0.0096, + "step": 32040 + }, + { + "grad_norm": 0.07209392637014389, + "learning_rate": 8.129567212838283e-05, + "loss": 0.0103, + "step": 32050 + }, + { + "grad_norm": 0.08708705008029938, + "learning_rate": 8.128277512975708e-05, + "loss": 0.0092, + "step": 32060 + }, + { + "grad_norm": 0.10969625413417816, + "learning_rate": 8.126987471009408e-05, + "loss": 0.0124, + "step": 32070 + }, + { + "grad_norm": 0.06554620712995529, + "learning_rate": 8.125697087080459e-05, + "loss": 0.0106, + "step": 32080 + }, + { + "grad_norm": 0.042833734303712845, + "learning_rate": 8.124406361329976e-05, + "loss": 0.0084, + "step": 32090 + }, + { + "grad_norm": 0.056517887860536575, + "learning_rate": 8.123115293899111e-05, + "loss": 0.0081, + "step": 32100 + }, + { + "grad_norm": 0.06662890315055847, + "learning_rate": 8.121823884929055e-05, + "loss": 0.0088, + "step": 32110 + }, + { + "grad_norm": 0.051257532089948654, + "learning_rate": 8.12053213456103e-05, + "loss": 0.0104, + "step": 32120 + }, + { + "grad_norm": 0.06118126958608627, + "learning_rate": 8.119240042936303e-05, + "loss": 0.0101, + "step": 32130 + }, + { + "grad_norm": 0.04172506555914879, + "learning_rate": 8.117947610196175e-05, + "loss": 0.0092, + "step": 32140 + }, + { + "grad_norm": 0.06417485326528549, + "learning_rate": 8.116654836481982e-05, + "loss": 0.0092, + "step": 32150 + }, + { + "grad_norm": 0.08097387105226517, + "learning_rate": 8.115361721935106e-05, + "loss": 0.009, + "step": 32160 + }, + { + "grad_norm": 0.05881884694099426, + "learning_rate": 8.114068266696953e-05, + "loss": 0.0088, + "step": 32170 + }, + { + "grad_norm": 0.07777175307273865, + "learning_rate": 8.112774470908978e-05, + "loss": 0.0093, + "step": 32180 + }, + { + "grad_norm": 0.049120090901851654, + "learning_rate": 8.111480334712665e-05, + "loss": 0.0086, + "step": 32190 + }, + { + "grad_norm": 0.09079038351774216, + "learning_rate": 8.110185858249542e-05, + "loss": 0.0125, + "step": 32200 + }, + { + "grad_norm": 0.06002012640237808, + "learning_rate": 8.108891041661168e-05, + "loss": 0.009, + "step": 32210 + }, + { + "grad_norm": 0.056577105075120926, + "learning_rate": 8.107595885089146e-05, + "loss": 0.0084, + "step": 32220 + }, + { + "grad_norm": 0.06931925565004349, + "learning_rate": 8.106300388675108e-05, + "loss": 0.0102, + "step": 32230 + }, + { + "grad_norm": 0.05567097291350365, + "learning_rate": 8.105004552560731e-05, + "loss": 0.0108, + "step": 32240 + }, + { + "grad_norm": 0.04941194877028465, + "learning_rate": 8.103708376887724e-05, + "loss": 0.011, + "step": 32250 + }, + { + "grad_norm": 0.053379788994789124, + "learning_rate": 8.102411861797836e-05, + "loss": 0.0086, + "step": 32260 + }, + { + "grad_norm": 0.045128222554922104, + "learning_rate": 8.101115007432851e-05, + "loss": 0.0088, + "step": 32270 + }, + { + "grad_norm": 0.05059792473912239, + "learning_rate": 8.09981781393459e-05, + "loss": 0.0088, + "step": 32280 + }, + { + "grad_norm": 0.07612229883670807, + "learning_rate": 8.098520281444915e-05, + "loss": 0.011, + "step": 32290 + }, + { + "grad_norm": 0.10474861413240433, + "learning_rate": 8.09722241010572e-05, + "loss": 0.0082, + "step": 32300 + }, + { + "grad_norm": 0.06176815927028656, + "learning_rate": 8.095924200058939e-05, + "loss": 0.0102, + "step": 32310 + }, + { + "grad_norm": 0.05922417342662811, + "learning_rate": 8.094625651446541e-05, + "loss": 0.0089, + "step": 32320 + }, + { + "grad_norm": 0.05901024490594864, + "learning_rate": 8.093326764410536e-05, + "loss": 0.0102, + "step": 32330 + }, + { + "grad_norm": 0.06976433098316193, + "learning_rate": 8.092027539092966e-05, + "loss": 0.0097, + "step": 32340 + }, + { + "grad_norm": 0.058496441692113876, + "learning_rate": 8.090727975635913e-05, + "loss": 0.0085, + "step": 32350 + }, + { + "grad_norm": 0.03846200555562973, + "learning_rate": 8.089428074181497e-05, + "loss": 0.0082, + "step": 32360 + }, + { + "grad_norm": 0.05572528392076492, + "learning_rate": 8.088127834871871e-05, + "loss": 0.0083, + "step": 32370 + }, + { + "grad_norm": 0.03872951492667198, + "learning_rate": 8.086827257849226e-05, + "loss": 0.0093, + "step": 32380 + }, + { + "grad_norm": 0.06766422837972641, + "learning_rate": 8.085526343255795e-05, + "loss": 0.0093, + "step": 32390 + }, + { + "grad_norm": 0.06642191112041473, + "learning_rate": 8.084225091233842e-05, + "loss": 0.0084, + "step": 32400 + }, + { + "grad_norm": 0.04490929841995239, + "learning_rate": 8.082923501925668e-05, + "loss": 0.0089, + "step": 32410 + }, + { + "grad_norm": 0.039574816823005676, + "learning_rate": 8.081621575473617e-05, + "loss": 0.0077, + "step": 32420 + }, + { + "grad_norm": 0.06124534830451012, + "learning_rate": 8.080319312020064e-05, + "loss": 0.0092, + "step": 32430 + }, + { + "grad_norm": 0.07908494025468826, + "learning_rate": 8.079016711707421e-05, + "loss": 0.0081, + "step": 32440 + }, + { + "grad_norm": 0.05345477536320686, + "learning_rate": 8.077713774678139e-05, + "loss": 0.0094, + "step": 32450 + }, + { + "grad_norm": 0.053173430263996124, + "learning_rate": 8.076410501074707e-05, + "loss": 0.0087, + "step": 32460 + }, + { + "grad_norm": 0.05614132806658745, + "learning_rate": 8.075106891039647e-05, + "loss": 0.0095, + "step": 32470 + }, + { + "grad_norm": 0.04603938013315201, + "learning_rate": 8.073802944715523e-05, + "loss": 0.0093, + "step": 32480 + }, + { + "grad_norm": 0.05814945325255394, + "learning_rate": 8.072498662244929e-05, + "loss": 0.0102, + "step": 32490 + }, + { + "grad_norm": 0.05335627496242523, + "learning_rate": 8.0711940437705e-05, + "loss": 0.0096, + "step": 32500 + }, + { + "grad_norm": 0.0870281234383583, + "learning_rate": 8.06988908943491e-05, + "loss": 0.0097, + "step": 32510 + }, + { + "grad_norm": 0.0700884461402893, + "learning_rate": 8.068583799380863e-05, + "loss": 0.0072, + "step": 32520 + }, + { + "grad_norm": 0.049042414873838425, + "learning_rate": 8.067278173751104e-05, + "loss": 0.0097, + "step": 32530 + }, + { + "grad_norm": 0.05228426307439804, + "learning_rate": 8.065972212688417e-05, + "loss": 0.0086, + "step": 32540 + }, + { + "grad_norm": 0.07675322890281677, + "learning_rate": 8.064665916335618e-05, + "loss": 0.0091, + "step": 32550 + }, + { + "grad_norm": 0.07819338887929916, + "learning_rate": 8.063359284835564e-05, + "loss": 0.0094, + "step": 32560 + }, + { + "grad_norm": 0.06835991889238358, + "learning_rate": 8.062052318331142e-05, + "loss": 0.0093, + "step": 32570 + }, + { + "grad_norm": 0.07076342403888702, + "learning_rate": 8.060745016965283e-05, + "loss": 0.0109, + "step": 32580 + }, + { + "grad_norm": 0.060349661856889725, + "learning_rate": 8.059437380880952e-05, + "loss": 0.0108, + "step": 32590 + }, + { + "grad_norm": 0.0600600466132164, + "learning_rate": 8.058129410221146e-05, + "loss": 0.0077, + "step": 32600 + }, + { + "grad_norm": 0.07673339545726776, + "learning_rate": 8.056821105128908e-05, + "loss": 0.0115, + "step": 32610 + }, + { + "grad_norm": 0.07210202515125275, + "learning_rate": 8.05551246574731e-05, + "loss": 0.0097, + "step": 32620 + }, + { + "grad_norm": 0.055661823600530624, + "learning_rate": 8.05420349221946e-05, + "loss": 0.0086, + "step": 32630 + }, + { + "grad_norm": 0.07409267127513885, + "learning_rate": 8.05289418468851e-05, + "loss": 0.009, + "step": 32640 + }, + { + "grad_norm": 0.0677647739648819, + "learning_rate": 8.051584543297642e-05, + "loss": 0.0092, + "step": 32650 + }, + { + "grad_norm": 0.0748215839266777, + "learning_rate": 8.050274568190074e-05, + "loss": 0.0095, + "step": 32660 + }, + { + "grad_norm": 0.055616866797208786, + "learning_rate": 8.048964259509067e-05, + "loss": 0.0096, + "step": 32670 + }, + { + "grad_norm": 0.08023913949728012, + "learning_rate": 8.047653617397914e-05, + "loss": 0.0084, + "step": 32680 + }, + { + "grad_norm": 0.06778740137815475, + "learning_rate": 8.046342641999941e-05, + "loss": 0.0094, + "step": 32690 + }, + { + "grad_norm": 0.08272966742515564, + "learning_rate": 8.045031333458517e-05, + "loss": 0.0108, + "step": 32700 + }, + { + "grad_norm": 0.07473905384540558, + "learning_rate": 8.043719691917047e-05, + "loss": 0.0095, + "step": 32710 + }, + { + "grad_norm": 0.0669669434428215, + "learning_rate": 8.042407717518966e-05, + "loss": 0.0093, + "step": 32720 + }, + { + "grad_norm": 0.0628041997551918, + "learning_rate": 8.041095410407751e-05, + "loss": 0.0096, + "step": 32730 + }, + { + "grad_norm": 0.10479876399040222, + "learning_rate": 8.039782770726913e-05, + "loss": 0.0083, + "step": 32740 + }, + { + "grad_norm": 0.07976023107767105, + "learning_rate": 8.038469798620004e-05, + "loss": 0.01, + "step": 32750 + }, + { + "grad_norm": 0.07262463867664337, + "learning_rate": 8.037156494230604e-05, + "loss": 0.0113, + "step": 32760 + }, + { + "grad_norm": 0.06899212300777435, + "learning_rate": 8.035842857702338e-05, + "loss": 0.0103, + "step": 32770 + }, + { + "grad_norm": 0.06221238151192665, + "learning_rate": 8.03452888917886e-05, + "loss": 0.0097, + "step": 32780 + }, + { + "grad_norm": 0.06356707215309143, + "learning_rate": 8.033214588803866e-05, + "loss": 0.0085, + "step": 32790 + }, + { + "grad_norm": 0.09152013063430786, + "learning_rate": 8.031899956721083e-05, + "loss": 0.0101, + "step": 32800 + }, + { + "grad_norm": 0.06691724807024002, + "learning_rate": 8.030584993074282e-05, + "loss": 0.0099, + "step": 32810 + }, + { + "grad_norm": 0.08732567727565765, + "learning_rate": 8.02926969800726e-05, + "loss": 0.0092, + "step": 32820 + }, + { + "grad_norm": 0.05673089250922203, + "learning_rate": 8.027954071663859e-05, + "loss": 0.0115, + "step": 32830 + }, + { + "grad_norm": 0.05396921560168266, + "learning_rate": 8.026638114187954e-05, + "loss": 0.0099, + "step": 32840 + }, + { + "grad_norm": 0.05109027400612831, + "learning_rate": 8.025321825723456e-05, + "loss": 0.0084, + "step": 32850 + }, + { + "grad_norm": 0.06235812231898308, + "learning_rate": 8.02400520641431e-05, + "loss": 0.0083, + "step": 32860 + }, + { + "grad_norm": 0.07015563547611237, + "learning_rate": 8.022688256404501e-05, + "loss": 0.0102, + "step": 32870 + }, + { + "grad_norm": 0.07201700657606125, + "learning_rate": 8.02137097583805e-05, + "loss": 0.0096, + "step": 32880 + }, + { + "grad_norm": 0.09285320341587067, + "learning_rate": 8.02005336485901e-05, + "loss": 0.013, + "step": 32890 + }, + { + "grad_norm": 0.07348840683698654, + "learning_rate": 8.018735423611476e-05, + "loss": 0.011, + "step": 32900 + }, + { + "grad_norm": 0.07107436656951904, + "learning_rate": 8.017417152239574e-05, + "loss": 0.0103, + "step": 32910 + }, + { + "grad_norm": 0.04881977289915085, + "learning_rate": 8.01609855088747e-05, + "loss": 0.0081, + "step": 32920 + }, + { + "grad_norm": 0.05552692338824272, + "learning_rate": 8.014779619699362e-05, + "loss": 0.0086, + "step": 32930 + }, + { + "grad_norm": 0.05992819368839264, + "learning_rate": 8.013460358819489e-05, + "loss": 0.0085, + "step": 32940 + }, + { + "grad_norm": 0.07147642225027084, + "learning_rate": 8.01214076839212e-05, + "loss": 0.0133, + "step": 32950 + }, + { + "grad_norm": 0.09117699414491653, + "learning_rate": 8.010820848561565e-05, + "loss": 0.0094, + "step": 32960 + }, + { + "grad_norm": 0.08886099606752396, + "learning_rate": 8.009500599472171e-05, + "loss": 0.0089, + "step": 32970 + }, + { + "grad_norm": 0.07017754763364792, + "learning_rate": 8.008180021268314e-05, + "loss": 0.0095, + "step": 32980 + }, + { + "grad_norm": 0.04742906987667084, + "learning_rate": 8.006859114094414e-05, + "loss": 0.0085, + "step": 32990 + }, + { + "grad_norm": 0.05676064267754555, + "learning_rate": 8.005537878094921e-05, + "loss": 0.0085, + "step": 33000 + }, + { + "grad_norm": 0.05398652330040932, + "learning_rate": 8.004216313414323e-05, + "loss": 0.0109, + "step": 33010 + }, + { + "grad_norm": 0.0570361502468586, + "learning_rate": 8.002894420197149e-05, + "loss": 0.0081, + "step": 33020 + }, + { + "grad_norm": 0.048812542110681534, + "learning_rate": 8.001572198587954e-05, + "loss": 0.0103, + "step": 33030 + }, + { + "grad_norm": 0.09126198291778564, + "learning_rate": 8.000249648731338e-05, + "loss": 0.0108, + "step": 33040 + }, + { + "grad_norm": 0.05167738348245621, + "learning_rate": 7.998926770771928e-05, + "loss": 0.0088, + "step": 33050 + }, + { + "grad_norm": 0.0824449434876442, + "learning_rate": 7.997603564854397e-05, + "loss": 0.0106, + "step": 33060 + }, + { + "grad_norm": 0.07628034800291061, + "learning_rate": 7.996280031123448e-05, + "loss": 0.0096, + "step": 33070 + }, + { + "grad_norm": 0.053595248609781265, + "learning_rate": 7.994956169723818e-05, + "loss": 0.0091, + "step": 33080 + }, + { + "grad_norm": 0.044052112847566605, + "learning_rate": 7.993631980800285e-05, + "loss": 0.0084, + "step": 33090 + }, + { + "grad_norm": 0.04868858680129051, + "learning_rate": 7.992307464497659e-05, + "loss": 0.0081, + "step": 33100 + }, + { + "grad_norm": 0.08099538832902908, + "learning_rate": 7.990982620960787e-05, + "loss": 0.0118, + "step": 33110 + }, + { + "grad_norm": 0.05508343130350113, + "learning_rate": 7.989657450334554e-05, + "loss": 0.0105, + "step": 33120 + }, + { + "grad_norm": 0.06023716926574707, + "learning_rate": 7.988331952763877e-05, + "loss": 0.0087, + "step": 33130 + }, + { + "grad_norm": 0.04404933750629425, + "learning_rate": 7.987006128393709e-05, + "loss": 0.0069, + "step": 33140 + }, + { + "grad_norm": 0.05110744759440422, + "learning_rate": 7.985679977369043e-05, + "loss": 0.0093, + "step": 33150 + }, + { + "grad_norm": 0.05790717899799347, + "learning_rate": 7.984353499834902e-05, + "loss": 0.0083, + "step": 33160 + }, + { + "grad_norm": 0.07583462446928024, + "learning_rate": 7.983026695936351e-05, + "loss": 0.0099, + "step": 33170 + }, + { + "grad_norm": 0.06526331603527069, + "learning_rate": 7.981699565818486e-05, + "loss": 0.0096, + "step": 33180 + }, + { + "grad_norm": 0.04034942016005516, + "learning_rate": 7.980372109626437e-05, + "loss": 0.0091, + "step": 33190 + }, + { + "grad_norm": 0.045953117311000824, + "learning_rate": 7.979044327505375e-05, + "loss": 0.0084, + "step": 33200 + }, + { + "grad_norm": 0.09713510423898697, + "learning_rate": 7.977716219600506e-05, + "loss": 0.0099, + "step": 33210 + }, + { + "grad_norm": 0.055322080850601196, + "learning_rate": 7.97638778605707e-05, + "loss": 0.0077, + "step": 33220 + }, + { + "grad_norm": 0.04944276809692383, + "learning_rate": 7.975059027020338e-05, + "loss": 0.0078, + "step": 33230 + }, + { + "grad_norm": 0.05154932662844658, + "learning_rate": 7.973729942635623e-05, + "loss": 0.0087, + "step": 33240 + }, + { + "grad_norm": 0.03916417062282562, + "learning_rate": 7.972400533048273e-05, + "loss": 0.0093, + "step": 33250 + }, + { + "grad_norm": 0.080724336206913, + "learning_rate": 7.97107079840367e-05, + "loss": 0.0087, + "step": 33260 + }, + { + "grad_norm": 0.09271682798862457, + "learning_rate": 7.969740738847231e-05, + "loss": 0.009, + "step": 33270 + }, + { + "grad_norm": 0.060989730060100555, + "learning_rate": 7.968410354524411e-05, + "loss": 0.0087, + "step": 33280 + }, + { + "grad_norm": 0.06001696363091469, + "learning_rate": 7.967079645580697e-05, + "loss": 0.0076, + "step": 33290 + }, + { + "grad_norm": 0.07883070409297943, + "learning_rate": 7.965748612161612e-05, + "loss": 0.0112, + "step": 33300 + }, + { + "grad_norm": 0.08579603582620621, + "learning_rate": 7.96441725441272e-05, + "loss": 0.0079, + "step": 33310 + }, + { + "grad_norm": 0.06017429754137993, + "learning_rate": 7.963085572479614e-05, + "loss": 0.009, + "step": 33320 + }, + { + "grad_norm": 0.04629390314221382, + "learning_rate": 7.961753566507924e-05, + "loss": 0.0092, + "step": 33330 + }, + { + "grad_norm": 0.0782664567232132, + "learning_rate": 7.960421236643316e-05, + "loss": 0.009, + "step": 33340 + }, + { + "grad_norm": 0.08908846229314804, + "learning_rate": 7.959088583031496e-05, + "loss": 0.0081, + "step": 33350 + }, + { + "grad_norm": 0.07992212474346161, + "learning_rate": 7.957755605818194e-05, + "loss": 0.0093, + "step": 33360 + }, + { + "grad_norm": 0.05512632057070732, + "learning_rate": 7.956422305149185e-05, + "loss": 0.0084, + "step": 33370 + }, + { + "grad_norm": 0.0748966708779335, + "learning_rate": 7.95508868117028e-05, + "loss": 0.0078, + "step": 33380 + }, + { + "grad_norm": 0.05469821020960808, + "learning_rate": 7.953754734027318e-05, + "loss": 0.0107, + "step": 33390 + }, + { + "grad_norm": 0.05494758486747742, + "learning_rate": 7.952420463866182e-05, + "loss": 0.0082, + "step": 33400 + }, + { + "grad_norm": 0.052443236112594604, + "learning_rate": 7.951085870832782e-05, + "loss": 0.0099, + "step": 33410 + }, + { + "grad_norm": 0.06514479964971542, + "learning_rate": 7.949750955073067e-05, + "loss": 0.0097, + "step": 33420 + }, + { + "grad_norm": 0.05437902733683586, + "learning_rate": 7.948415716733022e-05, + "loss": 0.0091, + "step": 33430 + }, + { + "grad_norm": 0.07258640229701996, + "learning_rate": 7.947080155958669e-05, + "loss": 0.0085, + "step": 33440 + }, + { + "grad_norm": 0.05795162543654442, + "learning_rate": 7.94574427289606e-05, + "loss": 0.0075, + "step": 33450 + }, + { + "grad_norm": 0.06822375953197479, + "learning_rate": 7.944408067691284e-05, + "loss": 0.0086, + "step": 33460 + }, + { + "grad_norm": 0.06326568871736526, + "learning_rate": 7.943071540490473e-05, + "loss": 0.0115, + "step": 33470 + }, + { + "grad_norm": 0.0801779180765152, + "learning_rate": 7.94173469143978e-05, + "loss": 0.0097, + "step": 33480 + }, + { + "grad_norm": 0.09955580532550812, + "learning_rate": 7.940397520685406e-05, + "loss": 0.0088, + "step": 33490 + }, + { + "grad_norm": 0.07540398836135864, + "learning_rate": 7.939060028373577e-05, + "loss": 0.009, + "step": 33500 + }, + { + "grad_norm": 0.06434854120016098, + "learning_rate": 7.937722214650565e-05, + "loss": 0.0095, + "step": 33510 + }, + { + "grad_norm": 0.06918632984161377, + "learning_rate": 7.936384079662666e-05, + "loss": 0.0111, + "step": 33520 + }, + { + "grad_norm": 0.06431401520967484, + "learning_rate": 7.93504562355622e-05, + "loss": 0.0089, + "step": 33530 + }, + { + "grad_norm": 0.05535825714468956, + "learning_rate": 7.933706846477599e-05, + "loss": 0.0079, + "step": 33540 + }, + { + "grad_norm": 0.05215870961546898, + "learning_rate": 7.932367748573206e-05, + "loss": 0.0104, + "step": 33550 + }, + { + "grad_norm": 0.07171984761953354, + "learning_rate": 7.931028329989485e-05, + "loss": 0.009, + "step": 33560 + }, + { + "grad_norm": 0.09146517515182495, + "learning_rate": 7.929688590872913e-05, + "loss": 0.009, + "step": 33570 + }, + { + "grad_norm": 0.08076253533363342, + "learning_rate": 7.928348531370003e-05, + "loss": 0.0097, + "step": 33580 + }, + { + "grad_norm": 0.07999927550554276, + "learning_rate": 7.927008151627297e-05, + "loss": 0.0089, + "step": 33590 + }, + { + "grad_norm": 0.06282489001750946, + "learning_rate": 7.925667451791383e-05, + "loss": 0.0071, + "step": 33600 + }, + { + "grad_norm": 0.09626957774162292, + "learning_rate": 7.924326432008874e-05, + "loss": 0.0102, + "step": 33610 + }, + { + "grad_norm": 0.069429911673069, + "learning_rate": 7.922985092426422e-05, + "loss": 0.0083, + "step": 33620 + }, + { + "grad_norm": 0.06983331590890884, + "learning_rate": 7.921643433190717e-05, + "loss": 0.0084, + "step": 33630 + }, + { + "grad_norm": 0.06728195399045944, + "learning_rate": 7.920301454448478e-05, + "loss": 0.0089, + "step": 33640 + }, + { + "grad_norm": 0.06796871870756149, + "learning_rate": 7.918959156346461e-05, + "loss": 0.0115, + "step": 33650 + }, + { + "grad_norm": 0.0885085016489029, + "learning_rate": 7.91761653903146e-05, + "loss": 0.0121, + "step": 33660 + }, + { + "grad_norm": 0.08450372517108917, + "learning_rate": 7.916273602650302e-05, + "loss": 0.0092, + "step": 33670 + }, + { + "grad_norm": 0.06036858260631561, + "learning_rate": 7.914930347349847e-05, + "loss": 0.0082, + "step": 33680 + }, + { + "grad_norm": 0.09696558117866516, + "learning_rate": 7.913586773276992e-05, + "loss": 0.0105, + "step": 33690 + }, + { + "grad_norm": 0.045999862253665924, + "learning_rate": 7.912242880578667e-05, + "loss": 0.0081, + "step": 33700 + }, + { + "grad_norm": 0.05520831048488617, + "learning_rate": 7.910898669401839e-05, + "loss": 0.0093, + "step": 33710 + }, + { + "grad_norm": 0.05371830239892006, + "learning_rate": 7.909554139893511e-05, + "loss": 0.0101, + "step": 33720 + }, + { + "grad_norm": 0.05434143543243408, + "learning_rate": 7.908209292200715e-05, + "loss": 0.0092, + "step": 33730 + }, + { + "grad_norm": 0.0487607903778553, + "learning_rate": 7.906864126470523e-05, + "loss": 0.0094, + "step": 33740 + }, + { + "grad_norm": 0.05098196864128113, + "learning_rate": 7.905518642850041e-05, + "loss": 0.009, + "step": 33750 + }, + { + "grad_norm": 0.07843765616416931, + "learning_rate": 7.904172841486409e-05, + "loss": 0.0089, + "step": 33760 + }, + { + "grad_norm": 0.059820666909217834, + "learning_rate": 7.902826722526801e-05, + "loss": 0.0088, + "step": 33770 + }, + { + "grad_norm": 0.08311822265386581, + "learning_rate": 7.901480286118427e-05, + "loss": 0.0098, + "step": 33780 + }, + { + "grad_norm": 0.0581614151597023, + "learning_rate": 7.900133532408531e-05, + "loss": 0.0075, + "step": 33790 + }, + { + "grad_norm": 0.05388614907860756, + "learning_rate": 7.898786461544395e-05, + "loss": 0.0079, + "step": 33800 + }, + { + "grad_norm": 0.05667612701654434, + "learning_rate": 7.897439073673325e-05, + "loss": 0.0098, + "step": 33810 + }, + { + "grad_norm": 0.07110437750816345, + "learning_rate": 7.896091368942677e-05, + "loss": 0.0094, + "step": 33820 + }, + { + "grad_norm": 0.09371413290500641, + "learning_rate": 7.894743347499832e-05, + "loss": 0.009, + "step": 33830 + }, + { + "grad_norm": 0.08152700960636139, + "learning_rate": 7.893395009492203e-05, + "loss": 0.0082, + "step": 33840 + }, + { + "grad_norm": 0.07225392758846283, + "learning_rate": 7.892046355067248e-05, + "loss": 0.0094, + "step": 33850 + }, + { + "grad_norm": 0.08151423931121826, + "learning_rate": 7.890697384372451e-05, + "loss": 0.0099, + "step": 33860 + }, + { + "grad_norm": 0.05354785546660423, + "learning_rate": 7.889348097555336e-05, + "loss": 0.0089, + "step": 33870 + }, + { + "grad_norm": 0.07577694207429886, + "learning_rate": 7.887998494763455e-05, + "loss": 0.0092, + "step": 33880 + }, + { + "grad_norm": 0.05227834731340408, + "learning_rate": 7.886648576144404e-05, + "loss": 0.0088, + "step": 33890 + }, + { + "grad_norm": 0.07410194724798203, + "learning_rate": 7.885298341845802e-05, + "loss": 0.0121, + "step": 33900 + }, + { + "grad_norm": 0.06606439501047134, + "learning_rate": 7.883947792015311e-05, + "loss": 0.0093, + "step": 33910 + }, + { + "grad_norm": 0.037825167179107666, + "learning_rate": 7.882596926800628e-05, + "loss": 0.0081, + "step": 33920 + }, + { + "grad_norm": 0.05417391285300255, + "learning_rate": 7.881245746349477e-05, + "loss": 0.0097, + "step": 33930 + }, + { + "grad_norm": 0.053145263344049454, + "learning_rate": 7.879894250809623e-05, + "loss": 0.0105, + "step": 33940 + }, + { + "grad_norm": 0.07122893631458282, + "learning_rate": 7.878542440328865e-05, + "loss": 0.0105, + "step": 33950 + }, + { + "grad_norm": 0.06862280517816544, + "learning_rate": 7.877190315055031e-05, + "loss": 0.0104, + "step": 33960 + }, + { + "grad_norm": 0.07392364740371704, + "learning_rate": 7.875837875135991e-05, + "loss": 0.0084, + "step": 33970 + }, + { + "grad_norm": 0.09909310191869736, + "learning_rate": 7.874485120719646e-05, + "loss": 0.0096, + "step": 33980 + }, + { + "grad_norm": 0.07238095998764038, + "learning_rate": 7.873132051953928e-05, + "loss": 0.0098, + "step": 33990 + }, + { + "grad_norm": 0.05434523895382881, + "learning_rate": 7.87177866898681e-05, + "loss": 0.0099, + "step": 34000 + }, + { + "grad_norm": 0.04535417631268501, + "learning_rate": 7.870424971966294e-05, + "loss": 0.0072, + "step": 34010 + }, + { + "grad_norm": 0.062189117074012756, + "learning_rate": 7.869070961040419e-05, + "loss": 0.0117, + "step": 34020 + }, + { + "grad_norm": 0.07544189691543579, + "learning_rate": 7.867716636357257e-05, + "loss": 0.011, + "step": 34030 + }, + { + "grad_norm": 0.050999462604522705, + "learning_rate": 7.866361998064915e-05, + "loss": 0.0093, + "step": 34040 + }, + { + "grad_norm": 0.1097223088145256, + "learning_rate": 7.865007046311534e-05, + "loss": 0.0108, + "step": 34050 + }, + { + "grad_norm": 0.06941992044448853, + "learning_rate": 7.86365178124529e-05, + "loss": 0.0079, + "step": 34060 + }, + { + "grad_norm": 0.0631285235285759, + "learning_rate": 7.862296203014394e-05, + "loss": 0.01, + "step": 34070 + }, + { + "grad_norm": 0.08280680328607559, + "learning_rate": 7.860940311767088e-05, + "loss": 0.0099, + "step": 34080 + }, + { + "grad_norm": 0.09877130389213562, + "learning_rate": 7.85958410765165e-05, + "loss": 0.0097, + "step": 34090 + }, + { + "grad_norm": 0.0692889392375946, + "learning_rate": 7.858227590816394e-05, + "loss": 0.0088, + "step": 34100 + }, + { + "grad_norm": 0.05794373154640198, + "learning_rate": 7.856870761409664e-05, + "loss": 0.0101, + "step": 34110 + }, + { + "grad_norm": 0.03476683422923088, + "learning_rate": 7.855513619579846e-05, + "loss": 0.0085, + "step": 34120 + }, + { + "grad_norm": 0.05912259593605995, + "learning_rate": 7.85415616547535e-05, + "loss": 0.01, + "step": 34130 + }, + { + "grad_norm": 0.06859909743070602, + "learning_rate": 7.852798399244627e-05, + "loss": 0.0084, + "step": 34140 + }, + { + "grad_norm": 0.04685547947883606, + "learning_rate": 7.851440321036161e-05, + "loss": 0.0094, + "step": 34150 + }, + { + "grad_norm": 0.05789569765329361, + "learning_rate": 7.850081930998468e-05, + "loss": 0.0095, + "step": 34160 + }, + { + "grad_norm": 0.08652117848396301, + "learning_rate": 7.8487232292801e-05, + "loss": 0.0084, + "step": 34170 + }, + { + "grad_norm": 0.08879147469997406, + "learning_rate": 7.847364216029642e-05, + "loss": 0.0098, + "step": 34180 + }, + { + "grad_norm": 0.08524429053068161, + "learning_rate": 7.846004891395716e-05, + "loss": 0.0088, + "step": 34190 + }, + { + "grad_norm": 0.08214467763900757, + "learning_rate": 7.844645255526972e-05, + "loss": 0.0094, + "step": 34200 + }, + { + "grad_norm": 0.042969755828380585, + "learning_rate": 7.843285308572101e-05, + "loss": 0.0066, + "step": 34210 + }, + { + "grad_norm": 0.05290450528264046, + "learning_rate": 7.841925050679823e-05, + "loss": 0.0096, + "step": 34220 + }, + { + "grad_norm": 0.038520392030477524, + "learning_rate": 7.840564481998895e-05, + "loss": 0.0099, + "step": 34230 + }, + { + "grad_norm": 0.05341049283742905, + "learning_rate": 7.839203602678105e-05, + "loss": 0.0076, + "step": 34240 + }, + { + "grad_norm": 0.05817975103855133, + "learning_rate": 7.837842412866279e-05, + "loss": 0.009, + "step": 34250 + }, + { + "grad_norm": 0.0675949901342392, + "learning_rate": 7.836480912712272e-05, + "loss": 0.0073, + "step": 34260 + }, + { + "grad_norm": 0.07661116123199463, + "learning_rate": 7.835119102364976e-05, + "loss": 0.0095, + "step": 34270 + }, + { + "grad_norm": 0.06677821278572083, + "learning_rate": 7.83375698197332e-05, + "loss": 0.0101, + "step": 34280 + }, + { + "grad_norm": 0.06612630188465118, + "learning_rate": 7.83239455168626e-05, + "loss": 0.0072, + "step": 34290 + }, + { + "grad_norm": 0.060570672154426575, + "learning_rate": 7.83103181165279e-05, + "loss": 0.0082, + "step": 34300 + }, + { + "grad_norm": 0.05627619847655296, + "learning_rate": 7.829668762021937e-05, + "loss": 0.0074, + "step": 34310 + }, + { + "grad_norm": 0.09030017256736755, + "learning_rate": 7.828305402942764e-05, + "loss": 0.0102, + "step": 34320 + }, + { + "grad_norm": 0.08406803756952286, + "learning_rate": 7.826941734564363e-05, + "loss": 0.0103, + "step": 34330 + }, + { + "grad_norm": 0.057238128036260605, + "learning_rate": 7.825577757035865e-05, + "loss": 0.0079, + "step": 34340 + }, + { + "grad_norm": 0.08562155812978745, + "learning_rate": 7.824213470506431e-05, + "loss": 0.0075, + "step": 34350 + }, + { + "grad_norm": 0.06015324592590332, + "learning_rate": 7.822848875125257e-05, + "loss": 0.0075, + "step": 34360 + }, + { + "grad_norm": 0.07492796331644058, + "learning_rate": 7.821483971041576e-05, + "loss": 0.0096, + "step": 34370 + }, + { + "grad_norm": 0.05474745109677315, + "learning_rate": 7.820118758404649e-05, + "loss": 0.0077, + "step": 34380 + }, + { + "grad_norm": 0.05138446018099785, + "learning_rate": 7.818753237363776e-05, + "loss": 0.0082, + "step": 34390 + }, + { + "grad_norm": 0.05497808754444122, + "learning_rate": 7.817387408068286e-05, + "loss": 0.0088, + "step": 34400 + }, + { + "grad_norm": 0.05337353050708771, + "learning_rate": 7.816021270667544e-05, + "loss": 0.0095, + "step": 34410 + }, + { + "grad_norm": 0.05622713267803192, + "learning_rate": 7.81465482531095e-05, + "loss": 0.0086, + "step": 34420 + }, + { + "grad_norm": 0.06337340921163559, + "learning_rate": 7.813288072147938e-05, + "loss": 0.0101, + "step": 34430 + }, + { + "grad_norm": 0.06802414357662201, + "learning_rate": 7.811921011327972e-05, + "loss": 0.009, + "step": 34440 + }, + { + "grad_norm": 0.0932912603020668, + "learning_rate": 7.810553643000549e-05, + "loss": 0.0092, + "step": 34450 + }, + { + "grad_norm": 0.0893191248178482, + "learning_rate": 7.809185967315206e-05, + "loss": 0.0109, + "step": 34460 + }, + { + "grad_norm": 0.0631389319896698, + "learning_rate": 7.80781798442151e-05, + "loss": 0.0111, + "step": 34470 + }, + { + "grad_norm": 0.07905782014131546, + "learning_rate": 7.806449694469061e-05, + "loss": 0.0088, + "step": 34480 + }, + { + "grad_norm": 0.059059541672468185, + "learning_rate": 7.805081097607492e-05, + "loss": 0.0081, + "step": 34490 + }, + { + "grad_norm": 0.08142119646072388, + "learning_rate": 7.803712193986474e-05, + "loss": 0.0093, + "step": 34500 + }, + { + "grad_norm": 0.0824245736002922, + "learning_rate": 7.802342983755702e-05, + "loss": 0.0097, + "step": 34510 + }, + { + "grad_norm": 0.05238554626703262, + "learning_rate": 7.800973467064918e-05, + "loss": 0.0098, + "step": 34520 + }, + { + "grad_norm": 0.0746847465634346, + "learning_rate": 7.799603644063884e-05, + "loss": 0.0099, + "step": 34530 + }, + { + "grad_norm": 0.06733821332454681, + "learning_rate": 7.798233514902405e-05, + "loss": 0.011, + "step": 34540 + }, + { + "grad_norm": 0.0512721873819828, + "learning_rate": 7.796863079730318e-05, + "loss": 0.0091, + "step": 34550 + }, + { + "grad_norm": 0.07186884433031082, + "learning_rate": 7.795492338697488e-05, + "loss": 0.0104, + "step": 34560 + }, + { + "grad_norm": 0.06374401599168777, + "learning_rate": 7.794121291953819e-05, + "loss": 0.0097, + "step": 34570 + }, + { + "grad_norm": 0.09319420158863068, + "learning_rate": 7.792749939649246e-05, + "loss": 0.0103, + "step": 34580 + }, + { + "grad_norm": 0.06100812181830406, + "learning_rate": 7.79137828193374e-05, + "loss": 0.0097, + "step": 34590 + }, + { + "grad_norm": 0.059428904205560684, + "learning_rate": 7.790006318957301e-05, + "loss": 0.0083, + "step": 34600 + }, + { + "grad_norm": 0.04419975355267525, + "learning_rate": 7.788634050869965e-05, + "loss": 0.0081, + "step": 34610 + }, + { + "grad_norm": 0.06560224294662476, + "learning_rate": 7.787261477821803e-05, + "loss": 0.0112, + "step": 34620 + }, + { + "grad_norm": 0.05379380285739899, + "learning_rate": 7.785888599962916e-05, + "loss": 0.0096, + "step": 34630 + }, + { + "grad_norm": 0.04154759272933006, + "learning_rate": 7.784515417443439e-05, + "loss": 0.0086, + "step": 34640 + }, + { + "grad_norm": 0.08297640085220337, + "learning_rate": 7.783141930413545e-05, + "loss": 0.0094, + "step": 34650 + }, + { + "grad_norm": 0.08371850103139877, + "learning_rate": 7.78176813902343e-05, + "loss": 0.0106, + "step": 34660 + }, + { + "grad_norm": 0.038069188594818115, + "learning_rate": 7.780394043423336e-05, + "loss": 0.0088, + "step": 34670 + }, + { + "grad_norm": 0.05073149874806404, + "learning_rate": 7.77901964376353e-05, + "loss": 0.0099, + "step": 34680 + }, + { + "grad_norm": 0.06543881446123123, + "learning_rate": 7.777644940194316e-05, + "loss": 0.0089, + "step": 34690 + }, + { + "grad_norm": 0.09357982873916626, + "learning_rate": 7.776269932866023e-05, + "loss": 0.009, + "step": 34700 + }, + { + "grad_norm": 0.06743744760751724, + "learning_rate": 7.774894621929026e-05, + "loss": 0.0093, + "step": 34710 + }, + { + "grad_norm": 0.07999222725629807, + "learning_rate": 7.773519007533725e-05, + "loss": 0.0079, + "step": 34720 + }, + { + "grad_norm": 0.06707431375980377, + "learning_rate": 7.772143089830556e-05, + "loss": 0.0088, + "step": 34730 + }, + { + "grad_norm": 0.06310528516769409, + "learning_rate": 7.770766868969985e-05, + "loss": 0.0075, + "step": 34740 + }, + { + "grad_norm": 0.06563655287027359, + "learning_rate": 7.769390345102518e-05, + "loss": 0.0083, + "step": 34750 + }, + { + "grad_norm": 0.07737554609775543, + "learning_rate": 7.768013518378683e-05, + "loss": 0.0077, + "step": 34760 + }, + { + "grad_norm": 0.06485651433467865, + "learning_rate": 7.766636388949053e-05, + "loss": 0.0078, + "step": 34770 + }, + { + "grad_norm": 0.052309032529592514, + "learning_rate": 7.765258956964229e-05, + "loss": 0.0081, + "step": 34780 + }, + { + "grad_norm": 0.04714994877576828, + "learning_rate": 7.76388122257484e-05, + "loss": 0.0095, + "step": 34790 + }, + { + "grad_norm": 0.06142337992787361, + "learning_rate": 7.762503185931558e-05, + "loss": 0.0093, + "step": 34800 + }, + { + "grad_norm": 0.05780591443181038, + "learning_rate": 7.76112484718508e-05, + "loss": 0.0094, + "step": 34810 + }, + { + "grad_norm": 0.06762459874153137, + "learning_rate": 7.75974620648614e-05, + "loss": 0.0088, + "step": 34820 + }, + { + "grad_norm": 0.05407922714948654, + "learning_rate": 7.758367263985503e-05, + "loss": 0.0087, + "step": 34830 + }, + { + "grad_norm": 0.06623527407646179, + "learning_rate": 7.75698801983397e-05, + "loss": 0.0097, + "step": 34840 + }, + { + "grad_norm": 0.07129108160734177, + "learning_rate": 7.755608474182372e-05, + "loss": 0.0101, + "step": 34850 + }, + { + "grad_norm": 0.057502735406160355, + "learning_rate": 7.754228627181574e-05, + "loss": 0.0092, + "step": 34860 + }, + { + "grad_norm": 0.05170201510190964, + "learning_rate": 7.752848478982476e-05, + "loss": 0.0107, + "step": 34870 + }, + { + "grad_norm": 0.06950045377016068, + "learning_rate": 7.751468029736006e-05, + "loss": 0.0093, + "step": 34880 + }, + { + "grad_norm": 0.0803132876753807, + "learning_rate": 7.750087279593129e-05, + "loss": 0.0099, + "step": 34890 + }, + { + "grad_norm": 0.07550134509801865, + "learning_rate": 7.748706228704843e-05, + "loss": 0.0097, + "step": 34900 + }, + { + "grad_norm": 0.08225661516189575, + "learning_rate": 7.747324877222176e-05, + "loss": 0.0087, + "step": 34910 + }, + { + "grad_norm": 0.08440366387367249, + "learning_rate": 7.745943225296188e-05, + "loss": 0.0093, + "step": 34920 + }, + { + "grad_norm": 0.06771862506866455, + "learning_rate": 7.744561273077981e-05, + "loss": 0.0089, + "step": 34930 + }, + { + "grad_norm": 0.044064126908779144, + "learning_rate": 7.743179020718678e-05, + "loss": 0.0071, + "step": 34940 + }, + { + "grad_norm": 0.04946913197636604, + "learning_rate": 7.741796468369443e-05, + "loss": 0.0086, + "step": 34950 + }, + { + "grad_norm": 0.06975158303976059, + "learning_rate": 7.740413616181466e-05, + "loss": 0.01, + "step": 34960 + }, + { + "grad_norm": 0.09117342531681061, + "learning_rate": 7.739030464305978e-05, + "loss": 0.0084, + "step": 34970 + }, + { + "grad_norm": 0.12113401293754578, + "learning_rate": 7.737647012894235e-05, + "loss": 0.0088, + "step": 34980 + }, + { + "grad_norm": 0.059407789260149, + "learning_rate": 7.736263262097532e-05, + "loss": 0.0085, + "step": 34990 + }, + { + "grad_norm": 0.07498694211244583, + "learning_rate": 7.734879212067192e-05, + "loss": 0.0094, + "step": 35000 + }, + { + "grad_norm": 0.0758432000875473, + "learning_rate": 7.733494862954573e-05, + "loss": 0.0099, + "step": 35010 + }, + { + "grad_norm": 0.05203739553689957, + "learning_rate": 7.732110214911066e-05, + "loss": 0.0089, + "step": 35020 + }, + { + "grad_norm": 0.04957215487957001, + "learning_rate": 7.730725268088094e-05, + "loss": 0.008, + "step": 35030 + }, + { + "grad_norm": 0.07270657271146774, + "learning_rate": 7.729340022637112e-05, + "loss": 0.0119, + "step": 35040 + }, + { + "grad_norm": 0.08892940729856491, + "learning_rate": 7.727954478709607e-05, + "loss": 0.0099, + "step": 35050 + }, + { + "grad_norm": 0.0401296503841877, + "learning_rate": 7.726568636457103e-05, + "loss": 0.0076, + "step": 35060 + }, + { + "grad_norm": 0.047326281666755676, + "learning_rate": 7.725182496031153e-05, + "loss": 0.0095, + "step": 35070 + }, + { + "grad_norm": 0.06356912851333618, + "learning_rate": 7.723796057583341e-05, + "loss": 0.0106, + "step": 35080 + }, + { + "grad_norm": 0.06955086439847946, + "learning_rate": 7.722409321265287e-05, + "loss": 0.0088, + "step": 35090 + }, + { + "grad_norm": 0.06372782588005066, + "learning_rate": 7.721022287228645e-05, + "loss": 0.0104, + "step": 35100 + }, + { + "grad_norm": 0.08080269396305084, + "learning_rate": 7.719634955625094e-05, + "loss": 0.0096, + "step": 35110 + }, + { + "grad_norm": 0.08346597850322723, + "learning_rate": 7.718247326606355e-05, + "loss": 0.0096, + "step": 35120 + }, + { + "grad_norm": 0.07257570326328278, + "learning_rate": 7.716859400324175e-05, + "loss": 0.0108, + "step": 35130 + }, + { + "grad_norm": 0.046115126460790634, + "learning_rate": 7.715471176930335e-05, + "loss": 0.0088, + "step": 35140 + }, + { + "grad_norm": 0.05936802551150322, + "learning_rate": 7.714082656576651e-05, + "loss": 0.0093, + "step": 35150 + }, + { + "grad_norm": 0.06935510039329529, + "learning_rate": 7.712693839414968e-05, + "loss": 0.0085, + "step": 35160 + }, + { + "grad_norm": 0.08545587211847305, + "learning_rate": 7.711304725597164e-05, + "loss": 0.0086, + "step": 35170 + }, + { + "grad_norm": 0.045210570096969604, + "learning_rate": 7.709915315275151e-05, + "loss": 0.0105, + "step": 35180 + }, + { + "grad_norm": 0.07809995859861374, + "learning_rate": 7.708525608600876e-05, + "loss": 0.0087, + "step": 35190 + }, + { + "grad_norm": 0.05611608549952507, + "learning_rate": 7.707135605726311e-05, + "loss": 0.0084, + "step": 35200 + }, + { + "grad_norm": 0.05161663517355919, + "learning_rate": 7.705745306803466e-05, + "loss": 0.007, + "step": 35210 + }, + { + "grad_norm": 0.039752501994371414, + "learning_rate": 7.704354711984383e-05, + "loss": 0.0082, + "step": 35220 + }, + { + "grad_norm": 0.056117963045835495, + "learning_rate": 7.702963821421133e-05, + "loss": 0.0077, + "step": 35230 + }, + { + "grad_norm": 0.07034383714199066, + "learning_rate": 7.701572635265826e-05, + "loss": 0.0084, + "step": 35240 + }, + { + "grad_norm": 0.07360661774873734, + "learning_rate": 7.700181153670596e-05, + "loss": 0.0084, + "step": 35250 + }, + { + "grad_norm": 0.06689673662185669, + "learning_rate": 7.698789376787616e-05, + "loss": 0.0096, + "step": 35260 + }, + { + "grad_norm": 0.05015142634510994, + "learning_rate": 7.697397304769085e-05, + "loss": 0.0094, + "step": 35270 + }, + { + "grad_norm": 0.04227178543806076, + "learning_rate": 7.696004937767241e-05, + "loss": 0.0097, + "step": 35280 + }, + { + "grad_norm": 0.12252222746610641, + "learning_rate": 7.694612275934352e-05, + "loss": 0.0104, + "step": 35290 + }, + { + "grad_norm": 0.08857709169387817, + "learning_rate": 7.693219319422714e-05, + "loss": 0.0099, + "step": 35300 + }, + { + "grad_norm": 0.07141581922769547, + "learning_rate": 7.69182606838466e-05, + "loss": 0.009, + "step": 35310 + }, + { + "grad_norm": 0.06296537071466446, + "learning_rate": 7.690432522972558e-05, + "loss": 0.0096, + "step": 35320 + }, + { + "grad_norm": 0.06802424788475037, + "learning_rate": 7.689038683338796e-05, + "loss": 0.011, + "step": 35330 + }, + { + "grad_norm": 0.07277482002973557, + "learning_rate": 7.687644549635808e-05, + "loss": 0.0103, + "step": 35340 + }, + { + "grad_norm": 0.0771317407488823, + "learning_rate": 7.686250122016053e-05, + "loss": 0.008, + "step": 35350 + }, + { + "grad_norm": 0.0666443258523941, + "learning_rate": 7.684855400632022e-05, + "loss": 0.0079, + "step": 35360 + }, + { + "grad_norm": 0.07040128111839294, + "learning_rate": 7.683460385636243e-05, + "loss": 0.009, + "step": 35370 + }, + { + "grad_norm": 0.07841245830059052, + "learning_rate": 7.68206507718127e-05, + "loss": 0.0086, + "step": 35380 + }, + { + "grad_norm": 0.088328056037426, + "learning_rate": 7.680669475419693e-05, + "loss": 0.0109, + "step": 35390 + }, + { + "grad_norm": 0.06551249325275421, + "learning_rate": 7.679273580504132e-05, + "loss": 0.0117, + "step": 35400 + }, + { + "grad_norm": 0.08413197100162506, + "learning_rate": 7.67787739258724e-05, + "loss": 0.0103, + "step": 35410 + }, + { + "grad_norm": 0.10181311517953873, + "learning_rate": 7.676480911821705e-05, + "loss": 0.0098, + "step": 35420 + }, + { + "grad_norm": 0.0892852395772934, + "learning_rate": 7.675084138360238e-05, + "loss": 0.0091, + "step": 35430 + }, + { + "grad_norm": 0.05235644429922104, + "learning_rate": 7.673687072355592e-05, + "loss": 0.0078, + "step": 35440 + }, + { + "grad_norm": 0.06826240569353104, + "learning_rate": 7.67228971396055e-05, + "loss": 0.0075, + "step": 35450 + }, + { + "grad_norm": 0.07313347607851028, + "learning_rate": 7.670892063327922e-05, + "loss": 0.0095, + "step": 35460 + }, + { + "grad_norm": 0.06150249019265175, + "learning_rate": 7.669494120610552e-05, + "loss": 0.0088, + "step": 35470 + }, + { + "grad_norm": 0.07128283381462097, + "learning_rate": 7.66809588596132e-05, + "loss": 0.0078, + "step": 35480 + }, + { + "grad_norm": 0.06066935509443283, + "learning_rate": 7.666697359533132e-05, + "loss": 0.0104, + "step": 35490 + }, + { + "grad_norm": 0.06698368489742279, + "learning_rate": 7.665298541478932e-05, + "loss": 0.0082, + "step": 35500 + }, + { + "grad_norm": 0.044936761260032654, + "learning_rate": 7.66389943195169e-05, + "loss": 0.0068, + "step": 35510 + }, + { + "grad_norm": 0.05450865626335144, + "learning_rate": 7.66250003110441e-05, + "loss": 0.0084, + "step": 35520 + }, + { + "grad_norm": 0.06863921880722046, + "learning_rate": 7.66110033909013e-05, + "loss": 0.0081, + "step": 35530 + }, + { + "grad_norm": 0.058299172669649124, + "learning_rate": 7.659700356061918e-05, + "loss": 0.0097, + "step": 35540 + }, + { + "grad_norm": 0.05997718870639801, + "learning_rate": 7.658300082172875e-05, + "loss": 0.0084, + "step": 35550 + }, + { + "grad_norm": 0.06682436913251877, + "learning_rate": 7.65689951757613e-05, + "loss": 0.0096, + "step": 35560 + }, + { + "grad_norm": 0.06445273756980896, + "learning_rate": 7.655498662424849e-05, + "loss": 0.0086, + "step": 35570 + }, + { + "grad_norm": 0.05792718380689621, + "learning_rate": 7.654097516872227e-05, + "loss": 0.0078, + "step": 35580 + }, + { + "grad_norm": 0.06081075966358185, + "learning_rate": 7.65269608107149e-05, + "loss": 0.0081, + "step": 35590 + }, + { + "grad_norm": 0.07141481339931488, + "learning_rate": 7.6512943551759e-05, + "loss": 0.0094, + "step": 35600 + }, + { + "grad_norm": 0.037523575127124786, + "learning_rate": 7.649892339338743e-05, + "loss": 0.0083, + "step": 35610 + }, + { + "grad_norm": 0.03491964936256409, + "learning_rate": 7.648490033713344e-05, + "loss": 0.009, + "step": 35620 + }, + { + "grad_norm": 0.09034712612628937, + "learning_rate": 7.647087438453058e-05, + "loss": 0.0089, + "step": 35630 + }, + { + "grad_norm": 0.0703291967511177, + "learning_rate": 7.64568455371127e-05, + "loss": 0.0105, + "step": 35640 + }, + { + "grad_norm": 0.06721556186676025, + "learning_rate": 7.644281379641396e-05, + "loss": 0.0081, + "step": 35650 + }, + { + "grad_norm": 0.062381893396377563, + "learning_rate": 7.642877916396887e-05, + "loss": 0.0117, + "step": 35660 + }, + { + "grad_norm": 0.05137443542480469, + "learning_rate": 7.641474164131221e-05, + "loss": 0.0077, + "step": 35670 + }, + { + "grad_norm": 0.04076661542057991, + "learning_rate": 7.640070122997913e-05, + "loss": 0.0066, + "step": 35680 + }, + { + "grad_norm": 0.051403120160102844, + "learning_rate": 7.638665793150505e-05, + "loss": 0.0091, + "step": 35690 + }, + { + "grad_norm": 0.05523626133799553, + "learning_rate": 7.637261174742574e-05, + "loss": 0.0075, + "step": 35700 + }, + { + "grad_norm": 0.06494210660457611, + "learning_rate": 7.635856267927725e-05, + "loss": 0.0064, + "step": 35710 + }, + { + "grad_norm": 0.06902151554822922, + "learning_rate": 7.634451072859597e-05, + "loss": 0.0085, + "step": 35720 + }, + { + "grad_norm": 0.04927060753107071, + "learning_rate": 7.633045589691863e-05, + "loss": 0.0073, + "step": 35730 + }, + { + "grad_norm": 0.06781480461359024, + "learning_rate": 7.63163981857822e-05, + "loss": 0.0111, + "step": 35740 + }, + { + "grad_norm": 0.07120037078857422, + "learning_rate": 7.630233759672403e-05, + "loss": 0.0104, + "step": 35750 + }, + { + "grad_norm": 0.05198446661233902, + "learning_rate": 7.628827413128177e-05, + "loss": 0.0097, + "step": 35760 + }, + { + "grad_norm": 0.07237011194229126, + "learning_rate": 7.627420779099338e-05, + "loss": 0.0096, + "step": 35770 + }, + { + "grad_norm": 0.07009875774383545, + "learning_rate": 7.626013857739711e-05, + "loss": 0.0103, + "step": 35780 + }, + { + "grad_norm": 0.06599503755569458, + "learning_rate": 7.624606649203158e-05, + "loss": 0.0072, + "step": 35790 + }, + { + "grad_norm": 0.08648604899644852, + "learning_rate": 7.623199153643569e-05, + "loss": 0.009, + "step": 35800 + }, + { + "grad_norm": 0.061808668076992035, + "learning_rate": 7.621791371214863e-05, + "loss": 0.0073, + "step": 35810 + }, + { + "grad_norm": 0.07340284436941147, + "learning_rate": 7.620383302070995e-05, + "loss": 0.0082, + "step": 35820 + }, + { + "grad_norm": 0.04696594923734665, + "learning_rate": 7.61897494636595e-05, + "loss": 0.0071, + "step": 35830 + }, + { + "grad_norm": 0.04372727870941162, + "learning_rate": 7.617566304253739e-05, + "loss": 0.0078, + "step": 35840 + }, + { + "grad_norm": 0.09912550449371338, + "learning_rate": 7.616157375888416e-05, + "loss": 0.0101, + "step": 35850 + }, + { + "grad_norm": 0.08202742040157318, + "learning_rate": 7.614748161424053e-05, + "loss": 0.0091, + "step": 35860 + }, + { + "grad_norm": 0.05003098398447037, + "learning_rate": 7.613338661014763e-05, + "loss": 0.0083, + "step": 35870 + }, + { + "grad_norm": 0.05969087406992912, + "learning_rate": 7.611928874814686e-05, + "loss": 0.0085, + "step": 35880 + }, + { + "grad_norm": 0.06750152260065079, + "learning_rate": 7.610518802977996e-05, + "loss": 0.0081, + "step": 35890 + }, + { + "grad_norm": 0.0668291449546814, + "learning_rate": 7.609108445658893e-05, + "loss": 0.007, + "step": 35900 + }, + { + "grad_norm": 0.07471954822540283, + "learning_rate": 7.607697803011612e-05, + "loss": 0.0093, + "step": 35910 + }, + { + "grad_norm": 0.07107339054346085, + "learning_rate": 7.606286875190421e-05, + "loss": 0.0118, + "step": 35920 + }, + { + "grad_norm": 0.08388003706932068, + "learning_rate": 7.604875662349617e-05, + "loss": 0.0099, + "step": 35930 + }, + { + "grad_norm": 0.07834774255752563, + "learning_rate": 7.603464164643525e-05, + "loss": 0.0113, + "step": 35940 + }, + { + "grad_norm": 0.0632089376449585, + "learning_rate": 7.602052382226507e-05, + "loss": 0.0097, + "step": 35950 + }, + { + "grad_norm": 0.059493083506822586, + "learning_rate": 7.600640315252954e-05, + "loss": 0.0086, + "step": 35960 + }, + { + "grad_norm": 0.04018396884202957, + "learning_rate": 7.599227963877284e-05, + "loss": 0.0073, + "step": 35970 + }, + { + "grad_norm": 0.051920972764492035, + "learning_rate": 7.597815328253954e-05, + "loss": 0.0074, + "step": 35980 + }, + { + "grad_norm": 0.035456541925668716, + "learning_rate": 7.596402408537443e-05, + "loss": 0.0084, + "step": 35990 + }, + { + "grad_norm": 0.05670539662241936, + "learning_rate": 7.59498920488227e-05, + "loss": 0.009, + "step": 36000 + }, + { + "grad_norm": 0.07128715515136719, + "learning_rate": 7.593575717442979e-05, + "loss": 0.0097, + "step": 36010 + }, + { + "grad_norm": 0.07802385091781616, + "learning_rate": 7.592161946374147e-05, + "loss": 0.0103, + "step": 36020 + }, + { + "grad_norm": 0.06465918570756912, + "learning_rate": 7.590747891830381e-05, + "loss": 0.0093, + "step": 36030 + }, + { + "grad_norm": 0.072089783847332, + "learning_rate": 7.58933355396632e-05, + "loss": 0.0102, + "step": 36040 + }, + { + "grad_norm": 0.0723298117518425, + "learning_rate": 7.587918932936636e-05, + "loss": 0.0085, + "step": 36050 + }, + { + "grad_norm": 0.06522191315889359, + "learning_rate": 7.586504028896028e-05, + "loss": 0.0097, + "step": 36060 + }, + { + "grad_norm": 0.0621720626950264, + "learning_rate": 7.585088841999228e-05, + "loss": 0.0081, + "step": 36070 + }, + { + "grad_norm": 0.05916761979460716, + "learning_rate": 7.583673372400999e-05, + "loss": 0.0094, + "step": 36080 + }, + { + "grad_norm": 0.07929997891187668, + "learning_rate": 7.582257620256134e-05, + "loss": 0.0092, + "step": 36090 + }, + { + "grad_norm": 0.09394922852516174, + "learning_rate": 7.580841585719458e-05, + "loss": 0.011, + "step": 36100 + }, + { + "grad_norm": 0.07169429957866669, + "learning_rate": 7.579425268945825e-05, + "loss": 0.0091, + "step": 36110 + }, + { + "grad_norm": 0.051440365612506866, + "learning_rate": 7.578008670090127e-05, + "loss": 0.0077, + "step": 36120 + }, + { + "grad_norm": 0.03685536980628967, + "learning_rate": 7.576591789307272e-05, + "loss": 0.0085, + "step": 36130 + }, + { + "grad_norm": 0.06761753559112549, + "learning_rate": 7.575174626752216e-05, + "loss": 0.0072, + "step": 36140 + }, + { + "grad_norm": 0.0668235570192337, + "learning_rate": 7.573757182579934e-05, + "loss": 0.0076, + "step": 36150 + }, + { + "grad_norm": 0.06984100490808487, + "learning_rate": 7.572339456945435e-05, + "loss": 0.0094, + "step": 36160 + }, + { + "grad_norm": 0.05704806372523308, + "learning_rate": 7.570921450003762e-05, + "loss": 0.01, + "step": 36170 + }, + { + "grad_norm": 0.06804841011762619, + "learning_rate": 7.569503161909983e-05, + "loss": 0.0075, + "step": 36180 + }, + { + "grad_norm": 0.04601828008890152, + "learning_rate": 7.568084592819202e-05, + "loss": 0.0076, + "step": 36190 + }, + { + "grad_norm": 0.05855371430516243, + "learning_rate": 7.566665742886551e-05, + "loss": 0.0094, + "step": 36200 + }, + { + "grad_norm": 0.04610889032483101, + "learning_rate": 7.565246612267194e-05, + "loss": 0.0089, + "step": 36210 + }, + { + "grad_norm": 0.05957828834652901, + "learning_rate": 7.563827201116325e-05, + "loss": 0.0094, + "step": 36220 + }, + { + "grad_norm": 0.0678081139922142, + "learning_rate": 7.562407509589164e-05, + "loss": 0.0093, + "step": 36230 + }, + { + "grad_norm": 0.06102342531085014, + "learning_rate": 7.560987537840973e-05, + "loss": 0.0096, + "step": 36240 + }, + { + "grad_norm": 0.057230640202760696, + "learning_rate": 7.559567286027036e-05, + "loss": 0.0106, + "step": 36250 + }, + { + "grad_norm": 0.05311381816864014, + "learning_rate": 7.558146754302668e-05, + "loss": 0.0071, + "step": 36260 + }, + { + "grad_norm": 0.0426969937980175, + "learning_rate": 7.556725942823217e-05, + "loss": 0.0085, + "step": 36270 + }, + { + "grad_norm": 0.046997688710689545, + "learning_rate": 7.555304851744061e-05, + "loss": 0.0085, + "step": 36280 + }, + { + "grad_norm": 0.05317792296409607, + "learning_rate": 7.553883481220608e-05, + "loss": 0.0071, + "step": 36290 + }, + { + "grad_norm": 0.054949499666690826, + "learning_rate": 7.552461831408298e-05, + "loss": 0.0079, + "step": 36300 + }, + { + "grad_norm": 0.06317616999149323, + "learning_rate": 7.551039902462599e-05, + "loss": 0.0069, + "step": 36310 + }, + { + "grad_norm": 0.054522883147001266, + "learning_rate": 7.549617694539011e-05, + "loss": 0.0077, + "step": 36320 + }, + { + "grad_norm": 0.0592828169465065, + "learning_rate": 7.548195207793066e-05, + "loss": 0.0079, + "step": 36330 + }, + { + "grad_norm": 0.08698629587888718, + "learning_rate": 7.546772442380323e-05, + "loss": 0.0095, + "step": 36340 + }, + { + "grad_norm": 0.05801745131611824, + "learning_rate": 7.545349398456376e-05, + "loss": 0.007, + "step": 36350 + }, + { + "grad_norm": 0.06559114158153534, + "learning_rate": 7.543926076176845e-05, + "loss": 0.0098, + "step": 36360 + }, + { + "grad_norm": 0.07777122408151627, + "learning_rate": 7.542502475697385e-05, + "loss": 0.0106, + "step": 36370 + }, + { + "grad_norm": 0.0602751187980175, + "learning_rate": 7.541078597173675e-05, + "loss": 0.0089, + "step": 36380 + }, + { + "grad_norm": 0.06804963946342468, + "learning_rate": 7.539654440761431e-05, + "loss": 0.0082, + "step": 36390 + }, + { + "grad_norm": 0.0834302082657814, + "learning_rate": 7.538230006616395e-05, + "loss": 0.0078, + "step": 36400 + }, + { + "grad_norm": 0.05620754137635231, + "learning_rate": 7.536805294894342e-05, + "loss": 0.0092, + "step": 36410 + }, + { + "grad_norm": 0.04591730609536171, + "learning_rate": 7.535380305751076e-05, + "loss": 0.01, + "step": 36420 + }, + { + "grad_norm": 0.05279707536101341, + "learning_rate": 7.533955039342431e-05, + "loss": 0.0085, + "step": 36430 + }, + { + "grad_norm": 0.07035575807094574, + "learning_rate": 7.532529495824274e-05, + "loss": 0.0105, + "step": 36440 + }, + { + "grad_norm": 0.07663489133119583, + "learning_rate": 7.531103675352497e-05, + "loss": 0.0082, + "step": 36450 + }, + { + "grad_norm": 0.07336420565843582, + "learning_rate": 7.529677578083029e-05, + "loss": 0.009, + "step": 36460 + }, + { + "grad_norm": 0.08517823368310928, + "learning_rate": 7.528251204171823e-05, + "loss": 0.007, + "step": 36470 + }, + { + "grad_norm": 0.07138539850711823, + "learning_rate": 7.526824553774867e-05, + "loss": 0.0077, + "step": 36480 + }, + { + "grad_norm": 0.051267050206661224, + "learning_rate": 7.525397627048178e-05, + "loss": 0.0073, + "step": 36490 + }, + { + "grad_norm": 0.0467180460691452, + "learning_rate": 7.523970424147802e-05, + "loss": 0.0079, + "step": 36500 + }, + { + "grad_norm": 0.06537896394729614, + "learning_rate": 7.522542945229813e-05, + "loss": 0.0083, + "step": 36510 + }, + { + "grad_norm": 0.06671801209449768, + "learning_rate": 7.521115190450321e-05, + "loss": 0.0095, + "step": 36520 + }, + { + "grad_norm": 0.0889543667435646, + "learning_rate": 7.519687159965462e-05, + "loss": 0.0091, + "step": 36530 + }, + { + "grad_norm": 0.060373302549123764, + "learning_rate": 7.518258853931403e-05, + "loss": 0.0075, + "step": 36540 + }, + { + "grad_norm": 0.07655898481607437, + "learning_rate": 7.516830272504342e-05, + "loss": 0.0082, + "step": 36550 + }, + { + "grad_norm": 0.04545805603265762, + "learning_rate": 7.515401415840509e-05, + "loss": 0.0071, + "step": 36560 + }, + { + "grad_norm": 0.056131474673748016, + "learning_rate": 7.513972284096155e-05, + "loss": 0.0096, + "step": 36570 + }, + { + "grad_norm": 0.06390322744846344, + "learning_rate": 7.512542877427574e-05, + "loss": 0.0073, + "step": 36580 + }, + { + "grad_norm": 0.043434590101242065, + "learning_rate": 7.51111319599108e-05, + "loss": 0.0076, + "step": 36590 + }, + { + "grad_norm": 0.0462808720767498, + "learning_rate": 7.509683239943024e-05, + "loss": 0.0071, + "step": 36600 + }, + { + "grad_norm": 0.04330957308411598, + "learning_rate": 7.50825300943978e-05, + "loss": 0.0104, + "step": 36610 + }, + { + "grad_norm": 0.05806614086031914, + "learning_rate": 7.50682250463776e-05, + "loss": 0.0096, + "step": 36620 + }, + { + "grad_norm": 0.05413532629609108, + "learning_rate": 7.5053917256934e-05, + "loss": 0.0081, + "step": 36630 + }, + { + "grad_norm": 0.05438511446118355, + "learning_rate": 7.503960672763166e-05, + "loss": 0.0071, + "step": 36640 + }, + { + "grad_norm": 0.033026959747076035, + "learning_rate": 7.502529346003559e-05, + "loss": 0.0081, + "step": 36650 + }, + { + "grad_norm": 0.04885672777891159, + "learning_rate": 7.501097745571106e-05, + "loss": 0.007, + "step": 36660 + }, + { + "grad_norm": 0.049274057149887085, + "learning_rate": 7.499665871622361e-05, + "loss": 0.0075, + "step": 36670 + }, + { + "grad_norm": 0.06188281252980232, + "learning_rate": 7.498233724313917e-05, + "loss": 0.0077, + "step": 36680 + }, + { + "grad_norm": 0.05501294881105423, + "learning_rate": 7.496801303802387e-05, + "loss": 0.0071, + "step": 36690 + }, + { + "grad_norm": 0.04718391224741936, + "learning_rate": 7.495368610244423e-05, + "loss": 0.0086, + "step": 36700 + }, + { + "grad_norm": 0.047094833105802536, + "learning_rate": 7.493935643796697e-05, + "loss": 0.0079, + "step": 36710 + }, + { + "grad_norm": 0.08486564457416534, + "learning_rate": 7.49250240461592e-05, + "loss": 0.0099, + "step": 36720 + }, + { + "grad_norm": 0.08308353275060654, + "learning_rate": 7.491068892858826e-05, + "loss": 0.0084, + "step": 36730 + }, + { + "grad_norm": 0.10033369809389114, + "learning_rate": 7.489635108682184e-05, + "loss": 0.0098, + "step": 36740 + }, + { + "grad_norm": 0.056798119097948074, + "learning_rate": 7.48820105224279e-05, + "loss": 0.0087, + "step": 36750 + }, + { + "grad_norm": 0.06212857365608215, + "learning_rate": 7.48676672369747e-05, + "loss": 0.0082, + "step": 36760 + }, + { + "grad_norm": 0.06273670494556427, + "learning_rate": 7.485332123203079e-05, + "loss": 0.0077, + "step": 36770 + }, + { + "grad_norm": 0.06916702538728714, + "learning_rate": 7.483897250916504e-05, + "loss": 0.0108, + "step": 36780 + }, + { + "grad_norm": 0.06775794178247452, + "learning_rate": 7.48246210699466e-05, + "loss": 0.0101, + "step": 36790 + }, + { + "grad_norm": 0.05539553984999657, + "learning_rate": 7.481026691594492e-05, + "loss": 0.0092, + "step": 36800 + }, + { + "grad_norm": 0.05564363673329353, + "learning_rate": 7.479591004872975e-05, + "loss": 0.0093, + "step": 36810 + }, + { + "grad_norm": 0.05807704105973244, + "learning_rate": 7.478155046987114e-05, + "loss": 0.007, + "step": 36820 + }, + { + "grad_norm": 0.056353554129600525, + "learning_rate": 7.476718818093944e-05, + "loss": 0.0105, + "step": 36830 + }, + { + "grad_norm": 0.048213012516498566, + "learning_rate": 7.475282318350527e-05, + "loss": 0.0099, + "step": 36840 + }, + { + "grad_norm": 0.04193131625652313, + "learning_rate": 7.473845547913957e-05, + "loss": 0.0075, + "step": 36850 + }, + { + "grad_norm": 0.05329926684498787, + "learning_rate": 7.472408506941357e-05, + "loss": 0.0079, + "step": 36860 + }, + { + "grad_norm": 0.049795348197221756, + "learning_rate": 7.470971195589881e-05, + "loss": 0.0073, + "step": 36870 + }, + { + "grad_norm": 0.04652847349643707, + "learning_rate": 7.46953361401671e-05, + "loss": 0.0063, + "step": 36880 + }, + { + "grad_norm": 0.04490065202116966, + "learning_rate": 7.468095762379055e-05, + "loss": 0.0093, + "step": 36890 + }, + { + "grad_norm": 0.06678474694490433, + "learning_rate": 7.466657640834158e-05, + "loss": 0.0075, + "step": 36900 + }, + { + "grad_norm": 0.05424720421433449, + "learning_rate": 7.46521924953929e-05, + "loss": 0.0064, + "step": 36910 + }, + { + "grad_norm": 0.04709966853260994, + "learning_rate": 7.463780588651752e-05, + "loss": 0.0086, + "step": 36920 + }, + { + "grad_norm": 0.05787397921085358, + "learning_rate": 7.462341658328873e-05, + "loss": 0.0086, + "step": 36930 + }, + { + "grad_norm": 0.06593353301286697, + "learning_rate": 7.460902458728012e-05, + "loss": 0.0103, + "step": 36940 + }, + { + "grad_norm": 0.04826364293694496, + "learning_rate": 7.459462990006558e-05, + "loss": 0.0095, + "step": 36950 + }, + { + "grad_norm": 0.09099572151899338, + "learning_rate": 7.458023252321929e-05, + "loss": 0.0083, + "step": 36960 + }, + { + "grad_norm": 0.06282197684049606, + "learning_rate": 7.456583245831573e-05, + "loss": 0.0078, + "step": 36970 + }, + { + "grad_norm": 0.06289525330066681, + "learning_rate": 7.455142970692968e-05, + "loss": 0.007, + "step": 36980 + }, + { + "grad_norm": 0.08000416308641434, + "learning_rate": 7.453702427063615e-05, + "loss": 0.0091, + "step": 36990 + }, + { + "grad_norm": 0.0487174466252327, + "learning_rate": 7.452261615101057e-05, + "loss": 0.0081, + "step": 37000 + }, + { + "grad_norm": 0.06304118782281876, + "learning_rate": 7.450820534962856e-05, + "loss": 0.0078, + "step": 37010 + }, + { + "grad_norm": 0.06691629439592361, + "learning_rate": 7.449379186806604e-05, + "loss": 0.0082, + "step": 37020 + }, + { + "grad_norm": 0.050866544246673584, + "learning_rate": 7.44793757078993e-05, + "loss": 0.0074, + "step": 37030 + }, + { + "grad_norm": 0.0612175315618515, + "learning_rate": 7.44649568707048e-05, + "loss": 0.0105, + "step": 37040 + }, + { + "grad_norm": 0.07326172292232513, + "learning_rate": 7.445053535805942e-05, + "loss": 0.0086, + "step": 37050 + }, + { + "grad_norm": 0.0440250001847744, + "learning_rate": 7.443611117154024e-05, + "loss": 0.008, + "step": 37060 + }, + { + "grad_norm": 0.07383135706186295, + "learning_rate": 7.442168431272469e-05, + "loss": 0.0082, + "step": 37070 + }, + { + "grad_norm": 0.059708449989557266, + "learning_rate": 7.440725478319044e-05, + "loss": 0.0076, + "step": 37080 + }, + { + "grad_norm": 0.052430409938097, + "learning_rate": 7.439282258451553e-05, + "loss": 0.0082, + "step": 37090 + }, + { + "grad_norm": 0.07875518500804901, + "learning_rate": 7.43783877182782e-05, + "loss": 0.0093, + "step": 37100 + }, + { + "grad_norm": 0.05658772215247154, + "learning_rate": 7.436395018605707e-05, + "loss": 0.0078, + "step": 37110 + }, + { + "grad_norm": 0.06753315031528473, + "learning_rate": 7.434950998943094e-05, + "loss": 0.0095, + "step": 37120 + }, + { + "grad_norm": 0.04919448122382164, + "learning_rate": 7.433506712997902e-05, + "loss": 0.0072, + "step": 37130 + }, + { + "grad_norm": 0.04955650120973587, + "learning_rate": 7.432062160928077e-05, + "loss": 0.0091, + "step": 37140 + }, + { + "grad_norm": 0.06009000539779663, + "learning_rate": 7.430617342891588e-05, + "loss": 0.0087, + "step": 37150 + }, + { + "grad_norm": 0.036332905292510986, + "learning_rate": 7.429172259046442e-05, + "loss": 0.0088, + "step": 37160 + }, + { + "grad_norm": 0.04983198642730713, + "learning_rate": 7.42772690955067e-05, + "loss": 0.0087, + "step": 37170 + }, + { + "grad_norm": 0.09888037294149399, + "learning_rate": 7.426281294562332e-05, + "loss": 0.011, + "step": 37180 + }, + { + "grad_norm": 0.04568241536617279, + "learning_rate": 7.424835414239519e-05, + "loss": 0.0087, + "step": 37190 + }, + { + "grad_norm": 0.04733895882964134, + "learning_rate": 7.423389268740352e-05, + "loss": 0.0079, + "step": 37200 + }, + { + "grad_norm": 0.05210587754845619, + "learning_rate": 7.42194285822298e-05, + "loss": 0.0078, + "step": 37210 + }, + { + "grad_norm": 0.05745123699307442, + "learning_rate": 7.420496182845576e-05, + "loss": 0.0063, + "step": 37220 + }, + { + "grad_norm": 0.08403569459915161, + "learning_rate": 7.419049242766348e-05, + "loss": 0.0111, + "step": 37230 + }, + { + "grad_norm": 0.061250243335962296, + "learning_rate": 7.417602038143533e-05, + "loss": 0.0084, + "step": 37240 + }, + { + "grad_norm": 0.06368165463209152, + "learning_rate": 7.416154569135393e-05, + "loss": 0.0079, + "step": 37250 + }, + { + "grad_norm": 0.053756337612867355, + "learning_rate": 7.414706835900222e-05, + "loss": 0.0079, + "step": 37260 + }, + { + "grad_norm": 0.05358142778277397, + "learning_rate": 7.413258838596345e-05, + "loss": 0.0078, + "step": 37270 + }, + { + "grad_norm": 0.0682104229927063, + "learning_rate": 7.411810577382106e-05, + "loss": 0.009, + "step": 37280 + }, + { + "grad_norm": 0.04732954874634743, + "learning_rate": 7.410362052415889e-05, + "loss": 0.0074, + "step": 37290 + }, + { + "grad_norm": 0.07052797824144363, + "learning_rate": 7.408913263856102e-05, + "loss": 0.008, + "step": 37300 + }, + { + "grad_norm": 0.06084561347961426, + "learning_rate": 7.407464211861183e-05, + "loss": 0.0086, + "step": 37310 + }, + { + "grad_norm": 0.07967741787433624, + "learning_rate": 7.406014896589597e-05, + "loss": 0.0082, + "step": 37320 + }, + { + "grad_norm": 0.0633559599518776, + "learning_rate": 7.404565318199838e-05, + "loss": 0.0083, + "step": 37330 + }, + { + "grad_norm": 0.06882713735103607, + "learning_rate": 7.403115476850432e-05, + "loss": 0.0091, + "step": 37340 + }, + { + "grad_norm": 0.04958261176943779, + "learning_rate": 7.401665372699932e-05, + "loss": 0.0074, + "step": 37350 + }, + { + "grad_norm": 0.04861575737595558, + "learning_rate": 7.400215005906916e-05, + "loss": 0.0072, + "step": 37360 + }, + { + "grad_norm": 0.06261317431926727, + "learning_rate": 7.398764376629996e-05, + "loss": 0.0086, + "step": 37370 + }, + { + "grad_norm": 0.07773038744926453, + "learning_rate": 7.39731348502781e-05, + "loss": 0.0091, + "step": 37380 + }, + { + "grad_norm": 0.0699739158153534, + "learning_rate": 7.395862331259028e-05, + "loss": 0.0072, + "step": 37390 + }, + { + "grad_norm": 0.07774220407009125, + "learning_rate": 7.394410915482341e-05, + "loss": 0.0096, + "step": 37400 + }, + { + "grad_norm": 0.055295590311288834, + "learning_rate": 7.392959237856476e-05, + "loss": 0.0081, + "step": 37410 + }, + { + "grad_norm": 0.04672793671488762, + "learning_rate": 7.391507298540187e-05, + "loss": 0.0101, + "step": 37420 + }, + { + "grad_norm": 0.08668512105941772, + "learning_rate": 7.390055097692257e-05, + "loss": 0.0095, + "step": 37430 + }, + { + "grad_norm": 0.0797509104013443, + "learning_rate": 7.388602635471494e-05, + "loss": 0.0081, + "step": 37440 + }, + { + "grad_norm": 0.06537000089883804, + "learning_rate": 7.387149912036738e-05, + "loss": 0.0103, + "step": 37450 + }, + { + "grad_norm": 0.08333057165145874, + "learning_rate": 7.385696927546854e-05, + "loss": 0.0087, + "step": 37460 + }, + { + "grad_norm": 0.07838740199804306, + "learning_rate": 7.384243682160743e-05, + "loss": 0.0071, + "step": 37470 + }, + { + "grad_norm": 0.047257550060749054, + "learning_rate": 7.382790176037329e-05, + "loss": 0.0081, + "step": 37480 + }, + { + "grad_norm": 0.05523942410945892, + "learning_rate": 7.381336409335562e-05, + "loss": 0.0073, + "step": 37490 + }, + { + "grad_norm": 0.0519549660384655, + "learning_rate": 7.379882382214426e-05, + "loss": 0.0078, + "step": 37500 + }, + { + "grad_norm": 0.04452618211507797, + "learning_rate": 7.378428094832931e-05, + "loss": 0.0089, + "step": 37510 + }, + { + "grad_norm": 0.07688479125499725, + "learning_rate": 7.376973547350114e-05, + "loss": 0.0106, + "step": 37520 + }, + { + "grad_norm": 0.07083530724048615, + "learning_rate": 7.375518739925046e-05, + "loss": 0.0092, + "step": 37530 + }, + { + "grad_norm": 0.06414107978343964, + "learning_rate": 7.374063672716818e-05, + "loss": 0.0095, + "step": 37540 + }, + { + "grad_norm": 0.07267563045024872, + "learning_rate": 7.372608345884558e-05, + "loss": 0.0104, + "step": 37550 + }, + { + "grad_norm": 0.05553022772073746, + "learning_rate": 7.371152759587415e-05, + "loss": 0.0079, + "step": 37560 + }, + { + "grad_norm": 0.0633024126291275, + "learning_rate": 7.369696913984572e-05, + "loss": 0.0075, + "step": 37570 + }, + { + "grad_norm": 0.06448372453451157, + "learning_rate": 7.368240809235237e-05, + "loss": 0.0072, + "step": 37580 + }, + { + "grad_norm": 0.08850754052400589, + "learning_rate": 7.366784445498649e-05, + "loss": 0.0083, + "step": 37590 + }, + { + "grad_norm": 0.0686020627617836, + "learning_rate": 7.36532782293407e-05, + "loss": 0.0101, + "step": 37600 + }, + { + "grad_norm": 0.07379226386547089, + "learning_rate": 7.363870941700797e-05, + "loss": 0.0085, + "step": 37610 + }, + { + "grad_norm": 0.06727227568626404, + "learning_rate": 7.362413801958152e-05, + "loss": 0.0079, + "step": 37620 + }, + { + "grad_norm": 0.07503978163003922, + "learning_rate": 7.360956403865486e-05, + "loss": 0.0073, + "step": 37630 + }, + { + "grad_norm": 0.0740475207567215, + "learning_rate": 7.359498747582177e-05, + "loss": 0.0061, + "step": 37640 + }, + { + "grad_norm": 0.05657259747385979, + "learning_rate": 7.358040833267634e-05, + "loss": 0.0082, + "step": 37650 + }, + { + "grad_norm": 0.04327058419585228, + "learning_rate": 7.356582661081288e-05, + "loss": 0.0088, + "step": 37660 + }, + { + "grad_norm": 0.04669550806283951, + "learning_rate": 7.355124231182607e-05, + "loss": 0.0076, + "step": 37670 + }, + { + "grad_norm": 0.05353321135044098, + "learning_rate": 7.353665543731083e-05, + "loss": 0.0069, + "step": 37680 + }, + { + "grad_norm": 0.04219420999288559, + "learning_rate": 7.352206598886229e-05, + "loss": 0.0076, + "step": 37690 + }, + { + "grad_norm": 0.06561043113470078, + "learning_rate": 7.350747396807601e-05, + "loss": 0.0078, + "step": 37700 + }, + { + "grad_norm": 0.06669773906469345, + "learning_rate": 7.349287937654772e-05, + "loss": 0.0091, + "step": 37710 + }, + { + "grad_norm": 0.049489427357912064, + "learning_rate": 7.347828221587345e-05, + "loss": 0.008, + "step": 37720 + }, + { + "grad_norm": 0.044736348092556, + "learning_rate": 7.346368248764955e-05, + "loss": 0.0086, + "step": 37730 + }, + { + "grad_norm": 0.04585606977343559, + "learning_rate": 7.34490801934726e-05, + "loss": 0.0094, + "step": 37740 + }, + { + "grad_norm": 0.04603338986635208, + "learning_rate": 7.343447533493947e-05, + "loss": 0.0089, + "step": 37750 + }, + { + "grad_norm": 0.03835167735815048, + "learning_rate": 7.341986791364739e-05, + "loss": 0.0078, + "step": 37760 + }, + { + "grad_norm": 0.05371613800525665, + "learning_rate": 7.340525793119374e-05, + "loss": 0.0089, + "step": 37770 + }, + { + "grad_norm": 0.07578379660844803, + "learning_rate": 7.33906453891763e-05, + "loss": 0.0074, + "step": 37780 + }, + { + "grad_norm": 0.05631643161177635, + "learning_rate": 7.337603028919303e-05, + "loss": 0.008, + "step": 37790 + }, + { + "grad_norm": 0.08026324212551117, + "learning_rate": 7.336141263284225e-05, + "loss": 0.0083, + "step": 37800 + }, + { + "grad_norm": 0.07097702473402023, + "learning_rate": 7.334679242172249e-05, + "loss": 0.008, + "step": 37810 + }, + { + "grad_norm": 0.07012926787137985, + "learning_rate": 7.33321696574326e-05, + "loss": 0.0096, + "step": 37820 + }, + { + "grad_norm": 0.059353090822696686, + "learning_rate": 7.331754434157174e-05, + "loss": 0.007, + "step": 37830 + }, + { + "grad_norm": 0.04101753979921341, + "learning_rate": 7.330291647573928e-05, + "loss": 0.0073, + "step": 37840 + }, + { + "grad_norm": 0.036038197576999664, + "learning_rate": 7.32882860615349e-05, + "loss": 0.0064, + "step": 37850 + }, + { + "grad_norm": 0.051442742347717285, + "learning_rate": 7.327365310055858e-05, + "loss": 0.0078, + "step": 37860 + }, + { + "grad_norm": 0.05568506941199303, + "learning_rate": 7.325901759441055e-05, + "loss": 0.0085, + "step": 37870 + }, + { + "grad_norm": 0.09177827835083008, + "learning_rate": 7.324437954469132e-05, + "loss": 0.0071, + "step": 37880 + }, + { + "grad_norm": 0.0660829022526741, + "learning_rate": 7.32297389530017e-05, + "loss": 0.0083, + "step": 37890 + }, + { + "grad_norm": 0.05032157152891159, + "learning_rate": 7.321509582094274e-05, + "loss": 0.0086, + "step": 37900 + }, + { + "grad_norm": 0.03930800035595894, + "learning_rate": 7.320045015011582e-05, + "loss": 0.0081, + "step": 37910 + }, + { + "grad_norm": 0.04787285253405571, + "learning_rate": 7.318580194212253e-05, + "loss": 0.0088, + "step": 37920 + }, + { + "grad_norm": 0.06280340254306793, + "learning_rate": 7.317115119856483e-05, + "loss": 0.0081, + "step": 37930 + }, + { + "grad_norm": 0.06163405627012253, + "learning_rate": 7.315649792104485e-05, + "loss": 0.0089, + "step": 37940 + }, + { + "grad_norm": 0.06455356627702713, + "learning_rate": 7.314184211116507e-05, + "loss": 0.0097, + "step": 37950 + }, + { + "grad_norm": 0.05602305755019188, + "learning_rate": 7.312718377052823e-05, + "loss": 0.0066, + "step": 37960 + }, + { + "grad_norm": 0.0725470557808876, + "learning_rate": 7.311252290073737e-05, + "loss": 0.008, + "step": 37970 + }, + { + "grad_norm": 0.05082955211400986, + "learning_rate": 7.309785950339572e-05, + "loss": 0.0077, + "step": 37980 + }, + { + "grad_norm": 0.05791261047124863, + "learning_rate": 7.308319358010692e-05, + "loss": 0.0089, + "step": 37990 + }, + { + "grad_norm": 0.05803631991147995, + "learning_rate": 7.306852513247474e-05, + "loss": 0.0079, + "step": 38000 + }, + { + "grad_norm": 0.07118013501167297, + "learning_rate": 7.305385416210336e-05, + "loss": 0.01, + "step": 38010 + }, + { + "grad_norm": 0.049638815224170685, + "learning_rate": 7.303918067059711e-05, + "loss": 0.0074, + "step": 38020 + }, + { + "grad_norm": 0.04764178395271301, + "learning_rate": 7.302450465956075e-05, + "loss": 0.0077, + "step": 38030 + }, + { + "grad_norm": 0.06959004700183868, + "learning_rate": 7.300982613059914e-05, + "loss": 0.0094, + "step": 38040 + }, + { + "grad_norm": 0.05992555245757103, + "learning_rate": 7.299514508531757e-05, + "loss": 0.0075, + "step": 38050 + }, + { + "grad_norm": 0.04270455241203308, + "learning_rate": 7.298046152532148e-05, + "loss": 0.0074, + "step": 38060 + }, + { + "grad_norm": 0.08466912060976028, + "learning_rate": 7.296577545221668e-05, + "loss": 0.0112, + "step": 38070 + }, + { + "grad_norm": 0.06820330768823624, + "learning_rate": 7.29510868676092e-05, + "loss": 0.0089, + "step": 38080 + }, + { + "grad_norm": 0.05868639051914215, + "learning_rate": 7.293639577310538e-05, + "loss": 0.0097, + "step": 38090 + }, + { + "grad_norm": 0.044817693531513214, + "learning_rate": 7.292170217031179e-05, + "loss": 0.0091, + "step": 38100 + }, + { + "grad_norm": 0.0613132119178772, + "learning_rate": 7.290700606083532e-05, + "loss": 0.0079, + "step": 38110 + }, + { + "grad_norm": 0.06423576176166534, + "learning_rate": 7.28923074462831e-05, + "loss": 0.0088, + "step": 38120 + }, + { + "grad_norm": 0.05821511894464493, + "learning_rate": 7.287760632826255e-05, + "loss": 0.008, + "step": 38130 + }, + { + "grad_norm": 0.04636881500482559, + "learning_rate": 7.286290270838139e-05, + "loss": 0.0086, + "step": 38140 + }, + { + "grad_norm": 0.0464785061776638, + "learning_rate": 7.284819658824756e-05, + "loss": 0.0093, + "step": 38150 + }, + { + "grad_norm": 0.08402376621961594, + "learning_rate": 7.283348796946929e-05, + "loss": 0.0079, + "step": 38160 + }, + { + "grad_norm": 0.06257039308547974, + "learning_rate": 7.28187768536551e-05, + "loss": 0.0074, + "step": 38170 + }, + { + "grad_norm": 0.049627114087343216, + "learning_rate": 7.280406324241379e-05, + "loss": 0.0083, + "step": 38180 + }, + { + "grad_norm": 0.06368131935596466, + "learning_rate": 7.278934713735441e-05, + "loss": 0.0096, + "step": 38190 + }, + { + "grad_norm": 0.045313045382499695, + "learning_rate": 7.277462854008629e-05, + "loss": 0.0067, + "step": 38200 + }, + { + "grad_norm": 0.06145802140235901, + "learning_rate": 7.275990745221904e-05, + "loss": 0.0077, + "step": 38210 + }, + { + "grad_norm": 0.054420653730630875, + "learning_rate": 7.274518387536252e-05, + "loss": 0.0094, + "step": 38220 + }, + { + "grad_norm": 0.05962321162223816, + "learning_rate": 7.273045781112689e-05, + "loss": 0.008, + "step": 38230 + }, + { + "grad_norm": 0.058246079832315445, + "learning_rate": 7.271572926112257e-05, + "loss": 0.0074, + "step": 38240 + }, + { + "grad_norm": 0.05725992098450661, + "learning_rate": 7.270099822696024e-05, + "loss": 0.0091, + "step": 38250 + }, + { + "grad_norm": 0.05191377177834511, + "learning_rate": 7.26862647102509e-05, + "loss": 0.0077, + "step": 38260 + }, + { + "grad_norm": 0.07474447041749954, + "learning_rate": 7.267152871260573e-05, + "loss": 0.008, + "step": 38270 + }, + { + "grad_norm": 0.05392444506287575, + "learning_rate": 7.265679023563629e-05, + "loss": 0.0065, + "step": 38280 + }, + { + "grad_norm": 0.04438793659210205, + "learning_rate": 7.264204928095433e-05, + "loss": 0.0079, + "step": 38290 + }, + { + "grad_norm": 0.05779464542865753, + "learning_rate": 7.262730585017188e-05, + "loss": 0.0082, + "step": 38300 + }, + { + "grad_norm": 0.07221371680498123, + "learning_rate": 7.261255994490131e-05, + "loss": 0.0089, + "step": 38310 + }, + { + "grad_norm": 0.07767545431852341, + "learning_rate": 7.259781156675517e-05, + "loss": 0.0098, + "step": 38320 + }, + { + "grad_norm": 0.07467276602983475, + "learning_rate": 7.258306071734633e-05, + "loss": 0.0081, + "step": 38330 + }, + { + "grad_norm": 0.06225668638944626, + "learning_rate": 7.256830739828792e-05, + "loss": 0.0089, + "step": 38340 + }, + { + "grad_norm": 0.07995936274528503, + "learning_rate": 7.255355161119336e-05, + "loss": 0.0088, + "step": 38350 + }, + { + "grad_norm": 0.09410261362791061, + "learning_rate": 7.253879335767628e-05, + "loss": 0.0082, + "step": 38360 + }, + { + "grad_norm": 0.07926265150308609, + "learning_rate": 7.252403263935067e-05, + "loss": 0.0087, + "step": 38370 + }, + { + "grad_norm": 0.06265964359045029, + "learning_rate": 7.25092694578307e-05, + "loss": 0.0085, + "step": 38380 + }, + { + "grad_norm": 0.05881159007549286, + "learning_rate": 7.249450381473087e-05, + "loss": 0.0076, + "step": 38390 + }, + { + "grad_norm": 0.07616504281759262, + "learning_rate": 7.247973571166593e-05, + "loss": 0.0085, + "step": 38400 + }, + { + "grad_norm": 0.046434637159109116, + "learning_rate": 7.246496515025089e-05, + "loss": 0.0072, + "step": 38410 + }, + { + "grad_norm": 0.06709200143814087, + "learning_rate": 7.245019213210101e-05, + "loss": 0.0081, + "step": 38420 + }, + { + "grad_norm": 0.05221172049641609, + "learning_rate": 7.24354166588319e-05, + "loss": 0.0077, + "step": 38430 + }, + { + "grad_norm": 0.034318309277296066, + "learning_rate": 7.242063873205935e-05, + "loss": 0.0065, + "step": 38440 + }, + { + "grad_norm": 0.05585482344031334, + "learning_rate": 7.240585835339946e-05, + "loss": 0.0088, + "step": 38450 + }, + { + "grad_norm": 0.04951586201786995, + "learning_rate": 7.239107552446857e-05, + "loss": 0.0067, + "step": 38460 + }, + { + "grad_norm": 0.05238928645849228, + "learning_rate": 7.237629024688334e-05, + "loss": 0.0067, + "step": 38470 + }, + { + "grad_norm": 0.07152634859085083, + "learning_rate": 7.236150252226064e-05, + "loss": 0.0086, + "step": 38480 + }, + { + "grad_norm": 0.07100438326597214, + "learning_rate": 7.234671235221765e-05, + "loss": 0.0085, + "step": 38490 + }, + { + "grad_norm": 0.06211738660931587, + "learning_rate": 7.233191973837179e-05, + "loss": 0.0074, + "step": 38500 + }, + { + "grad_norm": 0.08095794916152954, + "learning_rate": 7.231712468234078e-05, + "loss": 0.0087, + "step": 38510 + }, + { + "grad_norm": 0.07000883668661118, + "learning_rate": 7.230232718574254e-05, + "loss": 0.0088, + "step": 38520 + }, + { + "grad_norm": 0.050122711807489395, + "learning_rate": 7.228752725019535e-05, + "loss": 0.0074, + "step": 38530 + }, + { + "grad_norm": 0.04573208838701248, + "learning_rate": 7.227272487731769e-05, + "loss": 0.0093, + "step": 38540 + }, + { + "grad_norm": 0.06264923512935638, + "learning_rate": 7.225792006872831e-05, + "loss": 0.0083, + "step": 38550 + }, + { + "grad_norm": 0.046957533806562424, + "learning_rate": 7.224311282604628e-05, + "loss": 0.0068, + "step": 38560 + }, + { + "grad_norm": 0.05426778644323349, + "learning_rate": 7.222830315089085e-05, + "loss": 0.0078, + "step": 38570 + }, + { + "grad_norm": 0.06567785888910294, + "learning_rate": 7.22134910448816e-05, + "loss": 0.0092, + "step": 38580 + }, + { + "grad_norm": 0.044448383152484894, + "learning_rate": 7.219867650963839e-05, + "loss": 0.0081, + "step": 38590 + }, + { + "grad_norm": 0.06029331684112549, + "learning_rate": 7.21838595467813e-05, + "loss": 0.0082, + "step": 38600 + }, + { + "grad_norm": 0.05989250913262367, + "learning_rate": 7.216904015793067e-05, + "loss": 0.0079, + "step": 38610 + }, + { + "grad_norm": 0.051526471972465515, + "learning_rate": 7.215421834470713e-05, + "loss": 0.0075, + "step": 38620 + }, + { + "grad_norm": 0.07105184346437454, + "learning_rate": 7.213939410873158e-05, + "loss": 0.0094, + "step": 38630 + }, + { + "grad_norm": 0.06634173542261124, + "learning_rate": 7.212456745162519e-05, + "loss": 0.0086, + "step": 38640 + }, + { + "grad_norm": 0.05257374793291092, + "learning_rate": 7.210973837500937e-05, + "loss": 0.0086, + "step": 38650 + }, + { + "grad_norm": 0.038741327822208405, + "learning_rate": 7.209490688050578e-05, + "loss": 0.0083, + "step": 38660 + }, + { + "grad_norm": 0.03524458408355713, + "learning_rate": 7.208007296973641e-05, + "loss": 0.0072, + "step": 38670 + }, + { + "grad_norm": 0.047198645770549774, + "learning_rate": 7.206523664432345e-05, + "loss": 0.0074, + "step": 38680 + }, + { + "grad_norm": 0.05224379152059555, + "learning_rate": 7.205039790588939e-05, + "loss": 0.0076, + "step": 38690 + }, + { + "grad_norm": 0.05939783900976181, + "learning_rate": 7.203555675605697e-05, + "loss": 0.008, + "step": 38700 + }, + { + "grad_norm": 0.05286114662885666, + "learning_rate": 7.202071319644917e-05, + "loss": 0.0078, + "step": 38710 + }, + { + "grad_norm": 0.045967891812324524, + "learning_rate": 7.200586722868932e-05, + "loss": 0.0066, + "step": 38720 + }, + { + "grad_norm": 0.06157560274004936, + "learning_rate": 7.199101885440088e-05, + "loss": 0.0083, + "step": 38730 + }, + { + "grad_norm": 0.06022723764181137, + "learning_rate": 7.197616807520771e-05, + "loss": 0.0074, + "step": 38740 + }, + { + "grad_norm": 0.07427223026752472, + "learning_rate": 7.196131489273381e-05, + "loss": 0.0077, + "step": 38750 + }, + { + "grad_norm": 0.10644561797380447, + "learning_rate": 7.194645930860355e-05, + "loss": 0.0104, + "step": 38760 + }, + { + "grad_norm": 0.10509537905454636, + "learning_rate": 7.19316013244415e-05, + "loss": 0.0069, + "step": 38770 + }, + { + "grad_norm": 0.0632430911064148, + "learning_rate": 7.191674094187248e-05, + "loss": 0.0084, + "step": 38780 + }, + { + "grad_norm": 0.055250171571969986, + "learning_rate": 7.190187816252165e-05, + "loss": 0.0078, + "step": 38790 + }, + { + "grad_norm": 0.043441105633974075, + "learning_rate": 7.188701298801435e-05, + "loss": 0.0093, + "step": 38800 + }, + { + "grad_norm": 0.04650827869772911, + "learning_rate": 7.18721454199762e-05, + "loss": 0.009, + "step": 38810 + }, + { + "grad_norm": 0.09134817868471146, + "learning_rate": 7.185727546003312e-05, + "loss": 0.0083, + "step": 38820 + }, + { + "grad_norm": 0.05303478613495827, + "learning_rate": 7.184240310981126e-05, + "loss": 0.0069, + "step": 38830 + }, + { + "grad_norm": 0.062430936843156815, + "learning_rate": 7.182752837093704e-05, + "loss": 0.0095, + "step": 38840 + }, + { + "grad_norm": 0.057860106229782104, + "learning_rate": 7.181265124503711e-05, + "loss": 0.009, + "step": 38850 + }, + { + "grad_norm": 0.11383724957704544, + "learning_rate": 7.179777173373847e-05, + "loss": 0.0091, + "step": 38860 + }, + { + "grad_norm": 0.10316877067089081, + "learning_rate": 7.178288983866826e-05, + "loss": 0.01, + "step": 38870 + }, + { + "grad_norm": 0.06365399062633514, + "learning_rate": 7.176800556145397e-05, + "loss": 0.0098, + "step": 38880 + }, + { + "grad_norm": 0.07377298921346664, + "learning_rate": 7.175311890372334e-05, + "loss": 0.0097, + "step": 38890 + }, + { + "grad_norm": 0.05359859764575958, + "learning_rate": 7.17382298671043e-05, + "loss": 0.0107, + "step": 38900 + }, + { + "grad_norm": 0.05900599807500839, + "learning_rate": 7.172333845322515e-05, + "loss": 0.0091, + "step": 38910 + }, + { + "grad_norm": 0.05228494107723236, + "learning_rate": 7.170844466371436e-05, + "loss": 0.0099, + "step": 38920 + }, + { + "grad_norm": 0.05120079591870308, + "learning_rate": 7.16935485002007e-05, + "loss": 0.0077, + "step": 38930 + }, + { + "grad_norm": 0.05324206128716469, + "learning_rate": 7.167864996431319e-05, + "loss": 0.0073, + "step": 38940 + }, + { + "grad_norm": 0.04830080270767212, + "learning_rate": 7.166374905768111e-05, + "loss": 0.0073, + "step": 38950 + }, + { + "grad_norm": 0.05635576695203781, + "learning_rate": 7.164884578193401e-05, + "loss": 0.0091, + "step": 38960 + }, + { + "grad_norm": 0.05308983474969864, + "learning_rate": 7.163394013870168e-05, + "loss": 0.0066, + "step": 38970 + }, + { + "grad_norm": 0.07042425870895386, + "learning_rate": 7.161903212961419e-05, + "loss": 0.0089, + "step": 38980 + }, + { + "grad_norm": 0.07280943542718887, + "learning_rate": 7.160412175630183e-05, + "loss": 0.0072, + "step": 38990 + }, + { + "grad_norm": 0.07827257364988327, + "learning_rate": 7.158920902039521e-05, + "loss": 0.0079, + "step": 39000 + }, + { + "grad_norm": 0.04113142564892769, + "learning_rate": 7.157429392352514e-05, + "loss": 0.008, + "step": 39010 + }, + { + "grad_norm": 0.09540022164583206, + "learning_rate": 7.155937646732275e-05, + "loss": 0.01, + "step": 39020 + }, + { + "grad_norm": 0.06057918071746826, + "learning_rate": 7.154445665341933e-05, + "loss": 0.01, + "step": 39030 + }, + { + "grad_norm": 0.08149078488349915, + "learning_rate": 7.152953448344654e-05, + "loss": 0.0114, + "step": 39040 + }, + { + "grad_norm": 0.0779283195734024, + "learning_rate": 7.151460995903624e-05, + "loss": 0.0085, + "step": 39050 + }, + { + "grad_norm": 0.060559406876564026, + "learning_rate": 7.149968308182052e-05, + "loss": 0.0095, + "step": 39060 + }, + { + "grad_norm": 0.055103935301303864, + "learning_rate": 7.14847538534318e-05, + "loss": 0.0084, + "step": 39070 + }, + { + "grad_norm": 0.04797394201159477, + "learning_rate": 7.14698222755027e-05, + "loss": 0.0066, + "step": 39080 + }, + { + "grad_norm": 0.07666755467653275, + "learning_rate": 7.14548883496661e-05, + "loss": 0.0077, + "step": 39090 + }, + { + "grad_norm": 0.06382032483816147, + "learning_rate": 7.143995207755517e-05, + "loss": 0.0064, + "step": 39100 + }, + { + "grad_norm": 0.06394185870885849, + "learning_rate": 7.142501346080333e-05, + "loss": 0.0092, + "step": 39110 + }, + { + "grad_norm": 0.05439372360706329, + "learning_rate": 7.141007250104421e-05, + "loss": 0.0075, + "step": 39120 + }, + { + "grad_norm": 0.043888792395591736, + "learning_rate": 7.139512919991176e-05, + "loss": 0.0067, + "step": 39130 + }, + { + "grad_norm": 0.06705842167139053, + "learning_rate": 7.138018355904014e-05, + "loss": 0.009, + "step": 39140 + }, + { + "grad_norm": 0.06771920621395111, + "learning_rate": 7.13652355800638e-05, + "loss": 0.0073, + "step": 39150 + }, + { + "grad_norm": 0.08130213618278503, + "learning_rate": 7.135028526461743e-05, + "loss": 0.0073, + "step": 39160 + }, + { + "grad_norm": 0.04962070286273956, + "learning_rate": 7.133533261433594e-05, + "loss": 0.0084, + "step": 39170 + }, + { + "grad_norm": 0.05952700972557068, + "learning_rate": 7.132037763085457e-05, + "loss": 0.0082, + "step": 39180 + }, + { + "grad_norm": 0.06724099814891815, + "learning_rate": 7.130542031580875e-05, + "loss": 0.0081, + "step": 39190 + }, + { + "grad_norm": 0.051775891333818436, + "learning_rate": 7.12904606708342e-05, + "loss": 0.007, + "step": 39200 + }, + { + "grad_norm": 0.04089704155921936, + "learning_rate": 7.127549869756687e-05, + "loss": 0.0095, + "step": 39210 + }, + { + "grad_norm": 0.0729939267039299, + "learning_rate": 7.126053439764299e-05, + "loss": 0.0115, + "step": 39220 + }, + { + "grad_norm": 0.0594962015748024, + "learning_rate": 7.124556777269904e-05, + "loss": 0.0075, + "step": 39230 + }, + { + "grad_norm": 0.0483117550611496, + "learning_rate": 7.123059882437174e-05, + "loss": 0.0075, + "step": 39240 + }, + { + "grad_norm": 0.03739434480667114, + "learning_rate": 7.121562755429807e-05, + "loss": 0.0089, + "step": 39250 + }, + { + "grad_norm": 0.06336085498332977, + "learning_rate": 7.120065396411528e-05, + "loss": 0.0077, + "step": 39260 + }, + { + "grad_norm": 0.07088667154312134, + "learning_rate": 7.118567805546084e-05, + "loss": 0.0081, + "step": 39270 + }, + { + "grad_norm": 0.06359900534152985, + "learning_rate": 7.117069982997248e-05, + "loss": 0.0078, + "step": 39280 + }, + { + "grad_norm": 0.060519371181726456, + "learning_rate": 7.115571928928825e-05, + "loss": 0.0087, + "step": 39290 + }, + { + "grad_norm": 0.07513485848903656, + "learning_rate": 7.114073643504635e-05, + "loss": 0.0078, + "step": 39300 + }, + { + "grad_norm": 0.0401153638958931, + "learning_rate": 7.11257512688853e-05, + "loss": 0.0074, + "step": 39310 + }, + { + "grad_norm": 0.043602388352155685, + "learning_rate": 7.111076379244384e-05, + "loss": 0.0085, + "step": 39320 + }, + { + "grad_norm": 0.0579400509595871, + "learning_rate": 7.109577400736101e-05, + "loss": 0.0085, + "step": 39330 + }, + { + "grad_norm": 0.05847614258527756, + "learning_rate": 7.108078191527605e-05, + "loss": 0.0086, + "step": 39340 + }, + { + "grad_norm": 0.05453656241297722, + "learning_rate": 7.106578751782847e-05, + "loss": 0.0088, + "step": 39350 + }, + { + "grad_norm": 0.07560881227254868, + "learning_rate": 7.105079081665803e-05, + "loss": 0.0087, + "step": 39360 + }, + { + "grad_norm": 0.06511498987674713, + "learning_rate": 7.103579181340476e-05, + "loss": 0.0089, + "step": 39370 + }, + { + "grad_norm": 0.088877834379673, + "learning_rate": 7.102079050970893e-05, + "loss": 0.0072, + "step": 39380 + }, + { + "grad_norm": 0.04927246272563934, + "learning_rate": 7.100578690721105e-05, + "loss": 0.0081, + "step": 39390 + }, + { + "grad_norm": 0.05791039764881134, + "learning_rate": 7.09907810075519e-05, + "loss": 0.0082, + "step": 39400 + }, + { + "grad_norm": 0.05915409326553345, + "learning_rate": 7.097577281237249e-05, + "loss": 0.0085, + "step": 39410 + }, + { + "grad_norm": 0.053234077990055084, + "learning_rate": 7.09607623233141e-05, + "loss": 0.008, + "step": 39420 + }, + { + "grad_norm": 0.05434441193938255, + "learning_rate": 7.094574954201827e-05, + "loss": 0.008, + "step": 39430 + }, + { + "grad_norm": 0.06408333778381348, + "learning_rate": 7.093073447012675e-05, + "loss": 0.0099, + "step": 39440 + }, + { + "grad_norm": 0.03873661532998085, + "learning_rate": 7.09157171092816e-05, + "loss": 0.0063, + "step": 39450 + }, + { + "grad_norm": 0.05079269781708717, + "learning_rate": 7.090069746112504e-05, + "loss": 0.0062, + "step": 39460 + }, + { + "grad_norm": 0.05820511281490326, + "learning_rate": 7.088567552729965e-05, + "loss": 0.0097, + "step": 39470 + }, + { + "grad_norm": 0.040252938866615295, + "learning_rate": 7.087065130944818e-05, + "loss": 0.0067, + "step": 39480 + }, + { + "grad_norm": 0.06109447777271271, + "learning_rate": 7.085562480921366e-05, + "loss": 0.0082, + "step": 39490 + }, + { + "grad_norm": 0.054439935833215714, + "learning_rate": 7.084059602823937e-05, + "loss": 0.0078, + "step": 39500 + }, + { + "grad_norm": 0.04857541248202324, + "learning_rate": 7.082556496816882e-05, + "loss": 0.0064, + "step": 39510 + }, + { + "grad_norm": 0.05634095519781113, + "learning_rate": 7.081053163064582e-05, + "loss": 0.0062, + "step": 39520 + }, + { + "grad_norm": 0.08178559690713882, + "learning_rate": 7.079549601731434e-05, + "loss": 0.0086, + "step": 39530 + }, + { + "grad_norm": 0.09156296402215958, + "learning_rate": 7.07804581298187e-05, + "loss": 0.0073, + "step": 39540 + }, + { + "grad_norm": 0.04949105530977249, + "learning_rate": 7.07654179698034e-05, + "loss": 0.0088, + "step": 39550 + }, + { + "grad_norm": 0.0427805557847023, + "learning_rate": 7.075037553891321e-05, + "loss": 0.007, + "step": 39560 + }, + { + "grad_norm": 0.04322442412376404, + "learning_rate": 7.073533083879315e-05, + "loss": 0.0062, + "step": 39570 + }, + { + "grad_norm": 0.07905631512403488, + "learning_rate": 7.072028387108849e-05, + "loss": 0.0087, + "step": 39580 + }, + { + "grad_norm": 0.06934299319982529, + "learning_rate": 7.070523463744472e-05, + "loss": 0.0075, + "step": 39590 + }, + { + "grad_norm": 0.06221234053373337, + "learning_rate": 7.069018313950763e-05, + "loss": 0.0079, + "step": 39600 + }, + { + "grad_norm": 0.08266325294971466, + "learning_rate": 7.067512937892322e-05, + "loss": 0.007, + "step": 39610 + }, + { + "grad_norm": 0.04453746974468231, + "learning_rate": 7.066007335733775e-05, + "loss": 0.0067, + "step": 39620 + }, + { + "grad_norm": 0.0546966977417469, + "learning_rate": 7.064501507639772e-05, + "loss": 0.0078, + "step": 39630 + }, + { + "grad_norm": 0.04348641261458397, + "learning_rate": 7.062995453774987e-05, + "loss": 0.009, + "step": 39640 + }, + { + "grad_norm": 0.0525679774582386, + "learning_rate": 7.061489174304121e-05, + "loss": 0.0084, + "step": 39650 + }, + { + "grad_norm": 0.05082926154136658, + "learning_rate": 7.0599826693919e-05, + "loss": 0.0067, + "step": 39660 + }, + { + "grad_norm": 0.050355758517980576, + "learning_rate": 7.05847593920307e-05, + "loss": 0.0088, + "step": 39670 + }, + { + "grad_norm": 0.06365548074245453, + "learning_rate": 7.056968983902406e-05, + "loss": 0.0084, + "step": 39680 + }, + { + "grad_norm": 0.07076599448919296, + "learning_rate": 7.055461803654706e-05, + "loss": 0.007, + "step": 39690 + }, + { + "grad_norm": 0.05630141496658325, + "learning_rate": 7.053954398624794e-05, + "loss": 0.0076, + "step": 39700 + }, + { + "grad_norm": 0.07783308625221252, + "learning_rate": 7.052446768977518e-05, + "loss": 0.0097, + "step": 39710 + }, + { + "grad_norm": 0.059805359691381454, + "learning_rate": 7.050938914877748e-05, + "loss": 0.0074, + "step": 39720 + }, + { + "grad_norm": 0.047839272767305374, + "learning_rate": 7.049430836490382e-05, + "loss": 0.0064, + "step": 39730 + }, + { + "grad_norm": 0.051142461597919464, + "learning_rate": 7.047922533980342e-05, + "loss": 0.0076, + "step": 39740 + }, + { + "grad_norm": 0.06789460778236389, + "learning_rate": 7.046414007512571e-05, + "loss": 0.0066, + "step": 39750 + }, + { + "grad_norm": 0.06533181667327881, + "learning_rate": 7.044905257252042e-05, + "loss": 0.0089, + "step": 39760 + }, + { + "grad_norm": 0.05181031674146652, + "learning_rate": 7.04339628336375e-05, + "loss": 0.0066, + "step": 39770 + }, + { + "grad_norm": 0.0517396554350853, + "learning_rate": 7.041887086012711e-05, + "loss": 0.0077, + "step": 39780 + }, + { + "grad_norm": 0.06078574061393738, + "learning_rate": 7.040377665363969e-05, + "loss": 0.0083, + "step": 39790 + }, + { + "grad_norm": 0.04464176297187805, + "learning_rate": 7.038868021582594e-05, + "loss": 0.0082, + "step": 39800 + }, + { + "grad_norm": 0.04727984592318535, + "learning_rate": 7.037358154833679e-05, + "loss": 0.0074, + "step": 39810 + }, + { + "grad_norm": 0.05338886380195618, + "learning_rate": 7.035848065282339e-05, + "loss": 0.0073, + "step": 39820 + }, + { + "grad_norm": 0.07028140127658844, + "learning_rate": 7.034337753093714e-05, + "loss": 0.0076, + "step": 39830 + }, + { + "grad_norm": 0.0641181543469429, + "learning_rate": 7.032827218432972e-05, + "loss": 0.0085, + "step": 39840 + }, + { + "grad_norm": 0.08024713397026062, + "learning_rate": 7.031316461465302e-05, + "loss": 0.0095, + "step": 39850 + }, + { + "grad_norm": 0.04340182617306709, + "learning_rate": 7.029805482355915e-05, + "loss": 0.0083, + "step": 39860 + }, + { + "grad_norm": 0.04990290477871895, + "learning_rate": 7.028294281270055e-05, + "loss": 0.0074, + "step": 39870 + }, + { + "grad_norm": 0.03473324328660965, + "learning_rate": 7.026782858372982e-05, + "loss": 0.0089, + "step": 39880 + }, + { + "grad_norm": 0.06533315032720566, + "learning_rate": 7.025271213829982e-05, + "loss": 0.0101, + "step": 39890 + }, + { + "grad_norm": 0.061363037675619125, + "learning_rate": 7.023759347806366e-05, + "loss": 0.0073, + "step": 39900 + }, + { + "grad_norm": 0.07177280634641647, + "learning_rate": 7.022247260467472e-05, + "loss": 0.0081, + "step": 39910 + }, + { + "grad_norm": 0.05772266164422035, + "learning_rate": 7.020734951978658e-05, + "loss": 0.0071, + "step": 39920 + }, + { + "grad_norm": 0.06196872517466545, + "learning_rate": 7.019222422505307e-05, + "loss": 0.007, + "step": 39930 + }, + { + "grad_norm": 0.047553643584251404, + "learning_rate": 7.017709672212827e-05, + "loss": 0.0083, + "step": 39940 + }, + { + "grad_norm": 0.07470451295375824, + "learning_rate": 7.016196701266652e-05, + "loss": 0.0089, + "step": 39950 + }, + { + "grad_norm": 0.06197798624634743, + "learning_rate": 7.014683509832238e-05, + "loss": 0.0103, + "step": 39960 + }, + { + "grad_norm": 0.06549069285392761, + "learning_rate": 7.013170098075063e-05, + "loss": 0.006, + "step": 39970 + }, + { + "grad_norm": 0.05961313471198082, + "learning_rate": 7.011656466160632e-05, + "loss": 0.007, + "step": 39980 + }, + { + "grad_norm": 0.06476326286792755, + "learning_rate": 7.010142614254475e-05, + "loss": 0.0082, + "step": 39990 + }, + { + "grad_norm": 0.05395246669650078, + "learning_rate": 7.008628542522147e-05, + "loss": 0.0077, + "step": 40000 + }, + { + "grad_norm": 0.04947962239384651, + "learning_rate": 7.007114251129218e-05, + "loss": 0.0076, + "step": 40010 + }, + { + "grad_norm": 0.049306027591228485, + "learning_rate": 7.005599740241293e-05, + "loss": 0.0077, + "step": 40020 + }, + { + "grad_norm": 0.07821040600538254, + "learning_rate": 7.004085010023996e-05, + "loss": 0.0078, + "step": 40030 + }, + { + "grad_norm": 0.06655087321996689, + "learning_rate": 7.002570060642976e-05, + "loss": 0.0077, + "step": 40040 + }, + { + "grad_norm": 0.0968986377120018, + "learning_rate": 7.001054892263903e-05, + "loss": 0.01, + "step": 40050 + }, + { + "grad_norm": 0.08080632239580154, + "learning_rate": 6.999539505052477e-05, + "loss": 0.0094, + "step": 40060 + }, + { + "grad_norm": 0.08521395176649094, + "learning_rate": 6.998023899174418e-05, + "loss": 0.0086, + "step": 40070 + }, + { + "grad_norm": 0.07889541238546371, + "learning_rate": 6.996508074795467e-05, + "loss": 0.0084, + "step": 40080 + }, + { + "grad_norm": 0.06928499788045883, + "learning_rate": 6.994992032081396e-05, + "loss": 0.0083, + "step": 40090 + }, + { + "grad_norm": 0.06706976145505905, + "learning_rate": 6.993475771197995e-05, + "loss": 0.0087, + "step": 40100 + }, + { + "grad_norm": 0.057448189705610275, + "learning_rate": 6.991959292311082e-05, + "loss": 0.0066, + "step": 40110 + }, + { + "grad_norm": 0.0654650554060936, + "learning_rate": 6.990442595586495e-05, + "loss": 0.0098, + "step": 40120 + }, + { + "grad_norm": 0.0713191106915474, + "learning_rate": 6.988925681190098e-05, + "loss": 0.0084, + "step": 40130 + }, + { + "grad_norm": 0.06903170049190521, + "learning_rate": 6.987408549287778e-05, + "loss": 0.0082, + "step": 40140 + }, + { + "grad_norm": 0.054681695997714996, + "learning_rate": 6.985891200045449e-05, + "loss": 0.0064, + "step": 40150 + }, + { + "grad_norm": 0.04114748537540436, + "learning_rate": 6.984373633629045e-05, + "loss": 0.0105, + "step": 40160 + }, + { + "grad_norm": 0.03890092298388481, + "learning_rate": 6.98285585020452e-05, + "loss": 0.0061, + "step": 40170 + }, + { + "grad_norm": 0.05548648163676262, + "learning_rate": 6.981337849937864e-05, + "loss": 0.0079, + "step": 40180 + }, + { + "grad_norm": 0.04801216349005699, + "learning_rate": 6.979819632995078e-05, + "loss": 0.0063, + "step": 40190 + }, + { + "grad_norm": 0.050497617572546005, + "learning_rate": 6.978301199542193e-05, + "loss": 0.0057, + "step": 40200 + }, + { + "grad_norm": 0.05538531020283699, + "learning_rate": 6.976782549745263e-05, + "loss": 0.0079, + "step": 40210 + }, + { + "grad_norm": 0.04349804297089577, + "learning_rate": 6.975263683770367e-05, + "loss": 0.0086, + "step": 40220 + }, + { + "grad_norm": 0.06468820571899414, + "learning_rate": 6.973744601783604e-05, + "loss": 0.0068, + "step": 40230 + }, + { + "grad_norm": 0.061044204980134964, + "learning_rate": 6.972225303951097e-05, + "loss": 0.0084, + "step": 40240 + }, + { + "grad_norm": 0.06366939842700958, + "learning_rate": 6.970705790438998e-05, + "loss": 0.0066, + "step": 40250 + }, + { + "grad_norm": 0.058194201439619064, + "learning_rate": 6.969186061413477e-05, + "loss": 0.0076, + "step": 40260 + }, + { + "grad_norm": 0.05098713934421539, + "learning_rate": 6.967666117040727e-05, + "loss": 0.0076, + "step": 40270 + }, + { + "grad_norm": 0.06550533324480057, + "learning_rate": 6.966145957486972e-05, + "loss": 0.007, + "step": 40280 + }, + { + "grad_norm": 0.05835976079106331, + "learning_rate": 6.964625582918449e-05, + "loss": 0.0068, + "step": 40290 + }, + { + "grad_norm": 0.06984388083219528, + "learning_rate": 6.963104993501425e-05, + "loss": 0.0084, + "step": 40300 + }, + { + "grad_norm": 0.07689247280359268, + "learning_rate": 6.961584189402192e-05, + "loss": 0.0074, + "step": 40310 + }, + { + "grad_norm": 0.06499718129634857, + "learning_rate": 6.96006317078706e-05, + "loss": 0.0074, + "step": 40320 + }, + { + "grad_norm": 0.08055394142866135, + "learning_rate": 6.958541937822367e-05, + "loss": 0.0084, + "step": 40330 + }, + { + "grad_norm": 0.09365476667881012, + "learning_rate": 6.957020490674473e-05, + "loss": 0.0079, + "step": 40340 + }, + { + "grad_norm": 0.06947208940982819, + "learning_rate": 6.95549882950976e-05, + "loss": 0.0063, + "step": 40350 + }, + { + "grad_norm": 0.0801238864660263, + "learning_rate": 6.953976954494635e-05, + "loss": 0.0084, + "step": 40360 + }, + { + "grad_norm": 0.060200683772563934, + "learning_rate": 6.952454865795528e-05, + "loss": 0.0094, + "step": 40370 + }, + { + "grad_norm": 0.0626673623919487, + "learning_rate": 6.950932563578892e-05, + "loss": 0.0102, + "step": 40380 + }, + { + "grad_norm": 0.0432911291718483, + "learning_rate": 6.949410048011206e-05, + "loss": 0.0072, + "step": 40390 + }, + { + "grad_norm": 0.05657346919178963, + "learning_rate": 6.947887319258966e-05, + "loss": 0.0062, + "step": 40400 + }, + { + "grad_norm": 0.05439772456884384, + "learning_rate": 6.946364377488696e-05, + "loss": 0.0085, + "step": 40410 + }, + { + "grad_norm": 0.050730664283037186, + "learning_rate": 6.944841222866947e-05, + "loss": 0.0075, + "step": 40420 + }, + { + "grad_norm": 0.0459885410964489, + "learning_rate": 6.943317855560284e-05, + "loss": 0.0073, + "step": 40430 + }, + { + "grad_norm": 0.06654293835163116, + "learning_rate": 6.941794275735302e-05, + "loss": 0.0079, + "step": 40440 + }, + { + "grad_norm": 0.05811358988285065, + "learning_rate": 6.94027048355862e-05, + "loss": 0.008, + "step": 40450 + }, + { + "grad_norm": 0.05442667007446289, + "learning_rate": 6.938746479196871e-05, + "loss": 0.0087, + "step": 40460 + }, + { + "grad_norm": 0.05051586404442787, + "learning_rate": 6.937222262816724e-05, + "loss": 0.0078, + "step": 40470 + }, + { + "grad_norm": 0.05338919535279274, + "learning_rate": 6.935697834584865e-05, + "loss": 0.0078, + "step": 40480 + }, + { + "grad_norm": 0.054312676191329956, + "learning_rate": 6.934173194667998e-05, + "loss": 0.0107, + "step": 40490 + }, + { + "grad_norm": 0.05421331524848938, + "learning_rate": 6.93264834323286e-05, + "loss": 0.0081, + "step": 40500 + }, + { + "grad_norm": 0.10122297704219818, + "learning_rate": 6.931123280446206e-05, + "loss": 0.0093, + "step": 40510 + }, + { + "grad_norm": 0.059622906148433685, + "learning_rate": 6.929598006474811e-05, + "loss": 0.0093, + "step": 40520 + }, + { + "grad_norm": 0.0627245381474495, + "learning_rate": 6.92807252148548e-05, + "loss": 0.0061, + "step": 40530 + }, + { + "grad_norm": 0.052805136889219284, + "learning_rate": 6.926546825645037e-05, + "loss": 0.008, + "step": 40540 + }, + { + "grad_norm": 0.04397548362612724, + "learning_rate": 6.92502091912033e-05, + "loss": 0.0062, + "step": 40550 + }, + { + "grad_norm": 0.0554516576230526, + "learning_rate": 6.923494802078229e-05, + "loss": 0.0073, + "step": 40560 + }, + { + "grad_norm": 0.06848278641700745, + "learning_rate": 6.921968474685628e-05, + "loss": 0.0082, + "step": 40570 + }, + { + "grad_norm": 0.05322015658020973, + "learning_rate": 6.920441937109445e-05, + "loss": 0.0096, + "step": 40580 + }, + { + "grad_norm": 0.05831311270594597, + "learning_rate": 6.91891518951662e-05, + "loss": 0.0063, + "step": 40590 + }, + { + "grad_norm": 0.04960948973894119, + "learning_rate": 6.917388232074114e-05, + "loss": 0.0081, + "step": 40600 + }, + { + "grad_norm": 0.05544249340891838, + "learning_rate": 6.915861064948914e-05, + "loss": 0.0071, + "step": 40610 + }, + { + "grad_norm": 0.05579586327075958, + "learning_rate": 6.914333688308025e-05, + "loss": 0.0092, + "step": 40620 + }, + { + "grad_norm": 0.07058683037757874, + "learning_rate": 6.912806102318487e-05, + "loss": 0.0094, + "step": 40630 + }, + { + "grad_norm": 0.05255395919084549, + "learning_rate": 6.911278307147347e-05, + "loss": 0.0099, + "step": 40640 + }, + { + "grad_norm": 0.06336770206689835, + "learning_rate": 6.909750302961684e-05, + "loss": 0.007, + "step": 40650 + }, + { + "grad_norm": 0.05364316329360008, + "learning_rate": 6.9082220899286e-05, + "loss": 0.0086, + "step": 40660 + }, + { + "grad_norm": 0.05164165422320366, + "learning_rate": 6.906693668215219e-05, + "loss": 0.0077, + "step": 40670 + }, + { + "grad_norm": 0.0495193675160408, + "learning_rate": 6.905165037988683e-05, + "loss": 0.0065, + "step": 40680 + }, + { + "grad_norm": 0.054564058780670166, + "learning_rate": 6.903636199416164e-05, + "loss": 0.0083, + "step": 40690 + }, + { + "grad_norm": 0.07172158360481262, + "learning_rate": 6.902107152664851e-05, + "loss": 0.0077, + "step": 40700 + }, + { + "grad_norm": 0.06320268660783768, + "learning_rate": 6.90057789790196e-05, + "loss": 0.0075, + "step": 40710 + }, + { + "grad_norm": 0.08148522675037384, + "learning_rate": 6.899048435294728e-05, + "loss": 0.0068, + "step": 40720 + }, + { + "grad_norm": 0.06466656178236008, + "learning_rate": 6.897518765010415e-05, + "loss": 0.0076, + "step": 40730 + }, + { + "grad_norm": 0.05570802837610245, + "learning_rate": 6.895988887216302e-05, + "loss": 0.0087, + "step": 40740 + }, + { + "grad_norm": 0.04922957718372345, + "learning_rate": 6.894458802079694e-05, + "loss": 0.0088, + "step": 40750 + }, + { + "grad_norm": 0.050884198397397995, + "learning_rate": 6.892928509767922e-05, + "loss": 0.0074, + "step": 40760 + }, + { + "grad_norm": 0.05459513142704964, + "learning_rate": 6.891398010448333e-05, + "loss": 0.0068, + "step": 40770 + }, + { + "grad_norm": 0.04497300088405609, + "learning_rate": 6.8898673042883e-05, + "loss": 0.0079, + "step": 40780 + }, + { + "grad_norm": 0.07228973507881165, + "learning_rate": 6.888336391455222e-05, + "loss": 0.0079, + "step": 40790 + }, + { + "grad_norm": 0.06941306591033936, + "learning_rate": 6.886805272116513e-05, + "loss": 0.0083, + "step": 40800 + }, + { + "grad_norm": 0.048254143446683884, + "learning_rate": 6.885273946439617e-05, + "loss": 0.0059, + "step": 40810 + }, + { + "grad_norm": 0.05341745913028717, + "learning_rate": 6.883742414591998e-05, + "loss": 0.0065, + "step": 40820 + }, + { + "grad_norm": 0.05000094696879387, + "learning_rate": 6.88221067674114e-05, + "loss": 0.0083, + "step": 40830 + }, + { + "grad_norm": 0.05353971943259239, + "learning_rate": 6.88067873305455e-05, + "loss": 0.0074, + "step": 40840 + }, + { + "grad_norm": 0.05770906060934067, + "learning_rate": 6.879146583699765e-05, + "loss": 0.0096, + "step": 40850 + }, + { + "grad_norm": 0.07988651841878891, + "learning_rate": 6.877614228844334e-05, + "loss": 0.0067, + "step": 40860 + }, + { + "grad_norm": 0.06196431815624237, + "learning_rate": 6.876081668655832e-05, + "loss": 0.0065, + "step": 40870 + }, + { + "grad_norm": 0.05928327515721321, + "learning_rate": 6.874548903301861e-05, + "loss": 0.0081, + "step": 40880 + }, + { + "grad_norm": 0.05531932786107063, + "learning_rate": 6.873015932950039e-05, + "loss": 0.0073, + "step": 40890 + }, + { + "grad_norm": 0.06751757860183716, + "learning_rate": 6.871482757768012e-05, + "loss": 0.0069, + "step": 40900 + }, + { + "grad_norm": 0.05785239860415459, + "learning_rate": 6.869949377923442e-05, + "loss": 0.0075, + "step": 40910 + }, + { + "grad_norm": 0.06692349165678024, + "learning_rate": 6.868415793584022e-05, + "loss": 0.0075, + "step": 40920 + }, + { + "grad_norm": 0.0539034940302372, + "learning_rate": 6.86688200491746e-05, + "loss": 0.0088, + "step": 40930 + }, + { + "grad_norm": 0.05982846021652222, + "learning_rate": 6.865348012091488e-05, + "loss": 0.0087, + "step": 40940 + }, + { + "grad_norm": 0.06535152345895767, + "learning_rate": 6.86381381527386e-05, + "loss": 0.0085, + "step": 40950 + }, + { + "grad_norm": 0.053264327347278595, + "learning_rate": 6.862279414632359e-05, + "loss": 0.0062, + "step": 40960 + }, + { + "grad_norm": 0.03937552869319916, + "learning_rate": 6.860744810334778e-05, + "loss": 0.0065, + "step": 40970 + }, + { + "grad_norm": 0.06786076724529266, + "learning_rate": 6.859210002548943e-05, + "loss": 0.0097, + "step": 40980 + }, + { + "grad_norm": 0.06692834943532944, + "learning_rate": 6.8576749914427e-05, + "loss": 0.0084, + "step": 40990 + }, + { + "grad_norm": 0.06050063297152519, + "learning_rate": 6.85613977718391e-05, + "loss": 0.0068, + "step": 41000 + }, + { + "grad_norm": 0.06534554809331894, + "learning_rate": 6.854604359940464e-05, + "loss": 0.0074, + "step": 41010 + }, + { + "grad_norm": 0.06322316080331802, + "learning_rate": 6.853068739880276e-05, + "loss": 0.0076, + "step": 41020 + }, + { + "grad_norm": 0.060013603419065475, + "learning_rate": 6.851532917171276e-05, + "loss": 0.0075, + "step": 41030 + }, + { + "grad_norm": 0.07163479179143906, + "learning_rate": 6.849996891981418e-05, + "loss": 0.0074, + "step": 41040 + }, + { + "grad_norm": 0.05142606794834137, + "learning_rate": 6.848460664478684e-05, + "loss": 0.0075, + "step": 41050 + }, + { + "grad_norm": 0.06378056854009628, + "learning_rate": 6.846924234831069e-05, + "loss": 0.0073, + "step": 41060 + }, + { + "grad_norm": 0.0478401854634285, + "learning_rate": 6.845387603206597e-05, + "loss": 0.0066, + "step": 41070 + }, + { + "grad_norm": 0.05029834434390068, + "learning_rate": 6.843850769773312e-05, + "loss": 0.0071, + "step": 41080 + }, + { + "grad_norm": 0.05126627907156944, + "learning_rate": 6.842313734699278e-05, + "loss": 0.0071, + "step": 41090 + }, + { + "grad_norm": 0.06790380924940109, + "learning_rate": 6.840776498152584e-05, + "loss": 0.0083, + "step": 41100 + }, + { + "grad_norm": 0.06892986595630646, + "learning_rate": 6.83923906030134e-05, + "loss": 0.0089, + "step": 41110 + }, + { + "grad_norm": 0.06693699955940247, + "learning_rate": 6.837701421313677e-05, + "loss": 0.0066, + "step": 41120 + }, + { + "grad_norm": 0.07091010361909866, + "learning_rate": 6.83616358135775e-05, + "loss": 0.0072, + "step": 41130 + }, + { + "grad_norm": 0.070062056183815, + "learning_rate": 6.834625540601734e-05, + "loss": 0.0082, + "step": 41140 + }, + { + "grad_norm": 0.06290470063686371, + "learning_rate": 6.833087299213829e-05, + "loss": 0.008, + "step": 41150 + }, + { + "grad_norm": 0.04047272726893425, + "learning_rate": 6.83154885736225e-05, + "loss": 0.008, + "step": 41160 + }, + { + "grad_norm": 0.06403202563524246, + "learning_rate": 6.830010215215244e-05, + "loss": 0.0073, + "step": 41170 + }, + { + "grad_norm": 0.06875070184469223, + "learning_rate": 6.828471372941072e-05, + "loss": 0.0061, + "step": 41180 + }, + { + "grad_norm": 0.05779410898685455, + "learning_rate": 6.826932330708018e-05, + "loss": 0.0085, + "step": 41190 + }, + { + "grad_norm": 0.058907367289066315, + "learning_rate": 6.825393088684393e-05, + "loss": 0.0066, + "step": 41200 + }, + { + "grad_norm": 0.0416654571890831, + "learning_rate": 6.823853647038524e-05, + "loss": 0.0072, + "step": 41210 + }, + { + "grad_norm": 0.04194960370659828, + "learning_rate": 6.822314005938762e-05, + "loss": 0.0062, + "step": 41220 + }, + { + "grad_norm": 0.0575970858335495, + "learning_rate": 6.820774165553479e-05, + "loss": 0.0105, + "step": 41230 + }, + { + "grad_norm": 0.06612683832645416, + "learning_rate": 6.819234126051072e-05, + "loss": 0.007, + "step": 41240 + }, + { + "grad_norm": 0.05610157549381256, + "learning_rate": 6.817693887599956e-05, + "loss": 0.0086, + "step": 41250 + }, + { + "grad_norm": 0.03869062289595604, + "learning_rate": 6.816153450368568e-05, + "loss": 0.0072, + "step": 41260 + }, + { + "grad_norm": 0.04002370312809944, + "learning_rate": 6.814612814525372e-05, + "loss": 0.0074, + "step": 41270 + }, + { + "grad_norm": 0.06344068795442581, + "learning_rate": 6.813071980238846e-05, + "loss": 0.0081, + "step": 41280 + }, + { + "grad_norm": 0.06753505766391754, + "learning_rate": 6.811530947677492e-05, + "loss": 0.0096, + "step": 41290 + }, + { + "grad_norm": 0.057857248932123184, + "learning_rate": 6.809989717009839e-05, + "loss": 0.0099, + "step": 41300 + }, + { + "grad_norm": 0.06475003063678741, + "learning_rate": 6.808448288404431e-05, + "loss": 0.008, + "step": 41310 + }, + { + "grad_norm": 0.07630280405282974, + "learning_rate": 6.806906662029838e-05, + "loss": 0.009, + "step": 41320 + }, + { + "grad_norm": 0.06346382200717926, + "learning_rate": 6.805364838054647e-05, + "loss": 0.0066, + "step": 41330 + }, + { + "grad_norm": 0.05530714616179466, + "learning_rate": 6.803822816647474e-05, + "loss": 0.0079, + "step": 41340 + }, + { + "grad_norm": 0.060454487800598145, + "learning_rate": 6.802280597976949e-05, + "loss": 0.0077, + "step": 41350 + }, + { + "grad_norm": 0.051856305450201035, + "learning_rate": 6.800738182211727e-05, + "loss": 0.0068, + "step": 41360 + }, + { + "grad_norm": 0.07189567387104034, + "learning_rate": 6.799195569520485e-05, + "loss": 0.0085, + "step": 41370 + }, + { + "grad_norm": 0.09397125244140625, + "learning_rate": 6.79765276007192e-05, + "loss": 0.0074, + "step": 41380 + }, + { + "grad_norm": 0.059967488050460815, + "learning_rate": 6.796109754034753e-05, + "loss": 0.0081, + "step": 41390 + }, + { + "grad_norm": 0.050760384649038315, + "learning_rate": 6.794566551577724e-05, + "loss": 0.0071, + "step": 41400 + }, + { + "grad_norm": 0.06893869489431381, + "learning_rate": 6.793023152869594e-05, + "loss": 0.0074, + "step": 41410 + }, + { + "grad_norm": 0.040962330996990204, + "learning_rate": 6.791479558079147e-05, + "loss": 0.0065, + "step": 41420 + }, + { + "grad_norm": 0.0711832121014595, + "learning_rate": 6.789935767375191e-05, + "loss": 0.0086, + "step": 41430 + }, + { + "grad_norm": 0.05507403239607811, + "learning_rate": 6.78839178092655e-05, + "loss": 0.0056, + "step": 41440 + }, + { + "grad_norm": 0.05678330361843109, + "learning_rate": 6.786847598902072e-05, + "loss": 0.0063, + "step": 41450 + }, + { + "grad_norm": 0.06459486484527588, + "learning_rate": 6.785303221470627e-05, + "loss": 0.0068, + "step": 41460 + }, + { + "grad_norm": 0.05704733356833458, + "learning_rate": 6.783758648801108e-05, + "loss": 0.0071, + "step": 41470 + }, + { + "grad_norm": 0.04475538432598114, + "learning_rate": 6.782213881062422e-05, + "loss": 0.0072, + "step": 41480 + }, + { + "grad_norm": 0.05048883706331253, + "learning_rate": 6.780668918423508e-05, + "loss": 0.0071, + "step": 41490 + }, + { + "grad_norm": 0.05757227912545204, + "learning_rate": 6.779123761053317e-05, + "loss": 0.0085, + "step": 41500 + }, + { + "grad_norm": 0.050609759986400604, + "learning_rate": 6.777578409120826e-05, + "loss": 0.0071, + "step": 41510 + }, + { + "grad_norm": 0.06675445288419724, + "learning_rate": 6.776032862795033e-05, + "loss": 0.0068, + "step": 41520 + }, + { + "grad_norm": 0.047639939934015274, + "learning_rate": 6.774487122244956e-05, + "loss": 0.0063, + "step": 41530 + }, + { + "grad_norm": 0.05831020697951317, + "learning_rate": 6.772941187639637e-05, + "loss": 0.0067, + "step": 41540 + }, + { + "grad_norm": 0.07377860695123672, + "learning_rate": 6.771395059148134e-05, + "loss": 0.0075, + "step": 41550 + }, + { + "grad_norm": 0.05056414008140564, + "learning_rate": 6.76984873693953e-05, + "loss": 0.0063, + "step": 41560 + }, + { + "grad_norm": 0.05438686162233353, + "learning_rate": 6.768302221182931e-05, + "loss": 0.0084, + "step": 41570 + }, + { + "grad_norm": 0.07100310176610947, + "learning_rate": 6.766755512047457e-05, + "loss": 0.0083, + "step": 41580 + }, + { + "grad_norm": 0.05460037663578987, + "learning_rate": 6.765208609702259e-05, + "loss": 0.0074, + "step": 41590 + }, + { + "grad_norm": 0.07307857275009155, + "learning_rate": 6.763661514316499e-05, + "loss": 0.0087, + "step": 41600 + }, + { + "grad_norm": 0.06387989968061447, + "learning_rate": 6.76211422605937e-05, + "loss": 0.0058, + "step": 41610 + }, + { + "grad_norm": 0.055224668234586716, + "learning_rate": 6.760566745100076e-05, + "loss": 0.0063, + "step": 41620 + }, + { + "grad_norm": 0.09157317876815796, + "learning_rate": 6.75901907160785e-05, + "loss": 0.0082, + "step": 41630 + }, + { + "grad_norm": 0.07877278327941895, + "learning_rate": 6.757471205751943e-05, + "loss": 0.0076, + "step": 41640 + }, + { + "grad_norm": 0.054986048489809036, + "learning_rate": 6.75592314770163e-05, + "loss": 0.0077, + "step": 41650 + }, + { + "grad_norm": 0.04919520765542984, + "learning_rate": 6.7543748976262e-05, + "loss": 0.0055, + "step": 41660 + }, + { + "grad_norm": 0.060383282601833344, + "learning_rate": 6.752826455694968e-05, + "loss": 0.0081, + "step": 41670 + }, + { + "grad_norm": 0.04966985061764717, + "learning_rate": 6.751277822077271e-05, + "loss": 0.0063, + "step": 41680 + }, + { + "grad_norm": 0.07141781598329544, + "learning_rate": 6.749728996942465e-05, + "loss": 0.0068, + "step": 41690 + }, + { + "grad_norm": 0.04224513843655586, + "learning_rate": 6.748179980459924e-05, + "loss": 0.0078, + "step": 41700 + }, + { + "grad_norm": 0.05912189558148384, + "learning_rate": 6.746630772799052e-05, + "loss": 0.0068, + "step": 41710 + }, + { + "grad_norm": 0.059023600071668625, + "learning_rate": 6.745081374129262e-05, + "loss": 0.0062, + "step": 41720 + }, + { + "grad_norm": 0.033447474241256714, + "learning_rate": 6.743531784619998e-05, + "loss": 0.0056, + "step": 41730 + }, + { + "grad_norm": 0.05546460300683975, + "learning_rate": 6.741982004440719e-05, + "loss": 0.0076, + "step": 41740 + }, + { + "grad_norm": 0.04637811332941055, + "learning_rate": 6.740432033760907e-05, + "loss": 0.0063, + "step": 41750 + }, + { + "grad_norm": 0.028896059840917587, + "learning_rate": 6.738881872750066e-05, + "loss": 0.0076, + "step": 41760 + }, + { + "grad_norm": 0.047442108392715454, + "learning_rate": 6.737331521577715e-05, + "loss": 0.0071, + "step": 41770 + }, + { + "grad_norm": 0.052041638642549515, + "learning_rate": 6.735780980413403e-05, + "loss": 0.0075, + "step": 41780 + }, + { + "grad_norm": 0.05837913230061531, + "learning_rate": 6.734230249426692e-05, + "loss": 0.0086, + "step": 41790 + }, + { + "grad_norm": 0.06638123095035553, + "learning_rate": 6.732679328787168e-05, + "loss": 0.0076, + "step": 41800 + }, + { + "grad_norm": 0.0560302697122097, + "learning_rate": 6.731128218664438e-05, + "loss": 0.0084, + "step": 41810 + }, + { + "grad_norm": 0.04207568243145943, + "learning_rate": 6.729576919228129e-05, + "loss": 0.0072, + "step": 41820 + }, + { + "grad_norm": 0.07121196389198303, + "learning_rate": 6.728025430647888e-05, + "loss": 0.0082, + "step": 41830 + }, + { + "grad_norm": 0.051833078265190125, + "learning_rate": 6.726473753093383e-05, + "loss": 0.0074, + "step": 41840 + }, + { + "grad_norm": 0.0486704558134079, + "learning_rate": 6.724921886734305e-05, + "loss": 0.0072, + "step": 41850 + }, + { + "grad_norm": 0.06911402195692062, + "learning_rate": 6.723369831740363e-05, + "loss": 0.0073, + "step": 41860 + }, + { + "grad_norm": 0.06322810053825378, + "learning_rate": 6.721817588281288e-05, + "loss": 0.007, + "step": 41870 + }, + { + "grad_norm": 0.06203952804207802, + "learning_rate": 6.720265156526828e-05, + "loss": 0.0061, + "step": 41880 + }, + { + "grad_norm": 0.05950020253658295, + "learning_rate": 6.718712536646758e-05, + "loss": 0.007, + "step": 41890 + }, + { + "grad_norm": 0.07113482803106308, + "learning_rate": 6.71715972881087e-05, + "loss": 0.0083, + "step": 41900 + }, + { + "grad_norm": 0.052271127700805664, + "learning_rate": 6.715606733188974e-05, + "loss": 0.0073, + "step": 41910 + }, + { + "grad_norm": 0.057251885533332825, + "learning_rate": 6.714053549950909e-05, + "loss": 0.0091, + "step": 41920 + }, + { + "grad_norm": 0.05351640656590462, + "learning_rate": 6.71250017926652e-05, + "loss": 0.0076, + "step": 41930 + }, + { + "grad_norm": 0.061739541590213776, + "learning_rate": 6.71094662130569e-05, + "loss": 0.0083, + "step": 41940 + }, + { + "grad_norm": 0.05374715104699135, + "learning_rate": 6.709392876238307e-05, + "loss": 0.0071, + "step": 41950 + }, + { + "grad_norm": 0.05591040477156639, + "learning_rate": 6.707838944234289e-05, + "loss": 0.0095, + "step": 41960 + }, + { + "grad_norm": 0.04433199390769005, + "learning_rate": 6.706284825463574e-05, + "loss": 0.0068, + "step": 41970 + }, + { + "grad_norm": 0.056513845920562744, + "learning_rate": 6.704730520096114e-05, + "loss": 0.0067, + "step": 41980 + }, + { + "grad_norm": 0.04676041007041931, + "learning_rate": 6.703176028301888e-05, + "loss": 0.0072, + "step": 41990 + }, + { + "grad_norm": 0.0467195101082325, + "learning_rate": 6.701621350250892e-05, + "loss": 0.0084, + "step": 42000 + }, + { + "grad_norm": 0.06291917711496353, + "learning_rate": 6.700066486113144e-05, + "loss": 0.0063, + "step": 42010 + }, + { + "grad_norm": 0.06342970579862595, + "learning_rate": 6.69851143605868e-05, + "loss": 0.0089, + "step": 42020 + }, + { + "grad_norm": 0.07533004879951477, + "learning_rate": 6.696956200257561e-05, + "loss": 0.008, + "step": 42030 + }, + { + "grad_norm": 0.04822695627808571, + "learning_rate": 6.695400778879863e-05, + "loss": 0.0054, + "step": 42040 + }, + { + "grad_norm": 0.07095114886760712, + "learning_rate": 6.693845172095683e-05, + "loss": 0.0072, + "step": 42050 + }, + { + "grad_norm": 0.07385826110839844, + "learning_rate": 6.692289380075142e-05, + "loss": 0.008, + "step": 42060 + }, + { + "grad_norm": 0.04714265838265419, + "learning_rate": 6.690733402988379e-05, + "loss": 0.0077, + "step": 42070 + }, + { + "grad_norm": 0.07234479486942291, + "learning_rate": 6.689177241005553e-05, + "loss": 0.0083, + "step": 42080 + }, + { + "grad_norm": 0.04022373631596565, + "learning_rate": 6.687620894296844e-05, + "loss": 0.0059, + "step": 42090 + }, + { + "grad_norm": 0.0516703724861145, + "learning_rate": 6.686064363032451e-05, + "loss": 0.0076, + "step": 42100 + }, + { + "grad_norm": 0.04360736533999443, + "learning_rate": 6.684507647382596e-05, + "loss": 0.0082, + "step": 42110 + }, + { + "grad_norm": 0.06273283809423447, + "learning_rate": 6.682950747517513e-05, + "loss": 0.0074, + "step": 42120 + }, + { + "grad_norm": 0.05699149891734123, + "learning_rate": 6.68139366360747e-05, + "loss": 0.009, + "step": 42130 + }, + { + "grad_norm": 0.05170596390962601, + "learning_rate": 6.679836395822744e-05, + "loss": 0.0089, + "step": 42140 + }, + { + "grad_norm": 0.06956782937049866, + "learning_rate": 6.678278944333633e-05, + "loss": 0.0098, + "step": 42150 + }, + { + "grad_norm": 0.0515826940536499, + "learning_rate": 6.676721309310462e-05, + "loss": 0.0078, + "step": 42160 + }, + { + "grad_norm": 0.04026634991168976, + "learning_rate": 6.67516349092357e-05, + "loss": 0.0072, + "step": 42170 + }, + { + "grad_norm": 0.04139690473675728, + "learning_rate": 6.673605489343315e-05, + "loss": 0.0068, + "step": 42180 + }, + { + "grad_norm": 0.069291852414608, + "learning_rate": 6.672047304740081e-05, + "loss": 0.0076, + "step": 42190 + }, + { + "grad_norm": 0.05204644054174423, + "learning_rate": 6.670488937284268e-05, + "loss": 0.0081, + "step": 42200 + }, + { + "grad_norm": 0.0690489187836647, + "learning_rate": 6.668930387146294e-05, + "loss": 0.0073, + "step": 42210 + }, + { + "grad_norm": 0.0709243044257164, + "learning_rate": 6.667371654496605e-05, + "loss": 0.0074, + "step": 42220 + }, + { + "grad_norm": 0.05899015814065933, + "learning_rate": 6.665812739505659e-05, + "loss": 0.0078, + "step": 42230 + }, + { + "grad_norm": 0.06251039355993271, + "learning_rate": 6.664253642343935e-05, + "loss": 0.007, + "step": 42240 + }, + { + "grad_norm": 0.08701679855585098, + "learning_rate": 6.662694363181935e-05, + "loss": 0.0076, + "step": 42250 + }, + { + "grad_norm": 0.06348737329244614, + "learning_rate": 6.66113490219018e-05, + "loss": 0.0077, + "step": 42260 + }, + { + "grad_norm": 0.09389802813529968, + "learning_rate": 6.659575259539207e-05, + "loss": 0.0077, + "step": 42270 + }, + { + "grad_norm": 0.04556003585457802, + "learning_rate": 6.65801543539958e-05, + "loss": 0.0069, + "step": 42280 + }, + { + "grad_norm": 0.045384831726551056, + "learning_rate": 6.65645542994188e-05, + "loss": 0.0064, + "step": 42290 + }, + { + "grad_norm": 0.05430404841899872, + "learning_rate": 6.654895243336702e-05, + "loss": 0.0065, + "step": 42300 + }, + { + "grad_norm": 0.06579568982124329, + "learning_rate": 6.65333487575467e-05, + "loss": 0.0065, + "step": 42310 + }, + { + "grad_norm": 0.06703734397888184, + "learning_rate": 6.651774327366422e-05, + "loss": 0.0091, + "step": 42320 + }, + { + "grad_norm": 0.041833627969026566, + "learning_rate": 6.650213598342613e-05, + "loss": 0.0064, + "step": 42330 + }, + { + "grad_norm": 0.04807272553443909, + "learning_rate": 6.648652688853931e-05, + "loss": 0.0057, + "step": 42340 + }, + { + "grad_norm": 0.035933058708906174, + "learning_rate": 6.647091599071066e-05, + "loss": 0.0071, + "step": 42350 + }, + { + "grad_norm": 0.05051408335566521, + "learning_rate": 6.645530329164743e-05, + "loss": 0.008, + "step": 42360 + }, + { + "grad_norm": 0.05864965170621872, + "learning_rate": 6.643968879305694e-05, + "loss": 0.0067, + "step": 42370 + }, + { + "grad_norm": 0.058963626623153687, + "learning_rate": 6.642407249664683e-05, + "loss": 0.0073, + "step": 42380 + }, + { + "grad_norm": 0.059335898607969284, + "learning_rate": 6.640845440412483e-05, + "loss": 0.0061, + "step": 42390 + }, + { + "grad_norm": 0.08060883730649948, + "learning_rate": 6.639283451719893e-05, + "loss": 0.0068, + "step": 42400 + }, + { + "grad_norm": 0.07224829494953156, + "learning_rate": 6.637721283757729e-05, + "loss": 0.007, + "step": 42410 + }, + { + "grad_norm": 0.07503804564476013, + "learning_rate": 6.636158936696828e-05, + "loss": 0.0076, + "step": 42420 + }, + { + "grad_norm": 0.055558204650878906, + "learning_rate": 6.634596410708047e-05, + "loss": 0.0086, + "step": 42430 + }, + { + "grad_norm": 0.05424121022224426, + "learning_rate": 6.633033705962257e-05, + "loss": 0.0089, + "step": 42440 + }, + { + "grad_norm": 0.06866896897554398, + "learning_rate": 6.631470822630359e-05, + "loss": 0.0075, + "step": 42450 + }, + { + "grad_norm": 0.049507513642311096, + "learning_rate": 6.629907760883263e-05, + "loss": 0.0078, + "step": 42460 + }, + { + "grad_norm": 0.058496441692113876, + "learning_rate": 6.628344520891905e-05, + "loss": 0.0077, + "step": 42470 + }, + { + "grad_norm": 0.05602193623781204, + "learning_rate": 6.62678110282724e-05, + "loss": 0.0075, + "step": 42480 + }, + { + "grad_norm": 0.07149291783571243, + "learning_rate": 6.625217506860237e-05, + "loss": 0.0072, + "step": 42490 + }, + { + "grad_norm": 0.04187968000769615, + "learning_rate": 6.623653733161892e-05, + "loss": 0.0092, + "step": 42500 + }, + { + "grad_norm": 0.048496801406145096, + "learning_rate": 6.622089781903217e-05, + "loss": 0.0086, + "step": 42510 + }, + { + "grad_norm": 0.04818401485681534, + "learning_rate": 6.620525653255241e-05, + "loss": 0.0058, + "step": 42520 + }, + { + "grad_norm": 0.03892850503325462, + "learning_rate": 6.618961347389015e-05, + "loss": 0.0072, + "step": 42530 + }, + { + "grad_norm": 0.05895136669278145, + "learning_rate": 6.617396864475613e-05, + "loss": 0.0072, + "step": 42540 + }, + { + "grad_norm": 0.048454947769641876, + "learning_rate": 6.61583220468612e-05, + "loss": 0.0058, + "step": 42550 + }, + { + "grad_norm": 0.058478012681007385, + "learning_rate": 6.614267368191645e-05, + "loss": 0.0085, + "step": 42560 + }, + { + "grad_norm": 0.04263891279697418, + "learning_rate": 6.61270235516332e-05, + "loss": 0.0063, + "step": 42570 + }, + { + "grad_norm": 0.05295981094241142, + "learning_rate": 6.611137165772287e-05, + "loss": 0.0058, + "step": 42580 + }, + { + "grad_norm": 0.05318630859255791, + "learning_rate": 6.60957180018972e-05, + "loss": 0.0078, + "step": 42590 + }, + { + "grad_norm": 0.06079864501953125, + "learning_rate": 6.608006258586797e-05, + "loss": 0.0081, + "step": 42600 + }, + { + "grad_norm": 0.07881972193717957, + "learning_rate": 6.606440541134731e-05, + "loss": 0.0069, + "step": 42610 + }, + { + "grad_norm": 0.06177901104092598, + "learning_rate": 6.60487464800474e-05, + "loss": 0.0076, + "step": 42620 + }, + { + "grad_norm": 0.07908639311790466, + "learning_rate": 6.603308579368071e-05, + "loss": 0.0072, + "step": 42630 + }, + { + "grad_norm": 0.0544901117682457, + "learning_rate": 6.601742335395987e-05, + "loss": 0.0071, + "step": 42640 + }, + { + "grad_norm": 0.08174995332956314, + "learning_rate": 6.600175916259769e-05, + "loss": 0.0105, + "step": 42650 + }, + { + "grad_norm": 0.059171367436647415, + "learning_rate": 6.598609322130718e-05, + "loss": 0.0097, + "step": 42660 + }, + { + "grad_norm": 0.07428053766489029, + "learning_rate": 6.597042553180155e-05, + "loss": 0.0076, + "step": 42670 + }, + { + "grad_norm": 0.05497381463646889, + "learning_rate": 6.59547560957942e-05, + "loss": 0.0074, + "step": 42680 + }, + { + "grad_norm": 0.05242554470896721, + "learning_rate": 6.59390849149987e-05, + "loss": 0.0083, + "step": 42690 + }, + { + "grad_norm": 0.03966321796178818, + "learning_rate": 6.592341199112886e-05, + "loss": 0.0068, + "step": 42700 + }, + { + "grad_norm": 0.05298098176717758, + "learning_rate": 6.590773732589861e-05, + "loss": 0.007, + "step": 42710 + }, + { + "grad_norm": 0.05991358309984207, + "learning_rate": 6.589206092102213e-05, + "loss": 0.0083, + "step": 42720 + }, + { + "grad_norm": 0.054905965924263, + "learning_rate": 6.587638277821375e-05, + "loss": 0.0066, + "step": 42730 + }, + { + "grad_norm": 0.060843031853437424, + "learning_rate": 6.586070289918803e-05, + "loss": 0.0078, + "step": 42740 + }, + { + "grad_norm": 0.04730408266186714, + "learning_rate": 6.584502128565968e-05, + "loss": 0.0066, + "step": 42750 + }, + { + "grad_norm": 0.05134085565805435, + "learning_rate": 6.582933793934363e-05, + "loss": 0.0074, + "step": 42760 + }, + { + "grad_norm": 0.058902665972709656, + "learning_rate": 6.581365286195498e-05, + "loss": 0.007, + "step": 42770 + }, + { + "grad_norm": 0.03933332860469818, + "learning_rate": 6.579796605520903e-05, + "loss": 0.0069, + "step": 42780 + }, + { + "grad_norm": 0.04825511947274208, + "learning_rate": 6.578227752082127e-05, + "loss": 0.0061, + "step": 42790 + }, + { + "grad_norm": 0.073561891913414, + "learning_rate": 6.576658726050735e-05, + "loss": 0.0077, + "step": 42800 + }, + { + "grad_norm": 0.06769812852144241, + "learning_rate": 6.575089527598316e-05, + "loss": 0.0083, + "step": 42810 + }, + { + "grad_norm": 0.04875003173947334, + "learning_rate": 6.573520156896476e-05, + "loss": 0.0082, + "step": 42820 + }, + { + "grad_norm": 0.09468165785074234, + "learning_rate": 6.571950614116835e-05, + "loss": 0.0076, + "step": 42830 + }, + { + "grad_norm": 0.05886295437812805, + "learning_rate": 6.570380899431039e-05, + "loss": 0.0073, + "step": 42840 + }, + { + "grad_norm": 0.06682241708040237, + "learning_rate": 6.568811013010749e-05, + "loss": 0.0073, + "step": 42850 + }, + { + "grad_norm": 0.0651569589972496, + "learning_rate": 6.567240955027645e-05, + "loss": 0.0075, + "step": 42860 + }, + { + "grad_norm": 0.08856748789548874, + "learning_rate": 6.565670725653427e-05, + "loss": 0.0073, + "step": 42870 + }, + { + "grad_norm": 0.06484676897525787, + "learning_rate": 6.564100325059812e-05, + "loss": 0.0066, + "step": 42880 + }, + { + "grad_norm": 0.05693868547677994, + "learning_rate": 6.562529753418538e-05, + "loss": 0.0076, + "step": 42890 + }, + { + "grad_norm": 0.05249697342514992, + "learning_rate": 6.56095901090136e-05, + "loss": 0.0068, + "step": 42900 + }, + { + "grad_norm": 0.03790966048836708, + "learning_rate": 6.55938809768005e-05, + "loss": 0.0079, + "step": 42910 + }, + { + "grad_norm": 0.05736794322729111, + "learning_rate": 6.557817013926402e-05, + "loss": 0.006, + "step": 42920 + }, + { + "grad_norm": 0.05615324527025223, + "learning_rate": 6.556245759812227e-05, + "loss": 0.0083, + "step": 42930 + }, + { + "grad_norm": 0.04853339493274689, + "learning_rate": 6.554674335509357e-05, + "loss": 0.0077, + "step": 42940 + }, + { + "grad_norm": 0.056057222187519073, + "learning_rate": 6.553102741189638e-05, + "loss": 0.0079, + "step": 42950 + }, + { + "grad_norm": 0.03913949429988861, + "learning_rate": 6.55153097702494e-05, + "loss": 0.007, + "step": 42960 + }, + { + "grad_norm": 0.04082692787051201, + "learning_rate": 6.549959043187146e-05, + "loss": 0.0073, + "step": 42970 + }, + { + "grad_norm": 0.07170411944389343, + "learning_rate": 6.54838693984816e-05, + "loss": 0.0078, + "step": 42980 + }, + { + "grad_norm": 0.047963470220565796, + "learning_rate": 6.546814667179909e-05, + "loss": 0.0064, + "step": 42990 + }, + { + "grad_norm": 0.04903905466198921, + "learning_rate": 6.545242225354328e-05, + "loss": 0.0075, + "step": 43000 + }, + { + "grad_norm": 0.05754467472434044, + "learning_rate": 6.543669614543383e-05, + "loss": 0.0071, + "step": 43010 + }, + { + "grad_norm": 0.046720024198293686, + "learning_rate": 6.542096834919049e-05, + "loss": 0.0089, + "step": 43020 + }, + { + "grad_norm": 0.06574027240276337, + "learning_rate": 6.540523886653324e-05, + "loss": 0.0064, + "step": 43030 + }, + { + "grad_norm": 0.0938122346997261, + "learning_rate": 6.538950769918222e-05, + "loss": 0.007, + "step": 43040 + }, + { + "grad_norm": 0.08000090718269348, + "learning_rate": 6.537377484885779e-05, + "loss": 0.0072, + "step": 43050 + }, + { + "grad_norm": 0.06643830984830856, + "learning_rate": 6.535804031728045e-05, + "loss": 0.0069, + "step": 43060 + }, + { + "grad_norm": 0.05990150198340416, + "learning_rate": 6.534230410617092e-05, + "loss": 0.0071, + "step": 43070 + }, + { + "grad_norm": 0.0454990491271019, + "learning_rate": 6.532656621725007e-05, + "loss": 0.0081, + "step": 43080 + }, + { + "grad_norm": 0.056026991456747055, + "learning_rate": 6.531082665223898e-05, + "loss": 0.0062, + "step": 43090 + }, + { + "grad_norm": 0.05722775682806969, + "learning_rate": 6.529508541285889e-05, + "loss": 0.0083, + "step": 43100 + }, + { + "grad_norm": 0.04317883402109146, + "learning_rate": 6.527934250083128e-05, + "loss": 0.0073, + "step": 43110 + }, + { + "grad_norm": 0.05346916988492012, + "learning_rate": 6.526359791787775e-05, + "loss": 0.0077, + "step": 43120 + }, + { + "grad_norm": 0.04955014958977699, + "learning_rate": 6.524785166572009e-05, + "loss": 0.0077, + "step": 43130 + }, + { + "grad_norm": 0.06336234509944916, + "learning_rate": 6.523210374608029e-05, + "loss": 0.0078, + "step": 43140 + }, + { + "grad_norm": 0.0691562294960022, + "learning_rate": 6.521635416068054e-05, + "loss": 0.0068, + "step": 43150 + }, + { + "grad_norm": 0.06732677668333054, + "learning_rate": 6.520060291124317e-05, + "loss": 0.0078, + "step": 43160 + }, + { + "grad_norm": 0.07210691273212433, + "learning_rate": 6.518484999949072e-05, + "loss": 0.0072, + "step": 43170 + }, + { + "grad_norm": 0.07270727306604385, + "learning_rate": 6.516909542714591e-05, + "loss": 0.0093, + "step": 43180 + }, + { + "grad_norm": 0.05610862746834755, + "learning_rate": 6.515333919593161e-05, + "loss": 0.0086, + "step": 43190 + }, + { + "grad_norm": 0.07852879911661148, + "learning_rate": 6.513758130757094e-05, + "loss": 0.0072, + "step": 43200 + }, + { + "grad_norm": 0.07503876835107803, + "learning_rate": 6.512182176378713e-05, + "loss": 0.0071, + "step": 43210 + }, + { + "grad_norm": 0.07002539187669754, + "learning_rate": 6.510606056630362e-05, + "loss": 0.0073, + "step": 43220 + }, + { + "grad_norm": 0.06173553690314293, + "learning_rate": 6.509029771684405e-05, + "loss": 0.0094, + "step": 43230 + }, + { + "grad_norm": 0.05597817525267601, + "learning_rate": 6.50745332171322e-05, + "loss": 0.006, + "step": 43240 + }, + { + "grad_norm": 0.05419031158089638, + "learning_rate": 6.505876706889207e-05, + "loss": 0.0074, + "step": 43250 + }, + { + "grad_norm": 0.04541458934545517, + "learning_rate": 6.504299927384781e-05, + "loss": 0.0064, + "step": 43260 + }, + { + "grad_norm": 0.05404338985681534, + "learning_rate": 6.502722983372378e-05, + "loss": 0.0081, + "step": 43270 + }, + { + "grad_norm": 0.06274518370628357, + "learning_rate": 6.501145875024446e-05, + "loss": 0.0064, + "step": 43280 + }, + { + "grad_norm": 0.06391624361276627, + "learning_rate": 6.499568602513459e-05, + "loss": 0.0084, + "step": 43290 + }, + { + "grad_norm": 0.081358402967453, + "learning_rate": 6.497991166011903e-05, + "loss": 0.0083, + "step": 43300 + }, + { + "grad_norm": 0.07635452598333359, + "learning_rate": 6.49641356569229e-05, + "loss": 0.0078, + "step": 43310 + }, + { + "grad_norm": 0.05075513944029808, + "learning_rate": 6.494835801727135e-05, + "loss": 0.0064, + "step": 43320 + }, + { + "grad_norm": 0.08694008737802505, + "learning_rate": 6.493257874288986e-05, + "loss": 0.0071, + "step": 43330 + }, + { + "grad_norm": 0.05830146744847298, + "learning_rate": 6.491679783550399e-05, + "loss": 0.0063, + "step": 43340 + }, + { + "grad_norm": 0.03397137671709061, + "learning_rate": 6.490101529683957e-05, + "loss": 0.0067, + "step": 43350 + }, + { + "grad_norm": 0.0622718408703804, + "learning_rate": 6.488523112862248e-05, + "loss": 0.0074, + "step": 43360 + }, + { + "grad_norm": 0.075664222240448, + "learning_rate": 6.486944533257891e-05, + "loss": 0.0069, + "step": 43370 + }, + { + "grad_norm": 0.03544650226831436, + "learning_rate": 6.485365791043517e-05, + "loss": 0.0062, + "step": 43380 + }, + { + "grad_norm": 0.06062491610646248, + "learning_rate": 6.483786886391772e-05, + "loss": 0.0078, + "step": 43390 + }, + { + "grad_norm": 0.03885777294635773, + "learning_rate": 6.482207819475323e-05, + "loss": 0.007, + "step": 43400 + }, + { + "grad_norm": 0.03244316205382347, + "learning_rate": 6.480628590466857e-05, + "loss": 0.0063, + "step": 43410 + }, + { + "grad_norm": 0.04727006331086159, + "learning_rate": 6.479049199539074e-05, + "loss": 0.0064, + "step": 43420 + }, + { + "grad_norm": 0.057055436074733734, + "learning_rate": 6.477469646864693e-05, + "loss": 0.007, + "step": 43430 + }, + { + "grad_norm": 0.04878535866737366, + "learning_rate": 6.475889932616454e-05, + "loss": 0.0081, + "step": 43440 + }, + { + "grad_norm": 0.05594784393906593, + "learning_rate": 6.474310056967111e-05, + "loss": 0.0058, + "step": 43450 + }, + { + "grad_norm": 0.05818909406661987, + "learning_rate": 6.472730020089437e-05, + "loss": 0.0073, + "step": 43460 + }, + { + "grad_norm": 0.04961571469902992, + "learning_rate": 6.471149822156222e-05, + "loss": 0.0077, + "step": 43470 + }, + { + "grad_norm": 0.07470770180225372, + "learning_rate": 6.469569463340275e-05, + "loss": 0.0092, + "step": 43480 + }, + { + "grad_norm": 0.04938609153032303, + "learning_rate": 6.467988943814418e-05, + "loss": 0.0074, + "step": 43490 + }, + { + "grad_norm": 0.050038427114486694, + "learning_rate": 6.4664082637515e-05, + "loss": 0.007, + "step": 43500 + }, + { + "grad_norm": 0.04564812406897545, + "learning_rate": 6.464827423324377e-05, + "loss": 0.0057, + "step": 43510 + }, + { + "grad_norm": 0.053495511412620544, + "learning_rate": 6.463246422705931e-05, + "loss": 0.008, + "step": 43520 + }, + { + "grad_norm": 0.04678389057517052, + "learning_rate": 6.461665262069055e-05, + "loss": 0.0067, + "step": 43530 + }, + { + "grad_norm": 0.0687221884727478, + "learning_rate": 6.460083941586665e-05, + "loss": 0.0076, + "step": 43540 + }, + { + "grad_norm": 0.07434194535017014, + "learning_rate": 6.45850246143169e-05, + "loss": 0.0086, + "step": 43550 + }, + { + "grad_norm": 0.07466459274291992, + "learning_rate": 6.456920821777077e-05, + "loss": 0.0075, + "step": 43560 + }, + { + "grad_norm": 0.0618617907166481, + "learning_rate": 6.455339022795796e-05, + "loss": 0.0074, + "step": 43570 + }, + { + "grad_norm": 0.05270927771925926, + "learning_rate": 6.453757064660825e-05, + "loss": 0.0089, + "step": 43580 + }, + { + "grad_norm": 0.04990934208035469, + "learning_rate": 6.452174947545169e-05, + "loss": 0.0072, + "step": 43590 + }, + { + "grad_norm": 0.07549446821212769, + "learning_rate": 6.450592671621842e-05, + "loss": 0.0066, + "step": 43600 + }, + { + "grad_norm": 0.049133215099573135, + "learning_rate": 6.449010237063882e-05, + "loss": 0.0084, + "step": 43610 + }, + { + "grad_norm": 0.03210590407252312, + "learning_rate": 6.447427644044343e-05, + "loss": 0.0064, + "step": 43620 + }, + { + "grad_norm": 0.04508182406425476, + "learning_rate": 6.44584489273629e-05, + "loss": 0.0058, + "step": 43630 + }, + { + "grad_norm": 0.05750054493546486, + "learning_rate": 6.444261983312817e-05, + "loss": 0.0071, + "step": 43640 + }, + { + "grad_norm": 0.05318000167608261, + "learning_rate": 6.442678915947023e-05, + "loss": 0.007, + "step": 43650 + }, + { + "grad_norm": 0.060811590403318405, + "learning_rate": 6.441095690812032e-05, + "loss": 0.0059, + "step": 43660 + }, + { + "grad_norm": 0.053335968405008316, + "learning_rate": 6.439512308080982e-05, + "loss": 0.0067, + "step": 43670 + }, + { + "grad_norm": 0.06531035155057907, + "learning_rate": 6.43792876792703e-05, + "loss": 0.0078, + "step": 43680 + }, + { + "grad_norm": 0.07661423832178116, + "learning_rate": 6.43634507052335e-05, + "loss": 0.0073, + "step": 43690 + }, + { + "grad_norm": 0.05322694033384323, + "learning_rate": 6.434761216043135e-05, + "loss": 0.0068, + "step": 43700 + }, + { + "grad_norm": 0.038369543850421906, + "learning_rate": 6.433177204659589e-05, + "loss": 0.0073, + "step": 43710 + }, + { + "grad_norm": 0.07659327238798141, + "learning_rate": 6.431593036545938e-05, + "loss": 0.0073, + "step": 43720 + }, + { + "grad_norm": 0.04376114159822464, + "learning_rate": 6.430008711875426e-05, + "loss": 0.0062, + "step": 43730 + }, + { + "grad_norm": 0.0557129792869091, + "learning_rate": 6.42842423082131e-05, + "loss": 0.0066, + "step": 43740 + }, + { + "grad_norm": 0.05467875674366951, + "learning_rate": 6.426839593556868e-05, + "loss": 0.0079, + "step": 43750 + }, + { + "grad_norm": 0.050305984914302826, + "learning_rate": 6.425254800255392e-05, + "loss": 0.0058, + "step": 43760 + }, + { + "grad_norm": 0.0459662526845932, + "learning_rate": 6.423669851090196e-05, + "loss": 0.0078, + "step": 43770 + }, + { + "grad_norm": 0.03316989541053772, + "learning_rate": 6.422084746234604e-05, + "loss": 0.0064, + "step": 43780 + }, + { + "grad_norm": 0.07478824257850647, + "learning_rate": 6.420499485861965e-05, + "loss": 0.008, + "step": 43790 + }, + { + "grad_norm": 0.07679704576730728, + "learning_rate": 6.418914070145636e-05, + "loss": 0.0079, + "step": 43800 + }, + { + "grad_norm": 0.046180497854948044, + "learning_rate": 6.417328499258996e-05, + "loss": 0.0083, + "step": 43810 + }, + { + "grad_norm": 0.05408841744065285, + "learning_rate": 6.415742773375446e-05, + "loss": 0.0073, + "step": 43820 + }, + { + "grad_norm": 0.05512335151433945, + "learning_rate": 6.414156892668393e-05, + "loss": 0.0072, + "step": 43830 + }, + { + "grad_norm": 0.04532119631767273, + "learning_rate": 6.412570857311267e-05, + "loss": 0.0064, + "step": 43840 + }, + { + "grad_norm": 0.03436973690986633, + "learning_rate": 6.410984667477518e-05, + "loss": 0.0072, + "step": 43850 + }, + { + "grad_norm": 0.0609838105738163, + "learning_rate": 6.409398323340607e-05, + "loss": 0.0071, + "step": 43860 + }, + { + "grad_norm": 0.05276607349514961, + "learning_rate": 6.407811825074012e-05, + "loss": 0.008, + "step": 43870 + }, + { + "grad_norm": 0.05294394493103027, + "learning_rate": 6.406225172851234e-05, + "loss": 0.0064, + "step": 43880 + }, + { + "grad_norm": 0.05683840438723564, + "learning_rate": 6.404638366845786e-05, + "loss": 0.0074, + "step": 43890 + }, + { + "grad_norm": 0.04265894740819931, + "learning_rate": 6.403051407231196e-05, + "loss": 0.0072, + "step": 43900 + }, + { + "grad_norm": 0.06337971240282059, + "learning_rate": 6.401464294181016e-05, + "loss": 0.0084, + "step": 43910 + }, + { + "grad_norm": 0.04644482955336571, + "learning_rate": 6.399877027868808e-05, + "loss": 0.0081, + "step": 43920 + }, + { + "grad_norm": 0.0369589701294899, + "learning_rate": 6.39828960846815e-05, + "loss": 0.0063, + "step": 43930 + }, + { + "grad_norm": 0.048936955630779266, + "learning_rate": 6.396702036152644e-05, + "loss": 0.0082, + "step": 43940 + }, + { + "grad_norm": 0.07964393496513367, + "learning_rate": 6.395114311095905e-05, + "loss": 0.0075, + "step": 43950 + }, + { + "grad_norm": 0.05501263961195946, + "learning_rate": 6.393526433471559e-05, + "loss": 0.0088, + "step": 43960 + }, + { + "grad_norm": 0.049263808876276016, + "learning_rate": 6.391938403453262e-05, + "loss": 0.006, + "step": 43970 + }, + { + "grad_norm": 0.06767433136701584, + "learning_rate": 6.390350221214671e-05, + "loss": 0.0071, + "step": 43980 + }, + { + "grad_norm": 0.05433230847120285, + "learning_rate": 6.38876188692947e-05, + "loss": 0.006, + "step": 43990 + }, + { + "grad_norm": 0.06731005012989044, + "learning_rate": 6.387173400771358e-05, + "loss": 0.0079, + "step": 44000 + }, + { + "grad_norm": 0.07643702626228333, + "learning_rate": 6.385584762914047e-05, + "loss": 0.0068, + "step": 44010 + }, + { + "grad_norm": 0.04473643749952316, + "learning_rate": 6.38399597353127e-05, + "loss": 0.0076, + "step": 44020 + }, + { + "grad_norm": 0.06171203404664993, + "learning_rate": 6.382407032796775e-05, + "loss": 0.0083, + "step": 44030 + }, + { + "grad_norm": 0.05158284306526184, + "learning_rate": 6.380817940884325e-05, + "loss": 0.0062, + "step": 44040 + }, + { + "grad_norm": 0.07817710191011429, + "learning_rate": 6.379228697967702e-05, + "loss": 0.0067, + "step": 44050 + }, + { + "grad_norm": 0.040966760367155075, + "learning_rate": 6.3776393042207e-05, + "loss": 0.007, + "step": 44060 + }, + { + "grad_norm": 0.04167444258928299, + "learning_rate": 6.376049759817137e-05, + "loss": 0.0072, + "step": 44070 + }, + { + "grad_norm": 0.0394289530813694, + "learning_rate": 6.37446006493084e-05, + "loss": 0.0066, + "step": 44080 + }, + { + "grad_norm": 0.049617208540439606, + "learning_rate": 6.372870219735656e-05, + "loss": 0.0062, + "step": 44090 + }, + { + "grad_norm": 0.0379226878285408, + "learning_rate": 6.371280224405452e-05, + "loss": 0.0094, + "step": 44100 + }, + { + "grad_norm": 0.06045668572187424, + "learning_rate": 6.369690079114101e-05, + "loss": 0.0056, + "step": 44110 + }, + { + "grad_norm": 0.07325341552495956, + "learning_rate": 6.368099784035504e-05, + "loss": 0.0082, + "step": 44120 + }, + { + "grad_norm": 0.045886628329753876, + "learning_rate": 6.366509339343572e-05, + "loss": 0.0064, + "step": 44130 + }, + { + "grad_norm": 0.07587558776140213, + "learning_rate": 6.364918745212232e-05, + "loss": 0.009, + "step": 44140 + }, + { + "grad_norm": 0.05190021172165871, + "learning_rate": 6.363328001815431e-05, + "loss": 0.0087, + "step": 44150 + }, + { + "grad_norm": 0.0771910548210144, + "learning_rate": 6.361737109327128e-05, + "loss": 0.0076, + "step": 44160 + }, + { + "grad_norm": 0.057909153401851654, + "learning_rate": 6.360146067921301e-05, + "loss": 0.0059, + "step": 44170 + }, + { + "grad_norm": 0.061261117458343506, + "learning_rate": 6.35855487777195e-05, + "loss": 0.0083, + "step": 44180 + }, + { + "grad_norm": 0.05046169087290764, + "learning_rate": 6.356963539053076e-05, + "loss": 0.0062, + "step": 44190 + }, + { + "grad_norm": 0.049372538924217224, + "learning_rate": 6.355372051938711e-05, + "loss": 0.0083, + "step": 44200 + }, + { + "grad_norm": 0.04172447696328163, + "learning_rate": 6.353780416602894e-05, + "loss": 0.0063, + "step": 44210 + }, + { + "grad_norm": 0.03962530195713043, + "learning_rate": 6.352188633219689e-05, + "loss": 0.0067, + "step": 44220 + }, + { + "grad_norm": 0.04756879806518555, + "learning_rate": 6.350596701963166e-05, + "loss": 0.0081, + "step": 44230 + }, + { + "grad_norm": 0.07053947448730469, + "learning_rate": 6.349004623007419e-05, + "loss": 0.0082, + "step": 44240 + }, + { + "grad_norm": 0.06724164634943008, + "learning_rate": 6.347412396526555e-05, + "loss": 0.008, + "step": 44250 + }, + { + "grad_norm": 0.05094132572412491, + "learning_rate": 6.345820022694696e-05, + "loss": 0.0066, + "step": 44260 + }, + { + "grad_norm": 0.04320726916193962, + "learning_rate": 6.344227501685984e-05, + "loss": 0.0073, + "step": 44270 + }, + { + "grad_norm": 0.07847516983747482, + "learning_rate": 6.342634833674572e-05, + "loss": 0.0079, + "step": 44280 + }, + { + "grad_norm": 0.07032286375761032, + "learning_rate": 6.341042018834635e-05, + "loss": 0.0069, + "step": 44290 + }, + { + "grad_norm": 0.06395560503005981, + "learning_rate": 6.339449057340359e-05, + "loss": 0.0072, + "step": 44300 + }, + { + "grad_norm": 0.06929591298103333, + "learning_rate": 6.337855949365945e-05, + "loss": 0.0078, + "step": 44310 + }, + { + "grad_norm": 0.05548369139432907, + "learning_rate": 6.336262695085619e-05, + "loss": 0.0084, + "step": 44320 + }, + { + "grad_norm": 0.06246302276849747, + "learning_rate": 6.334669294673612e-05, + "loss": 0.0091, + "step": 44330 + }, + { + "grad_norm": 0.05454406142234802, + "learning_rate": 6.333075748304179e-05, + "loss": 0.0064, + "step": 44340 + }, + { + "grad_norm": 0.04950999841094017, + "learning_rate": 6.331482056151585e-05, + "loss": 0.0051, + "step": 44350 + }, + { + "grad_norm": 0.053262144327163696, + "learning_rate": 6.329888218390117e-05, + "loss": 0.0063, + "step": 44360 + }, + { + "grad_norm": 0.04223521426320076, + "learning_rate": 6.328294235194072e-05, + "loss": 0.0056, + "step": 44370 + }, + { + "grad_norm": 0.04157620295882225, + "learning_rate": 6.326700106737765e-05, + "loss": 0.0069, + "step": 44380 + }, + { + "grad_norm": 0.062071543186903, + "learning_rate": 6.325105833195531e-05, + "loss": 0.0078, + "step": 44390 + }, + { + "grad_norm": 0.043643832206726074, + "learning_rate": 6.323511414741715e-05, + "loss": 0.0078, + "step": 44400 + }, + { + "grad_norm": 0.04895438626408577, + "learning_rate": 6.321916851550678e-05, + "loss": 0.0064, + "step": 44410 + }, + { + "grad_norm": 0.030691925436258316, + "learning_rate": 6.320322143796806e-05, + "loss": 0.0072, + "step": 44420 + }, + { + "grad_norm": 0.056823644787073135, + "learning_rate": 6.318727291654488e-05, + "loss": 0.0068, + "step": 44430 + }, + { + "grad_norm": 0.0677594467997551, + "learning_rate": 6.317132295298134e-05, + "loss": 0.0066, + "step": 44440 + }, + { + "grad_norm": 0.04835272952914238, + "learning_rate": 6.315537154902173e-05, + "loss": 0.0068, + "step": 44450 + }, + { + "grad_norm": 0.05220544710755348, + "learning_rate": 6.313941870641048e-05, + "loss": 0.0084, + "step": 44460 + }, + { + "grad_norm": 0.04423655569553375, + "learning_rate": 6.312346442689214e-05, + "loss": 0.0067, + "step": 44470 + }, + { + "grad_norm": 0.07352080196142197, + "learning_rate": 6.310750871221147e-05, + "loss": 0.008, + "step": 44480 + }, + { + "grad_norm": 0.08335172384977341, + "learning_rate": 6.309155156411335e-05, + "loss": 0.0107, + "step": 44490 + }, + { + "grad_norm": 0.05220252275466919, + "learning_rate": 6.307559298434284e-05, + "loss": 0.0073, + "step": 44500 + }, + { + "grad_norm": 0.031318772584199905, + "learning_rate": 6.305963297464512e-05, + "loss": 0.008, + "step": 44510 + }, + { + "grad_norm": 0.06150548532605171, + "learning_rate": 6.304367153676561e-05, + "loss": 0.0075, + "step": 44520 + }, + { + "grad_norm": 0.06010236218571663, + "learning_rate": 6.302770867244978e-05, + "loss": 0.0065, + "step": 44530 + }, + { + "grad_norm": 0.0732431709766388, + "learning_rate": 6.301174438344328e-05, + "loss": 0.009, + "step": 44540 + }, + { + "grad_norm": 0.05492916703224182, + "learning_rate": 6.299577867149202e-05, + "loss": 0.0069, + "step": 44550 + }, + { + "grad_norm": 0.0491560734808445, + "learning_rate": 6.297981153834192e-05, + "loss": 0.007, + "step": 44560 + }, + { + "grad_norm": 0.07921849191188812, + "learning_rate": 6.296384298573916e-05, + "loss": 0.0065, + "step": 44570 + }, + { + "grad_norm": 0.07495097070932388, + "learning_rate": 6.294787301543001e-05, + "loss": 0.0072, + "step": 44580 + }, + { + "grad_norm": 0.047593407332897186, + "learning_rate": 6.293190162916095e-05, + "loss": 0.009, + "step": 44590 + }, + { + "grad_norm": 0.05583319067955017, + "learning_rate": 6.291592882867855e-05, + "loss": 0.0082, + "step": 44600 + }, + { + "grad_norm": 0.05633173882961273, + "learning_rate": 6.28999546157296e-05, + "loss": 0.0075, + "step": 44610 + }, + { + "grad_norm": 0.06618383526802063, + "learning_rate": 6.288397899206102e-05, + "loss": 0.0072, + "step": 44620 + }, + { + "grad_norm": 0.04830862209200859, + "learning_rate": 6.286800195941984e-05, + "loss": 0.0069, + "step": 44630 + }, + { + "grad_norm": 0.09185205399990082, + "learning_rate": 6.285202351955334e-05, + "loss": 0.0082, + "step": 44640 + }, + { + "grad_norm": 0.0586894229054451, + "learning_rate": 6.283604367420887e-05, + "loss": 0.0064, + "step": 44650 + }, + { + "grad_norm": 0.045428745448589325, + "learning_rate": 6.282006242513394e-05, + "loss": 0.0065, + "step": 44660 + }, + { + "grad_norm": 0.04904564470052719, + "learning_rate": 6.280407977407628e-05, + "loss": 0.0073, + "step": 44670 + }, + { + "grad_norm": 0.04811546579003334, + "learning_rate": 6.27880957227837e-05, + "loss": 0.0068, + "step": 44680 + }, + { + "grad_norm": 0.05564416944980621, + "learning_rate": 6.27721102730042e-05, + "loss": 0.0077, + "step": 44690 + }, + { + "grad_norm": 0.06043921038508415, + "learning_rate": 6.275612342648592e-05, + "loss": 0.0071, + "step": 44700 + }, + { + "grad_norm": 0.04473031312227249, + "learning_rate": 6.274013518497716e-05, + "loss": 0.0063, + "step": 44710 + }, + { + "grad_norm": 0.025890972465276718, + "learning_rate": 6.272414555022636e-05, + "loss": 0.0059, + "step": 44720 + }, + { + "grad_norm": 0.045023512095212936, + "learning_rate": 6.270815452398215e-05, + "loss": 0.0066, + "step": 44730 + }, + { + "grad_norm": 0.0303477942943573, + "learning_rate": 6.269216210799326e-05, + "loss": 0.0075, + "step": 44740 + }, + { + "grad_norm": 0.03013383410871029, + "learning_rate": 6.26761683040086e-05, + "loss": 0.0056, + "step": 44750 + }, + { + "grad_norm": 0.0639699399471283, + "learning_rate": 6.266017311377723e-05, + "loss": 0.0085, + "step": 44760 + }, + { + "grad_norm": 0.051675599068403244, + "learning_rate": 6.264417653904839e-05, + "loss": 0.0078, + "step": 44770 + }, + { + "grad_norm": 0.044346533715724945, + "learning_rate": 6.262817858157139e-05, + "loss": 0.0066, + "step": 44780 + }, + { + "grad_norm": 0.04160953313112259, + "learning_rate": 6.261217924309576e-05, + "loss": 0.0057, + "step": 44790 + }, + { + "grad_norm": 0.05225401744246483, + "learning_rate": 6.259617852537118e-05, + "loss": 0.0084, + "step": 44800 + }, + { + "grad_norm": 0.04684465751051903, + "learning_rate": 6.258017643014747e-05, + "loss": 0.0071, + "step": 44810 + }, + { + "grad_norm": 0.04493929073214531, + "learning_rate": 6.256417295917456e-05, + "loss": 0.0066, + "step": 44820 + }, + { + "grad_norm": 0.05146993696689606, + "learning_rate": 6.254816811420258e-05, + "loss": 0.0076, + "step": 44830 + }, + { + "grad_norm": 0.06138813868165016, + "learning_rate": 6.253216189698183e-05, + "loss": 0.0097, + "step": 44840 + }, + { + "grad_norm": 0.056669969111680984, + "learning_rate": 6.251615430926267e-05, + "loss": 0.0079, + "step": 44850 + }, + { + "grad_norm": 0.06327902525663376, + "learning_rate": 6.25001453527957e-05, + "loss": 0.0076, + "step": 44860 + }, + { + "grad_norm": 0.06949151307344437, + "learning_rate": 6.248413502933164e-05, + "loss": 0.0077, + "step": 44870 + }, + { + "grad_norm": 0.06419124454259872, + "learning_rate": 6.246812334062133e-05, + "loss": 0.0088, + "step": 44880 + }, + { + "grad_norm": 0.04673193767666817, + "learning_rate": 6.245211028841579e-05, + "loss": 0.007, + "step": 44890 + }, + { + "grad_norm": 0.04418428987264633, + "learning_rate": 6.24360958744662e-05, + "loss": 0.0072, + "step": 44900 + }, + { + "grad_norm": 0.05226394161581993, + "learning_rate": 6.242008010052387e-05, + "loss": 0.0089, + "step": 44910 + }, + { + "grad_norm": 0.07916418462991714, + "learning_rate": 6.240406296834024e-05, + "loss": 0.0071, + "step": 44920 + }, + { + "grad_norm": 0.04250669106841087, + "learning_rate": 6.238804447966694e-05, + "loss": 0.0053, + "step": 44930 + }, + { + "grad_norm": 0.054764777421951294, + "learning_rate": 6.237202463625573e-05, + "loss": 0.0062, + "step": 44940 + }, + { + "grad_norm": 0.049711454659700394, + "learning_rate": 6.235600343985848e-05, + "loss": 0.0079, + "step": 44950 + }, + { + "grad_norm": 0.04474499449133873, + "learning_rate": 6.233998089222729e-05, + "loss": 0.0064, + "step": 44960 + }, + { + "grad_norm": 0.07001730054616928, + "learning_rate": 6.232395699511433e-05, + "loss": 0.008, + "step": 44970 + }, + { + "grad_norm": 0.03076932579278946, + "learning_rate": 6.230793175027199e-05, + "loss": 0.0078, + "step": 44980 + }, + { + "grad_norm": 0.04272817075252533, + "learning_rate": 6.22919051594527e-05, + "loss": 0.007, + "step": 44990 + }, + { + "grad_norm": 0.05058933421969414, + "learning_rate": 6.227587722440917e-05, + "loss": 0.006, + "step": 45000 + }, + { + "grad_norm": 0.05281269550323486, + "learning_rate": 6.225984794689414e-05, + "loss": 0.008, + "step": 45010 + }, + { + "grad_norm": 0.04957704618573189, + "learning_rate": 6.22438173286606e-05, + "loss": 0.0075, + "step": 45020 + }, + { + "grad_norm": 0.06581113487482071, + "learning_rate": 6.222778537146159e-05, + "loss": 0.0064, + "step": 45030 + }, + { + "grad_norm": 0.08127663284540176, + "learning_rate": 6.221175207705037e-05, + "loss": 0.0078, + "step": 45040 + }, + { + "grad_norm": 0.07411782443523407, + "learning_rate": 6.219571744718027e-05, + "loss": 0.0094, + "step": 45050 + }, + { + "grad_norm": 0.0635618269443512, + "learning_rate": 6.217968148360487e-05, + "loss": 0.0074, + "step": 45060 + }, + { + "grad_norm": 0.05528765544295311, + "learning_rate": 6.216364418807782e-05, + "loss": 0.007, + "step": 45070 + }, + { + "grad_norm": 0.0555732436478138, + "learning_rate": 6.214760556235292e-05, + "loss": 0.0074, + "step": 45080 + }, + { + "grad_norm": 0.07111828774213791, + "learning_rate": 6.213156560818414e-05, + "loss": 0.0088, + "step": 45090 + }, + { + "grad_norm": 0.06020417809486389, + "learning_rate": 6.211552432732559e-05, + "loss": 0.0071, + "step": 45100 + }, + { + "grad_norm": 0.06252002716064453, + "learning_rate": 6.20994817215315e-05, + "loss": 0.0078, + "step": 45110 + }, + { + "grad_norm": 0.09828368574380875, + "learning_rate": 6.208343779255628e-05, + "loss": 0.0095, + "step": 45120 + }, + { + "grad_norm": 0.0649108961224556, + "learning_rate": 6.206739254215449e-05, + "loss": 0.0067, + "step": 45130 + }, + { + "grad_norm": 0.05020270124077797, + "learning_rate": 6.205134597208077e-05, + "loss": 0.0091, + "step": 45140 + }, + { + "grad_norm": 0.05032052472233772, + "learning_rate": 6.203529808409e-05, + "loss": 0.0087, + "step": 45150 + }, + { + "grad_norm": 0.05580511689186096, + "learning_rate": 6.201924887993712e-05, + "loss": 0.0072, + "step": 45160 + }, + { + "grad_norm": 0.04589800164103508, + "learning_rate": 6.200319836137725e-05, + "loss": 0.007, + "step": 45170 + }, + { + "grad_norm": 0.06520179659128189, + "learning_rate": 6.198714653016565e-05, + "loss": 0.0069, + "step": 45180 + }, + { + "grad_norm": 0.05596523359417915, + "learning_rate": 6.197109338805774e-05, + "loss": 0.0065, + "step": 45190 + }, + { + "grad_norm": 0.0366889163851738, + "learning_rate": 6.195503893680903e-05, + "loss": 0.0063, + "step": 45200 + }, + { + "grad_norm": 0.04734932258725166, + "learning_rate": 6.193898317817524e-05, + "loss": 0.0063, + "step": 45210 + }, + { + "grad_norm": 0.05096011981368065, + "learning_rate": 6.192292611391221e-05, + "loss": 0.0073, + "step": 45220 + }, + { + "grad_norm": 0.0376410037279129, + "learning_rate": 6.190686774577591e-05, + "loss": 0.0089, + "step": 45230 + }, + { + "grad_norm": 0.05384059250354767, + "learning_rate": 6.189080807552245e-05, + "loss": 0.0071, + "step": 45240 + }, + { + "grad_norm": 0.05033281445503235, + "learning_rate": 6.187474710490809e-05, + "loss": 0.0065, + "step": 45250 + }, + { + "grad_norm": 0.0565660186111927, + "learning_rate": 6.185868483568926e-05, + "loss": 0.0064, + "step": 45260 + }, + { + "grad_norm": 0.08879884332418442, + "learning_rate": 6.184262126962245e-05, + "loss": 0.0073, + "step": 45270 + }, + { + "grad_norm": 0.09148086607456207, + "learning_rate": 6.182655640846442e-05, + "loss": 0.006, + "step": 45280 + }, + { + "grad_norm": 0.06779436022043228, + "learning_rate": 6.181049025397196e-05, + "loss": 0.0061, + "step": 45290 + }, + { + "grad_norm": 0.04418351873755455, + "learning_rate": 6.179442280790202e-05, + "loss": 0.008, + "step": 45300 + }, + { + "grad_norm": 0.04597906395792961, + "learning_rate": 6.177835407201174e-05, + "loss": 0.0081, + "step": 45310 + }, + { + "grad_norm": 0.04057233780622482, + "learning_rate": 6.176228404805839e-05, + "loss": 0.0065, + "step": 45320 + }, + { + "grad_norm": 0.05747630074620247, + "learning_rate": 6.174621273779932e-05, + "loss": 0.0085, + "step": 45330 + }, + { + "grad_norm": 0.07970195263624191, + "learning_rate": 6.17301401429921e-05, + "loss": 0.0066, + "step": 45340 + }, + { + "grad_norm": 0.059449512511491776, + "learning_rate": 6.171406626539436e-05, + "loss": 0.0057, + "step": 45350 + }, + { + "grad_norm": 0.08069461584091187, + "learning_rate": 6.169799110676398e-05, + "loss": 0.0052, + "step": 45360 + }, + { + "grad_norm": 0.09395992755889893, + "learning_rate": 6.168191466885885e-05, + "loss": 0.0068, + "step": 45370 + }, + { + "grad_norm": 0.06239261105656624, + "learning_rate": 6.166583695343711e-05, + "loss": 0.0085, + "step": 45380 + }, + { + "grad_norm": 0.045309145003557205, + "learning_rate": 6.164975796225698e-05, + "loss": 0.0061, + "step": 45390 + }, + { + "grad_norm": 0.057129424065351486, + "learning_rate": 6.163367769707683e-05, + "loss": 0.008, + "step": 45400 + }, + { + "grad_norm": 0.05495647341012955, + "learning_rate": 6.161759615965519e-05, + "loss": 0.0071, + "step": 45410 + }, + { + "grad_norm": 0.04568574205040932, + "learning_rate": 6.16015133517507e-05, + "loss": 0.0062, + "step": 45420 + }, + { + "grad_norm": 0.054690174758434296, + "learning_rate": 6.158542927512214e-05, + "loss": 0.0063, + "step": 45430 + }, + { + "grad_norm": 0.05403212457895279, + "learning_rate": 6.156934393152846e-05, + "loss": 0.0061, + "step": 45440 + }, + { + "grad_norm": 0.05063396319746971, + "learning_rate": 6.15532573227287e-05, + "loss": 0.006, + "step": 45450 + }, + { + "grad_norm": 0.05993906408548355, + "learning_rate": 6.153716945048212e-05, + "loss": 0.0064, + "step": 45460 + }, + { + "grad_norm": 0.08090988546609879, + "learning_rate": 6.152108031654802e-05, + "loss": 0.0084, + "step": 45470 + }, + { + "grad_norm": 0.06943440437316895, + "learning_rate": 6.15049899226859e-05, + "loss": 0.0069, + "step": 45480 + }, + { + "grad_norm": 0.10574271529912949, + "learning_rate": 6.148889827065537e-05, + "loss": 0.0068, + "step": 45490 + }, + { + "grad_norm": 0.055917106568813324, + "learning_rate": 6.147280536221622e-05, + "loss": 0.0084, + "step": 45500 + }, + { + "grad_norm": 0.05002406984567642, + "learning_rate": 6.145671119912832e-05, + "loss": 0.0065, + "step": 45510 + }, + { + "grad_norm": 0.0511082261800766, + "learning_rate": 6.144061578315169e-05, + "loss": 0.0069, + "step": 45520 + }, + { + "grad_norm": 0.06036490574479103, + "learning_rate": 6.142451911604654e-05, + "loss": 0.0065, + "step": 45530 + }, + { + "grad_norm": 0.06274447590112686, + "learning_rate": 6.140842119957315e-05, + "loss": 0.0062, + "step": 45540 + }, + { + "grad_norm": 0.071309395134449, + "learning_rate": 6.139232203549197e-05, + "loss": 0.0073, + "step": 45550 + }, + { + "grad_norm": 0.06268247216939926, + "learning_rate": 6.137622162556357e-05, + "loss": 0.0072, + "step": 45560 + }, + { + "grad_norm": 0.06636428087949753, + "learning_rate": 6.13601199715487e-05, + "loss": 0.0061, + "step": 45570 + }, + { + "grad_norm": 0.06971890479326248, + "learning_rate": 6.134401707520816e-05, + "loss": 0.007, + "step": 45580 + }, + { + "grad_norm": 0.03342646360397339, + "learning_rate": 6.132791293830298e-05, + "loss": 0.0064, + "step": 45590 + }, + { + "grad_norm": 0.05130387097597122, + "learning_rate": 6.131180756259428e-05, + "loss": 0.0065, + "step": 45600 + }, + { + "grad_norm": 0.0357397198677063, + "learning_rate": 6.129570094984331e-05, + "loss": 0.0059, + "step": 45610 + }, + { + "grad_norm": 0.04476381093263626, + "learning_rate": 6.127959310181145e-05, + "loss": 0.0062, + "step": 45620 + }, + { + "grad_norm": 0.04194534569978714, + "learning_rate": 6.126348402026026e-05, + "loss": 0.0078, + "step": 45630 + }, + { + "grad_norm": 0.038813043385744095, + "learning_rate": 6.12473737069514e-05, + "loss": 0.0063, + "step": 45640 + }, + { + "grad_norm": 0.03699970990419388, + "learning_rate": 6.123126216364665e-05, + "loss": 0.0066, + "step": 45650 + }, + { + "grad_norm": 0.052195291966199875, + "learning_rate": 6.121514939210797e-05, + "loss": 0.0057, + "step": 45660 + }, + { + "grad_norm": 0.06943801045417786, + "learning_rate": 6.119903539409741e-05, + "loss": 0.0079, + "step": 45670 + }, + { + "grad_norm": 0.05117044970393181, + "learning_rate": 6.118292017137716e-05, + "loss": 0.0067, + "step": 45680 + }, + { + "grad_norm": 0.07073280215263367, + "learning_rate": 6.116680372570959e-05, + "loss": 0.0067, + "step": 45690 + }, + { + "grad_norm": 0.07063701003789902, + "learning_rate": 6.115068605885713e-05, + "loss": 0.0069, + "step": 45700 + }, + { + "grad_norm": 0.052199024707078934, + "learning_rate": 6.113456717258243e-05, + "loss": 0.0064, + "step": 45710 + }, + { + "grad_norm": 0.046241797506809235, + "learning_rate": 6.11184470686482e-05, + "loss": 0.0058, + "step": 45720 + }, + { + "grad_norm": 0.048183463513851166, + "learning_rate": 6.110232574881731e-05, + "loss": 0.0085, + "step": 45730 + }, + { + "grad_norm": 0.05999916419386864, + "learning_rate": 6.108620321485277e-05, + "loss": 0.008, + "step": 45740 + }, + { + "grad_norm": 0.04630109295248985, + "learning_rate": 6.107007946851773e-05, + "loss": 0.0063, + "step": 45750 + }, + { + "grad_norm": 0.0676128938794136, + "learning_rate": 6.105395451157542e-05, + "loss": 0.0079, + "step": 45760 + }, + { + "grad_norm": 0.0515739843249321, + "learning_rate": 6.103782834578928e-05, + "loss": 0.0069, + "step": 45770 + }, + { + "grad_norm": 0.06777254492044449, + "learning_rate": 6.102170097292281e-05, + "loss": 0.0082, + "step": 45780 + }, + { + "grad_norm": 0.07285195589065552, + "learning_rate": 6.1005572394739716e-05, + "loss": 0.0066, + "step": 45790 + }, + { + "grad_norm": 0.06251756846904755, + "learning_rate": 6.0989442613003765e-05, + "loss": 0.0073, + "step": 45800 + }, + { + "grad_norm": 0.04656178504228592, + "learning_rate": 6.097331162947888e-05, + "loss": 0.0075, + "step": 45810 + }, + { + "grad_norm": 0.0529671348631382, + "learning_rate": 6.095717944592914e-05, + "loss": 0.0068, + "step": 45820 + }, + { + "grad_norm": 0.0868285745382309, + "learning_rate": 6.094104606411873e-05, + "loss": 0.0078, + "step": 45830 + }, + { + "grad_norm": 0.07659400999546051, + "learning_rate": 6.0924911485811966e-05, + "loss": 0.0061, + "step": 45840 + }, + { + "grad_norm": 0.06347058713436127, + "learning_rate": 6.090877571277331e-05, + "loss": 0.008, + "step": 45850 + }, + { + "grad_norm": 0.036952052265405655, + "learning_rate": 6.089263874676736e-05, + "loss": 0.0057, + "step": 45860 + }, + { + "grad_norm": 0.04958666115999222, + "learning_rate": 6.0876500589558796e-05, + "loss": 0.0072, + "step": 45870 + }, + { + "grad_norm": 0.041143134236335754, + "learning_rate": 6.086036124291248e-05, + "loss": 0.0063, + "step": 45880 + }, + { + "grad_norm": 0.06702110171318054, + "learning_rate": 6.084422070859339e-05, + "loss": 0.0088, + "step": 45890 + }, + { + "grad_norm": 0.050440069288015366, + "learning_rate": 6.082807898836663e-05, + "loss": 0.0078, + "step": 45900 + }, + { + "grad_norm": 0.07047853618860245, + "learning_rate": 6.081193608399742e-05, + "loss": 0.0063, + "step": 45910 + }, + { + "grad_norm": 0.07522844523191452, + "learning_rate": 6.0795791997251164e-05, + "loss": 0.0081, + "step": 45920 + }, + { + "grad_norm": 0.04473874345421791, + "learning_rate": 6.0779646729893294e-05, + "loss": 0.0082, + "step": 45930 + }, + { + "grad_norm": 0.05355091020464897, + "learning_rate": 6.0763500283689476e-05, + "loss": 0.0067, + "step": 45940 + }, + { + "grad_norm": 0.05489353463053703, + "learning_rate": 6.0747352660405455e-05, + "loss": 0.007, + "step": 45950 + }, + { + "grad_norm": 0.05934766307473183, + "learning_rate": 6.073120386180709e-05, + "loss": 0.0074, + "step": 45960 + }, + { + "grad_norm": 0.05651751533150673, + "learning_rate": 6.0715053889660425e-05, + "loss": 0.0058, + "step": 45970 + }, + { + "grad_norm": 0.06387175619602203, + "learning_rate": 6.069890274573157e-05, + "loss": 0.0072, + "step": 45980 + }, + { + "grad_norm": 0.06624062359333038, + "learning_rate": 6.068275043178679e-05, + "loss": 0.0064, + "step": 45990 + }, + { + "grad_norm": 0.0456022247672081, + "learning_rate": 6.066659694959248e-05, + "loss": 0.0056, + "step": 46000 + }, + { + "grad_norm": 0.05124369263648987, + "learning_rate": 6.0650442300915176e-05, + "loss": 0.0059, + "step": 46010 + }, + { + "grad_norm": 0.05241374298930168, + "learning_rate": 6.063428648752152e-05, + "loss": 0.0061, + "step": 46020 + }, + { + "grad_norm": 0.050078701227903366, + "learning_rate": 6.0618129511178266e-05, + "loss": 0.0065, + "step": 46030 + }, + { + "grad_norm": 0.041677169501781464, + "learning_rate": 6.060197137365234e-05, + "loss": 0.0065, + "step": 46040 + }, + { + "grad_norm": 0.05700084567070007, + "learning_rate": 6.058581207671077e-05, + "loss": 0.0072, + "step": 46050 + }, + { + "grad_norm": 0.05052364245057106, + "learning_rate": 6.056965162212072e-05, + "loss": 0.0058, + "step": 46060 + }, + { + "grad_norm": 0.04126855358481407, + "learning_rate": 6.055349001164943e-05, + "loss": 0.0066, + "step": 46070 + }, + { + "grad_norm": 0.041363995522260666, + "learning_rate": 6.0537327247064347e-05, + "loss": 0.0056, + "step": 46080 + }, + { + "grad_norm": 0.04933146759867668, + "learning_rate": 6.0521163330133014e-05, + "loss": 0.0063, + "step": 46090 + }, + { + "grad_norm": 0.06528108566999435, + "learning_rate": 6.050499826262306e-05, + "loss": 0.0058, + "step": 46100 + }, + { + "grad_norm": 0.04185983166098595, + "learning_rate": 6.0488832046302294e-05, + "loss": 0.0063, + "step": 46110 + }, + { + "grad_norm": 0.07279111444950104, + "learning_rate": 6.0472664682938626e-05, + "loss": 0.0055, + "step": 46120 + }, + { + "grad_norm": 0.04376688972115517, + "learning_rate": 6.045649617430009e-05, + "loss": 0.0058, + "step": 46130 + }, + { + "grad_norm": 0.04916579648852348, + "learning_rate": 6.0440326522154866e-05, + "loss": 0.0059, + "step": 46140 + }, + { + "grad_norm": 0.0678071677684784, + "learning_rate": 6.0424155728271224e-05, + "loss": 0.0068, + "step": 46150 + }, + { + "grad_norm": 0.04478305205702782, + "learning_rate": 6.040798379441758e-05, + "loss": 0.0079, + "step": 46160 + }, + { + "grad_norm": 0.05925637483596802, + "learning_rate": 6.0391810722362485e-05, + "loss": 0.0057, + "step": 46170 + }, + { + "grad_norm": 0.048384394496679306, + "learning_rate": 6.037563651387458e-05, + "loss": 0.0059, + "step": 46180 + }, + { + "grad_norm": 0.06752736121416092, + "learning_rate": 6.0359461170722666e-05, + "loss": 0.0065, + "step": 46190 + }, + { + "grad_norm": 0.07585429400205612, + "learning_rate": 6.034328469467566e-05, + "loss": 0.0071, + "step": 46200 + }, + { + "grad_norm": 0.08919773995876312, + "learning_rate": 6.0327107087502596e-05, + "loss": 0.0068, + "step": 46210 + }, + { + "grad_norm": 0.0569613054394722, + "learning_rate": 6.031092835097262e-05, + "loss": 0.006, + "step": 46220 + }, + { + "grad_norm": 0.05337390676140785, + "learning_rate": 6.0294748486855024e-05, + "loss": 0.0059, + "step": 46230 + }, + { + "grad_norm": 0.05456452816724777, + "learning_rate": 6.0278567496919216e-05, + "loss": 0.0078, + "step": 46240 + }, + { + "grad_norm": 0.050157349556684494, + "learning_rate": 6.026238538293472e-05, + "loss": 0.0059, + "step": 46250 + }, + { + "grad_norm": 0.07463489472866058, + "learning_rate": 6.024620214667118e-05, + "loss": 0.0061, + "step": 46260 + }, + { + "grad_norm": 0.07840040326118469, + "learning_rate": 6.0230017789898384e-05, + "loss": 0.006, + "step": 46270 + }, + { + "grad_norm": 0.052054405212402344, + "learning_rate": 6.021383231438622e-05, + "loss": 0.0076, + "step": 46280 + }, + { + "grad_norm": 0.05605929717421532, + "learning_rate": 6.0197645721904704e-05, + "loss": 0.0059, + "step": 46290 + }, + { + "grad_norm": 0.05898040905594826, + "learning_rate": 6.0181458014224e-05, + "loss": 0.0048, + "step": 46300 + }, + { + "grad_norm": 0.07242681831121445, + "learning_rate": 6.016526919311435e-05, + "loss": 0.0064, + "step": 46310 + }, + { + "grad_norm": 0.08438113331794739, + "learning_rate": 6.014907926034613e-05, + "loss": 0.0078, + "step": 46320 + }, + { + "grad_norm": 0.05115766450762749, + "learning_rate": 6.0132888217689875e-05, + "loss": 0.0058, + "step": 46330 + }, + { + "grad_norm": 0.06199609488248825, + "learning_rate": 6.01166960669162e-05, + "loss": 0.0068, + "step": 46340 + }, + { + "grad_norm": 0.06064892187714577, + "learning_rate": 6.0100502809795845e-05, + "loss": 0.0063, + "step": 46350 + }, + { + "grad_norm": 0.059124212712049484, + "learning_rate": 6.00843084480997e-05, + "loss": 0.0058, + "step": 46360 + }, + { + "grad_norm": 0.06139913573861122, + "learning_rate": 6.0068112983598736e-05, + "loss": 0.0057, + "step": 46370 + }, + { + "grad_norm": 0.05699080228805542, + "learning_rate": 6.005191641806407e-05, + "loss": 0.0071, + "step": 46380 + }, + { + "grad_norm": 0.04380242899060249, + "learning_rate": 6.003571875326694e-05, + "loss": 0.0056, + "step": 46390 + }, + { + "grad_norm": 0.05875202268362045, + "learning_rate": 6.00195199909787e-05, + "loss": 0.0065, + "step": 46400 + }, + { + "grad_norm": 0.05544586479663849, + "learning_rate": 6.000332013297082e-05, + "loss": 0.006, + "step": 46410 + }, + { + "grad_norm": 0.06455297023057938, + "learning_rate": 5.998711918101487e-05, + "loss": 0.0058, + "step": 46420 + }, + { + "grad_norm": 0.04972798749804497, + "learning_rate": 5.997091713688261e-05, + "loss": 0.0062, + "step": 46430 + }, + { + "grad_norm": 0.051441047340631485, + "learning_rate": 5.9954714002345836e-05, + "loss": 0.0078, + "step": 46440 + }, + { + "grad_norm": 0.06472425162792206, + "learning_rate": 5.993850977917649e-05, + "loss": 0.0064, + "step": 46450 + }, + { + "grad_norm": 0.04403214156627655, + "learning_rate": 5.992230446914667e-05, + "loss": 0.0071, + "step": 46460 + }, + { + "grad_norm": 0.0484006404876709, + "learning_rate": 5.990609807402855e-05, + "loss": 0.0059, + "step": 46470 + }, + { + "grad_norm": 0.055055923759937286, + "learning_rate": 5.988989059559443e-05, + "loss": 0.0084, + "step": 46480 + }, + { + "grad_norm": 0.06196339800953865, + "learning_rate": 5.987368203561675e-05, + "loss": 0.0057, + "step": 46490 + }, + { + "grad_norm": 0.062121689319610596, + "learning_rate": 5.9857472395868055e-05, + "loss": 0.0076, + "step": 46500 + }, + { + "grad_norm": 0.05027635768055916, + "learning_rate": 5.9841261678120983e-05, + "loss": 0.0057, + "step": 46510 + }, + { + "grad_norm": 0.048911165446043015, + "learning_rate": 5.982504988414834e-05, + "loss": 0.0054, + "step": 46520 + }, + { + "grad_norm": 0.054978858679533005, + "learning_rate": 5.9808837015723015e-05, + "loss": 0.0079, + "step": 46530 + }, + { + "grad_norm": 0.04658237472176552, + "learning_rate": 5.9792623074618016e-05, + "loss": 0.0065, + "step": 46540 + }, + { + "grad_norm": 0.048214711248874664, + "learning_rate": 5.977640806260648e-05, + "loss": 0.0081, + "step": 46550 + }, + { + "grad_norm": 0.05639795958995819, + "learning_rate": 5.976019198146166e-05, + "loss": 0.0084, + "step": 46560 + }, + { + "grad_norm": 0.050567131489515305, + "learning_rate": 5.974397483295692e-05, + "loss": 0.0096, + "step": 46570 + }, + { + "grad_norm": 0.0762893557548523, + "learning_rate": 5.972775661886575e-05, + "loss": 0.0065, + "step": 46580 + }, + { + "grad_norm": 0.050433073192834854, + "learning_rate": 5.971153734096173e-05, + "loss": 0.006, + "step": 46590 + }, + { + "grad_norm": 0.060671839863061905, + "learning_rate": 5.969531700101859e-05, + "loss": 0.0085, + "step": 46600 + }, + { + "grad_norm": 0.061003949493169785, + "learning_rate": 5.9679095600810155e-05, + "loss": 0.0056, + "step": 46610 + }, + { + "grad_norm": 0.049094367772340775, + "learning_rate": 5.9662873142110384e-05, + "loss": 0.0048, + "step": 46620 + }, + { + "grad_norm": 0.04588117450475693, + "learning_rate": 5.964664962669333e-05, + "loss": 0.006, + "step": 46630 + }, + { + "grad_norm": 0.05883815512061119, + "learning_rate": 5.9630425056333186e-05, + "loss": 0.0081, + "step": 46640 + }, + { + "grad_norm": 0.07331296056509018, + "learning_rate": 5.961419943280422e-05, + "loss": 0.0072, + "step": 46650 + }, + { + "grad_norm": 0.07353411614894867, + "learning_rate": 5.959797275788087e-05, + "loss": 0.0073, + "step": 46660 + }, + { + "grad_norm": 0.07229164242744446, + "learning_rate": 5.958174503333765e-05, + "loss": 0.0061, + "step": 46670 + }, + { + "grad_norm": 0.03897930309176445, + "learning_rate": 5.9565516260949195e-05, + "loss": 0.0066, + "step": 46680 + }, + { + "grad_norm": 0.02596457675099373, + "learning_rate": 5.9549286442490273e-05, + "loss": 0.0066, + "step": 46690 + }, + { + "grad_norm": 0.03743985295295715, + "learning_rate": 5.953305557973572e-05, + "loss": 0.0055, + "step": 46700 + }, + { + "grad_norm": 0.053316034376621246, + "learning_rate": 5.9516823674460564e-05, + "loss": 0.0062, + "step": 46710 + }, + { + "grad_norm": 0.06315701454877853, + "learning_rate": 5.9500590728439875e-05, + "loss": 0.0067, + "step": 46720 + }, + { + "grad_norm": 0.04645966365933418, + "learning_rate": 5.948435674344886e-05, + "loss": 0.0061, + "step": 46730 + }, + { + "grad_norm": 0.057315826416015625, + "learning_rate": 5.946812172126285e-05, + "loss": 0.0062, + "step": 46740 + }, + { + "grad_norm": 0.06428902596235275, + "learning_rate": 5.945188566365729e-05, + "loss": 0.0067, + "step": 46750 + }, + { + "grad_norm": 0.04625703766942024, + "learning_rate": 5.943564857240773e-05, + "loss": 0.0053, + "step": 46760 + }, + { + "grad_norm": 0.05707825720310211, + "learning_rate": 5.941941044928981e-05, + "loss": 0.0069, + "step": 46770 + }, + { + "grad_norm": 0.06208327040076256, + "learning_rate": 5.940317129607935e-05, + "loss": 0.0071, + "step": 46780 + }, + { + "grad_norm": 0.07078859210014343, + "learning_rate": 5.9386931114552204e-05, + "loss": 0.0067, + "step": 46790 + }, + { + "grad_norm": 0.0564362108707428, + "learning_rate": 5.937068990648438e-05, + "loss": 0.0063, + "step": 46800 + }, + { + "grad_norm": 0.05496051162481308, + "learning_rate": 5.935444767365199e-05, + "loss": 0.0077, + "step": 46810 + }, + { + "grad_norm": 0.04133359342813492, + "learning_rate": 5.933820441783129e-05, + "loss": 0.006, + "step": 46820 + }, + { + "grad_norm": 0.08164665848016739, + "learning_rate": 5.932196014079857e-05, + "loss": 0.0073, + "step": 46830 + }, + { + "grad_norm": 0.07383808493614197, + "learning_rate": 5.930571484433032e-05, + "loss": 0.0101, + "step": 46840 + }, + { + "grad_norm": 0.08557072281837463, + "learning_rate": 5.928946853020309e-05, + "loss": 0.0069, + "step": 46850 + }, + { + "grad_norm": 0.07769991457462311, + "learning_rate": 5.927322120019352e-05, + "loss": 0.0075, + "step": 46860 + }, + { + "grad_norm": 0.039852436631917953, + "learning_rate": 5.925697285607844e-05, + "loss": 0.0062, + "step": 46870 + }, + { + "grad_norm": 0.04914264380931854, + "learning_rate": 5.924072349963472e-05, + "loss": 0.0061, + "step": 46880 + }, + { + "grad_norm": 0.048187073320150375, + "learning_rate": 5.922447313263938e-05, + "loss": 0.0067, + "step": 46890 + }, + { + "grad_norm": 0.05271361395716667, + "learning_rate": 5.920822175686952e-05, + "loss": 0.0067, + "step": 46900 + }, + { + "grad_norm": 0.05029045790433884, + "learning_rate": 5.919196937410239e-05, + "loss": 0.0068, + "step": 46910 + }, + { + "grad_norm": 0.048735491931438446, + "learning_rate": 5.917571598611529e-05, + "loss": 0.0047, + "step": 46920 + }, + { + "grad_norm": 0.044748444110155106, + "learning_rate": 5.9159461594685686e-05, + "loss": 0.0061, + "step": 46930 + }, + { + "grad_norm": 0.04621845483779907, + "learning_rate": 5.914320620159114e-05, + "loss": 0.009, + "step": 46940 + }, + { + "grad_norm": 0.041621893644332886, + "learning_rate": 5.912694980860931e-05, + "loss": 0.0075, + "step": 46950 + }, + { + "grad_norm": 0.0534682497382164, + "learning_rate": 5.9110692417517964e-05, + "loss": 0.0064, + "step": 46960 + }, + { + "grad_norm": 0.044602371752262115, + "learning_rate": 5.9094434030095e-05, + "loss": 0.0068, + "step": 46970 + }, + { + "grad_norm": 0.057420071214437485, + "learning_rate": 5.907817464811841e-05, + "loss": 0.0062, + "step": 46980 + }, + { + "grad_norm": 0.05710258334875107, + "learning_rate": 5.906191427336627e-05, + "loss": 0.0055, + "step": 46990 + }, + { + "grad_norm": 0.044706251472234726, + "learning_rate": 5.9045652907616835e-05, + "loss": 0.0067, + "step": 47000 + }, + { + "grad_norm": 0.047174397855997086, + "learning_rate": 5.902939055264838e-05, + "loss": 0.0059, + "step": 47010 + }, + { + "grad_norm": 0.062203243374824524, + "learning_rate": 5.901312721023935e-05, + "loss": 0.0083, + "step": 47020 + }, + { + "grad_norm": 0.07187710702419281, + "learning_rate": 5.8996862882168294e-05, + "loss": 0.0068, + "step": 47030 + }, + { + "grad_norm": 0.05260472744703293, + "learning_rate": 5.8980597570213826e-05, + "loss": 0.0088, + "step": 47040 + }, + { + "grad_norm": 0.04741017147898674, + "learning_rate": 5.896433127615471e-05, + "loss": 0.0082, + "step": 47050 + }, + { + "grad_norm": 0.05543525889515877, + "learning_rate": 5.894806400176981e-05, + "loss": 0.0067, + "step": 47060 + }, + { + "grad_norm": 0.06532258540391922, + "learning_rate": 5.893179574883808e-05, + "loss": 0.0063, + "step": 47070 + }, + { + "grad_norm": 0.07376737147569656, + "learning_rate": 5.8915526519138585e-05, + "loss": 0.0069, + "step": 47080 + }, + { + "grad_norm": 0.04861742630600929, + "learning_rate": 5.889925631445053e-05, + "loss": 0.0054, + "step": 47090 + }, + { + "grad_norm": 0.054539889097213745, + "learning_rate": 5.888298513655318e-05, + "loss": 0.006, + "step": 47100 + }, + { + "grad_norm": 0.04612421244382858, + "learning_rate": 5.886671298722592e-05, + "loss": 0.007, + "step": 47110 + }, + { + "grad_norm": 0.05143781006336212, + "learning_rate": 5.885043986824828e-05, + "loss": 0.0074, + "step": 47120 + }, + { + "grad_norm": 0.04555310681462288, + "learning_rate": 5.883416578139982e-05, + "loss": 0.0068, + "step": 47130 + }, + { + "grad_norm": 0.08220788836479187, + "learning_rate": 5.881789072846029e-05, + "loss": 0.008, + "step": 47140 + }, + { + "grad_norm": 0.05839163437485695, + "learning_rate": 5.8801614711209474e-05, + "loss": 0.0068, + "step": 47150 + }, + { + "grad_norm": 0.0650712251663208, + "learning_rate": 5.878533773142732e-05, + "loss": 0.0074, + "step": 47160 + }, + { + "grad_norm": 0.02865639515221119, + "learning_rate": 5.8769059790893834e-05, + "loss": 0.0049, + "step": 47170 + }, + { + "grad_norm": 0.045469000935554504, + "learning_rate": 5.8752780891389145e-05, + "loss": 0.0055, + "step": 47180 + }, + { + "grad_norm": 0.0506497323513031, + "learning_rate": 5.873650103469351e-05, + "loss": 0.0053, + "step": 47190 + }, + { + "grad_norm": 0.04782406613230705, + "learning_rate": 5.872022022258725e-05, + "loss": 0.0051, + "step": 47200 + }, + { + "grad_norm": 0.04939540848135948, + "learning_rate": 5.8703938456850814e-05, + "loss": 0.007, + "step": 47210 + }, + { + "grad_norm": 0.04159420728683472, + "learning_rate": 5.868765573926477e-05, + "loss": 0.0063, + "step": 47220 + }, + { + "grad_norm": 0.043314751237630844, + "learning_rate": 5.867137207160974e-05, + "loss": 0.0062, + "step": 47230 + }, + { + "grad_norm": 0.055773425847291946, + "learning_rate": 5.86550874556665e-05, + "loss": 0.0077, + "step": 47240 + }, + { + "grad_norm": 0.05544980987906456, + "learning_rate": 5.863880189321592e-05, + "loss": 0.0076, + "step": 47250 + }, + { + "grad_norm": 0.0632009208202362, + "learning_rate": 5.862251538603894e-05, + "loss": 0.0063, + "step": 47260 + }, + { + "grad_norm": 0.06268154829740524, + "learning_rate": 5.8606227935916656e-05, + "loss": 0.0075, + "step": 47270 + }, + { + "grad_norm": 0.05180922523140907, + "learning_rate": 5.858993954463021e-05, + "loss": 0.0044, + "step": 47280 + }, + { + "grad_norm": 0.04535236582159996, + "learning_rate": 5.8573650213960896e-05, + "loss": 0.0064, + "step": 47290 + }, + { + "grad_norm": 0.056411705911159515, + "learning_rate": 5.8557359945690084e-05, + "loss": 0.0051, + "step": 47300 + }, + { + "grad_norm": 0.07079200446605682, + "learning_rate": 5.8541068741599246e-05, + "loss": 0.0061, + "step": 47310 + }, + { + "grad_norm": 0.03965405002236366, + "learning_rate": 5.8524776603469985e-05, + "loss": 0.0065, + "step": 47320 + }, + { + "grad_norm": 0.05051158741116524, + "learning_rate": 5.850848353308397e-05, + "loss": 0.0064, + "step": 47330 + }, + { + "grad_norm": 0.045048948377370834, + "learning_rate": 5.849218953222297e-05, + "loss": 0.0062, + "step": 47340 + }, + { + "grad_norm": 0.04195920005440712, + "learning_rate": 5.847589460266891e-05, + "loss": 0.0058, + "step": 47350 + }, + { + "grad_norm": 0.0565461628139019, + "learning_rate": 5.845959874620375e-05, + "loss": 0.0106, + "step": 47360 + }, + { + "grad_norm": 0.08150351047515869, + "learning_rate": 5.84433019646096e-05, + "loss": 0.0082, + "step": 47370 + }, + { + "grad_norm": 0.04790758341550827, + "learning_rate": 5.842700425966863e-05, + "loss": 0.0051, + "step": 47380 + }, + { + "grad_norm": 0.053409185260534286, + "learning_rate": 5.841070563316315e-05, + "loss": 0.0048, + "step": 47390 + }, + { + "grad_norm": 0.041571348905563354, + "learning_rate": 5.839440608687554e-05, + "loss": 0.0062, + "step": 47400 + }, + { + "grad_norm": 0.0561218224465847, + "learning_rate": 5.837810562258831e-05, + "loss": 0.0046, + "step": 47410 + }, + { + "grad_norm": 0.049425553530454636, + "learning_rate": 5.836180424208405e-05, + "loss": 0.0068, + "step": 47420 + }, + { + "grad_norm": 0.06320498138666153, + "learning_rate": 5.834550194714543e-05, + "loss": 0.0088, + "step": 47430 + }, + { + "grad_norm": 0.052327435463666916, + "learning_rate": 5.832919873955528e-05, + "loss": 0.0057, + "step": 47440 + }, + { + "grad_norm": 0.08352319151163101, + "learning_rate": 5.831289462109648e-05, + "loss": 0.0065, + "step": 47450 + }, + { + "grad_norm": 0.052865542471408844, + "learning_rate": 5.829658959355202e-05, + "loss": 0.006, + "step": 47460 + }, + { + "grad_norm": 0.04137881472706795, + "learning_rate": 5.8280283658704995e-05, + "loss": 0.0072, + "step": 47470 + }, + { + "grad_norm": 0.07149654626846313, + "learning_rate": 5.826397681833859e-05, + "loss": 0.0062, + "step": 47480 + }, + { + "grad_norm": 0.04831403121352196, + "learning_rate": 5.824766907423612e-05, + "loss": 0.0075, + "step": 47490 + }, + { + "grad_norm": 0.05050291121006012, + "learning_rate": 5.8231360428180945e-05, + "loss": 0.0056, + "step": 47500 + }, + { + "grad_norm": 0.03764805942773819, + "learning_rate": 5.821505088195658e-05, + "loss": 0.0057, + "step": 47510 + }, + { + "grad_norm": 0.04440665617585182, + "learning_rate": 5.819874043734661e-05, + "loss": 0.0063, + "step": 47520 + }, + { + "grad_norm": 0.08773432672023773, + "learning_rate": 5.8182429096134695e-05, + "loss": 0.0085, + "step": 47530 + }, + { + "grad_norm": 0.03473472222685814, + "learning_rate": 5.816611686010465e-05, + "loss": 0.0063, + "step": 47540 + }, + { + "grad_norm": 0.05070507153868675, + "learning_rate": 5.814980373104033e-05, + "loss": 0.0057, + "step": 47550 + }, + { + "grad_norm": 0.0514756478369236, + "learning_rate": 5.813348971072572e-05, + "loss": 0.0062, + "step": 47560 + }, + { + "grad_norm": 0.04300408810377121, + "learning_rate": 5.811717480094492e-05, + "loss": 0.0059, + "step": 47570 + }, + { + "grad_norm": 0.042815711349248886, + "learning_rate": 5.810085900348209e-05, + "loss": 0.0069, + "step": 47580 + }, + { + "grad_norm": 0.045030657202005386, + "learning_rate": 5.8084542320121483e-05, + "loss": 0.0063, + "step": 47590 + }, + { + "grad_norm": 0.038955505937337875, + "learning_rate": 5.8068224752647497e-05, + "loss": 0.0079, + "step": 47600 + }, + { + "grad_norm": 0.03851201385259628, + "learning_rate": 5.805190630284457e-05, + "loss": 0.0067, + "step": 47610 + }, + { + "grad_norm": 0.06069378927350044, + "learning_rate": 5.803558697249729e-05, + "loss": 0.0064, + "step": 47620 + }, + { + "grad_norm": 0.052776288241147995, + "learning_rate": 5.8019266763390276e-05, + "loss": 0.0069, + "step": 47630 + }, + { + "grad_norm": 0.0694228857755661, + "learning_rate": 5.800294567730833e-05, + "loss": 0.0068, + "step": 47640 + }, + { + "grad_norm": 0.05122019723057747, + "learning_rate": 5.798662371603628e-05, + "loss": 0.007, + "step": 47650 + }, + { + "grad_norm": 0.056735098361968994, + "learning_rate": 5.797030088135904e-05, + "loss": 0.0078, + "step": 47660 + }, + { + "grad_norm": 0.04543592780828476, + "learning_rate": 5.7953977175061705e-05, + "loss": 0.0058, + "step": 47670 + }, + { + "grad_norm": 0.04646649211645126, + "learning_rate": 5.793765259892938e-05, + "loss": 0.0066, + "step": 47680 + }, + { + "grad_norm": 0.06400921195745468, + "learning_rate": 5.792132715474729e-05, + "loss": 0.0075, + "step": 47690 + }, + { + "grad_norm": 0.07281327247619629, + "learning_rate": 5.790500084430078e-05, + "loss": 0.0067, + "step": 47700 + }, + { + "grad_norm": 0.06204711273312569, + "learning_rate": 5.7888673669375274e-05, + "loss": 0.0063, + "step": 47710 + }, + { + "grad_norm": 0.0407237783074379, + "learning_rate": 5.787234563175625e-05, + "loss": 0.0062, + "step": 47720 + }, + { + "grad_norm": 0.048770349472761154, + "learning_rate": 5.7856016733229355e-05, + "loss": 0.0057, + "step": 47730 + }, + { + "grad_norm": 0.058540668338537216, + "learning_rate": 5.7839686975580297e-05, + "loss": 0.0068, + "step": 47740 + }, + { + "grad_norm": 0.035693563520908356, + "learning_rate": 5.782335636059484e-05, + "loss": 0.0054, + "step": 47750 + }, + { + "grad_norm": 0.045693911612033844, + "learning_rate": 5.780702489005889e-05, + "loss": 0.0054, + "step": 47760 + }, + { + "grad_norm": 0.041721321642398834, + "learning_rate": 5.779069256575845e-05, + "loss": 0.006, + "step": 47770 + }, + { + "grad_norm": 0.07547381520271301, + "learning_rate": 5.7774359389479574e-05, + "loss": 0.0062, + "step": 47780 + }, + { + "grad_norm": 0.04039992764592171, + "learning_rate": 5.775802536300845e-05, + "loss": 0.0055, + "step": 47790 + }, + { + "grad_norm": 0.04650693014264107, + "learning_rate": 5.774169048813134e-05, + "loss": 0.0057, + "step": 47800 + }, + { + "grad_norm": 0.03709696978330612, + "learning_rate": 5.77253547666346e-05, + "loss": 0.0062, + "step": 47810 + }, + { + "grad_norm": 0.04634040594100952, + "learning_rate": 5.770901820030465e-05, + "loss": 0.0076, + "step": 47820 + }, + { + "grad_norm": 0.04081552103161812, + "learning_rate": 5.769268079092809e-05, + "loss": 0.0061, + "step": 47830 + }, + { + "grad_norm": 0.045734528452157974, + "learning_rate": 5.767634254029151e-05, + "loss": 0.0055, + "step": 47840 + }, + { + "grad_norm": 0.0544373095035553, + "learning_rate": 5.7660003450181655e-05, + "loss": 0.0068, + "step": 47850 + }, + { + "grad_norm": 0.06069719418883324, + "learning_rate": 5.764366352238534e-05, + "loss": 0.008, + "step": 47860 + }, + { + "grad_norm": 0.06253708153963089, + "learning_rate": 5.7627322758689474e-05, + "loss": 0.006, + "step": 47870 + }, + { + "grad_norm": 0.05222116783261299, + "learning_rate": 5.761098116088105e-05, + "loss": 0.0051, + "step": 47880 + }, + { + "grad_norm": 0.07610207796096802, + "learning_rate": 5.759463873074717e-05, + "loss": 0.0077, + "step": 47890 + }, + { + "grad_norm": 0.04346349462866783, + "learning_rate": 5.757829547007504e-05, + "loss": 0.007, + "step": 47900 + }, + { + "grad_norm": 0.047239452600479126, + "learning_rate": 5.756195138065189e-05, + "loss": 0.0047, + "step": 47910 + }, + { + "grad_norm": 0.055170100182294846, + "learning_rate": 5.754560646426511e-05, + "loss": 0.0073, + "step": 47920 + }, + { + "grad_norm": 0.056909676641225815, + "learning_rate": 5.752926072270216e-05, + "loss": 0.0069, + "step": 47930 + }, + { + "grad_norm": 0.07484812289476395, + "learning_rate": 5.7512914157750563e-05, + "loss": 0.007, + "step": 47940 + }, + { + "grad_norm": 0.06251852214336395, + "learning_rate": 5.749656677119798e-05, + "loss": 0.0057, + "step": 47950 + }, + { + "grad_norm": 0.054201364517211914, + "learning_rate": 5.7480218564832125e-05, + "loss": 0.0073, + "step": 47960 + }, + { + "grad_norm": 0.05809270963072777, + "learning_rate": 5.746386954044082e-05, + "loss": 0.0061, + "step": 47970 + }, + { + "grad_norm": 0.05596749484539032, + "learning_rate": 5.744751969981195e-05, + "loss": 0.0073, + "step": 47980 + }, + { + "grad_norm": 0.04226573556661606, + "learning_rate": 5.7431169044733526e-05, + "loss": 0.0055, + "step": 47990 + }, + { + "grad_norm": 0.056390181183815, + "learning_rate": 5.741481757699364e-05, + "loss": 0.0067, + "step": 48000 + }, + { + "grad_norm": 0.04893675819039345, + "learning_rate": 5.7398465298380434e-05, + "loss": 0.0076, + "step": 48010 + }, + { + "grad_norm": 0.05325990915298462, + "learning_rate": 5.7382112210682193e-05, + "loss": 0.0069, + "step": 48020 + }, + { + "grad_norm": 0.04512833431363106, + "learning_rate": 5.7365758315687266e-05, + "loss": 0.0055, + "step": 48030 + }, + { + "grad_norm": 0.03541749343276024, + "learning_rate": 5.734940361518407e-05, + "loss": 0.005, + "step": 48040 + }, + { + "grad_norm": 0.04595184326171875, + "learning_rate": 5.733304811096116e-05, + "loss": 0.0054, + "step": 48050 + }, + { + "grad_norm": 0.059336818754673004, + "learning_rate": 5.731669180480713e-05, + "loss": 0.0065, + "step": 48060 + }, + { + "grad_norm": 0.03675638884305954, + "learning_rate": 5.730033469851067e-05, + "loss": 0.0051, + "step": 48070 + }, + { + "grad_norm": 0.0459320992231369, + "learning_rate": 5.7283976793860607e-05, + "loss": 0.006, + "step": 48080 + }, + { + "grad_norm": 0.06485895067453384, + "learning_rate": 5.726761809264577e-05, + "loss": 0.0074, + "step": 48090 + }, + { + "grad_norm": 0.06390273571014404, + "learning_rate": 5.7251258596655155e-05, + "loss": 0.0056, + "step": 48100 + }, + { + "grad_norm": 0.04997611790895462, + "learning_rate": 5.72348983076778e-05, + "loss": 0.0065, + "step": 48110 + }, + { + "grad_norm": 0.04777975007891655, + "learning_rate": 5.7218537227502854e-05, + "loss": 0.0065, + "step": 48120 + }, + { + "grad_norm": 0.041226308792829514, + "learning_rate": 5.720217535791951e-05, + "loss": 0.0053, + "step": 48130 + }, + { + "grad_norm": 0.05499844253063202, + "learning_rate": 5.718581270071711e-05, + "loss": 0.0058, + "step": 48140 + }, + { + "grad_norm": 0.03848085552453995, + "learning_rate": 5.716944925768505e-05, + "loss": 0.0058, + "step": 48150 + }, + { + "grad_norm": 0.031117301434278488, + "learning_rate": 5.7153085030612786e-05, + "loss": 0.0061, + "step": 48160 + }, + { + "grad_norm": 0.041450537741184235, + "learning_rate": 5.713672002128989e-05, + "loss": 0.0072, + "step": 48170 + }, + { + "grad_norm": 0.033054254949092865, + "learning_rate": 5.712035423150602e-05, + "loss": 0.0062, + "step": 48180 + }, + { + "grad_norm": 0.062352120876312256, + "learning_rate": 5.710398766305094e-05, + "loss": 0.0066, + "step": 48190 + }, + { + "grad_norm": 0.0636453628540039, + "learning_rate": 5.708762031771442e-05, + "loss": 0.007, + "step": 48200 + }, + { + "grad_norm": 0.04977080598473549, + "learning_rate": 5.707125219728642e-05, + "loss": 0.0076, + "step": 48210 + }, + { + "grad_norm": 0.0484146885573864, + "learning_rate": 5.7054883303556905e-05, + "loss": 0.0061, + "step": 48220 + }, + { + "grad_norm": 0.05629872903227806, + "learning_rate": 5.703851363831595e-05, + "loss": 0.0048, + "step": 48230 + }, + { + "grad_norm": 0.046142615377902985, + "learning_rate": 5.702214320335374e-05, + "loss": 0.0059, + "step": 48240 + }, + { + "grad_norm": 0.061130937188863754, + "learning_rate": 5.7005772000460514e-05, + "loss": 0.006, + "step": 48250 + }, + { + "grad_norm": 0.0563277006149292, + "learning_rate": 5.698940003142656e-05, + "loss": 0.0061, + "step": 48260 + }, + { + "grad_norm": 0.0595366396009922, + "learning_rate": 5.697302729804236e-05, + "loss": 0.0059, + "step": 48270 + }, + { + "grad_norm": 0.045886822044849396, + "learning_rate": 5.695665380209837e-05, + "loss": 0.006, + "step": 48280 + }, + { + "grad_norm": 0.05760663002729416, + "learning_rate": 5.6940279545385165e-05, + "loss": 0.0073, + "step": 48290 + }, + { + "grad_norm": 0.040921587496995926, + "learning_rate": 5.692390452969344e-05, + "loss": 0.0094, + "step": 48300 + }, + { + "grad_norm": 0.06402529031038284, + "learning_rate": 5.690752875681392e-05, + "loss": 0.0065, + "step": 48310 + }, + { + "grad_norm": 0.05357037112116814, + "learning_rate": 5.6891152228537435e-05, + "loss": 0.0057, + "step": 48320 + }, + { + "grad_norm": 0.07561872899532318, + "learning_rate": 5.687477494665492e-05, + "loss": 0.0065, + "step": 48330 + }, + { + "grad_norm": 0.055915988981723785, + "learning_rate": 5.685839691295733e-05, + "loss": 0.006, + "step": 48340 + }, + { + "grad_norm": 0.06239854171872139, + "learning_rate": 5.6842018129235786e-05, + "loss": 0.0101, + "step": 48350 + }, + { + "grad_norm": 0.05468118190765381, + "learning_rate": 5.6825638597281404e-05, + "loss": 0.0063, + "step": 48360 + }, + { + "grad_norm": 0.05700495466589928, + "learning_rate": 5.680925831888546e-05, + "loss": 0.007, + "step": 48370 + }, + { + "grad_norm": 0.0409858338534832, + "learning_rate": 5.6792877295839274e-05, + "loss": 0.0056, + "step": 48380 + }, + { + "grad_norm": 0.037136103957891464, + "learning_rate": 5.6776495529934224e-05, + "loss": 0.0065, + "step": 48390 + }, + { + "grad_norm": 0.05054077133536339, + "learning_rate": 5.6760113022961824e-05, + "loss": 0.0056, + "step": 48400 + }, + { + "grad_norm": 0.06750505417585373, + "learning_rate": 5.6743729776713617e-05, + "loss": 0.0076, + "step": 48410 + }, + { + "grad_norm": 0.04475882649421692, + "learning_rate": 5.672734579298126e-05, + "loss": 0.0052, + "step": 48420 + }, + { + "grad_norm": 0.04618104174733162, + "learning_rate": 5.671096107355649e-05, + "loss": 0.0071, + "step": 48430 + }, + { + "grad_norm": 0.04132257401943207, + "learning_rate": 5.66945756202311e-05, + "loss": 0.0046, + "step": 48440 + }, + { + "grad_norm": 0.04509824886918068, + "learning_rate": 5.667818943479699e-05, + "loss": 0.0063, + "step": 48450 + }, + { + "grad_norm": 0.05221918225288391, + "learning_rate": 5.666180251904612e-05, + "loss": 0.007, + "step": 48460 + }, + { + "grad_norm": 0.03673342615365982, + "learning_rate": 5.6645414874770555e-05, + "loss": 0.0064, + "step": 48470 + }, + { + "grad_norm": 0.04715992510318756, + "learning_rate": 5.66290265037624e-05, + "loss": 0.0052, + "step": 48480 + }, + { + "grad_norm": 0.0642116516828537, + "learning_rate": 5.661263740781386e-05, + "loss": 0.0064, + "step": 48490 + }, + { + "grad_norm": 0.04094836115837097, + "learning_rate": 5.6596247588717254e-05, + "loss": 0.0072, + "step": 48500 + }, + { + "grad_norm": 0.04765041172504425, + "learning_rate": 5.6579857048264926e-05, + "loss": 0.0083, + "step": 48510 + }, + { + "grad_norm": 0.09020636975765228, + "learning_rate": 5.6563465788249314e-05, + "loss": 0.009, + "step": 48520 + }, + { + "grad_norm": 0.06272269785404205, + "learning_rate": 5.6547073810462956e-05, + "loss": 0.0047, + "step": 48530 + }, + { + "grad_norm": 0.06770332902669907, + "learning_rate": 5.653068111669846e-05, + "loss": 0.0063, + "step": 48540 + }, + { + "grad_norm": 0.03177180886268616, + "learning_rate": 5.651428770874848e-05, + "loss": 0.0054, + "step": 48550 + }, + { + "grad_norm": 0.05665323883295059, + "learning_rate": 5.64978935884058e-05, + "loss": 0.0053, + "step": 48560 + }, + { + "grad_norm": 0.06848148256540298, + "learning_rate": 5.6481498757463244e-05, + "loss": 0.0069, + "step": 48570 + }, + { + "grad_norm": 0.05282492935657501, + "learning_rate": 5.646510321771373e-05, + "loss": 0.0064, + "step": 48580 + }, + { + "grad_norm": 0.05117543041706085, + "learning_rate": 5.644870697095024e-05, + "loss": 0.0068, + "step": 48590 + }, + { + "grad_norm": 0.0657866969704628, + "learning_rate": 5.643231001896586e-05, + "loss": 0.0076, + "step": 48600 + }, + { + "grad_norm": 0.044434912502765656, + "learning_rate": 5.6415912363553726e-05, + "loss": 0.007, + "step": 48610 + }, + { + "grad_norm": 0.05084364861249924, + "learning_rate": 5.639951400650706e-05, + "loss": 0.0053, + "step": 48620 + }, + { + "grad_norm": 0.05922464281320572, + "learning_rate": 5.6383114949619165e-05, + "loss": 0.0087, + "step": 48630 + }, + { + "grad_norm": 0.060365382581949234, + "learning_rate": 5.636671519468342e-05, + "loss": 0.0073, + "step": 48640 + }, + { + "grad_norm": 0.04159995913505554, + "learning_rate": 5.635031474349327e-05, + "loss": 0.0062, + "step": 48650 + }, + { + "grad_norm": 0.07162898778915405, + "learning_rate": 5.6333913597842246e-05, + "loss": 0.0079, + "step": 48660 + }, + { + "grad_norm": 0.0610126294195652, + "learning_rate": 5.6317511759523955e-05, + "loss": 0.0066, + "step": 48670 + }, + { + "grad_norm": 0.05593756586313248, + "learning_rate": 5.630110923033207e-05, + "loss": 0.0063, + "step": 48680 + }, + { + "grad_norm": 0.06693848222494125, + "learning_rate": 5.628470601206036e-05, + "loss": 0.007, + "step": 48690 + }, + { + "grad_norm": 0.04331620782613754, + "learning_rate": 5.626830210650266e-05, + "loss": 0.005, + "step": 48700 + }, + { + "grad_norm": 0.03412161394953728, + "learning_rate": 5.625189751545285e-05, + "loss": 0.0075, + "step": 48710 + }, + { + "grad_norm": 0.04692443460226059, + "learning_rate": 5.6235492240704936e-05, + "loss": 0.0068, + "step": 48720 + }, + { + "grad_norm": 0.05191435664892197, + "learning_rate": 5.621908628405296e-05, + "loss": 0.0056, + "step": 48730 + }, + { + "grad_norm": 0.04685590788722038, + "learning_rate": 5.620267964729106e-05, + "loss": 0.0058, + "step": 48740 + }, + { + "grad_norm": 0.052023544907569885, + "learning_rate": 5.618627233221344e-05, + "loss": 0.0058, + "step": 48750 + }, + { + "grad_norm": 0.050386931747198105, + "learning_rate": 5.616986434061438e-05, + "loss": 0.0079, + "step": 48760 + }, + { + "grad_norm": 0.06602558493614197, + "learning_rate": 5.615345567428822e-05, + "loss": 0.0066, + "step": 48770 + }, + { + "grad_norm": 0.05476875230669975, + "learning_rate": 5.613704633502941e-05, + "loss": 0.0071, + "step": 48780 + }, + { + "grad_norm": 0.04406599700450897, + "learning_rate": 5.612063632463245e-05, + "loss": 0.0045, + "step": 48790 + }, + { + "grad_norm": 0.053553204983472824, + "learning_rate": 5.610422564489188e-05, + "loss": 0.0052, + "step": 48800 + }, + { + "grad_norm": 0.06592311710119247, + "learning_rate": 5.608781429760239e-05, + "loss": 0.0074, + "step": 48810 + }, + { + "grad_norm": 0.04313194006681442, + "learning_rate": 5.607140228455866e-05, + "loss": 0.0074, + "step": 48820 + }, + { + "grad_norm": 0.05381868779659271, + "learning_rate": 5.605498960755553e-05, + "loss": 0.0065, + "step": 48830 + }, + { + "grad_norm": 0.057758428156375885, + "learning_rate": 5.603857626838782e-05, + "loss": 0.0081, + "step": 48840 + }, + { + "grad_norm": 0.04908338189125061, + "learning_rate": 5.60221622688505e-05, + "loss": 0.0054, + "step": 48850 + }, + { + "grad_norm": 0.05160261690616608, + "learning_rate": 5.6005747610738565e-05, + "loss": 0.0057, + "step": 48860 + }, + { + "grad_norm": 0.062066078186035156, + "learning_rate": 5.59893322958471e-05, + "loss": 0.0059, + "step": 48870 + }, + { + "grad_norm": 0.0697995200753212, + "learning_rate": 5.5972916325971256e-05, + "loss": 0.0053, + "step": 48880 + }, + { + "grad_norm": 0.035129085183143616, + "learning_rate": 5.595649970290628e-05, + "loss": 0.0064, + "step": 48890 + }, + { + "grad_norm": 0.05259355530142784, + "learning_rate": 5.5940082428447426e-05, + "loss": 0.0058, + "step": 48900 + }, + { + "grad_norm": 0.03379293158650398, + "learning_rate": 5.592366450439012e-05, + "loss": 0.0056, + "step": 48910 + }, + { + "grad_norm": 0.07219348847866058, + "learning_rate": 5.590724593252975e-05, + "loss": 0.0069, + "step": 48920 + }, + { + "grad_norm": 0.072885662317276, + "learning_rate": 5.589082671466184e-05, + "loss": 0.0064, + "step": 48930 + }, + { + "grad_norm": 0.03647923842072487, + "learning_rate": 5.587440685258199e-05, + "loss": 0.0057, + "step": 48940 + }, + { + "grad_norm": 0.06609509885311127, + "learning_rate": 5.585798634808583e-05, + "loss": 0.0094, + "step": 48950 + }, + { + "grad_norm": 0.049879077821969986, + "learning_rate": 5.584156520296909e-05, + "loss": 0.0059, + "step": 48960 + }, + { + "grad_norm": 0.052459899336099625, + "learning_rate": 5.582514341902757e-05, + "loss": 0.0093, + "step": 48970 + }, + { + "grad_norm": 0.04263072833418846, + "learning_rate": 5.580872099805713e-05, + "loss": 0.0072, + "step": 48980 + }, + { + "grad_norm": 0.048541612923145294, + "learning_rate": 5.5792297941853674e-05, + "loss": 0.0062, + "step": 48990 + }, + { + "grad_norm": 0.0742202028632164, + "learning_rate": 5.5775874252213247e-05, + "loss": 0.0061, + "step": 49000 + }, + { + "grad_norm": 0.06117947772145271, + "learning_rate": 5.575944993093189e-05, + "loss": 0.0075, + "step": 49010 + }, + { + "grad_norm": 0.04783555120229721, + "learning_rate": 5.574302497980574e-05, + "loss": 0.0074, + "step": 49020 + }, + { + "grad_norm": 0.033019717782735825, + "learning_rate": 5.5726599400631005e-05, + "loss": 0.0064, + "step": 49030 + }, + { + "grad_norm": 0.04908577352762222, + "learning_rate": 5.571017319520399e-05, + "loss": 0.0079, + "step": 49040 + }, + { + "grad_norm": 0.048758525401353836, + "learning_rate": 5.5693746365320984e-05, + "loss": 0.0046, + "step": 49050 + }, + { + "grad_norm": 0.06113271787762642, + "learning_rate": 5.567731891277848e-05, + "loss": 0.0053, + "step": 49060 + }, + { + "grad_norm": 0.06139109283685684, + "learning_rate": 5.566089083937288e-05, + "loss": 0.0064, + "step": 49070 + }, + { + "grad_norm": 0.0691591203212738, + "learning_rate": 5.564446214690079e-05, + "loss": 0.0084, + "step": 49080 + }, + { + "grad_norm": 0.05487705394625664, + "learning_rate": 5.56280328371588e-05, + "loss": 0.0062, + "step": 49090 + }, + { + "grad_norm": 0.07698244601488113, + "learning_rate": 5.5611602911943596e-05, + "loss": 0.0057, + "step": 49100 + }, + { + "grad_norm": 0.056778643280267715, + "learning_rate": 5.559517237305194e-05, + "loss": 0.0052, + "step": 49110 + }, + { + "grad_norm": 0.04331815242767334, + "learning_rate": 5.557874122228064e-05, + "loss": 0.0075, + "step": 49120 + }, + { + "grad_norm": 0.0584476999938488, + "learning_rate": 5.55623094614266e-05, + "loss": 0.006, + "step": 49130 + }, + { + "grad_norm": 0.0938376784324646, + "learning_rate": 5.5545877092286743e-05, + "loss": 0.0063, + "step": 49140 + }, + { + "grad_norm": 0.055697135627269745, + "learning_rate": 5.552944411665811e-05, + "loss": 0.0058, + "step": 49150 + }, + { + "grad_norm": 0.05711755156517029, + "learning_rate": 5.5513010536337795e-05, + "loss": 0.0066, + "step": 49160 + }, + { + "grad_norm": 0.07311534881591797, + "learning_rate": 5.5496576353122934e-05, + "loss": 0.0083, + "step": 49170 + }, + { + "grad_norm": 0.037001609802246094, + "learning_rate": 5.548014156881074e-05, + "loss": 0.0058, + "step": 49180 + }, + { + "grad_norm": 0.06255196034908295, + "learning_rate": 5.5463706185198494e-05, + "loss": 0.0073, + "step": 49190 + }, + { + "grad_norm": 0.045953311026096344, + "learning_rate": 5.544727020408358e-05, + "loss": 0.0064, + "step": 49200 + }, + { + "grad_norm": 0.04723098874092102, + "learning_rate": 5.5430833627263366e-05, + "loss": 0.006, + "step": 49210 + }, + { + "grad_norm": 0.04993175342679024, + "learning_rate": 5.5414396456535354e-05, + "loss": 0.0063, + "step": 49220 + }, + { + "grad_norm": 0.062304552644491196, + "learning_rate": 5.539795869369709e-05, + "loss": 0.007, + "step": 49230 + }, + { + "grad_norm": 0.04107928276062012, + "learning_rate": 5.538152034054618e-05, + "loss": 0.0063, + "step": 49240 + }, + { + "grad_norm": 0.06405100226402283, + "learning_rate": 5.536508139888028e-05, + "loss": 0.0052, + "step": 49250 + }, + { + "grad_norm": 0.07036936283111572, + "learning_rate": 5.534864187049716e-05, + "loss": 0.0063, + "step": 49260 + }, + { + "grad_norm": 0.05879493057727814, + "learning_rate": 5.5332201757194615e-05, + "loss": 0.0065, + "step": 49270 + }, + { + "grad_norm": 0.05369265750050545, + "learning_rate": 5.531576106077049e-05, + "loss": 0.0055, + "step": 49280 + }, + { + "grad_norm": 0.046304017305374146, + "learning_rate": 5.529931978302272e-05, + "loss": 0.0049, + "step": 49290 + }, + { + "grad_norm": 0.04195438325405121, + "learning_rate": 5.528287792574932e-05, + "loss": 0.0058, + "step": 49300 + }, + { + "grad_norm": 0.07068608701229095, + "learning_rate": 5.5266435490748324e-05, + "loss": 0.0052, + "step": 49310 + }, + { + "grad_norm": 0.04697553813457489, + "learning_rate": 5.524999247981787e-05, + "loss": 0.0055, + "step": 49320 + }, + { + "grad_norm": 0.04968760907649994, + "learning_rate": 5.523354889475613e-05, + "loss": 0.0054, + "step": 49330 + }, + { + "grad_norm": 0.04200851544737816, + "learning_rate": 5.521710473736134e-05, + "loss": 0.0081, + "step": 49340 + }, + { + "grad_norm": 0.054486602544784546, + "learning_rate": 5.5200660009431826e-05, + "loss": 0.0054, + "step": 49350 + }, + { + "grad_norm": 0.02937142550945282, + "learning_rate": 5.518421471276596e-05, + "loss": 0.0056, + "step": 49360 + }, + { + "grad_norm": 0.048145730048418045, + "learning_rate": 5.5167768849162174e-05, + "loss": 0.0062, + "step": 49370 + }, + { + "grad_norm": 0.05323381721973419, + "learning_rate": 5.515132242041893e-05, + "loss": 0.0073, + "step": 49380 + }, + { + "grad_norm": 0.03869276121258736, + "learning_rate": 5.513487542833483e-05, + "loss": 0.0055, + "step": 49390 + }, + { + "grad_norm": 0.054044246673583984, + "learning_rate": 5.5118427874708466e-05, + "loss": 0.0072, + "step": 49400 + }, + { + "grad_norm": 0.07164681702852249, + "learning_rate": 5.510197976133853e-05, + "loss": 0.0074, + "step": 49410 + }, + { + "grad_norm": 0.03916405886411667, + "learning_rate": 5.508553109002376e-05, + "loss": 0.0055, + "step": 49420 + }, + { + "grad_norm": 0.04457986727356911, + "learning_rate": 5.5069081862562957e-05, + "loss": 0.0061, + "step": 49430 + }, + { + "grad_norm": 0.04458441957831383, + "learning_rate": 5.5052632080754965e-05, + "loss": 0.0064, + "step": 49440 + }, + { + "grad_norm": 0.05546971410512924, + "learning_rate": 5.503618174639874e-05, + "loss": 0.0074, + "step": 49450 + }, + { + "grad_norm": 0.04627033695578575, + "learning_rate": 5.501973086129325e-05, + "loss": 0.0089, + "step": 49460 + }, + { + "grad_norm": 0.048672307282686234, + "learning_rate": 5.500327942723753e-05, + "loss": 0.0074, + "step": 49470 + }, + { + "grad_norm": 0.06258601695299149, + "learning_rate": 5.498682744603071e-05, + "loss": 0.0057, + "step": 49480 + }, + { + "grad_norm": 0.05422459542751312, + "learning_rate": 5.497037491947194e-05, + "loss": 0.0058, + "step": 49490 + }, + { + "grad_norm": 0.05054600164294243, + "learning_rate": 5.4953921849360424e-05, + "loss": 0.0077, + "step": 49500 + }, + { + "grad_norm": 0.052357811480760574, + "learning_rate": 5.493746823749547e-05, + "loss": 0.0056, + "step": 49510 + }, + { + "grad_norm": 0.06060272455215454, + "learning_rate": 5.49210140856764e-05, + "loss": 0.0072, + "step": 49520 + }, + { + "grad_norm": 0.04712158441543579, + "learning_rate": 5.4904559395702647e-05, + "loss": 0.0054, + "step": 49530 + }, + { + "grad_norm": 0.058220550417900085, + "learning_rate": 5.488810416937364e-05, + "loss": 0.0052, + "step": 49540 + }, + { + "grad_norm": 0.06787390261888504, + "learning_rate": 5.4871648408488926e-05, + "loss": 0.0059, + "step": 49550 + }, + { + "grad_norm": 0.04938188195228577, + "learning_rate": 5.485519211484807e-05, + "loss": 0.0069, + "step": 49560 + }, + { + "grad_norm": 0.05876767262816429, + "learning_rate": 5.4838735290250676e-05, + "loss": 0.0056, + "step": 49570 + }, + { + "grad_norm": 0.03977654501795769, + "learning_rate": 5.482227793649649e-05, + "loss": 0.005, + "step": 49580 + }, + { + "grad_norm": 0.06992412358522415, + "learning_rate": 5.480582005538524e-05, + "loss": 0.0063, + "step": 49590 + }, + { + "grad_norm": 0.05827942118048668, + "learning_rate": 5.478936164871671e-05, + "loss": 0.005, + "step": 49600 + }, + { + "grad_norm": 0.050281766802072525, + "learning_rate": 5.477290271829081e-05, + "loss": 0.0057, + "step": 49610 + }, + { + "grad_norm": 0.06382951885461807, + "learning_rate": 5.475644326590744e-05, + "loss": 0.0058, + "step": 49620 + }, + { + "grad_norm": 0.04959910735487938, + "learning_rate": 5.473998329336658e-05, + "loss": 0.0054, + "step": 49630 + }, + { + "grad_norm": 0.05614354461431503, + "learning_rate": 5.4723522802468286e-05, + "loss": 0.0081, + "step": 49640 + }, + { + "grad_norm": 0.043525803834199905, + "learning_rate": 5.470706179501264e-05, + "loss": 0.0046, + "step": 49650 + }, + { + "grad_norm": 0.060662154108285904, + "learning_rate": 5.4690600272799776e-05, + "loss": 0.0069, + "step": 49660 + }, + { + "grad_norm": 0.0514117032289505, + "learning_rate": 5.467413823762993e-05, + "loss": 0.0066, + "step": 49670 + }, + { + "grad_norm": 0.04650215804576874, + "learning_rate": 5.465767569130335e-05, + "loss": 0.0052, + "step": 49680 + }, + { + "grad_norm": 0.04615739732980728, + "learning_rate": 5.464121263562036e-05, + "loss": 0.0052, + "step": 49690 + }, + { + "grad_norm": 0.04379734396934509, + "learning_rate": 5.4624749072381343e-05, + "loss": 0.0053, + "step": 49700 + }, + { + "grad_norm": 0.046347107738256454, + "learning_rate": 5.460828500338672e-05, + "loss": 0.0058, + "step": 49710 + }, + { + "grad_norm": 0.0402761809527874, + "learning_rate": 5.459182043043698e-05, + "loss": 0.0054, + "step": 49720 + }, + { + "grad_norm": 0.05101783201098442, + "learning_rate": 5.457535535533265e-05, + "loss": 0.0058, + "step": 49730 + }, + { + "grad_norm": 0.04203540086746216, + "learning_rate": 5.4558889779874334e-05, + "loss": 0.0051, + "step": 49740 + }, + { + "grad_norm": 0.05480260029435158, + "learning_rate": 5.454242370586269e-05, + "loss": 0.0057, + "step": 49750 + }, + { + "grad_norm": 0.07596385478973389, + "learning_rate": 5.452595713509843e-05, + "loss": 0.0075, + "step": 49760 + }, + { + "grad_norm": 0.08924591541290283, + "learning_rate": 5.4509490069382274e-05, + "loss": 0.0055, + "step": 49770 + }, + { + "grad_norm": 0.0625491663813591, + "learning_rate": 5.449302251051509e-05, + "loss": 0.0072, + "step": 49780 + }, + { + "grad_norm": 0.05395881459116936, + "learning_rate": 5.44765544602977e-05, + "loss": 0.0066, + "step": 49790 + }, + { + "grad_norm": 0.07159490138292313, + "learning_rate": 5.446008592053107e-05, + "loss": 0.0062, + "step": 49800 + }, + { + "grad_norm": 0.05961599573493004, + "learning_rate": 5.4443616893016135e-05, + "loss": 0.0051, + "step": 49810 + }, + { + "grad_norm": 0.05147682875394821, + "learning_rate": 5.4427147379553934e-05, + "loss": 0.0065, + "step": 49820 + }, + { + "grad_norm": 0.052031293511390686, + "learning_rate": 5.441067738194556e-05, + "loss": 0.0066, + "step": 49830 + }, + { + "grad_norm": 0.0643131211400032, + "learning_rate": 5.439420690199214e-05, + "loss": 0.0062, + "step": 49840 + }, + { + "grad_norm": 0.04289890080690384, + "learning_rate": 5.437773594149484e-05, + "loss": 0.006, + "step": 49850 + }, + { + "grad_norm": 0.0624978169798851, + "learning_rate": 5.436126450225495e-05, + "loss": 0.0069, + "step": 49860 + }, + { + "grad_norm": 0.0354384146630764, + "learning_rate": 5.434479258607371e-05, + "loss": 0.0071, + "step": 49870 + }, + { + "grad_norm": 0.045881181955337524, + "learning_rate": 5.43283201947525e-05, + "loss": 0.0066, + "step": 49880 + }, + { + "grad_norm": 0.03769691661000252, + "learning_rate": 5.431184733009268e-05, + "loss": 0.0055, + "step": 49890 + }, + { + "grad_norm": 0.04297332838177681, + "learning_rate": 5.4295373993895736e-05, + "loss": 0.0073, + "step": 49900 + }, + { + "grad_norm": 0.03318938612937927, + "learning_rate": 5.4278900187963157e-05, + "loss": 0.0066, + "step": 49910 + }, + { + "grad_norm": 0.034715380519628525, + "learning_rate": 5.426242591409646e-05, + "loss": 0.0055, + "step": 49920 + }, + { + "grad_norm": 0.03865255042910576, + "learning_rate": 5.42459511740973e-05, + "loss": 0.0049, + "step": 49930 + }, + { + "grad_norm": 0.04750094190239906, + "learning_rate": 5.422947596976729e-05, + "loss": 0.0072, + "step": 49940 + }, + { + "grad_norm": 0.048306312412023544, + "learning_rate": 5.4213000302908134e-05, + "loss": 0.0071, + "step": 49950 + }, + { + "grad_norm": 0.05744267255067825, + "learning_rate": 5.419652417532162e-05, + "loss": 0.0086, + "step": 49960 + }, + { + "grad_norm": 0.05701657384634018, + "learning_rate": 5.4180047588809534e-05, + "loss": 0.0065, + "step": 49970 + }, + { + "grad_norm": 0.05617230385541916, + "learning_rate": 5.4163570545173704e-05, + "loss": 0.005, + "step": 49980 + }, + { + "grad_norm": 0.04248267039656639, + "learning_rate": 5.414709304621608e-05, + "loss": 0.0071, + "step": 49990 + }, + { + "grad_norm": 0.038865067064762115, + "learning_rate": 5.413061509373858e-05, + "loss": 0.0066, + "step": 50000 + }, + { + "grad_norm": 0.05118846520781517, + "learning_rate": 5.411413668954324e-05, + "loss": 0.0079, + "step": 50010 + }, + { + "grad_norm": 0.03210863843560219, + "learning_rate": 5.4097657835432083e-05, + "loss": 0.0049, + "step": 50020 + }, + { + "grad_norm": 0.042266055941581726, + "learning_rate": 5.408117853320723e-05, + "loss": 0.0073, + "step": 50030 + }, + { + "grad_norm": 0.04268176481127739, + "learning_rate": 5.406469878467084e-05, + "loss": 0.0061, + "step": 50040 + }, + { + "grad_norm": 0.05768989399075508, + "learning_rate": 5.404821859162509e-05, + "loss": 0.0067, + "step": 50050 + }, + { + "grad_norm": 0.03782631829380989, + "learning_rate": 5.403173795587225e-05, + "loss": 0.0049, + "step": 50060 + }, + { + "grad_norm": 0.06502310186624527, + "learning_rate": 5.4015256879214606e-05, + "loss": 0.0072, + "step": 50070 + }, + { + "grad_norm": 0.05569373816251755, + "learning_rate": 5.39987753634545e-05, + "loss": 0.0054, + "step": 50080 + }, + { + "grad_norm": 0.08761947602033615, + "learning_rate": 5.3982293410394335e-05, + "loss": 0.0072, + "step": 50090 + }, + { + "grad_norm": 0.05065275728702545, + "learning_rate": 5.396581102183655e-05, + "loss": 0.0069, + "step": 50100 + }, + { + "grad_norm": 0.044751934707164764, + "learning_rate": 5.3949328199583634e-05, + "loss": 0.007, + "step": 50110 + }, + { + "grad_norm": 0.057691074907779694, + "learning_rate": 5.393284494543813e-05, + "loss": 0.0061, + "step": 50120 + }, + { + "grad_norm": 0.060163334012031555, + "learning_rate": 5.391636126120262e-05, + "loss": 0.0058, + "step": 50130 + }, + { + "grad_norm": 0.04754204303026199, + "learning_rate": 5.389987714867971e-05, + "loss": 0.0084, + "step": 50140 + }, + { + "grad_norm": 0.04549044743180275, + "learning_rate": 5.3883392609672114e-05, + "loss": 0.005, + "step": 50150 + }, + { + "grad_norm": 0.050459787249565125, + "learning_rate": 5.3866907645982545e-05, + "loss": 0.0069, + "step": 50160 + }, + { + "grad_norm": 0.03739247843623161, + "learning_rate": 5.385042225941375e-05, + "loss": 0.0094, + "step": 50170 + }, + { + "grad_norm": 0.055173613131046295, + "learning_rate": 5.3833936451768585e-05, + "loss": 0.006, + "step": 50180 + }, + { + "grad_norm": 0.04065895453095436, + "learning_rate": 5.3817450224849884e-05, + "loss": 0.0051, + "step": 50190 + }, + { + "grad_norm": 0.049635667353868484, + "learning_rate": 5.380096358046056e-05, + "loss": 0.0105, + "step": 50200 + }, + { + "grad_norm": 0.05964174121618271, + "learning_rate": 5.378447652040359e-05, + "loss": 0.007, + "step": 50210 + }, + { + "grad_norm": 0.06068511679768562, + "learning_rate": 5.376798904648195e-05, + "loss": 0.007, + "step": 50220 + }, + { + "grad_norm": 0.026945538818836212, + "learning_rate": 5.375150116049869e-05, + "loss": 0.0065, + "step": 50230 + }, + { + "grad_norm": 0.06294409185647964, + "learning_rate": 5.373501286425691e-05, + "loss": 0.007, + "step": 50240 + }, + { + "grad_norm": 0.04923069849610329, + "learning_rate": 5.3718524159559726e-05, + "loss": 0.0062, + "step": 50250 + }, + { + "grad_norm": 0.05472206696867943, + "learning_rate": 5.370203504821034e-05, + "loss": 0.0051, + "step": 50260 + }, + { + "grad_norm": 0.03645586967468262, + "learning_rate": 5.368554553201196e-05, + "loss": 0.0062, + "step": 50270 + }, + { + "grad_norm": 0.06405065953731537, + "learning_rate": 5.3669055612767874e-05, + "loss": 0.0057, + "step": 50280 + }, + { + "grad_norm": 0.04198393598198891, + "learning_rate": 5.3652565292281376e-05, + "loss": 0.005, + "step": 50290 + }, + { + "grad_norm": 0.06819850206375122, + "learning_rate": 5.3636074572355824e-05, + "loss": 0.0078, + "step": 50300 + }, + { + "grad_norm": 0.06456124037504196, + "learning_rate": 5.361958345479463e-05, + "loss": 0.0058, + "step": 50310 + }, + { + "grad_norm": 0.04361245036125183, + "learning_rate": 5.360309194140123e-05, + "loss": 0.0055, + "step": 50320 + }, + { + "grad_norm": 0.051889289170503616, + "learning_rate": 5.3586600033979106e-05, + "loss": 0.0064, + "step": 50330 + }, + { + "grad_norm": 0.047036223113536835, + "learning_rate": 5.357010773433181e-05, + "loss": 0.0066, + "step": 50340 + }, + { + "grad_norm": 0.029034582898020744, + "learning_rate": 5.3553615044262895e-05, + "loss": 0.0049, + "step": 50350 + }, + { + "grad_norm": 0.0474880076944828, + "learning_rate": 5.3537121965575964e-05, + "loss": 0.0054, + "step": 50360 + }, + { + "grad_norm": 0.033310666680336, + "learning_rate": 5.352062850007471e-05, + "loss": 0.0064, + "step": 50370 + }, + { + "grad_norm": 0.04000009223818779, + "learning_rate": 5.350413464956282e-05, + "loss": 0.0066, + "step": 50380 + }, + { + "grad_norm": 0.0817638412117958, + "learning_rate": 5.348764041584403e-05, + "loss": 0.0057, + "step": 50390 + }, + { + "grad_norm": 0.05064260587096214, + "learning_rate": 5.347114580072212e-05, + "loss": 0.0096, + "step": 50400 + }, + { + "grad_norm": 0.04752817749977112, + "learning_rate": 5.345465080600093e-05, + "loss": 0.0048, + "step": 50410 + }, + { + "grad_norm": 0.07118037343025208, + "learning_rate": 5.3438155433484326e-05, + "loss": 0.0114, + "step": 50420 + }, + { + "grad_norm": 0.06013160198926926, + "learning_rate": 5.3421659684976197e-05, + "loss": 0.0076, + "step": 50430 + }, + { + "grad_norm": 0.05712509527802467, + "learning_rate": 5.340516356228052e-05, + "loss": 0.0062, + "step": 50440 + }, + { + "grad_norm": 0.05293245613574982, + "learning_rate": 5.338866706720128e-05, + "loss": 0.0054, + "step": 50450 + }, + { + "grad_norm": 0.04629657417535782, + "learning_rate": 5.337217020154249e-05, + "loss": 0.0053, + "step": 50460 + }, + { + "grad_norm": 0.053386665880680084, + "learning_rate": 5.335567296710825e-05, + "loss": 0.0051, + "step": 50470 + }, + { + "grad_norm": 0.0769769474864006, + "learning_rate": 5.333917536570265e-05, + "loss": 0.0069, + "step": 50480 + }, + { + "grad_norm": 0.07899194955825806, + "learning_rate": 5.332267739912986e-05, + "loss": 0.0054, + "step": 50490 + }, + { + "grad_norm": 0.051954835653305054, + "learning_rate": 5.330617906919405e-05, + "loss": 0.0058, + "step": 50500 + }, + { + "grad_norm": 0.055398110300302505, + "learning_rate": 5.328968037769949e-05, + "loss": 0.0065, + "step": 50510 + }, + { + "grad_norm": 0.0668981596827507, + "learning_rate": 5.327318132645042e-05, + "loss": 0.0082, + "step": 50520 + }, + { + "grad_norm": 0.04352225363254547, + "learning_rate": 5.325668191725116e-05, + "loss": 0.0058, + "step": 50530 + }, + { + "grad_norm": 0.0575515516102314, + "learning_rate": 5.324018215190606e-05, + "loss": 0.0055, + "step": 50540 + }, + { + "grad_norm": 0.046297457069158554, + "learning_rate": 5.3223682032219515e-05, + "loss": 0.0077, + "step": 50550 + }, + { + "grad_norm": 0.0360596738755703, + "learning_rate": 5.320718155999595e-05, + "loss": 0.0052, + "step": 50560 + }, + { + "grad_norm": 0.058395709842443466, + "learning_rate": 5.3190680737039835e-05, + "loss": 0.0056, + "step": 50570 + }, + { + "grad_norm": 0.04096156358718872, + "learning_rate": 5.317417956515567e-05, + "loss": 0.0056, + "step": 50580 + }, + { + "grad_norm": 0.05936264246702194, + "learning_rate": 5.315767804614798e-05, + "loss": 0.0073, + "step": 50590 + }, + { + "grad_norm": 0.07497719675302505, + "learning_rate": 5.3141176181821395e-05, + "loss": 0.006, + "step": 50600 + }, + { + "grad_norm": 0.04450860247015953, + "learning_rate": 5.31246739739805e-05, + "loss": 0.0071, + "step": 50610 + }, + { + "grad_norm": 0.058038730174303055, + "learning_rate": 5.310817142442994e-05, + "loss": 0.0059, + "step": 50620 + }, + { + "grad_norm": 0.05824095383286476, + "learning_rate": 5.309166853497445e-05, + "loss": 0.0076, + "step": 50630 + }, + { + "grad_norm": 0.06889133155345917, + "learning_rate": 5.307516530741873e-05, + "loss": 0.0052, + "step": 50640 + }, + { + "grad_norm": 0.04085616022348404, + "learning_rate": 5.305866174356754e-05, + "loss": 0.0063, + "step": 50650 + }, + { + "grad_norm": 0.05413263291120529, + "learning_rate": 5.304215784522571e-05, + "loss": 0.0054, + "step": 50660 + }, + { + "grad_norm": 0.03663495182991028, + "learning_rate": 5.302565361419808e-05, + "loss": 0.0046, + "step": 50670 + }, + { + "grad_norm": 0.05717622861266136, + "learning_rate": 5.3009149052289507e-05, + "loss": 0.0069, + "step": 50680 + }, + { + "grad_norm": 0.09741855412721634, + "learning_rate": 5.299264416130493e-05, + "loss": 0.0078, + "step": 50690 + }, + { + "grad_norm": 0.12018623948097229, + "learning_rate": 5.297613894304928e-05, + "loss": 0.0061, + "step": 50700 + }, + { + "grad_norm": 0.05254814773797989, + "learning_rate": 5.2959633399327534e-05, + "loss": 0.007, + "step": 50710 + }, + { + "grad_norm": 0.04330886900424957, + "learning_rate": 5.294312753194476e-05, + "loss": 0.0068, + "step": 50720 + }, + { + "grad_norm": 0.0419699102640152, + "learning_rate": 5.292662134270596e-05, + "loss": 0.006, + "step": 50730 + }, + { + "grad_norm": 0.048703450709581375, + "learning_rate": 5.291011483341626e-05, + "loss": 0.0058, + "step": 50740 + }, + { + "grad_norm": 0.03162194415926933, + "learning_rate": 5.2893608005880767e-05, + "loss": 0.0065, + "step": 50750 + }, + { + "grad_norm": 0.06003190577030182, + "learning_rate": 5.287710086190467e-05, + "loss": 0.006, + "step": 50760 + }, + { + "grad_norm": 0.04502379521727562, + "learning_rate": 5.286059340329314e-05, + "loss": 0.0057, + "step": 50770 + }, + { + "grad_norm": 0.04784243926405907, + "learning_rate": 5.284408563185141e-05, + "loss": 0.0061, + "step": 50780 + }, + { + "grad_norm": 0.05710354447364807, + "learning_rate": 5.282757754938476e-05, + "loss": 0.0052, + "step": 50790 + }, + { + "grad_norm": 0.06354282796382904, + "learning_rate": 5.281106915769849e-05, + "loss": 0.0055, + "step": 50800 + }, + { + "grad_norm": 0.05113953724503517, + "learning_rate": 5.2794560458597897e-05, + "loss": 0.0051, + "step": 50810 + }, + { + "grad_norm": 0.04635072872042656, + "learning_rate": 5.27780514538884e-05, + "loss": 0.0065, + "step": 50820 + }, + { + "grad_norm": 0.0561801940202713, + "learning_rate": 5.2761542145375365e-05, + "loss": 0.0055, + "step": 50830 + }, + { + "grad_norm": 0.06659267097711563, + "learning_rate": 5.274503253486421e-05, + "loss": 0.0065, + "step": 50840 + }, + { + "grad_norm": 0.07580709457397461, + "learning_rate": 5.272852262416046e-05, + "loss": 0.0059, + "step": 50850 + }, + { + "grad_norm": 0.04192926362156868, + "learning_rate": 5.2712012415069555e-05, + "loss": 0.0056, + "step": 50860 + }, + { + "grad_norm": 0.05524737015366554, + "learning_rate": 5.269550190939705e-05, + "loss": 0.0073, + "step": 50870 + }, + { + "grad_norm": 0.04543115571141243, + "learning_rate": 5.267899110894852e-05, + "loss": 0.0059, + "step": 50880 + }, + { + "grad_norm": 0.05907884240150452, + "learning_rate": 5.266248001552955e-05, + "loss": 0.0057, + "step": 50890 + }, + { + "grad_norm": 0.04404085502028465, + "learning_rate": 5.264596863094575e-05, + "loss": 0.0052, + "step": 50900 + }, + { + "grad_norm": 0.06148587539792061, + "learning_rate": 5.262945695700282e-05, + "loss": 0.0054, + "step": 50910 + }, + { + "grad_norm": 0.049589186906814575, + "learning_rate": 5.261294499550643e-05, + "loss": 0.007, + "step": 50920 + }, + { + "grad_norm": 0.04530829191207886, + "learning_rate": 5.25964327482623e-05, + "loss": 0.0067, + "step": 50930 + }, + { + "grad_norm": 0.06216636300086975, + "learning_rate": 5.257992021707617e-05, + "loss": 0.0053, + "step": 50940 + }, + { + "grad_norm": 0.04186026379466057, + "learning_rate": 5.256340740375387e-05, + "loss": 0.0078, + "step": 50950 + }, + { + "grad_norm": 0.04101523756980896, + "learning_rate": 5.254689431010117e-05, + "loss": 0.0059, + "step": 50960 + }, + { + "grad_norm": 0.05001840740442276, + "learning_rate": 5.253038093792395e-05, + "loss": 0.0073, + "step": 50970 + }, + { + "grad_norm": 0.06057055667042732, + "learning_rate": 5.251386728902806e-05, + "loss": 0.0053, + "step": 50980 + }, + { + "grad_norm": 0.04962975159287453, + "learning_rate": 5.2497353365219446e-05, + "loss": 0.0068, + "step": 50990 + }, + { + "grad_norm": 0.06156635284423828, + "learning_rate": 5.2480839168304e-05, + "loss": 0.0079, + "step": 51000 + }, + { + "grad_norm": 0.05438970774412155, + "learning_rate": 5.246432470008773e-05, + "loss": 0.0066, + "step": 51010 + }, + { + "grad_norm": 0.0459996722638607, + "learning_rate": 5.2447809962376616e-05, + "loss": 0.0063, + "step": 51020 + }, + { + "grad_norm": 0.059565283358097076, + "learning_rate": 5.243129495697666e-05, + "loss": 0.006, + "step": 51030 + }, + { + "grad_norm": 0.045709528028964996, + "learning_rate": 5.241477968569397e-05, + "loss": 0.0055, + "step": 51040 + }, + { + "grad_norm": 0.04547249898314476, + "learning_rate": 5.23982641503346e-05, + "loss": 0.0072, + "step": 51050 + }, + { + "grad_norm": 0.04972587898373604, + "learning_rate": 5.238174835270465e-05, + "loss": 0.0068, + "step": 51060 + }, + { + "grad_norm": 0.048763956874608994, + "learning_rate": 5.23652322946103e-05, + "loss": 0.005, + "step": 51070 + }, + { + "grad_norm": 0.06975303590297699, + "learning_rate": 5.234871597785771e-05, + "loss": 0.0073, + "step": 51080 + }, + { + "grad_norm": 0.0520402155816555, + "learning_rate": 5.233219940425306e-05, + "loss": 0.0056, + "step": 51090 + }, + { + "grad_norm": 0.07262666523456573, + "learning_rate": 5.2315682575602576e-05, + "loss": 0.0061, + "step": 51100 + }, + { + "grad_norm": 0.06104672700166702, + "learning_rate": 5.229916549371254e-05, + "loss": 0.0056, + "step": 51110 + }, + { + "grad_norm": 0.061569832265377045, + "learning_rate": 5.228264816038924e-05, + "loss": 0.0061, + "step": 51120 + }, + { + "grad_norm": 0.03876776620745659, + "learning_rate": 5.2266130577438935e-05, + "loss": 0.0071, + "step": 51130 + }, + { + "grad_norm": 0.046570632606744766, + "learning_rate": 5.224961274666802e-05, + "loss": 0.0067, + "step": 51140 + }, + { + "grad_norm": 0.06041542813181877, + "learning_rate": 5.223309466988283e-05, + "loss": 0.0056, + "step": 51150 + }, + { + "grad_norm": 0.04598666727542877, + "learning_rate": 5.221657634888976e-05, + "loss": 0.0044, + "step": 51160 + }, + { + "grad_norm": 0.05004498362541199, + "learning_rate": 5.2200057785495235e-05, + "loss": 0.005, + "step": 51170 + }, + { + "grad_norm": 0.04268229380249977, + "learning_rate": 5.2183538981505706e-05, + "loss": 0.0075, + "step": 51180 + }, + { + "grad_norm": 0.040373753756284714, + "learning_rate": 5.216701993872762e-05, + "loss": 0.005, + "step": 51190 + }, + { + "grad_norm": 0.04083222895860672, + "learning_rate": 5.215050065896752e-05, + "loss": 0.0062, + "step": 51200 + }, + { + "grad_norm": 0.057301685214042664, + "learning_rate": 5.2133981144031875e-05, + "loss": 0.0068, + "step": 51210 + }, + { + "grad_norm": 0.07209085673093796, + "learning_rate": 5.211746139572727e-05, + "loss": 0.0055, + "step": 51220 + }, + { + "grad_norm": 0.0418047159910202, + "learning_rate": 5.210094141586026e-05, + "loss": 0.0063, + "step": 51230 + }, + { + "grad_norm": 0.061248887330293655, + "learning_rate": 5.2084421206237476e-05, + "loss": 0.0071, + "step": 51240 + }, + { + "grad_norm": 0.0485665425658226, + "learning_rate": 5.20679007686655e-05, + "loss": 0.0057, + "step": 51250 + }, + { + "grad_norm": 0.0551641620695591, + "learning_rate": 5.205138010495102e-05, + "loss": 0.0052, + "step": 51260 + }, + { + "grad_norm": 0.049401458352804184, + "learning_rate": 5.20348592169007e-05, + "loss": 0.0089, + "step": 51270 + }, + { + "grad_norm": 0.0572861023247242, + "learning_rate": 5.201833810632123e-05, + "loss": 0.0052, + "step": 51280 + }, + { + "grad_norm": 0.04326453059911728, + "learning_rate": 5.200181677501933e-05, + "loss": 0.0064, + "step": 51290 + }, + { + "grad_norm": 0.04268388822674751, + "learning_rate": 5.1985295224801777e-05, + "loss": 0.0055, + "step": 51300 + }, + { + "grad_norm": 0.05296429991722107, + "learning_rate": 5.196877345747531e-05, + "loss": 0.0062, + "step": 51310 + }, + { + "grad_norm": 0.05419914424419403, + "learning_rate": 5.195225147484674e-05, + "loss": 0.0059, + "step": 51320 + }, + { + "grad_norm": 0.042077530175447464, + "learning_rate": 5.193572927872288e-05, + "loss": 0.0079, + "step": 51330 + }, + { + "grad_norm": 0.05014668405056, + "learning_rate": 5.191920687091059e-05, + "loss": 0.0058, + "step": 51340 + }, + { + "grad_norm": 0.063189797103405, + "learning_rate": 5.1902684253216695e-05, + "loss": 0.005, + "step": 51350 + }, + { + "grad_norm": 0.055021341890096664, + "learning_rate": 5.1886161427448134e-05, + "loss": 0.0061, + "step": 51360 + }, + { + "grad_norm": 0.07327453792095184, + "learning_rate": 5.186963839541179e-05, + "loss": 0.0057, + "step": 51370 + }, + { + "grad_norm": 0.053304992616176605, + "learning_rate": 5.185311515891459e-05, + "loss": 0.0046, + "step": 51380 + }, + { + "grad_norm": 0.051758769899606705, + "learning_rate": 5.1836591719763516e-05, + "loss": 0.0058, + "step": 51390 + }, + { + "grad_norm": 0.05997127667069435, + "learning_rate": 5.182006807976554e-05, + "loss": 0.0055, + "step": 51400 + }, + { + "grad_norm": 0.04548224061727524, + "learning_rate": 5.180354424072763e-05, + "loss": 0.0054, + "step": 51410 + }, + { + "grad_norm": 0.03986069932579994, + "learning_rate": 5.178702020445686e-05, + "loss": 0.0054, + "step": 51420 + }, + { + "grad_norm": 0.06314125657081604, + "learning_rate": 5.177049597276024e-05, + "loss": 0.0067, + "step": 51430 + }, + { + "grad_norm": 0.06252748519182205, + "learning_rate": 5.175397154744482e-05, + "loss": 0.0051, + "step": 51440 + }, + { + "grad_norm": 0.05212031304836273, + "learning_rate": 5.1737446930317736e-05, + "loss": 0.0059, + "step": 51450 + }, + { + "grad_norm": 0.05161076411604881, + "learning_rate": 5.172092212318604e-05, + "loss": 0.0058, + "step": 51460 + }, + { + "grad_norm": 0.04007463902235031, + "learning_rate": 5.170439712785692e-05, + "loss": 0.0048, + "step": 51470 + }, + { + "grad_norm": 0.028396770358085632, + "learning_rate": 5.1687871946137467e-05, + "loss": 0.0055, + "step": 51480 + }, + { + "grad_norm": 0.03719332441687584, + "learning_rate": 5.16713465798349e-05, + "loss": 0.0082, + "step": 51490 + }, + { + "grad_norm": 0.07426763325929642, + "learning_rate": 5.165482103075637e-05, + "loss": 0.0061, + "step": 51500 + }, + { + "grad_norm": 0.054356493055820465, + "learning_rate": 5.163829530070909e-05, + "loss": 0.0056, + "step": 51510 + }, + { + "grad_norm": 0.05500281974673271, + "learning_rate": 5.162176939150032e-05, + "loss": 0.0065, + "step": 51520 + }, + { + "grad_norm": 0.06033024936914444, + "learning_rate": 5.160524330493728e-05, + "loss": 0.006, + "step": 51530 + }, + { + "grad_norm": 0.06569260358810425, + "learning_rate": 5.158871704282724e-05, + "loss": 0.0064, + "step": 51540 + }, + { + "grad_norm": 0.043516188859939575, + "learning_rate": 5.15721906069775e-05, + "loss": 0.0052, + "step": 51550 + }, + { + "grad_norm": 0.043931249529123306, + "learning_rate": 5.155566399919536e-05, + "loss": 0.0066, + "step": 51560 + }, + { + "grad_norm": 0.05033990368247032, + "learning_rate": 5.153913722128813e-05, + "loss": 0.0056, + "step": 51570 + }, + { + "grad_norm": 0.04453851655125618, + "learning_rate": 5.1522610275063176e-05, + "loss": 0.0066, + "step": 51580 + }, + { + "grad_norm": 0.047853682190179825, + "learning_rate": 5.150608316232786e-05, + "loss": 0.0064, + "step": 51590 + }, + { + "grad_norm": 0.05423213168978691, + "learning_rate": 5.148955588488955e-05, + "loss": 0.005, + "step": 51600 + }, + { + "grad_norm": 0.05546017363667488, + "learning_rate": 5.147302844455564e-05, + "loss": 0.0042, + "step": 51610 + }, + { + "grad_norm": 0.045785777270793915, + "learning_rate": 5.145650084313356e-05, + "loss": 0.0059, + "step": 51620 + }, + { + "grad_norm": 0.04065697267651558, + "learning_rate": 5.1439973082430734e-05, + "loss": 0.0056, + "step": 51630 + }, + { + "grad_norm": 0.060030050575733185, + "learning_rate": 5.142344516425461e-05, + "loss": 0.0068, + "step": 51640 + }, + { + "grad_norm": 0.03281380608677864, + "learning_rate": 5.1406917090412673e-05, + "loss": 0.0041, + "step": 51650 + }, + { + "grad_norm": 0.04936134070158005, + "learning_rate": 5.1390388862712404e-05, + "loss": 0.005, + "step": 51660 + }, + { + "grad_norm": 0.04752502962946892, + "learning_rate": 5.137386048296129e-05, + "loss": 0.0064, + "step": 51670 + }, + { + "grad_norm": 0.06275102496147156, + "learning_rate": 5.135733195296686e-05, + "loss": 0.0059, + "step": 51680 + }, + { + "grad_norm": 0.04153867065906525, + "learning_rate": 5.134080327453664e-05, + "loss": 0.0054, + "step": 51690 + }, + { + "grad_norm": 0.0359196737408638, + "learning_rate": 5.1324274449478204e-05, + "loss": 0.0049, + "step": 51700 + }, + { + "grad_norm": 0.04460769519209862, + "learning_rate": 5.130774547959909e-05, + "loss": 0.0053, + "step": 51710 + }, + { + "grad_norm": 0.03904309123754501, + "learning_rate": 5.129121636670692e-05, + "loss": 0.0058, + "step": 51720 + }, + { + "grad_norm": 0.05572271719574928, + "learning_rate": 5.1274687112609246e-05, + "loss": 0.0052, + "step": 51730 + }, + { + "grad_norm": 0.09736446291208267, + "learning_rate": 5.125815771911372e-05, + "loss": 0.0074, + "step": 51740 + }, + { + "grad_norm": 0.05256358161568642, + "learning_rate": 5.1241628188027956e-05, + "loss": 0.0054, + "step": 51750 + }, + { + "grad_norm": 0.05796360224485397, + "learning_rate": 5.1225098521159585e-05, + "loss": 0.0058, + "step": 51760 + }, + { + "grad_norm": 0.06262940168380737, + "learning_rate": 5.12085687203163e-05, + "loss": 0.006, + "step": 51770 + }, + { + "grad_norm": 0.03873581066727638, + "learning_rate": 5.119203878730575e-05, + "loss": 0.0048, + "step": 51780 + }, + { + "grad_norm": 0.04654510319232941, + "learning_rate": 5.117550872393564e-05, + "loss": 0.0053, + "step": 51790 + }, + { + "grad_norm": 0.08198217302560806, + "learning_rate": 5.1158978532013645e-05, + "loss": 0.0062, + "step": 51800 + }, + { + "grad_norm": 0.0835876315832138, + "learning_rate": 5.114244821334751e-05, + "loss": 0.0048, + "step": 51810 + }, + { + "grad_norm": 0.038914721459150314, + "learning_rate": 5.112591776974497e-05, + "loss": 0.0054, + "step": 51820 + }, + { + "grad_norm": 0.04877805709838867, + "learning_rate": 5.110938720301374e-05, + "loss": 0.0051, + "step": 51830 + }, + { + "grad_norm": 0.049966976046562195, + "learning_rate": 5.1092856514961595e-05, + "loss": 0.0066, + "step": 51840 + }, + { + "grad_norm": 0.056559935212135315, + "learning_rate": 5.107632570739631e-05, + "loss": 0.0051, + "step": 51850 + }, + { + "grad_norm": 0.052778664976358414, + "learning_rate": 5.1059794782125645e-05, + "loss": 0.0077, + "step": 51860 + }, + { + "grad_norm": 0.05472363531589508, + "learning_rate": 5.104326374095744e-05, + "loss": 0.0056, + "step": 51870 + }, + { + "grad_norm": 0.053279053419828415, + "learning_rate": 5.102673258569947e-05, + "loss": 0.0066, + "step": 51880 + }, + { + "grad_norm": 0.06347572803497314, + "learning_rate": 5.101020131815956e-05, + "loss": 0.0061, + "step": 51890 + }, + { + "grad_norm": 0.04689297452569008, + "learning_rate": 5.099366994014556e-05, + "loss": 0.0065, + "step": 51900 + }, + { + "grad_norm": 0.06105000153183937, + "learning_rate": 5.097713845346532e-05, + "loss": 0.0044, + "step": 51910 + }, + { + "grad_norm": 0.04364243522286415, + "learning_rate": 5.096060685992668e-05, + "loss": 0.0051, + "step": 51920 + }, + { + "grad_norm": 0.04080557823181152, + "learning_rate": 5.0944075161337514e-05, + "loss": 0.0041, + "step": 51930 + }, + { + "grad_norm": 0.04367987811565399, + "learning_rate": 5.09275433595057e-05, + "loss": 0.0056, + "step": 51940 + }, + { + "grad_norm": 0.052453137934207916, + "learning_rate": 5.0911011456239157e-05, + "loss": 0.0051, + "step": 51950 + }, + { + "grad_norm": 0.0348891019821167, + "learning_rate": 5.089447945334574e-05, + "loss": 0.0049, + "step": 51960 + }, + { + "grad_norm": 0.05567196011543274, + "learning_rate": 5.087794735263343e-05, + "loss": 0.0056, + "step": 51970 + }, + { + "grad_norm": 0.041933026164770126, + "learning_rate": 5.086141515591011e-05, + "loss": 0.0058, + "step": 51980 + }, + { + "grad_norm": 0.0730002373456955, + "learning_rate": 5.084488286498371e-05, + "loss": 0.01, + "step": 51990 + }, + { + "grad_norm": 0.0733826756477356, + "learning_rate": 5.08283504816622e-05, + "loss": 0.0058, + "step": 52000 + }, + { + "grad_norm": 0.05662541463971138, + "learning_rate": 5.081181800775353e-05, + "loss": 0.0061, + "step": 52010 + }, + { + "grad_norm": 0.034513868391513824, + "learning_rate": 5.079528544506565e-05, + "loss": 0.0072, + "step": 52020 + }, + { + "grad_norm": 0.052348166704177856, + "learning_rate": 5.077875279540657e-05, + "loss": 0.0069, + "step": 52030 + }, + { + "grad_norm": 0.04378015547990799, + "learning_rate": 5.076222006058424e-05, + "loss": 0.0053, + "step": 52040 + }, + { + "grad_norm": 0.08464865386486053, + "learning_rate": 5.074568724240667e-05, + "loss": 0.0063, + "step": 52050 + }, + { + "grad_norm": 0.03797619044780731, + "learning_rate": 5.072915434268187e-05, + "loss": 0.0058, + "step": 52060 + }, + { + "grad_norm": 0.06476757675409317, + "learning_rate": 5.071262136321785e-05, + "loss": 0.0068, + "step": 52070 + }, + { + "grad_norm": 0.04334505274891853, + "learning_rate": 5.069608830582262e-05, + "loss": 0.0054, + "step": 52080 + }, + { + "grad_norm": 0.05116177722811699, + "learning_rate": 5.067955517230423e-05, + "loss": 0.0055, + "step": 52090 + }, + { + "grad_norm": 0.05710606649518013, + "learning_rate": 5.06630219644707e-05, + "loss": 0.0054, + "step": 52100 + }, + { + "grad_norm": 0.054009970277547836, + "learning_rate": 5.064648868413008e-05, + "loss": 0.0057, + "step": 52110 + }, + { + "grad_norm": 0.037380002439022064, + "learning_rate": 5.062995533309044e-05, + "loss": 0.0063, + "step": 52120 + }, + { + "grad_norm": 0.058695919811725616, + "learning_rate": 5.061342191315983e-05, + "loss": 0.0058, + "step": 52130 + }, + { + "grad_norm": 0.05100831016898155, + "learning_rate": 5.0596888426146325e-05, + "loss": 0.0057, + "step": 52140 + }, + { + "grad_norm": 0.06811552494764328, + "learning_rate": 5.058035487385798e-05, + "loss": 0.0064, + "step": 52150 + }, + { + "grad_norm": 0.0483434721827507, + "learning_rate": 5.0563821258102915e-05, + "loss": 0.0062, + "step": 52160 + }, + { + "grad_norm": 0.049105316400527954, + "learning_rate": 5.05472875806892e-05, + "loss": 0.0069, + "step": 52170 + }, + { + "grad_norm": 0.06014179810881615, + "learning_rate": 5.053075384342494e-05, + "loss": 0.0065, + "step": 52180 + }, + { + "grad_norm": 0.04020949825644493, + "learning_rate": 5.0514220048118234e-05, + "loss": 0.0045, + "step": 52190 + }, + { + "grad_norm": 0.042876340448856354, + "learning_rate": 5.049768619657721e-05, + "loss": 0.0062, + "step": 52200 + }, + { + "grad_norm": 0.038095127791166306, + "learning_rate": 5.048115229060996e-05, + "loss": 0.0066, + "step": 52210 + }, + { + "grad_norm": 0.04459884762763977, + "learning_rate": 5.046461833202462e-05, + "loss": 0.0062, + "step": 52220 + }, + { + "grad_norm": 0.05117637291550636, + "learning_rate": 5.044808432262932e-05, + "loss": 0.0053, + "step": 52230 + }, + { + "grad_norm": 0.04813631996512413, + "learning_rate": 5.043155026423219e-05, + "loss": 0.0077, + "step": 52240 + }, + { + "grad_norm": 0.0400603711605072, + "learning_rate": 5.041501615864137e-05, + "loss": 0.0065, + "step": 52250 + }, + { + "grad_norm": 0.04574385657906532, + "learning_rate": 5.039848200766502e-05, + "loss": 0.0074, + "step": 52260 + }, + { + "grad_norm": 0.04089655727148056, + "learning_rate": 5.038194781311125e-05, + "loss": 0.0065, + "step": 52270 + }, + { + "grad_norm": 0.0398847721517086, + "learning_rate": 5.0365413576788264e-05, + "loss": 0.0061, + "step": 52280 + }, + { + "grad_norm": 0.03731732815504074, + "learning_rate": 5.0348879300504194e-05, + "loss": 0.0044, + "step": 52290 + }, + { + "grad_norm": 0.043355993926525116, + "learning_rate": 5.033234498606721e-05, + "loss": 0.0058, + "step": 52300 + }, + { + "grad_norm": 0.06417091935873032, + "learning_rate": 5.0315810635285476e-05, + "loss": 0.0057, + "step": 52310 + }, + { + "grad_norm": 0.0652875006198883, + "learning_rate": 5.0299276249967164e-05, + "loss": 0.0057, + "step": 52320 + }, + { + "grad_norm": 0.037437960505485535, + "learning_rate": 5.0282741831920454e-05, + "loss": 0.0069, + "step": 52330 + }, + { + "grad_norm": 0.07652504742145538, + "learning_rate": 5.026620738295351e-05, + "loss": 0.0073, + "step": 52340 + }, + { + "grad_norm": 0.062059540301561356, + "learning_rate": 5.024967290487455e-05, + "loss": 0.0053, + "step": 52350 + }, + { + "grad_norm": 0.06649911403656006, + "learning_rate": 5.023313839949172e-05, + "loss": 0.0062, + "step": 52360 + }, + { + "grad_norm": 0.10093656182289124, + "learning_rate": 5.0216603868613224e-05, + "loss": 0.0072, + "step": 52370 + }, + { + "grad_norm": 0.06463578343391418, + "learning_rate": 5.020006931404726e-05, + "loss": 0.0068, + "step": 52380 + }, + { + "grad_norm": 0.05216199532151222, + "learning_rate": 5.0183534737602014e-05, + "loss": 0.0054, + "step": 52390 + }, + { + "grad_norm": 0.04007330909371376, + "learning_rate": 5.0167000141085686e-05, + "loss": 0.005, + "step": 52400 + }, + { + "grad_norm": 0.046751078218221664, + "learning_rate": 5.015046552630647e-05, + "loss": 0.0044, + "step": 52410 + }, + { + "grad_norm": 0.05720505118370056, + "learning_rate": 5.013393089507258e-05, + "loss": 0.0045, + "step": 52420 + }, + { + "grad_norm": 0.0439041368663311, + "learning_rate": 5.011739624919219e-05, + "loss": 0.0054, + "step": 52430 + }, + { + "grad_norm": 0.030721230432391167, + "learning_rate": 5.0100861590473545e-05, + "loss": 0.0041, + "step": 52440 + }, + { + "grad_norm": 0.035984303802251816, + "learning_rate": 5.008432692072482e-05, + "loss": 0.0051, + "step": 52450 + }, + { + "grad_norm": 0.05262266471982002, + "learning_rate": 5.006779224175423e-05, + "loss": 0.0045, + "step": 52460 + }, + { + "grad_norm": 0.05562397465109825, + "learning_rate": 5.005125755536998e-05, + "loss": 0.0047, + "step": 52470 + }, + { + "grad_norm": 0.033916838467121124, + "learning_rate": 5.003472286338029e-05, + "loss": 0.0048, + "step": 52480 + }, + { + "grad_norm": 0.04148591682314873, + "learning_rate": 5.001818816759335e-05, + "loss": 0.0056, + "step": 52490 + }, + { + "grad_norm": 0.0520707368850708, + "learning_rate": 5.000165346981738e-05, + "loss": 0.0068, + "step": 52500 + }, + { + "grad_norm": 0.05041169002652168, + "learning_rate": 4.9985118771860595e-05, + "loss": 0.006, + "step": 52510 + }, + { + "grad_norm": 0.045507073402404785, + "learning_rate": 4.996858407553119e-05, + "loss": 0.0045, + "step": 52520 + }, + { + "grad_norm": 0.04779840633273125, + "learning_rate": 4.99520493826374e-05, + "loss": 0.0061, + "step": 52530 + }, + { + "grad_norm": 0.05107004940509796, + "learning_rate": 4.9935514694987384e-05, + "loss": 0.0046, + "step": 52540 + }, + { + "grad_norm": 0.045917849987745285, + "learning_rate": 4.991898001438941e-05, + "loss": 0.0057, + "step": 52550 + }, + { + "grad_norm": 0.06482759118080139, + "learning_rate": 4.990244534265166e-05, + "loss": 0.0079, + "step": 52560 + }, + { + "grad_norm": 0.056565895676612854, + "learning_rate": 4.988591068158234e-05, + "loss": 0.0055, + "step": 52570 + }, + { + "grad_norm": 0.06593350321054459, + "learning_rate": 4.986937603298965e-05, + "loss": 0.0076, + "step": 52580 + }, + { + "grad_norm": 0.05538654699921608, + "learning_rate": 4.985284139868181e-05, + "loss": 0.005, + "step": 52590 + }, + { + "grad_norm": 0.040484920144081116, + "learning_rate": 4.9836306780467e-05, + "loss": 0.0053, + "step": 52600 + }, + { + "grad_norm": 0.04224611073732376, + "learning_rate": 4.981977218015343e-05, + "loss": 0.0074, + "step": 52610 + }, + { + "grad_norm": 0.05615030974149704, + "learning_rate": 4.980323759954932e-05, + "loss": 0.0077, + "step": 52620 + }, + { + "grad_norm": 0.05520949885249138, + "learning_rate": 4.978670304046284e-05, + "loss": 0.0065, + "step": 52630 + }, + { + "grad_norm": 0.050535865128040314, + "learning_rate": 4.9770168504702195e-05, + "loss": 0.0056, + "step": 52640 + }, + { + "grad_norm": 0.04602331295609474, + "learning_rate": 4.975363399407559e-05, + "loss": 0.0049, + "step": 52650 + }, + { + "grad_norm": 0.046721167862415314, + "learning_rate": 4.9737099510391194e-05, + "loss": 0.0055, + "step": 52660 + }, + { + "grad_norm": 0.05407117307186127, + "learning_rate": 4.972056505545719e-05, + "loss": 0.0052, + "step": 52670 + }, + { + "grad_norm": 0.042796701192855835, + "learning_rate": 4.9704030631081775e-05, + "loss": 0.0056, + "step": 52680 + }, + { + "grad_norm": 0.07539059966802597, + "learning_rate": 4.968749623907313e-05, + "loss": 0.005, + "step": 52690 + }, + { + "grad_norm": 0.05479622632265091, + "learning_rate": 4.967096188123943e-05, + "loss": 0.0051, + "step": 52700 + }, + { + "grad_norm": 0.03507046774029732, + "learning_rate": 4.965442755938884e-05, + "loss": 0.008, + "step": 52710 + }, + { + "grad_norm": 0.04669608920812607, + "learning_rate": 4.963789327532954e-05, + "loss": 0.0054, + "step": 52720 + }, + { + "grad_norm": 0.06603379547595978, + "learning_rate": 4.962135903086968e-05, + "loss": 0.0065, + "step": 52730 + }, + { + "grad_norm": 0.051843754947185516, + "learning_rate": 4.960482482781741e-05, + "loss": 0.0061, + "step": 52740 + }, + { + "grad_norm": 0.04422800987958908, + "learning_rate": 4.958829066798092e-05, + "loss": 0.0044, + "step": 52750 + }, + { + "grad_norm": 0.04187363386154175, + "learning_rate": 4.957175655316835e-05, + "loss": 0.0052, + "step": 52760 + }, + { + "grad_norm": 0.05534370243549347, + "learning_rate": 4.9555222485187834e-05, + "loss": 0.0045, + "step": 52770 + }, + { + "grad_norm": 0.05061338096857071, + "learning_rate": 4.953868846584752e-05, + "loss": 0.0061, + "step": 52780 + }, + { + "grad_norm": 0.0556032694876194, + "learning_rate": 4.9522154496955544e-05, + "loss": 0.0059, + "step": 52790 + }, + { + "grad_norm": 0.06510380655527115, + "learning_rate": 4.950562058032001e-05, + "loss": 0.0079, + "step": 52800 + }, + { + "grad_norm": 0.07738206535577774, + "learning_rate": 4.948908671774909e-05, + "loss": 0.006, + "step": 52810 + }, + { + "grad_norm": 0.04922802373766899, + "learning_rate": 4.947255291105087e-05, + "loss": 0.0043, + "step": 52820 + }, + { + "grad_norm": 0.03729414939880371, + "learning_rate": 4.945601916203348e-05, + "loss": 0.0066, + "step": 52830 + }, + { + "grad_norm": 0.050797153264284134, + "learning_rate": 4.9439485472505016e-05, + "loss": 0.0057, + "step": 52840 + }, + { + "grad_norm": 0.04167822003364563, + "learning_rate": 4.942295184427356e-05, + "loss": 0.0052, + "step": 52850 + }, + { + "grad_norm": 0.035621654242277145, + "learning_rate": 4.9406418279147234e-05, + "loss": 0.0058, + "step": 52860 + }, + { + "grad_norm": 0.035040080547332764, + "learning_rate": 4.9389884778934116e-05, + "loss": 0.0057, + "step": 52870 + }, + { + "grad_norm": 0.040900979191064835, + "learning_rate": 4.937335134544228e-05, + "loss": 0.0058, + "step": 52880 + }, + { + "grad_norm": 0.0460117906332016, + "learning_rate": 4.935681798047979e-05, + "loss": 0.0064, + "step": 52890 + }, + { + "grad_norm": 0.039822518825531006, + "learning_rate": 4.934028468585473e-05, + "loss": 0.006, + "step": 52900 + }, + { + "grad_norm": 0.05649850144982338, + "learning_rate": 4.932375146337514e-05, + "loss": 0.0051, + "step": 52910 + }, + { + "grad_norm": 0.047539159655570984, + "learning_rate": 4.930721831484908e-05, + "loss": 0.0066, + "step": 52920 + }, + { + "grad_norm": 0.05622768774628639, + "learning_rate": 4.929068524208456e-05, + "loss": 0.0056, + "step": 52930 + }, + { + "grad_norm": 0.053024519234895706, + "learning_rate": 4.927415224688965e-05, + "loss": 0.0057, + "step": 52940 + }, + { + "grad_norm": 0.04410995915532112, + "learning_rate": 4.9257619331072375e-05, + "loss": 0.0049, + "step": 52950 + }, + { + "grad_norm": 0.03812269866466522, + "learning_rate": 4.924108649644072e-05, + "loss": 0.0059, + "step": 52960 + }, + { + "grad_norm": 0.04829426482319832, + "learning_rate": 4.922455374480271e-05, + "loss": 0.0055, + "step": 52970 + }, + { + "grad_norm": 0.04133943095803261, + "learning_rate": 4.920802107796634e-05, + "loss": 0.0056, + "step": 52980 + }, + { + "grad_norm": 0.03556700795888901, + "learning_rate": 4.9191488497739564e-05, + "loss": 0.0055, + "step": 52990 + }, + { + "grad_norm": 0.062101636081933975, + "learning_rate": 4.917495600593042e-05, + "loss": 0.0062, + "step": 53000 + }, + { + "grad_norm": 0.04956737160682678, + "learning_rate": 4.915842360434684e-05, + "loss": 0.0051, + "step": 53010 + }, + { + "grad_norm": 0.04147820547223091, + "learning_rate": 4.914189129479679e-05, + "loss": 0.0061, + "step": 53020 + }, + { + "grad_norm": 0.06656470149755478, + "learning_rate": 4.912535907908822e-05, + "loss": 0.0054, + "step": 53030 + }, + { + "grad_norm": 0.04442166909575462, + "learning_rate": 4.910882695902907e-05, + "loss": 0.0054, + "step": 53040 + }, + { + "grad_norm": 0.05116906017065048, + "learning_rate": 4.9092294936427234e-05, + "loss": 0.0058, + "step": 53050 + }, + { + "grad_norm": 0.041887346655130386, + "learning_rate": 4.9075763013090685e-05, + "loss": 0.0062, + "step": 53060 + }, + { + "grad_norm": 0.04510653018951416, + "learning_rate": 4.9059231190827305e-05, + "loss": 0.0058, + "step": 53070 + }, + { + "grad_norm": 0.05264848470687866, + "learning_rate": 4.904269947144499e-05, + "loss": 0.0057, + "step": 53080 + }, + { + "grad_norm": 0.04351793974637985, + "learning_rate": 4.902616785675161e-05, + "loss": 0.0054, + "step": 53090 + }, + { + "grad_norm": 0.04219987243413925, + "learning_rate": 4.900963634855505e-05, + "loss": 0.0046, + "step": 53100 + }, + { + "grad_norm": 0.04615088552236557, + "learning_rate": 4.8993104948663173e-05, + "loss": 0.0053, + "step": 53110 + }, + { + "grad_norm": 0.03956177458167076, + "learning_rate": 4.8976573658883823e-05, + "loss": 0.0077, + "step": 53120 + }, + { + "grad_norm": 0.050292640924453735, + "learning_rate": 4.896004248102484e-05, + "loss": 0.0048, + "step": 53130 + }, + { + "grad_norm": 0.04824615642428398, + "learning_rate": 4.8943511416894045e-05, + "loss": 0.0046, + "step": 53140 + }, + { + "grad_norm": 0.038917239755392075, + "learning_rate": 4.892698046829926e-05, + "loss": 0.0055, + "step": 53150 + }, + { + "grad_norm": 0.045090481638908386, + "learning_rate": 4.8910449637048275e-05, + "loss": 0.0045, + "step": 53160 + }, + { + "grad_norm": 0.047121185809373856, + "learning_rate": 4.8893918924948884e-05, + "loss": 0.0056, + "step": 53170 + }, + { + "grad_norm": 0.05726943537592888, + "learning_rate": 4.8877388333808835e-05, + "loss": 0.0062, + "step": 53180 + }, + { + "grad_norm": 0.04199058189988136, + "learning_rate": 4.8860857865435925e-05, + "loss": 0.0048, + "step": 53190 + }, + { + "grad_norm": 0.03404055908322334, + "learning_rate": 4.884432752163789e-05, + "loss": 0.0044, + "step": 53200 + }, + { + "grad_norm": 0.031610555946826935, + "learning_rate": 4.8827797304222466e-05, + "loss": 0.006, + "step": 53210 + }, + { + "grad_norm": 0.04065784066915512, + "learning_rate": 4.8811267214997366e-05, + "loss": 0.0059, + "step": 53220 + }, + { + "grad_norm": 0.0621301643550396, + "learning_rate": 4.879473725577029e-05, + "loss": 0.0056, + "step": 53230 + }, + { + "grad_norm": 0.0529235415160656, + "learning_rate": 4.877820742834891e-05, + "loss": 0.0058, + "step": 53240 + }, + { + "grad_norm": 0.05247807875275612, + "learning_rate": 4.8761677734540956e-05, + "loss": 0.0064, + "step": 53250 + }, + { + "grad_norm": 0.057757455855607986, + "learning_rate": 4.874514817615407e-05, + "loss": 0.0061, + "step": 53260 + }, + { + "grad_norm": 0.06457018852233887, + "learning_rate": 4.872861875499588e-05, + "loss": 0.0067, + "step": 53270 + }, + { + "grad_norm": 0.033243440091609955, + "learning_rate": 4.871208947287404e-05, + "loss": 0.0045, + "step": 53280 + }, + { + "grad_norm": 0.04957910254597664, + "learning_rate": 4.869556033159615e-05, + "loss": 0.0055, + "step": 53290 + }, + { + "grad_norm": 0.04530039057135582, + "learning_rate": 4.8679031332969835e-05, + "loss": 0.0062, + "step": 53300 + }, + { + "grad_norm": 0.08108033239841461, + "learning_rate": 4.8662502478802635e-05, + "loss": 0.0068, + "step": 53310 + }, + { + "grad_norm": 0.06872209161520004, + "learning_rate": 4.8645973770902176e-05, + "loss": 0.0061, + "step": 53320 + }, + { + "grad_norm": 0.0479188933968544, + "learning_rate": 4.862944521107599e-05, + "loss": 0.0062, + "step": 53330 + }, + { + "grad_norm": 0.03626694157719612, + "learning_rate": 4.861291680113162e-05, + "loss": 0.0045, + "step": 53340 + }, + { + "grad_norm": 0.045969776809215546, + "learning_rate": 4.8596388542876563e-05, + "loss": 0.0058, + "step": 53350 + }, + { + "grad_norm": 0.036241792142391205, + "learning_rate": 4.857986043811836e-05, + "loss": 0.0048, + "step": 53360 + }, + { + "grad_norm": 0.05033997818827629, + "learning_rate": 4.856333248866448e-05, + "loss": 0.0049, + "step": 53370 + }, + { + "grad_norm": 0.047482509166002274, + "learning_rate": 4.85468046963224e-05, + "loss": 0.0063, + "step": 53380 + }, + { + "grad_norm": 0.05606669932603836, + "learning_rate": 4.853027706289957e-05, + "loss": 0.0054, + "step": 53390 + }, + { + "grad_norm": 0.033986639231443405, + "learning_rate": 4.851374959020344e-05, + "loss": 0.0057, + "step": 53400 + }, + { + "grad_norm": 0.04302208870649338, + "learning_rate": 4.849722228004141e-05, + "loss": 0.005, + "step": 53410 + }, + { + "grad_norm": 0.04387088119983673, + "learning_rate": 4.848069513422091e-05, + "loss": 0.006, + "step": 53420 + }, + { + "grad_norm": 0.05683594569563866, + "learning_rate": 4.8464168154549294e-05, + "loss": 0.0052, + "step": 53430 + }, + { + "grad_norm": 0.05461166054010391, + "learning_rate": 4.844764134283392e-05, + "loss": 0.0053, + "step": 53440 + }, + { + "grad_norm": 0.06701746582984924, + "learning_rate": 4.843111470088218e-05, + "loss": 0.0059, + "step": 53450 + }, + { + "grad_norm": 0.04920516908168793, + "learning_rate": 4.841458823050137e-05, + "loss": 0.0054, + "step": 53460 + }, + { + "grad_norm": 0.07035821676254272, + "learning_rate": 4.8398061933498816e-05, + "loss": 0.0067, + "step": 53470 + }, + { + "grad_norm": 0.048588793724775314, + "learning_rate": 4.83815358116818e-05, + "loss": 0.0052, + "step": 53480 + }, + { + "grad_norm": 0.035947177559137344, + "learning_rate": 4.8365009866857584e-05, + "loss": 0.0046, + "step": 53490 + }, + { + "grad_norm": 0.05514584481716156, + "learning_rate": 4.834848410083342e-05, + "loss": 0.0058, + "step": 53500 + }, + { + "grad_norm": 0.055941082537174225, + "learning_rate": 4.833195851541657e-05, + "loss": 0.0054, + "step": 53510 + }, + { + "grad_norm": 0.04569251090288162, + "learning_rate": 4.831543311241423e-05, + "loss": 0.006, + "step": 53520 + }, + { + "grad_norm": 0.0538824237883091, + "learning_rate": 4.829890789363359e-05, + "loss": 0.0058, + "step": 53530 + }, + { + "grad_norm": 0.05475272238254547, + "learning_rate": 4.828238286088182e-05, + "loss": 0.0067, + "step": 53540 + }, + { + "grad_norm": 0.04970400407910347, + "learning_rate": 4.826585801596609e-05, + "loss": 0.0061, + "step": 53550 + }, + { + "grad_norm": 0.05907004326581955, + "learning_rate": 4.8249333360693486e-05, + "loss": 0.0054, + "step": 53560 + }, + { + "grad_norm": 0.04340185970067978, + "learning_rate": 4.8232808896871185e-05, + "loss": 0.0055, + "step": 53570 + }, + { + "grad_norm": 0.07633959501981735, + "learning_rate": 4.821628462630624e-05, + "loss": 0.0054, + "step": 53580 + }, + { + "grad_norm": 0.04183162748813629, + "learning_rate": 4.8199760550805735e-05, + "loss": 0.0064, + "step": 53590 + }, + { + "grad_norm": 0.06644944846630096, + "learning_rate": 4.818323667217669e-05, + "loss": 0.0065, + "step": 53600 + }, + { + "grad_norm": 0.046743787825107574, + "learning_rate": 4.8166712992226165e-05, + "loss": 0.0044, + "step": 53610 + }, + { + "grad_norm": 0.05511579290032387, + "learning_rate": 4.8150189512761154e-05, + "loss": 0.0045, + "step": 53620 + }, + { + "grad_norm": 0.04294634982943535, + "learning_rate": 4.813366623558862e-05, + "loss": 0.0042, + "step": 53630 + }, + { + "grad_norm": 0.052301764488220215, + "learning_rate": 4.811714316251554e-05, + "loss": 0.0049, + "step": 53640 + }, + { + "grad_norm": 0.036942072212696075, + "learning_rate": 4.810062029534886e-05, + "loss": 0.0052, + "step": 53650 + }, + { + "grad_norm": 0.03311761096119881, + "learning_rate": 4.8084097635895505e-05, + "loss": 0.005, + "step": 53660 + }, + { + "grad_norm": 0.04520956426858902, + "learning_rate": 4.8067575185962335e-05, + "loss": 0.0056, + "step": 53670 + }, + { + "grad_norm": 0.0421261303126812, + "learning_rate": 4.805105294735625e-05, + "loss": 0.0047, + "step": 53680 + }, + { + "grad_norm": 0.025741957128047943, + "learning_rate": 4.803453092188406e-05, + "loss": 0.0057, + "step": 53690 + }, + { + "grad_norm": 0.05715545266866684, + "learning_rate": 4.801800911135263e-05, + "loss": 0.0054, + "step": 53700 + }, + { + "grad_norm": 0.05475128814578056, + "learning_rate": 4.8001487517568744e-05, + "loss": 0.0061, + "step": 53710 + }, + { + "grad_norm": 0.062300194054841995, + "learning_rate": 4.798496614233918e-05, + "loss": 0.0056, + "step": 53720 + }, + { + "grad_norm": 0.04489002749323845, + "learning_rate": 4.796844498747069e-05, + "loss": 0.0058, + "step": 53730 + }, + { + "grad_norm": 0.04452187567949295, + "learning_rate": 4.795192405477e-05, + "loss": 0.0082, + "step": 53740 + }, + { + "grad_norm": 0.06913432478904724, + "learning_rate": 4.79354033460438e-05, + "loss": 0.0063, + "step": 53750 + }, + { + "grad_norm": 0.04911046102643013, + "learning_rate": 4.79188828630988e-05, + "loss": 0.0049, + "step": 53760 + }, + { + "grad_norm": 0.06197443976998329, + "learning_rate": 4.790236260774165e-05, + "loss": 0.006, + "step": 53770 + }, + { + "grad_norm": 0.0658547654747963, + "learning_rate": 4.788584258177896e-05, + "loss": 0.0052, + "step": 53780 + }, + { + "grad_norm": 0.07917845249176025, + "learning_rate": 4.7869322787017356e-05, + "loss": 0.0056, + "step": 53790 + }, + { + "grad_norm": 0.062361083924770355, + "learning_rate": 4.78528032252634e-05, + "loss": 0.0046, + "step": 53800 + }, + { + "grad_norm": 0.059005070477724075, + "learning_rate": 4.783628389832366e-05, + "loss": 0.0048, + "step": 53810 + }, + { + "grad_norm": 0.043413907289505005, + "learning_rate": 4.7819764808004644e-05, + "loss": 0.0062, + "step": 53820 + }, + { + "grad_norm": 0.051456376910209656, + "learning_rate": 4.780324595611289e-05, + "loss": 0.0048, + "step": 53830 + }, + { + "grad_norm": 0.045168157666921616, + "learning_rate": 4.7786727344454854e-05, + "loss": 0.0064, + "step": 53840 + }, + { + "grad_norm": 0.047397349029779434, + "learning_rate": 4.777020897483697e-05, + "loss": 0.0058, + "step": 53850 + }, + { + "grad_norm": 0.05859621241688728, + "learning_rate": 4.775369084906569e-05, + "loss": 0.0074, + "step": 53860 + }, + { + "grad_norm": 0.054090313613414764, + "learning_rate": 4.773717296894742e-05, + "loss": 0.0053, + "step": 53870 + }, + { + "grad_norm": 0.04337581992149353, + "learning_rate": 4.772065533628848e-05, + "loss": 0.0055, + "step": 53880 + }, + { + "grad_norm": 0.03135571628808975, + "learning_rate": 4.7704137952895265e-05, + "loss": 0.004, + "step": 53890 + }, + { + "grad_norm": 0.05283987894654274, + "learning_rate": 4.768762082057407e-05, + "loss": 0.0045, + "step": 53900 + }, + { + "grad_norm": 0.057143308222293854, + "learning_rate": 4.7671103941131186e-05, + "loss": 0.0065, + "step": 53910 + }, + { + "grad_norm": 0.030468177050352097, + "learning_rate": 4.765458731637287e-05, + "loss": 0.0051, + "step": 53920 + }, + { + "grad_norm": 0.05133320763707161, + "learning_rate": 4.763807094810537e-05, + "loss": 0.0048, + "step": 53930 + }, + { + "grad_norm": 0.053281717002391815, + "learning_rate": 4.762155483813487e-05, + "loss": 0.0063, + "step": 53940 + }, + { + "grad_norm": 0.05026394873857498, + "learning_rate": 4.760503898826754e-05, + "loss": 0.0068, + "step": 53950 + }, + { + "grad_norm": 0.06799668818712234, + "learning_rate": 4.758852340030957e-05, + "loss": 0.006, + "step": 53960 + }, + { + "grad_norm": 0.04221362993121147, + "learning_rate": 4.757200807606705e-05, + "loss": 0.0057, + "step": 53970 + }, + { + "grad_norm": 0.0692463144659996, + "learning_rate": 4.755549301734607e-05, + "loss": 0.0064, + "step": 53980 + }, + { + "grad_norm": 0.048715125769376755, + "learning_rate": 4.753897822595271e-05, + "loss": 0.0047, + "step": 53990 + }, + { + "grad_norm": 0.06388130784034729, + "learning_rate": 4.7522463703692994e-05, + "loss": 0.0057, + "step": 54000 + }, + { + "grad_norm": 0.042853619903326035, + "learning_rate": 4.75059494523729e-05, + "loss": 0.0069, + "step": 54010 + }, + { + "grad_norm": 0.029736706987023354, + "learning_rate": 4.748943547379843e-05, + "loss": 0.0043, + "step": 54020 + }, + { + "grad_norm": 0.045578114688396454, + "learning_rate": 4.7472921769775535e-05, + "loss": 0.0057, + "step": 54030 + }, + { + "grad_norm": 0.048274531960487366, + "learning_rate": 4.745640834211011e-05, + "loss": 0.006, + "step": 54040 + }, + { + "grad_norm": 0.054422006011009216, + "learning_rate": 4.7439895192608054e-05, + "loss": 0.005, + "step": 54050 + }, + { + "grad_norm": 0.059110093861818314, + "learning_rate": 4.7423382323075186e-05, + "loss": 0.0057, + "step": 54060 + }, + { + "grad_norm": 0.035360150039196014, + "learning_rate": 4.740686973531737e-05, + "loss": 0.0053, + "step": 54070 + }, + { + "grad_norm": 0.040134966373443604, + "learning_rate": 4.7390357431140375e-05, + "loss": 0.0046, + "step": 54080 + }, + { + "grad_norm": 0.044960394501686096, + "learning_rate": 4.737384541234998e-05, + "loss": 0.0062, + "step": 54090 + }, + { + "grad_norm": 0.04706403613090515, + "learning_rate": 4.735733368075189e-05, + "loss": 0.0063, + "step": 54100 + }, + { + "grad_norm": 0.0656152218580246, + "learning_rate": 4.734082223815182e-05, + "loss": 0.0046, + "step": 54110 + }, + { + "grad_norm": 0.04331245645880699, + "learning_rate": 4.7324311086355436e-05, + "loss": 0.0049, + "step": 54120 + }, + { + "grad_norm": 0.04385824874043465, + "learning_rate": 4.730780022716836e-05, + "loss": 0.0052, + "step": 54130 + }, + { + "grad_norm": 0.05429671332240105, + "learning_rate": 4.729128966239619e-05, + "loss": 0.0059, + "step": 54140 + }, + { + "grad_norm": 0.059555985033512115, + "learning_rate": 4.7274779393844536e-05, + "loss": 0.0055, + "step": 54150 + }, + { + "grad_norm": 0.06339988857507706, + "learning_rate": 4.725826942331891e-05, + "loss": 0.0053, + "step": 54160 + }, + { + "grad_norm": 0.06040172651410103, + "learning_rate": 4.724175975262481e-05, + "loss": 0.0063, + "step": 54170 + }, + { + "grad_norm": 0.05248741805553436, + "learning_rate": 4.722525038356772e-05, + "loss": 0.005, + "step": 54180 + }, + { + "grad_norm": 0.08453844487667084, + "learning_rate": 4.7208741317953075e-05, + "loss": 0.0064, + "step": 54190 + }, + { + "grad_norm": 0.05473635345697403, + "learning_rate": 4.7192232557586264e-05, + "loss": 0.0069, + "step": 54200 + }, + { + "grad_norm": 0.05792779102921486, + "learning_rate": 4.717572410427269e-05, + "loss": 0.0061, + "step": 54210 + }, + { + "grad_norm": 0.09100078791379929, + "learning_rate": 4.71592159598177e-05, + "loss": 0.0059, + "step": 54220 + }, + { + "grad_norm": 0.03645209223031998, + "learning_rate": 4.714270812602657e-05, + "loss": 0.0057, + "step": 54230 + }, + { + "grad_norm": 0.0573062002658844, + "learning_rate": 4.712620060470458e-05, + "loss": 0.006, + "step": 54240 + }, + { + "grad_norm": 0.0395195372402668, + "learning_rate": 4.7109693397656986e-05, + "loss": 0.0049, + "step": 54250 + }, + { + "grad_norm": 0.05683515965938568, + "learning_rate": 4.709318650668894e-05, + "loss": 0.0056, + "step": 54260 + }, + { + "grad_norm": 0.04417264461517334, + "learning_rate": 4.7076679933605676e-05, + "loss": 0.0064, + "step": 54270 + }, + { + "grad_norm": 0.03787734732031822, + "learning_rate": 4.70601736802123e-05, + "loss": 0.0046, + "step": 54280 + }, + { + "grad_norm": 0.03961440175771713, + "learning_rate": 4.704366774831391e-05, + "loss": 0.0058, + "step": 54290 + }, + { + "grad_norm": 0.03656534478068352, + "learning_rate": 4.702716213971557e-05, + "loss": 0.0044, + "step": 54300 + }, + { + "grad_norm": 0.044930100440979004, + "learning_rate": 4.70106568562223e-05, + "loss": 0.0046, + "step": 54310 + }, + { + "grad_norm": 0.07733253389596939, + "learning_rate": 4.6994151899639105e-05, + "loss": 0.0076, + "step": 54320 + }, + { + "grad_norm": 0.03548881411552429, + "learning_rate": 4.697764727177093e-05, + "loss": 0.0047, + "step": 54330 + }, + { + "grad_norm": 0.061647024005651474, + "learning_rate": 4.696114297442272e-05, + "loss": 0.0078, + "step": 54340 + }, + { + "grad_norm": 0.04313695803284645, + "learning_rate": 4.694463900939933e-05, + "loss": 0.0086, + "step": 54350 + }, + { + "grad_norm": 0.06820658594369888, + "learning_rate": 4.692813537850564e-05, + "loss": 0.0054, + "step": 54360 + }, + { + "grad_norm": 0.05256088823080063, + "learning_rate": 4.6911632083546445e-05, + "loss": 0.0066, + "step": 54370 + }, + { + "grad_norm": 0.05180799961090088, + "learning_rate": 4.6895129126326525e-05, + "loss": 0.0051, + "step": 54380 + }, + { + "grad_norm": 0.04559874162077904, + "learning_rate": 4.687862650865059e-05, + "loss": 0.0062, + "step": 54390 + }, + { + "grad_norm": 0.06435247510671616, + "learning_rate": 4.686212423232339e-05, + "loss": 0.0055, + "step": 54400 + }, + { + "grad_norm": 0.0289948508143425, + "learning_rate": 4.6845622299149565e-05, + "loss": 0.0053, + "step": 54410 + }, + { + "grad_norm": 0.04051930829882622, + "learning_rate": 4.682912071093374e-05, + "loss": 0.0047, + "step": 54420 + }, + { + "grad_norm": 0.036266349256038666, + "learning_rate": 4.681261946948052e-05, + "loss": 0.0049, + "step": 54430 + }, + { + "grad_norm": 0.060992252081632614, + "learning_rate": 4.679611857659443e-05, + "loss": 0.0054, + "step": 54440 + }, + { + "grad_norm": 0.05300331488251686, + "learning_rate": 4.6779618034079975e-05, + "loss": 0.0058, + "step": 54450 + }, + { + "grad_norm": 0.04346650838851929, + "learning_rate": 4.6763117843741675e-05, + "loss": 0.0063, + "step": 54460 + }, + { + "grad_norm": 0.029479846358299255, + "learning_rate": 4.674661800738394e-05, + "loss": 0.0045, + "step": 54470 + }, + { + "grad_norm": 0.07195316255092621, + "learning_rate": 4.673011852681117e-05, + "loss": 0.0087, + "step": 54480 + }, + { + "grad_norm": 0.04566960781812668, + "learning_rate": 4.671361940382772e-05, + "loss": 0.0059, + "step": 54490 + }, + { + "grad_norm": 0.0471191480755806, + "learning_rate": 4.669712064023791e-05, + "loss": 0.006, + "step": 54500 + }, + { + "grad_norm": 0.06227409839630127, + "learning_rate": 4.668062223784602e-05, + "loss": 0.0049, + "step": 54510 + }, + { + "grad_norm": 0.039410170167684555, + "learning_rate": 4.6664124198456274e-05, + "loss": 0.0046, + "step": 54520 + }, + { + "grad_norm": 0.057695671916007996, + "learning_rate": 4.6647626523872905e-05, + "loss": 0.0072, + "step": 54530 + }, + { + "grad_norm": 0.05156411603093147, + "learning_rate": 4.6631129215900066e-05, + "loss": 0.0073, + "step": 54540 + }, + { + "grad_norm": 0.05971852317452431, + "learning_rate": 4.661463227634187e-05, + "loss": 0.0081, + "step": 54550 + }, + { + "grad_norm": 0.06280889362096786, + "learning_rate": 4.659813570700238e-05, + "loss": 0.0066, + "step": 54560 + }, + { + "grad_norm": 0.05693546682596207, + "learning_rate": 4.658163950968568e-05, + "loss": 0.0055, + "step": 54570 + }, + { + "grad_norm": 0.039098285138607025, + "learning_rate": 4.6565143686195726e-05, + "loss": 0.0049, + "step": 54580 + }, + { + "grad_norm": 0.0586441233754158, + "learning_rate": 4.654864823833649e-05, + "loss": 0.0056, + "step": 54590 + }, + { + "grad_norm": 0.04212287440896034, + "learning_rate": 4.65321531679119e-05, + "loss": 0.0044, + "step": 54600 + }, + { + "grad_norm": 0.07088125497102737, + "learning_rate": 4.6515658476725834e-05, + "loss": 0.005, + "step": 54610 + }, + { + "grad_norm": 0.034848809242248535, + "learning_rate": 4.649916416658212e-05, + "loss": 0.0045, + "step": 54620 + }, + { + "grad_norm": 0.047551289200782776, + "learning_rate": 4.648267023928455e-05, + "loss": 0.0052, + "step": 54630 + }, + { + "grad_norm": 0.04063768312335014, + "learning_rate": 4.646617669663687e-05, + "loss": 0.0061, + "step": 54640 + }, + { + "grad_norm": 0.04033050313591957, + "learning_rate": 4.644968354044277e-05, + "loss": 0.0055, + "step": 54650 + }, + { + "grad_norm": 0.06492388993501663, + "learning_rate": 4.643319077250596e-05, + "loss": 0.0056, + "step": 54660 + }, + { + "grad_norm": 0.04565870389342308, + "learning_rate": 4.6416698394630055e-05, + "loss": 0.0054, + "step": 54670 + }, + { + "grad_norm": 0.04006074368953705, + "learning_rate": 4.640020640861862e-05, + "loss": 0.0058, + "step": 54680 + }, + { + "grad_norm": 0.04742893949151039, + "learning_rate": 4.6383714816275195e-05, + "loss": 0.0057, + "step": 54690 + }, + { + "grad_norm": 0.06540261209011078, + "learning_rate": 4.636722361940329e-05, + "loss": 0.0052, + "step": 54700 + }, + { + "grad_norm": 0.048874009400606155, + "learning_rate": 4.635073281980632e-05, + "loss": 0.0058, + "step": 54710 + }, + { + "grad_norm": 0.05059355869889259, + "learning_rate": 4.633424241928775e-05, + "loss": 0.0046, + "step": 54720 + }, + { + "grad_norm": 0.05270537734031677, + "learning_rate": 4.6317752419650914e-05, + "loss": 0.0057, + "step": 54730 + }, + { + "grad_norm": 0.05744932219386101, + "learning_rate": 4.630126282269914e-05, + "loss": 0.0065, + "step": 54740 + }, + { + "grad_norm": 0.05547177791595459, + "learning_rate": 4.628477363023569e-05, + "loss": 0.0071, + "step": 54750 + }, + { + "grad_norm": 0.04437410831451416, + "learning_rate": 4.626828484406383e-05, + "loss": 0.0053, + "step": 54760 + }, + { + "grad_norm": 0.055782075971364975, + "learning_rate": 4.625179646598669e-05, + "loss": 0.0053, + "step": 54770 + }, + { + "grad_norm": 0.05950287729501724, + "learning_rate": 4.6235308497807487e-05, + "loss": 0.0045, + "step": 54780 + }, + { + "grad_norm": 0.06230619177222252, + "learning_rate": 4.6218820941329276e-05, + "loss": 0.0053, + "step": 54790 + }, + { + "grad_norm": 0.057171065360307693, + "learning_rate": 4.620233379835513e-05, + "loss": 0.0048, + "step": 54800 + }, + { + "grad_norm": 0.05161285027861595, + "learning_rate": 4.618584707068804e-05, + "loss": 0.0061, + "step": 54810 + }, + { + "grad_norm": 0.06533448398113251, + "learning_rate": 4.616936076013099e-05, + "loss": 0.0068, + "step": 54820 + }, + { + "grad_norm": 0.06425833702087402, + "learning_rate": 4.615287486848688e-05, + "loss": 0.0051, + "step": 54830 + }, + { + "grad_norm": 0.06733601540327072, + "learning_rate": 4.6136389397558584e-05, + "loss": 0.0051, + "step": 54840 + }, + { + "grad_norm": 0.05799771845340729, + "learning_rate": 4.6119904349148935e-05, + "loss": 0.0046, + "step": 54850 + }, + { + "grad_norm": 0.05322619900107384, + "learning_rate": 4.6103419725060726e-05, + "loss": 0.0053, + "step": 54860 + }, + { + "grad_norm": 0.037931881844997406, + "learning_rate": 4.608693552709667e-05, + "loss": 0.006, + "step": 54870 + }, + { + "grad_norm": 0.043577972799539566, + "learning_rate": 4.607045175705947e-05, + "loss": 0.0048, + "step": 54880 + }, + { + "grad_norm": 0.07262901216745377, + "learning_rate": 4.605396841675176e-05, + "loss": 0.007, + "step": 54890 + }, + { + "grad_norm": 0.045729655772447586, + "learning_rate": 4.6037485507976094e-05, + "loss": 0.0051, + "step": 54900 + }, + { + "grad_norm": 0.047365907579660416, + "learning_rate": 4.602100303253509e-05, + "loss": 0.0045, + "step": 54910 + }, + { + "grad_norm": 0.055722687393426895, + "learning_rate": 4.600452099223122e-05, + "loss": 0.006, + "step": 54920 + }, + { + "grad_norm": 0.06420458853244781, + "learning_rate": 4.598803938886691e-05, + "loss": 0.0048, + "step": 54930 + }, + { + "grad_norm": 0.061094481498003006, + "learning_rate": 4.597155822424459e-05, + "loss": 0.0054, + "step": 54940 + }, + { + "grad_norm": 0.04964573681354523, + "learning_rate": 4.59550775001666e-05, + "loss": 0.005, + "step": 54950 + }, + { + "grad_norm": 0.05202203989028931, + "learning_rate": 4.5938597218435234e-05, + "loss": 0.005, + "step": 54960 + }, + { + "grad_norm": 0.04113601893186569, + "learning_rate": 4.592211738085279e-05, + "loss": 0.0051, + "step": 54970 + }, + { + "grad_norm": 0.0403980016708374, + "learning_rate": 4.590563798922146e-05, + "loss": 0.0065, + "step": 54980 + }, + { + "grad_norm": 0.0289996936917305, + "learning_rate": 4.5889159045343404e-05, + "loss": 0.0042, + "step": 54990 + }, + { + "grad_norm": 0.045575644820928574, + "learning_rate": 4.5872680551020734e-05, + "loss": 0.0055, + "step": 55000 + }, + { + "grad_norm": 0.04208865016698837, + "learning_rate": 4.585620250805552e-05, + "loss": 0.0052, + "step": 55010 + }, + { + "grad_norm": 0.04341801628470421, + "learning_rate": 4.583972491824975e-05, + "loss": 0.0054, + "step": 55020 + }, + { + "grad_norm": 0.05028342455625534, + "learning_rate": 4.582324778340541e-05, + "loss": 0.0055, + "step": 55030 + }, + { + "grad_norm": 0.04555366933345795, + "learning_rate": 4.5806771105324424e-05, + "loss": 0.0064, + "step": 55040 + }, + { + "grad_norm": 0.05850183218717575, + "learning_rate": 4.579029488580865e-05, + "loss": 0.0056, + "step": 55050 + }, + { + "grad_norm": 0.036759935319423676, + "learning_rate": 4.5773819126659876e-05, + "loss": 0.006, + "step": 55060 + }, + { + "grad_norm": 0.057965584099292755, + "learning_rate": 4.575734382967991e-05, + "loss": 0.0047, + "step": 55070 + }, + { + "grad_norm": 0.04413274675607681, + "learning_rate": 4.574086899667044e-05, + "loss": 0.0075, + "step": 55080 + }, + { + "grad_norm": 0.04692094400525093, + "learning_rate": 4.5724394629433106e-05, + "loss": 0.0059, + "step": 55090 + }, + { + "grad_norm": 0.04771324619650841, + "learning_rate": 4.570792072976958e-05, + "loss": 0.0055, + "step": 55100 + }, + { + "grad_norm": 0.05947808921337128, + "learning_rate": 4.569144729948138e-05, + "loss": 0.0055, + "step": 55110 + }, + { + "grad_norm": 0.045767512172460556, + "learning_rate": 4.5674974340370033e-05, + "loss": 0.0052, + "step": 55120 + }, + { + "grad_norm": 0.0521734319627285, + "learning_rate": 4.565850185423699e-05, + "loss": 0.0051, + "step": 55130 + }, + { + "grad_norm": 0.05652609094977379, + "learning_rate": 4.564202984288365e-05, + "loss": 0.0045, + "step": 55140 + }, + { + "grad_norm": 0.0582248829305172, + "learning_rate": 4.5625558308111356e-05, + "loss": 0.0061, + "step": 55150 + }, + { + "grad_norm": 0.041762229055166245, + "learning_rate": 4.560908725172145e-05, + "loss": 0.0053, + "step": 55160 + }, + { + "grad_norm": 0.04267338663339615, + "learning_rate": 4.559261667551516e-05, + "loss": 0.0057, + "step": 55170 + }, + { + "grad_norm": 0.058311909437179565, + "learning_rate": 4.5576146581293685e-05, + "loss": 0.0055, + "step": 55180 + }, + { + "grad_norm": 0.05677356943488121, + "learning_rate": 4.5559676970858164e-05, + "loss": 0.0069, + "step": 55190 + }, + { + "grad_norm": 0.03708640858530998, + "learning_rate": 4.5543207846009705e-05, + "loss": 0.0058, + "step": 55200 + }, + { + "grad_norm": 0.0469210222363472, + "learning_rate": 4.552673920854933e-05, + "loss": 0.0073, + "step": 55210 + }, + { + "grad_norm": 0.05903126671910286, + "learning_rate": 4.5510271060278006e-05, + "loss": 0.0045, + "step": 55220 + }, + { + "grad_norm": 0.05530810356140137, + "learning_rate": 4.5493803402996704e-05, + "loss": 0.0076, + "step": 55230 + }, + { + "grad_norm": 0.04658038541674614, + "learning_rate": 4.5477336238506295e-05, + "loss": 0.0051, + "step": 55240 + }, + { + "grad_norm": 0.04710359498858452, + "learning_rate": 4.546086956860759e-05, + "loss": 0.0088, + "step": 55250 + }, + { + "grad_norm": 0.029348133131861687, + "learning_rate": 4.5444403395101374e-05, + "loss": 0.0044, + "step": 55260 + }, + { + "grad_norm": 0.06372889131307602, + "learning_rate": 4.542793771978834e-05, + "loss": 0.0049, + "step": 55270 + }, + { + "grad_norm": 0.07245787978172302, + "learning_rate": 4.541147254446916e-05, + "loss": 0.0065, + "step": 55280 + }, + { + "grad_norm": 0.027574701234698296, + "learning_rate": 4.539500787094445e-05, + "loss": 0.005, + "step": 55290 + }, + { + "grad_norm": 0.057387977838516235, + "learning_rate": 4.537854370101475e-05, + "loss": 0.0061, + "step": 55300 + }, + { + "grad_norm": 0.040081556886434555, + "learning_rate": 4.5362080036480566e-05, + "loss": 0.0065, + "step": 55310 + }, + { + "grad_norm": 0.051510006189346313, + "learning_rate": 4.534561687914235e-05, + "loss": 0.0061, + "step": 55320 + }, + { + "grad_norm": 0.08649349957704544, + "learning_rate": 4.5329154230800466e-05, + "loss": 0.0068, + "step": 55330 + }, + { + "grad_norm": 0.057323988527059555, + "learning_rate": 4.531269209325525e-05, + "loss": 0.0063, + "step": 55340 + }, + { + "grad_norm": 0.05074109137058258, + "learning_rate": 4.529623046830696e-05, + "loss": 0.0056, + "step": 55350 + }, + { + "grad_norm": 0.05125992000102997, + "learning_rate": 4.5279769357755866e-05, + "loss": 0.0053, + "step": 55360 + }, + { + "grad_norm": 0.04256371408700943, + "learning_rate": 4.5263308763402084e-05, + "loss": 0.0053, + "step": 55370 + }, + { + "grad_norm": 0.03976164758205414, + "learning_rate": 4.524684868704574e-05, + "loss": 0.0058, + "step": 55380 + }, + { + "grad_norm": 0.0446786992251873, + "learning_rate": 4.523038913048688e-05, + "loss": 0.0067, + "step": 55390 + }, + { + "grad_norm": 0.05366141349077225, + "learning_rate": 4.521393009552549e-05, + "loss": 0.0056, + "step": 55400 + }, + { + "grad_norm": 0.03911447525024414, + "learning_rate": 4.5197471583961484e-05, + "loss": 0.0053, + "step": 55410 + }, + { + "grad_norm": 0.041795071214437485, + "learning_rate": 4.51810135975948e-05, + "loss": 0.0052, + "step": 55420 + }, + { + "grad_norm": 0.05283055454492569, + "learning_rate": 4.516455613822521e-05, + "loss": 0.0062, + "step": 55430 + }, + { + "grad_norm": 0.060996491461992264, + "learning_rate": 4.514809920765249e-05, + "loss": 0.0048, + "step": 55440 + }, + { + "grad_norm": 0.05216295272111893, + "learning_rate": 4.5131642807676356e-05, + "loss": 0.0038, + "step": 55450 + }, + { + "grad_norm": 0.04342640936374664, + "learning_rate": 4.511518694009644e-05, + "loss": 0.0054, + "step": 55460 + }, + { + "grad_norm": 0.051563818007707596, + "learning_rate": 4.509873160671231e-05, + "loss": 0.0078, + "step": 55470 + }, + { + "grad_norm": 0.032034263014793396, + "learning_rate": 4.508227680932356e-05, + "loss": 0.005, + "step": 55480 + }, + { + "grad_norm": 0.025843216106295586, + "learning_rate": 4.5065822549729605e-05, + "loss": 0.0053, + "step": 55490 + }, + { + "grad_norm": 0.05905138701200485, + "learning_rate": 4.504936882972989e-05, + "loss": 0.0064, + "step": 55500 + }, + { + "grad_norm": 0.061292506754398346, + "learning_rate": 4.503291565112374e-05, + "loss": 0.0056, + "step": 55510 + }, + { + "grad_norm": 0.04670180752873421, + "learning_rate": 4.501646301571048e-05, + "loss": 0.0061, + "step": 55520 + }, + { + "grad_norm": 0.040374837815761566, + "learning_rate": 4.500001092528933e-05, + "loss": 0.0044, + "step": 55530 + }, + { + "grad_norm": 0.05953873693943024, + "learning_rate": 4.4983559381659455e-05, + "loss": 0.0067, + "step": 55540 + }, + { + "grad_norm": 0.04297894611954689, + "learning_rate": 4.496710838661999e-05, + "loss": 0.0057, + "step": 55550 + }, + { + "grad_norm": 0.041753727942705154, + "learning_rate": 4.495065794196999e-05, + "loss": 0.0065, + "step": 55560 + }, + { + "grad_norm": 0.052079737186431885, + "learning_rate": 4.4934208049508455e-05, + "loss": 0.0056, + "step": 55570 + }, + { + "grad_norm": 0.03753305971622467, + "learning_rate": 4.4917758711034303e-05, + "loss": 0.0062, + "step": 55580 + }, + { + "grad_norm": 0.04449488967657089, + "learning_rate": 4.490130992834642e-05, + "loss": 0.0053, + "step": 55590 + }, + { + "grad_norm": 0.05089171603322029, + "learning_rate": 4.488486170324359e-05, + "loss": 0.0046, + "step": 55600 + }, + { + "grad_norm": 0.04198687523603439, + "learning_rate": 4.486841403752463e-05, + "loss": 0.0061, + "step": 55610 + }, + { + "grad_norm": 0.059168215841054916, + "learning_rate": 4.4851966932988174e-05, + "loss": 0.0059, + "step": 55620 + }, + { + "grad_norm": 0.0621398501098156, + "learning_rate": 4.4835520391432894e-05, + "loss": 0.0062, + "step": 55630 + }, + { + "grad_norm": 0.055582351982593536, + "learning_rate": 4.481907441465733e-05, + "loss": 0.0063, + "step": 55640 + }, + { + "grad_norm": 0.03831036388874054, + "learning_rate": 4.480262900446e-05, + "loss": 0.0053, + "step": 55650 + }, + { + "grad_norm": 0.05678832158446312, + "learning_rate": 4.478618416263933e-05, + "loss": 0.0053, + "step": 55660 + }, + { + "grad_norm": 0.03923865035176277, + "learning_rate": 4.476973989099373e-05, + "loss": 0.0051, + "step": 55670 + }, + { + "grad_norm": 0.043656010180711746, + "learning_rate": 4.475329619132153e-05, + "loss": 0.0057, + "step": 55680 + }, + { + "grad_norm": 0.04699769243597984, + "learning_rate": 4.4736853065420956e-05, + "loss": 0.0066, + "step": 55690 + }, + { + "grad_norm": 0.048300061374902725, + "learning_rate": 4.472041051509023e-05, + "loss": 0.0047, + "step": 55700 + }, + { + "grad_norm": 0.05423293262720108, + "learning_rate": 4.470396854212746e-05, + "loss": 0.005, + "step": 55710 + }, + { + "grad_norm": 0.03457451984286308, + "learning_rate": 4.468752714833072e-05, + "loss": 0.0055, + "step": 55720 + }, + { + "grad_norm": 0.03147746995091438, + "learning_rate": 4.467108633549803e-05, + "loss": 0.0043, + "step": 55730 + }, + { + "grad_norm": 0.036751922219991684, + "learning_rate": 4.4654646105427335e-05, + "loss": 0.0042, + "step": 55740 + }, + { + "grad_norm": 0.036786798387765884, + "learning_rate": 4.463820645991651e-05, + "loss": 0.0044, + "step": 55750 + }, + { + "grad_norm": 0.048806410282850266, + "learning_rate": 4.462176740076334e-05, + "loss": 0.0064, + "step": 55760 + }, + { + "grad_norm": 0.047303199768066406, + "learning_rate": 4.4605328929765624e-05, + "loss": 0.005, + "step": 55770 + }, + { + "grad_norm": 0.06891866773366928, + "learning_rate": 4.458889104872102e-05, + "loss": 0.0073, + "step": 55780 + }, + { + "grad_norm": 0.05165500193834305, + "learning_rate": 4.457245375942715e-05, + "loss": 0.005, + "step": 55790 + }, + { + "grad_norm": 0.03385063633322716, + "learning_rate": 4.455601706368157e-05, + "loss": 0.0067, + "step": 55800 + }, + { + "grad_norm": 0.04260464012622833, + "learning_rate": 4.453958096328179e-05, + "loss": 0.0051, + "step": 55810 + }, + { + "grad_norm": 0.05364413186907768, + "learning_rate": 4.452314546002523e-05, + "loss": 0.0043, + "step": 55820 + }, + { + "grad_norm": 0.04726731777191162, + "learning_rate": 4.4506710555709246e-05, + "loss": 0.0046, + "step": 55830 + }, + { + "grad_norm": 0.052985165268182755, + "learning_rate": 4.449027625213114e-05, + "loss": 0.0056, + "step": 55840 + }, + { + "grad_norm": 0.047210026532411575, + "learning_rate": 4.4473842551088135e-05, + "loss": 0.0046, + "step": 55850 + }, + { + "grad_norm": 0.05323554947972298, + "learning_rate": 4.445740945437738e-05, + "loss": 0.0063, + "step": 55860 + }, + { + "grad_norm": 0.05551661550998688, + "learning_rate": 4.444097696379602e-05, + "loss": 0.0053, + "step": 55870 + }, + { + "grad_norm": 0.04559353366494179, + "learning_rate": 4.442454508114106e-05, + "loss": 0.0054, + "step": 55880 + }, + { + "grad_norm": 0.052493929862976074, + "learning_rate": 4.4408113808209465e-05, + "loss": 0.0065, + "step": 55890 + }, + { + "grad_norm": 0.04500303417444229, + "learning_rate": 4.439168314679813e-05, + "loss": 0.0065, + "step": 55900 + }, + { + "grad_norm": 0.06263414770364761, + "learning_rate": 4.43752530987039e-05, + "loss": 0.0056, + "step": 55910 + }, + { + "grad_norm": 0.06170671805739403, + "learning_rate": 4.4358823665723515e-05, + "loss": 0.0047, + "step": 55920 + }, + { + "grad_norm": 0.05387067794799805, + "learning_rate": 4.434239484965371e-05, + "loss": 0.0048, + "step": 55930 + }, + { + "grad_norm": 0.05839439854025841, + "learning_rate": 4.4325966652291103e-05, + "loss": 0.0077, + "step": 55940 + }, + { + "grad_norm": 0.056360986083745956, + "learning_rate": 4.430953907543225e-05, + "loss": 0.0052, + "step": 55950 + }, + { + "grad_norm": 0.05385112389922142, + "learning_rate": 4.4293112120873645e-05, + "loss": 0.0048, + "step": 55960 + }, + { + "grad_norm": 0.0818788930773735, + "learning_rate": 4.427668579041172e-05, + "loss": 0.0057, + "step": 55970 + }, + { + "grad_norm": 0.050629764795303345, + "learning_rate": 4.4260260085842816e-05, + "loss": 0.0041, + "step": 55980 + }, + { + "grad_norm": 0.05018036067485809, + "learning_rate": 4.4243835008963256e-05, + "loss": 0.0072, + "step": 55990 + }, + { + "grad_norm": 0.054218191653490067, + "learning_rate": 4.422741056156925e-05, + "loss": 0.0052, + "step": 56000 + }, + { + "grad_norm": 0.04346352815628052, + "learning_rate": 4.421098674545693e-05, + "loss": 0.0041, + "step": 56010 + }, + { + "grad_norm": 0.05477886274456978, + "learning_rate": 4.419456356242241e-05, + "loss": 0.0055, + "step": 56020 + }, + { + "grad_norm": 0.05596620962023735, + "learning_rate": 4.4178141014261685e-05, + "loss": 0.0056, + "step": 56030 + }, + { + "grad_norm": 0.043927546590566635, + "learning_rate": 4.416171910277071e-05, + "loss": 0.0046, + "step": 56040 + }, + { + "grad_norm": 0.037298791110515594, + "learning_rate": 4.414529782974533e-05, + "loss": 0.0053, + "step": 56050 + }, + { + "grad_norm": 0.05860438942909241, + "learning_rate": 4.41288771969814e-05, + "loss": 0.0045, + "step": 56060 + }, + { + "grad_norm": 0.044780850410461426, + "learning_rate": 4.411245720627464e-05, + "loss": 0.0043, + "step": 56070 + }, + { + "grad_norm": 0.045089878141880035, + "learning_rate": 4.40960378594207e-05, + "loss": 0.006, + "step": 56080 + }, + { + "grad_norm": 0.04090734198689461, + "learning_rate": 4.407961915821519e-05, + "loss": 0.0044, + "step": 56090 + }, + { + "grad_norm": 0.04108116775751114, + "learning_rate": 4.406320110445362e-05, + "loss": 0.0048, + "step": 56100 + }, + { + "grad_norm": 0.03377703204751015, + "learning_rate": 4.404678369993144e-05, + "loss": 0.0043, + "step": 56110 + }, + { + "grad_norm": 0.052075184881687164, + "learning_rate": 4.403036694644406e-05, + "loss": 0.0071, + "step": 56120 + }, + { + "grad_norm": 0.041691217571496964, + "learning_rate": 4.4013950845786764e-05, + "loss": 0.0059, + "step": 56130 + }, + { + "grad_norm": 0.041191354393959045, + "learning_rate": 4.399753539975482e-05, + "loss": 0.0056, + "step": 56140 + }, + { + "grad_norm": 0.04936007037758827, + "learning_rate": 4.398112061014337e-05, + "loss": 0.0052, + "step": 56150 + }, + { + "grad_norm": 0.04021817073225975, + "learning_rate": 4.396470647874753e-05, + "loss": 0.004, + "step": 56160 + }, + { + "grad_norm": 0.06307690590620041, + "learning_rate": 4.394829300736229e-05, + "loss": 0.0069, + "step": 56170 + }, + { + "grad_norm": 0.04464741796255112, + "learning_rate": 4.393188019778265e-05, + "loss": 0.0069, + "step": 56180 + }, + { + "grad_norm": 0.04202074185013771, + "learning_rate": 4.391546805180347e-05, + "loss": 0.0071, + "step": 56190 + }, + { + "grad_norm": 0.050489090383052826, + "learning_rate": 4.389905657121955e-05, + "loss": 0.0047, + "step": 56200 + }, + { + "grad_norm": 0.04624494910240173, + "learning_rate": 4.3882645757825635e-05, + "loss": 0.0044, + "step": 56210 + }, + { + "grad_norm": 0.03719725087285042, + "learning_rate": 4.386623561341637e-05, + "loss": 0.005, + "step": 56220 + }, + { + "grad_norm": 0.05549690127372742, + "learning_rate": 4.384982613978637e-05, + "loss": 0.0062, + "step": 56230 + }, + { + "grad_norm": 0.04419654607772827, + "learning_rate": 4.383341733873012e-05, + "loss": 0.0043, + "step": 56240 + }, + { + "grad_norm": 0.05121549218893051, + "learning_rate": 4.381700921204209e-05, + "loss": 0.0075, + "step": 56250 + }, + { + "grad_norm": 0.06862151622772217, + "learning_rate": 4.380060176151663e-05, + "loss": 0.0047, + "step": 56260 + }, + { + "grad_norm": 0.04314696043729782, + "learning_rate": 4.378419498894805e-05, + "loss": 0.0053, + "step": 56270 + }, + { + "grad_norm": 0.0649251714348793, + "learning_rate": 4.376778889613056e-05, + "loss": 0.0055, + "step": 56280 + }, + { + "grad_norm": 0.03029860183596611, + "learning_rate": 4.375138348485831e-05, + "loss": 0.0045, + "step": 56290 + }, + { + "grad_norm": 0.04941508546471596, + "learning_rate": 4.3734978756925346e-05, + "loss": 0.0051, + "step": 56300 + }, + { + "grad_norm": 0.04289114847779274, + "learning_rate": 4.3718574714125706e-05, + "loss": 0.005, + "step": 56310 + }, + { + "grad_norm": 0.06666120141744614, + "learning_rate": 4.370217135825329e-05, + "loss": 0.0066, + "step": 56320 + }, + { + "grad_norm": 0.05199588090181351, + "learning_rate": 4.368576869110194e-05, + "loss": 0.0044, + "step": 56330 + }, + { + "grad_norm": 0.05527530983090401, + "learning_rate": 4.366936671446544e-05, + "loss": 0.0059, + "step": 56340 + }, + { + "grad_norm": 0.05640670284628868, + "learning_rate": 4.3652965430137474e-05, + "loss": 0.0071, + "step": 56350 + }, + { + "grad_norm": 0.05083804577589035, + "learning_rate": 4.3636564839911646e-05, + "loss": 0.0044, + "step": 56360 + }, + { + "grad_norm": 0.04957341402769089, + "learning_rate": 4.3620164945581545e-05, + "loss": 0.0063, + "step": 56370 + }, + { + "grad_norm": 0.05353671684861183, + "learning_rate": 4.360376574894061e-05, + "loss": 0.0072, + "step": 56380 + }, + { + "grad_norm": 0.04227617755532265, + "learning_rate": 4.358736725178224e-05, + "loss": 0.0054, + "step": 56390 + }, + { + "grad_norm": 0.03503485769033432, + "learning_rate": 4.357096945589974e-05, + "loss": 0.0044, + "step": 56400 + }, + { + "grad_norm": 0.03753906860947609, + "learning_rate": 4.355457236308636e-05, + "loss": 0.0042, + "step": 56410 + }, + { + "grad_norm": 0.047319892793893814, + "learning_rate": 4.353817597513526e-05, + "loss": 0.004, + "step": 56420 + }, + { + "grad_norm": 0.05157320946455002, + "learning_rate": 4.352178029383948e-05, + "loss": 0.0062, + "step": 56430 + }, + { + "grad_norm": 0.032766662538051605, + "learning_rate": 4.3505385320992105e-05, + "loss": 0.0048, + "step": 56440 + }, + { + "grad_norm": 0.059391554445028305, + "learning_rate": 4.348899105838602e-05, + "loss": 0.0058, + "step": 56450 + }, + { + "grad_norm": 0.030995503067970276, + "learning_rate": 4.3472597507814087e-05, + "loss": 0.0046, + "step": 56460 + }, + { + "grad_norm": 0.06417325884103775, + "learning_rate": 4.3456204671069066e-05, + "loss": 0.0058, + "step": 56470 + }, + { + "grad_norm": 0.05911616235971451, + "learning_rate": 4.343981254994367e-05, + "loss": 0.0051, + "step": 56480 + }, + { + "grad_norm": 0.03521348536014557, + "learning_rate": 4.34234211462305e-05, + "loss": 0.0061, + "step": 56490 + }, + { + "grad_norm": 0.03664993867278099, + "learning_rate": 4.340703046172213e-05, + "loss": 0.0057, + "step": 56500 + }, + { + "grad_norm": 0.04093744978308678, + "learning_rate": 4.339064049821097e-05, + "loss": 0.0039, + "step": 56510 + }, + { + "grad_norm": 0.08195864409208298, + "learning_rate": 4.3374251257489446e-05, + "loss": 0.0047, + "step": 56520 + }, + { + "grad_norm": 0.03808389976620674, + "learning_rate": 4.335786274134984e-05, + "loss": 0.0058, + "step": 56530 + }, + { + "grad_norm": 0.045212551951408386, + "learning_rate": 4.334147495158438e-05, + "loss": 0.0053, + "step": 56540 + }, + { + "grad_norm": 0.05622844770550728, + "learning_rate": 4.33250878899852e-05, + "loss": 0.0053, + "step": 56550 + }, + { + "grad_norm": 0.052942607551813126, + "learning_rate": 4.330870155834437e-05, + "loss": 0.0045, + "step": 56560 + }, + { + "grad_norm": 0.0489395409822464, + "learning_rate": 4.3292315958453884e-05, + "loss": 0.0067, + "step": 56570 + }, + { + "grad_norm": 0.0820709764957428, + "learning_rate": 4.3275931092105646e-05, + "loss": 0.0056, + "step": 56580 + }, + { + "grad_norm": 0.060450900346040726, + "learning_rate": 4.325954696109147e-05, + "loss": 0.0043, + "step": 56590 + }, + { + "grad_norm": 0.03783496096730232, + "learning_rate": 4.32431635672031e-05, + "loss": 0.0062, + "step": 56600 + }, + { + "grad_norm": 0.048003118485212326, + "learning_rate": 4.3226780912232214e-05, + "loss": 0.005, + "step": 56610 + }, + { + "grad_norm": 0.06786059588193893, + "learning_rate": 4.321039899797036e-05, + "loss": 0.005, + "step": 56620 + }, + { + "grad_norm": 0.04211663082242012, + "learning_rate": 4.319401782620908e-05, + "loss": 0.0051, + "step": 56630 + }, + { + "grad_norm": 0.04647449031472206, + "learning_rate": 4.317763739873978e-05, + "loss": 0.0054, + "step": 56640 + }, + { + "grad_norm": 0.04081985354423523, + "learning_rate": 4.31612577173538e-05, + "loss": 0.005, + "step": 56650 + }, + { + "grad_norm": 0.036252301186323166, + "learning_rate": 4.314487878384239e-05, + "loss": 0.0043, + "step": 56660 + }, + { + "grad_norm": 0.05023207888007164, + "learning_rate": 4.3128500599996736e-05, + "loss": 0.0046, + "step": 56670 + }, + { + "grad_norm": 0.03585376963019371, + "learning_rate": 4.31121231676079e-05, + "loss": 0.0044, + "step": 56680 + }, + { + "grad_norm": 0.045443255454301834, + "learning_rate": 4.309574648846694e-05, + "loss": 0.0047, + "step": 56690 + }, + { + "grad_norm": 0.04657849669456482, + "learning_rate": 4.3079370564364755e-05, + "loss": 0.0062, + "step": 56700 + }, + { + "grad_norm": 0.03845299780368805, + "learning_rate": 4.306299539709221e-05, + "loss": 0.0044, + "step": 56710 + }, + { + "grad_norm": 0.05520975962281227, + "learning_rate": 4.3046620988440046e-05, + "loss": 0.0038, + "step": 56720 + }, + { + "grad_norm": 0.05447414889931679, + "learning_rate": 4.303024734019897e-05, + "loss": 0.0053, + "step": 56730 + }, + { + "grad_norm": 0.05771076679229736, + "learning_rate": 4.3013874454159565e-05, + "loss": 0.0057, + "step": 56740 + }, + { + "grad_norm": 0.04860151186585426, + "learning_rate": 4.299750233211233e-05, + "loss": 0.0047, + "step": 56750 + }, + { + "grad_norm": 0.053604014217853546, + "learning_rate": 4.2981130975847715e-05, + "loss": 0.0047, + "step": 56760 + }, + { + "grad_norm": 0.047693513333797455, + "learning_rate": 4.2964760387156076e-05, + "loss": 0.0067, + "step": 56770 + }, + { + "grad_norm": 0.04870401695370674, + "learning_rate": 4.294839056782767e-05, + "loss": 0.0044, + "step": 56780 + }, + { + "grad_norm": 0.06174197420477867, + "learning_rate": 4.293202151965266e-05, + "loss": 0.0049, + "step": 56790 + }, + { + "grad_norm": 0.038085587322711945, + "learning_rate": 4.291565324442115e-05, + "loss": 0.0041, + "step": 56800 + }, + { + "grad_norm": 0.04718107730150223, + "learning_rate": 4.2899285743923135e-05, + "loss": 0.0046, + "step": 56810 + }, + { + "grad_norm": 0.04318517819046974, + "learning_rate": 4.288291901994857e-05, + "loss": 0.0045, + "step": 56820 + }, + { + "grad_norm": 0.032695163041353226, + "learning_rate": 4.286655307428729e-05, + "loss": 0.0043, + "step": 56830 + }, + { + "grad_norm": 0.04356015473604202, + "learning_rate": 4.285018790872903e-05, + "loss": 0.0045, + "step": 56840 + }, + { + "grad_norm": 0.037386663258075714, + "learning_rate": 4.283382352506347e-05, + "loss": 0.0053, + "step": 56850 + }, + { + "grad_norm": 0.04229297116398811, + "learning_rate": 4.2817459925080205e-05, + "loss": 0.0054, + "step": 56860 + }, + { + "grad_norm": 0.043174926191568375, + "learning_rate": 4.28010971105687e-05, + "loss": 0.0058, + "step": 56870 + }, + { + "grad_norm": 0.03988739103078842, + "learning_rate": 4.278473508331841e-05, + "loss": 0.0056, + "step": 56880 + }, + { + "grad_norm": 0.04435160756111145, + "learning_rate": 4.276837384511864e-05, + "loss": 0.0055, + "step": 56890 + }, + { + "grad_norm": 0.034157197922468185, + "learning_rate": 4.275201339775864e-05, + "loss": 0.0041, + "step": 56900 + }, + { + "grad_norm": 0.04424736648797989, + "learning_rate": 4.273565374302756e-05, + "loss": 0.0058, + "step": 56910 + }, + { + "grad_norm": 0.041695401072502136, + "learning_rate": 4.2719294882714454e-05, + "loss": 0.0042, + "step": 56920 + }, + { + "grad_norm": 0.05425066500902176, + "learning_rate": 4.270293681860831e-05, + "loss": 0.004, + "step": 56930 + }, + { + "grad_norm": 0.0596252866089344, + "learning_rate": 4.268657955249802e-05, + "loss": 0.0067, + "step": 56940 + }, + { + "grad_norm": 0.047609779983758926, + "learning_rate": 4.267022308617241e-05, + "loss": 0.0067, + "step": 56950 + }, + { + "grad_norm": 0.04675968736410141, + "learning_rate": 4.265386742142018e-05, + "loss": 0.005, + "step": 56960 + }, + { + "grad_norm": 0.04106135293841362, + "learning_rate": 4.263751256002996e-05, + "loss": 0.0041, + "step": 56970 + }, + { + "grad_norm": 0.05024043470621109, + "learning_rate": 4.26211585037903e-05, + "loss": 0.0062, + "step": 56980 + }, + { + "grad_norm": 0.04488452523946762, + "learning_rate": 4.2604805254489656e-05, + "loss": 0.0046, + "step": 56990 + }, + { + "grad_norm": 0.045738231390714645, + "learning_rate": 4.258845281391638e-05, + "loss": 0.0051, + "step": 57000 + }, + { + "grad_norm": 0.047946859151124954, + "learning_rate": 4.257210118385876e-05, + "loss": 0.0054, + "step": 57010 + }, + { + "grad_norm": 0.03529362007975578, + "learning_rate": 4.2555750366104994e-05, + "loss": 0.004, + "step": 57020 + }, + { + "grad_norm": 0.043490320444107056, + "learning_rate": 4.2539400362443184e-05, + "loss": 0.0048, + "step": 57030 + }, + { + "grad_norm": 0.03530341386795044, + "learning_rate": 4.252305117466133e-05, + "loss": 0.0052, + "step": 57040 + }, + { + "grad_norm": 0.0459720678627491, + "learning_rate": 4.2506702804547356e-05, + "loss": 0.0049, + "step": 57050 + }, + { + "grad_norm": 0.05603521317243576, + "learning_rate": 4.249035525388907e-05, + "loss": 0.0047, + "step": 57060 + }, + { + "grad_norm": 0.05807958170771599, + "learning_rate": 4.247400852447427e-05, + "loss": 0.0047, + "step": 57070 + }, + { + "grad_norm": 0.04553433507680893, + "learning_rate": 4.245766261809059e-05, + "loss": 0.0048, + "step": 57080 + }, + { + "grad_norm": 0.05365695804357529, + "learning_rate": 4.244131753652558e-05, + "loss": 0.005, + "step": 57090 + }, + { + "grad_norm": 0.0713738277554512, + "learning_rate": 4.242497328156672e-05, + "loss": 0.0077, + "step": 57100 + }, + { + "grad_norm": 0.04814737290143967, + "learning_rate": 4.240862985500139e-05, + "loss": 0.0046, + "step": 57110 + }, + { + "grad_norm": 0.06372030824422836, + "learning_rate": 4.2392287258616894e-05, + "loss": 0.0059, + "step": 57120 + }, + { + "grad_norm": 0.049790360033512115, + "learning_rate": 4.2375945494200396e-05, + "loss": 0.0066, + "step": 57130 + }, + { + "grad_norm": 0.0472116582095623, + "learning_rate": 4.2359604563539054e-05, + "loss": 0.005, + "step": 57140 + }, + { + "grad_norm": 0.04462441802024841, + "learning_rate": 4.2343264468419885e-05, + "loss": 0.0056, + "step": 57150 + }, + { + "grad_norm": 0.05022360011935234, + "learning_rate": 4.232692521062979e-05, + "loss": 0.0049, + "step": 57160 + }, + { + "grad_norm": 0.0621505007147789, + "learning_rate": 4.231058679195562e-05, + "loss": 0.0064, + "step": 57170 + }, + { + "grad_norm": 0.04729113727807999, + "learning_rate": 4.229424921418411e-05, + "loss": 0.0046, + "step": 57180 + }, + { + "grad_norm": 0.06279563158750534, + "learning_rate": 4.2277912479101916e-05, + "loss": 0.0059, + "step": 57190 + }, + { + "grad_norm": 0.044517695903778076, + "learning_rate": 4.226157658849562e-05, + "loss": 0.0035, + "step": 57200 + }, + { + "grad_norm": 0.03581937775015831, + "learning_rate": 4.2245241544151664e-05, + "loss": 0.0064, + "step": 57210 + }, + { + "grad_norm": 0.040201619267463684, + "learning_rate": 4.2228907347856425e-05, + "loss": 0.0046, + "step": 57220 + }, + { + "grad_norm": 0.035522788763046265, + "learning_rate": 4.2212574001396214e-05, + "loss": 0.005, + "step": 57230 + }, + { + "grad_norm": 0.03922980651259422, + "learning_rate": 4.21962415065572e-05, + "loss": 0.0046, + "step": 57240 + }, + { + "grad_norm": 0.04217612370848656, + "learning_rate": 4.217990986512548e-05, + "loss": 0.0047, + "step": 57250 + }, + { + "grad_norm": 0.04537534341216087, + "learning_rate": 4.216357907888704e-05, + "loss": 0.0044, + "step": 57260 + }, + { + "grad_norm": 0.044626299291849136, + "learning_rate": 4.2147249149627824e-05, + "loss": 0.0064, + "step": 57270 + }, + { + "grad_norm": 0.039926670491695404, + "learning_rate": 4.213092007913364e-05, + "loss": 0.0039, + "step": 57280 + }, + { + "grad_norm": 0.041618362069129944, + "learning_rate": 4.2114591869190205e-05, + "loss": 0.0035, + "step": 57290 + }, + { + "grad_norm": 0.0363912470638752, + "learning_rate": 4.209826452158315e-05, + "loss": 0.004, + "step": 57300 + }, + { + "grad_norm": 0.0379476360976696, + "learning_rate": 4.2081938038098e-05, + "loss": 0.0056, + "step": 57310 + }, + { + "grad_norm": 0.03661586344242096, + "learning_rate": 4.2065612420520185e-05, + "loss": 0.0043, + "step": 57320 + }, + { + "grad_norm": 0.05972154438495636, + "learning_rate": 4.204928767063509e-05, + "loss": 0.0064, + "step": 57330 + }, + { + "grad_norm": 0.0621824637055397, + "learning_rate": 4.2032963790227944e-05, + "loss": 0.0044, + "step": 57340 + }, + { + "grad_norm": 0.05445227771997452, + "learning_rate": 4.2016640781083887e-05, + "loss": 0.0046, + "step": 57350 + }, + { + "grad_norm": 0.03294886276125908, + "learning_rate": 4.2000318644988004e-05, + "loss": 0.0046, + "step": 57360 + }, + { + "grad_norm": 0.042276524007320404, + "learning_rate": 4.198399738372524e-05, + "loss": 0.0061, + "step": 57370 + }, + { + "grad_norm": 0.07998199015855789, + "learning_rate": 4.196767699908045e-05, + "loss": 0.0068, + "step": 57380 + }, + { + "grad_norm": 0.04354117810726166, + "learning_rate": 4.195135749283845e-05, + "loss": 0.0046, + "step": 57390 + }, + { + "grad_norm": 0.043422963470220566, + "learning_rate": 4.1935038866783894e-05, + "loss": 0.0039, + "step": 57400 + }, + { + "grad_norm": 0.04815424233675003, + "learning_rate": 4.1918721122701355e-05, + "loss": 0.0041, + "step": 57410 + }, + { + "grad_norm": 0.04562702402472496, + "learning_rate": 4.190240426237533e-05, + "loss": 0.0051, + "step": 57420 + }, + { + "grad_norm": 0.06229659542441368, + "learning_rate": 4.188608828759019e-05, + "loss": 0.0062, + "step": 57430 + }, + { + "grad_norm": 0.037699129432439804, + "learning_rate": 4.1869773200130246e-05, + "loss": 0.0042, + "step": 57440 + }, + { + "grad_norm": 0.04598093777894974, + "learning_rate": 4.185345900177967e-05, + "loss": 0.0036, + "step": 57450 + }, + { + "grad_norm": 0.02750852331519127, + "learning_rate": 4.183714569432258e-05, + "loss": 0.0047, + "step": 57460 + }, + { + "grad_norm": 0.034146543592214584, + "learning_rate": 4.182083327954295e-05, + "loss": 0.0069, + "step": 57470 + }, + { + "grad_norm": 0.04155842587351799, + "learning_rate": 4.1804521759224714e-05, + "loss": 0.0059, + "step": 57480 + }, + { + "grad_norm": 0.04333706945180893, + "learning_rate": 4.178821113515165e-05, + "loss": 0.0048, + "step": 57490 + }, + { + "grad_norm": 0.03796115517616272, + "learning_rate": 4.1771901409107476e-05, + "loss": 0.0048, + "step": 57500 + }, + { + "grad_norm": 0.041530415415763855, + "learning_rate": 4.175559258287577e-05, + "loss": 0.0046, + "step": 57510 + }, + { + "grad_norm": 0.03976341709494591, + "learning_rate": 4.173928465824008e-05, + "loss": 0.0045, + "step": 57520 + }, + { + "grad_norm": 0.05301005765795708, + "learning_rate": 4.17229776369838e-05, + "loss": 0.0043, + "step": 57530 + }, + { + "grad_norm": 0.04197834059596062, + "learning_rate": 4.170667152089025e-05, + "loss": 0.0046, + "step": 57540 + }, + { + "grad_norm": 0.047813814133405685, + "learning_rate": 4.1690366311742626e-05, + "loss": 0.0039, + "step": 57550 + }, + { + "grad_norm": 0.041106730699539185, + "learning_rate": 4.1674062011324055e-05, + "loss": 0.0047, + "step": 57560 + }, + { + "grad_norm": 0.054258059710264206, + "learning_rate": 4.165775862141752e-05, + "loss": 0.007, + "step": 57570 + }, + { + "grad_norm": 0.04970682039856911, + "learning_rate": 4.164145614380598e-05, + "loss": 0.0069, + "step": 57580 + }, + { + "grad_norm": 0.03542061150074005, + "learning_rate": 4.162515458027223e-05, + "loss": 0.0045, + "step": 57590 + }, + { + "grad_norm": 0.051592689007520676, + "learning_rate": 4.160885393259899e-05, + "loss": 0.0064, + "step": 57600 + }, + { + "grad_norm": 0.05365845561027527, + "learning_rate": 4.159255420256886e-05, + "loss": 0.0057, + "step": 57610 + }, + { + "grad_norm": 0.06731456518173218, + "learning_rate": 4.157625539196436e-05, + "loss": 0.006, + "step": 57620 + }, + { + "grad_norm": 0.04095650091767311, + "learning_rate": 4.155995750256791e-05, + "loss": 0.0042, + "step": 57630 + }, + { + "grad_norm": 0.04985041171312332, + "learning_rate": 4.15436605361618e-05, + "loss": 0.0046, + "step": 57640 + }, + { + "grad_norm": 0.04887353628873825, + "learning_rate": 4.152736449452827e-05, + "loss": 0.0051, + "step": 57650 + }, + { + "grad_norm": 0.043134476989507675, + "learning_rate": 4.151106937944943e-05, + "loss": 0.006, + "step": 57660 + }, + { + "grad_norm": 0.05047963932156563, + "learning_rate": 4.149477519270727e-05, + "loss": 0.0045, + "step": 57670 + }, + { + "grad_norm": 0.039122212678194046, + "learning_rate": 4.147848193608371e-05, + "loss": 0.0053, + "step": 57680 + }, + { + "grad_norm": 0.041314948350191116, + "learning_rate": 4.146218961136056e-05, + "loss": 0.005, + "step": 57690 + }, + { + "grad_norm": 0.03594615310430527, + "learning_rate": 4.14458982203195e-05, + "loss": 0.0059, + "step": 57700 + }, + { + "grad_norm": 0.02823358215391636, + "learning_rate": 4.1429607764742165e-05, + "loss": 0.0035, + "step": 57710 + }, + { + "grad_norm": 0.055686142295598984, + "learning_rate": 4.141331824641003e-05, + "loss": 0.0047, + "step": 57720 + }, + { + "grad_norm": 0.044421445578336716, + "learning_rate": 4.139702966710452e-05, + "loss": 0.0042, + "step": 57730 + }, + { + "grad_norm": 0.06351495534181595, + "learning_rate": 4.13807420286069e-05, + "loss": 0.0055, + "step": 57740 + }, + { + "grad_norm": 0.026185546070337296, + "learning_rate": 4.136445533269837e-05, + "loss": 0.0033, + "step": 57750 + }, + { + "grad_norm": 0.0482933335006237, + "learning_rate": 4.134816958116003e-05, + "loss": 0.0051, + "step": 57760 + }, + { + "grad_norm": 0.05873281881213188, + "learning_rate": 4.133188477577283e-05, + "loss": 0.005, + "step": 57770 + }, + { + "grad_norm": 0.040700361132621765, + "learning_rate": 4.131560091831771e-05, + "loss": 0.0043, + "step": 57780 + }, + { + "grad_norm": 0.052103444933891296, + "learning_rate": 4.1299318010575405e-05, + "loss": 0.0056, + "step": 57790 + }, + { + "grad_norm": 0.04962530359625816, + "learning_rate": 4.128303605432661e-05, + "loss": 0.0074, + "step": 57800 + }, + { + "grad_norm": 0.06261499971151352, + "learning_rate": 4.126675505135188e-05, + "loss": 0.0045, + "step": 57810 + }, + { + "grad_norm": 0.05111030489206314, + "learning_rate": 4.125047500343169e-05, + "loss": 0.0058, + "step": 57820 + }, + { + "grad_norm": 0.06187315657734871, + "learning_rate": 4.1234195912346376e-05, + "loss": 0.0041, + "step": 57830 + }, + { + "grad_norm": 0.03484303131699562, + "learning_rate": 4.1217917779876235e-05, + "loss": 0.0042, + "step": 57840 + }, + { + "grad_norm": 0.06906978040933609, + "learning_rate": 4.120164060780141e-05, + "loss": 0.0056, + "step": 57850 + }, + { + "grad_norm": 0.037724412977695465, + "learning_rate": 4.1185364397901935e-05, + "loss": 0.0063, + "step": 57860 + }, + { + "grad_norm": 0.04939967021346092, + "learning_rate": 4.116908915195776e-05, + "loss": 0.0042, + "step": 57870 + }, + { + "grad_norm": 0.034528568387031555, + "learning_rate": 4.115281487174872e-05, + "loss": 0.0048, + "step": 57880 + }, + { + "grad_norm": 0.03993901610374451, + "learning_rate": 4.113654155905452e-05, + "loss": 0.0037, + "step": 57890 + }, + { + "grad_norm": 0.045058950781822205, + "learning_rate": 4.112026921565483e-05, + "loss": 0.0052, + "step": 57900 + }, + { + "grad_norm": 0.03734549507498741, + "learning_rate": 4.1103997843329147e-05, + "loss": 0.0051, + "step": 57910 + }, + { + "grad_norm": 0.0493190661072731, + "learning_rate": 4.108772744385689e-05, + "loss": 0.0056, + "step": 57920 + }, + { + "grad_norm": 0.04190921038389206, + "learning_rate": 4.1071458019017346e-05, + "loss": 0.0051, + "step": 57930 + }, + { + "grad_norm": 0.05705275014042854, + "learning_rate": 4.1055189570589734e-05, + "loss": 0.0067, + "step": 57940 + }, + { + "grad_norm": 0.05132841691374779, + "learning_rate": 4.103892210035315e-05, + "loss": 0.0056, + "step": 57950 + }, + { + "grad_norm": 0.05347636714577675, + "learning_rate": 4.102265561008656e-05, + "loss": 0.0047, + "step": 57960 + }, + { + "grad_norm": 0.049733977764844894, + "learning_rate": 4.100639010156885e-05, + "loss": 0.0059, + "step": 57970 + }, + { + "grad_norm": 0.03730655834078789, + "learning_rate": 4.099012557657882e-05, + "loss": 0.0045, + "step": 57980 + }, + { + "grad_norm": 0.04093192517757416, + "learning_rate": 4.09738620368951e-05, + "loss": 0.0044, + "step": 57990 + }, + { + "grad_norm": 0.03621453791856766, + "learning_rate": 4.095759948429626e-05, + "loss": 0.0047, + "step": 58000 + }, + { + "grad_norm": 0.03860553354024887, + "learning_rate": 4.094133792056075e-05, + "loss": 0.0037, + "step": 58010 + }, + { + "grad_norm": 0.046041540801525116, + "learning_rate": 4.092507734746688e-05, + "loss": 0.0064, + "step": 58020 + }, + { + "grad_norm": 0.0414419025182724, + "learning_rate": 4.090881776679293e-05, + "loss": 0.0044, + "step": 58030 + }, + { + "grad_norm": 0.07107242196798325, + "learning_rate": 4.0892559180316994e-05, + "loss": 0.0048, + "step": 58040 + }, + { + "grad_norm": 0.043402113020420074, + "learning_rate": 4.08763015898171e-05, + "loss": 0.0049, + "step": 58050 + }, + { + "grad_norm": 0.0683019831776619, + "learning_rate": 4.0860044997071145e-05, + "loss": 0.0049, + "step": 58060 + }, + { + "grad_norm": 0.04942673072218895, + "learning_rate": 4.084378940385693e-05, + "loss": 0.0042, + "step": 58070 + }, + { + "grad_norm": 0.061228036880493164, + "learning_rate": 4.082753481195211e-05, + "loss": 0.0072, + "step": 58080 + }, + { + "grad_norm": 0.03165076673030853, + "learning_rate": 4.0811281223134324e-05, + "loss": 0.0043, + "step": 58090 + }, + { + "grad_norm": 0.03327783942222595, + "learning_rate": 4.0795028639181e-05, + "loss": 0.0039, + "step": 58100 + }, + { + "grad_norm": 0.0491664782166481, + "learning_rate": 4.077877706186951e-05, + "loss": 0.0058, + "step": 58110 + }, + { + "grad_norm": 0.06641201674938202, + "learning_rate": 4.076252649297709e-05, + "loss": 0.0057, + "step": 58120 + }, + { + "grad_norm": 0.07295121997594833, + "learning_rate": 4.07462769342809e-05, + "loss": 0.0068, + "step": 58130 + }, + { + "grad_norm": 0.038157038390636444, + "learning_rate": 4.073002838755793e-05, + "loss": 0.0048, + "step": 58140 + }, + { + "grad_norm": 0.06264602392911911, + "learning_rate": 4.0713780854585115e-05, + "loss": 0.0053, + "step": 58150 + }, + { + "grad_norm": 0.036989353597164154, + "learning_rate": 4.069753433713927e-05, + "loss": 0.0048, + "step": 58160 + }, + { + "grad_norm": 0.03242993727326393, + "learning_rate": 4.068128883699709e-05, + "loss": 0.0037, + "step": 58170 + }, + { + "grad_norm": 0.048118773847818375, + "learning_rate": 4.0665044355935155e-05, + "loss": 0.007, + "step": 58180 + }, + { + "grad_norm": 0.03361798822879791, + "learning_rate": 4.064880089572993e-05, + "loss": 0.0057, + "step": 58190 + }, + { + "grad_norm": 0.04230092093348503, + "learning_rate": 4.063255845815779e-05, + "loss": 0.0053, + "step": 58200 + }, + { + "grad_norm": 0.04676361382007599, + "learning_rate": 4.0616317044994945e-05, + "loss": 0.0045, + "step": 58210 + }, + { + "grad_norm": 0.04327785223722458, + "learning_rate": 4.0600076658017585e-05, + "loss": 0.0052, + "step": 58220 + }, + { + "grad_norm": 0.048585761338472366, + "learning_rate": 4.0583837299001706e-05, + "loss": 0.0067, + "step": 58230 + }, + { + "grad_norm": 0.06111232936382294, + "learning_rate": 4.056759896972322e-05, + "loss": 0.0058, + "step": 58240 + }, + { + "grad_norm": 0.06206374615430832, + "learning_rate": 4.055136167195793e-05, + "loss": 0.0047, + "step": 58250 + }, + { + "grad_norm": 0.03832334652543068, + "learning_rate": 4.053512540748152e-05, + "loss": 0.0046, + "step": 58260 + }, + { + "grad_norm": 0.03721402585506439, + "learning_rate": 4.051889017806955e-05, + "loss": 0.0037, + "step": 58270 + }, + { + "grad_norm": 0.03686711937189102, + "learning_rate": 4.0502655985497516e-05, + "loss": 0.0041, + "step": 58280 + }, + { + "grad_norm": 0.04809102788567543, + "learning_rate": 4.048642283154074e-05, + "loss": 0.005, + "step": 58290 + }, + { + "grad_norm": 0.043523095548152924, + "learning_rate": 4.0470190717974464e-05, + "loss": 0.0045, + "step": 58300 + }, + { + "grad_norm": 0.05076419189572334, + "learning_rate": 4.045395964657381e-05, + "loss": 0.0047, + "step": 58310 + }, + { + "grad_norm": 0.027019986882805824, + "learning_rate": 4.043772961911376e-05, + "loss": 0.0044, + "step": 58320 + }, + { + "grad_norm": 0.05580185726284981, + "learning_rate": 4.042150063736924e-05, + "loss": 0.0053, + "step": 58330 + }, + { + "grad_norm": 0.04156503826379776, + "learning_rate": 4.0405272703114976e-05, + "loss": 0.0047, + "step": 58340 + }, + { + "grad_norm": 0.038561929017305374, + "learning_rate": 4.038904581812569e-05, + "loss": 0.005, + "step": 58350 + }, + { + "grad_norm": 0.04282127320766449, + "learning_rate": 4.03728199841759e-05, + "loss": 0.0038, + "step": 58360 + }, + { + "grad_norm": 0.0375804677605629, + "learning_rate": 4.0356595203040044e-05, + "loss": 0.0056, + "step": 58370 + }, + { + "grad_norm": 0.04138028621673584, + "learning_rate": 4.0340371476492426e-05, + "loss": 0.005, + "step": 58380 + }, + { + "grad_norm": 0.04174933582544327, + "learning_rate": 4.032414880630727e-05, + "loss": 0.006, + "step": 58390 + }, + { + "grad_norm": 0.040562793612480164, + "learning_rate": 4.0307927194258646e-05, + "loss": 0.0051, + "step": 58400 + }, + { + "grad_norm": 0.03707405552268028, + "learning_rate": 4.029170664212054e-05, + "loss": 0.005, + "step": 58410 + }, + { + "grad_norm": 0.029625849798321724, + "learning_rate": 4.0275487151666794e-05, + "loss": 0.0043, + "step": 58420 + }, + { + "grad_norm": 0.0460079163312912, + "learning_rate": 4.025926872467116e-05, + "loss": 0.0061, + "step": 58430 + }, + { + "grad_norm": 0.04128903150558472, + "learning_rate": 4.024305136290725e-05, + "loss": 0.0055, + "step": 58440 + }, + { + "grad_norm": 0.05591029301285744, + "learning_rate": 4.022683506814858e-05, + "loss": 0.0061, + "step": 58450 + }, + { + "grad_norm": 0.0657714456319809, + "learning_rate": 4.021061984216853e-05, + "loss": 0.0064, + "step": 58460 + }, + { + "grad_norm": 0.0317985899746418, + "learning_rate": 4.019440568674035e-05, + "loss": 0.0046, + "step": 58470 + }, + { + "grad_norm": 0.05474330857396126, + "learning_rate": 4.017819260363724e-05, + "loss": 0.0049, + "step": 58480 + }, + { + "grad_norm": 0.05051977559924126, + "learning_rate": 4.016198059463223e-05, + "loss": 0.0077, + "step": 58490 + }, + { + "grad_norm": 0.04403461888432503, + "learning_rate": 4.0145769661498233e-05, + "loss": 0.0061, + "step": 58500 + }, + { + "grad_norm": 0.04669764265418053, + "learning_rate": 4.012955980600804e-05, + "loss": 0.0057, + "step": 58510 + }, + { + "grad_norm": 0.05662816762924194, + "learning_rate": 4.0113351029934355e-05, + "loss": 0.0051, + "step": 58520 + }, + { + "grad_norm": 0.0516149178147316, + "learning_rate": 4.0097143335049714e-05, + "loss": 0.0046, + "step": 58530 + }, + { + "grad_norm": 0.07265264540910721, + "learning_rate": 4.0080936723126606e-05, + "loss": 0.0065, + "step": 58540 + }, + { + "grad_norm": 0.03410162776708603, + "learning_rate": 4.006473119593735e-05, + "loss": 0.005, + "step": 58550 + }, + { + "grad_norm": 0.04147952049970627, + "learning_rate": 4.004852675525415e-05, + "loss": 0.0049, + "step": 58560 + }, + { + "grad_norm": 0.02894536405801773, + "learning_rate": 4.00323234028491e-05, + "loss": 0.0064, + "step": 58570 + }, + { + "grad_norm": 0.05048177391290665, + "learning_rate": 4.0016121140494186e-05, + "loss": 0.0061, + "step": 58580 + }, + { + "grad_norm": 0.06030404940247536, + "learning_rate": 3.999991996996122e-05, + "loss": 0.0054, + "step": 58590 + }, + { + "grad_norm": 0.034962497651576996, + "learning_rate": 3.9983719893022e-05, + "loss": 0.0042, + "step": 58600 + }, + { + "grad_norm": 0.05588829517364502, + "learning_rate": 3.996752091144812e-05, + "loss": 0.0051, + "step": 58610 + }, + { + "grad_norm": 0.04479563981294632, + "learning_rate": 3.9951323027011055e-05, + "loss": 0.006, + "step": 58620 + }, + { + "grad_norm": 0.03553848713636398, + "learning_rate": 3.993512624148219e-05, + "loss": 0.0045, + "step": 58630 + }, + { + "grad_norm": 0.046932581812143326, + "learning_rate": 3.99189305566328e-05, + "loss": 0.005, + "step": 58640 + }, + { + "grad_norm": 0.04030916839838028, + "learning_rate": 3.990273597423401e-05, + "loss": 0.0041, + "step": 58650 + }, + { + "grad_norm": 0.051982030272483826, + "learning_rate": 3.988654249605681e-05, + "loss": 0.0055, + "step": 58660 + }, + { + "grad_norm": 0.04963288828730583, + "learning_rate": 3.987035012387213e-05, + "loss": 0.0044, + "step": 58670 + }, + { + "grad_norm": 0.05290551111102104, + "learning_rate": 3.9854158859450735e-05, + "loss": 0.0051, + "step": 58680 + }, + { + "grad_norm": 0.049060702323913574, + "learning_rate": 3.983796870456328e-05, + "loss": 0.0053, + "step": 58690 + }, + { + "grad_norm": 0.047093749046325684, + "learning_rate": 3.982177966098028e-05, + "loss": 0.0047, + "step": 58700 + }, + { + "grad_norm": 0.05177822709083557, + "learning_rate": 3.980559173047216e-05, + "loss": 0.0055, + "step": 58710 + }, + { + "grad_norm": 0.034679822623729706, + "learning_rate": 3.9789404914809176e-05, + "loss": 0.0058, + "step": 58720 + }, + { + "grad_norm": 0.043636027723550797, + "learning_rate": 3.9773219215761544e-05, + "loss": 0.0045, + "step": 58730 + }, + { + "grad_norm": 0.034261323511600494, + "learning_rate": 3.975703463509928e-05, + "loss": 0.0039, + "step": 58740 + }, + { + "grad_norm": 0.05238258093595505, + "learning_rate": 3.974085117459231e-05, + "loss": 0.0058, + "step": 58750 + }, + { + "grad_norm": 0.04733629524707794, + "learning_rate": 3.972466883601043e-05, + "loss": 0.0055, + "step": 58760 + }, + { + "grad_norm": 0.04675266146659851, + "learning_rate": 3.9708487621123316e-05, + "loss": 0.005, + "step": 58770 + }, + { + "grad_norm": 0.04086301103234291, + "learning_rate": 3.969230753170051e-05, + "loss": 0.0045, + "step": 58780 + }, + { + "grad_norm": 0.05303901433944702, + "learning_rate": 3.967612856951146e-05, + "loss": 0.0061, + "step": 58790 + }, + { + "grad_norm": 0.04599476978182793, + "learning_rate": 3.9659950736325476e-05, + "loss": 0.0051, + "step": 58800 + }, + { + "grad_norm": 0.05278865993022919, + "learning_rate": 3.964377403391174e-05, + "loss": 0.006, + "step": 58810 + }, + { + "grad_norm": 0.048592422157526016, + "learning_rate": 3.9627598464039303e-05, + "loss": 0.0058, + "step": 58820 + }, + { + "grad_norm": 0.04363634064793587, + "learning_rate": 3.9611424028477096e-05, + "loss": 0.0043, + "step": 58830 + }, + { + "grad_norm": 0.03942609578371048, + "learning_rate": 3.959525072899394e-05, + "loss": 0.0039, + "step": 58840 + }, + { + "grad_norm": 0.04114594683051109, + "learning_rate": 3.9579078567358506e-05, + "loss": 0.0045, + "step": 58850 + }, + { + "grad_norm": 0.04243140295147896, + "learning_rate": 3.956290754533939e-05, + "loss": 0.0061, + "step": 58860 + }, + { + "grad_norm": 0.04790183901786804, + "learning_rate": 3.954673766470501e-05, + "loss": 0.0045, + "step": 58870 + }, + { + "grad_norm": 0.0688650831580162, + "learning_rate": 3.9530568927223674e-05, + "loss": 0.0064, + "step": 58880 + }, + { + "grad_norm": 0.04236884415149689, + "learning_rate": 3.95144013346636e-05, + "loss": 0.0058, + "step": 58890 + }, + { + "grad_norm": 0.05514984577894211, + "learning_rate": 3.949823488879282e-05, + "loss": 0.0061, + "step": 58900 + }, + { + "grad_norm": 0.06062682718038559, + "learning_rate": 3.9482069591379276e-05, + "loss": 0.006, + "step": 58910 + }, + { + "grad_norm": 0.044849954545497894, + "learning_rate": 3.9465905444190784e-05, + "loss": 0.0053, + "step": 58920 + }, + { + "grad_norm": 0.037277814000844955, + "learning_rate": 3.944974244899505e-05, + "loss": 0.0057, + "step": 58930 + }, + { + "grad_norm": 0.07353886216878891, + "learning_rate": 3.9433580607559616e-05, + "loss": 0.007, + "step": 58940 + }, + { + "grad_norm": 0.04723146930336952, + "learning_rate": 3.941741992165193e-05, + "loss": 0.004, + "step": 58950 + }, + { + "grad_norm": 0.05206465721130371, + "learning_rate": 3.940126039303928e-05, + "loss": 0.0048, + "step": 58960 + }, + { + "grad_norm": 0.037541087716817856, + "learning_rate": 3.938510202348886e-05, + "loss": 0.0045, + "step": 58970 + }, + { + "grad_norm": 0.03792622685432434, + "learning_rate": 3.93689448147677e-05, + "loss": 0.0044, + "step": 58980 + }, + { + "grad_norm": 0.03510300815105438, + "learning_rate": 3.935278876864278e-05, + "loss": 0.0059, + "step": 58990 + }, + { + "grad_norm": 0.029569489881396294, + "learning_rate": 3.933663388688087e-05, + "loss": 0.0052, + "step": 59000 + }, + { + "grad_norm": 0.02879188023507595, + "learning_rate": 3.9320480171248645e-05, + "loss": 0.0029, + "step": 59010 + }, + { + "grad_norm": 0.03914906829595566, + "learning_rate": 3.930432762351265e-05, + "loss": 0.005, + "step": 59020 + }, + { + "grad_norm": 0.06900253891944885, + "learning_rate": 3.928817624543931e-05, + "loss": 0.0066, + "step": 59030 + }, + { + "grad_norm": 0.042984675616025925, + "learning_rate": 3.9272026038794885e-05, + "loss": 0.0046, + "step": 59040 + }, + { + "grad_norm": 0.028548037633299828, + "learning_rate": 3.9255877005345586e-05, + "loss": 0.0036, + "step": 59050 + }, + { + "grad_norm": 0.04252579063177109, + "learning_rate": 3.923972914685743e-05, + "loss": 0.0052, + "step": 59060 + }, + { + "grad_norm": 0.023040352389216423, + "learning_rate": 3.922358246509631e-05, + "loss": 0.0045, + "step": 59070 + }, + { + "grad_norm": 0.04034537076950073, + "learning_rate": 3.920743696182801e-05, + "loss": 0.0039, + "step": 59080 + }, + { + "grad_norm": 0.02808697149157524, + "learning_rate": 3.9191292638818164e-05, + "loss": 0.0037, + "step": 59090 + }, + { + "grad_norm": 0.039078183472156525, + "learning_rate": 3.9175149497832316e-05, + "loss": 0.0038, + "step": 59100 + }, + { + "grad_norm": 0.03971266746520996, + "learning_rate": 3.915900754063584e-05, + "loss": 0.0051, + "step": 59110 + }, + { + "grad_norm": 0.055707961320877075, + "learning_rate": 3.9142866768994014e-05, + "loss": 0.0063, + "step": 59120 + }, + { + "grad_norm": 0.04434347525238991, + "learning_rate": 3.912672718467193e-05, + "loss": 0.0037, + "step": 59130 + }, + { + "grad_norm": 0.04346618801355362, + "learning_rate": 3.911058878943463e-05, + "loss": 0.0045, + "step": 59140 + }, + { + "grad_norm": 0.041898760944604874, + "learning_rate": 3.9094451585046974e-05, + "loss": 0.0036, + "step": 59150 + }, + { + "grad_norm": 0.026951085776090622, + "learning_rate": 3.90783155732737e-05, + "loss": 0.0044, + "step": 59160 + }, + { + "grad_norm": 0.04477783665060997, + "learning_rate": 3.9062180755879373e-05, + "loss": 0.0052, + "step": 59170 + }, + { + "grad_norm": 0.042390357702970505, + "learning_rate": 3.904604713462855e-05, + "loss": 0.0054, + "step": 59180 + }, + { + "grad_norm": 0.047496408224105835, + "learning_rate": 3.902991471128554e-05, + "loss": 0.0064, + "step": 59190 + }, + { + "grad_norm": 0.03972230106592178, + "learning_rate": 3.901378348761456e-05, + "loss": 0.0051, + "step": 59200 + }, + { + "grad_norm": 0.043422143906354904, + "learning_rate": 3.899765346537971e-05, + "loss": 0.0047, + "step": 59210 + }, + { + "grad_norm": 0.03100794553756714, + "learning_rate": 3.898152464634493e-05, + "loss": 0.0057, + "step": 59220 + }, + { + "grad_norm": 0.04482386261224747, + "learning_rate": 3.896539703227402e-05, + "loss": 0.0041, + "step": 59230 + }, + { + "grad_norm": 0.055074840784072876, + "learning_rate": 3.894927062493073e-05, + "loss": 0.0075, + "step": 59240 + }, + { + "grad_norm": 0.07365264743566513, + "learning_rate": 3.893314542607859e-05, + "loss": 0.0054, + "step": 59250 + }, + { + "grad_norm": 0.03865774720907211, + "learning_rate": 3.891702143748103e-05, + "loss": 0.0055, + "step": 59260 + }, + { + "grad_norm": 0.03972572460770607, + "learning_rate": 3.890089866090133e-05, + "loss": 0.0038, + "step": 59270 + }, + { + "grad_norm": 0.06303388625383377, + "learning_rate": 3.888477709810267e-05, + "loss": 0.0053, + "step": 59280 + }, + { + "grad_norm": 0.05302983149886131, + "learning_rate": 3.886865675084806e-05, + "loss": 0.0055, + "step": 59290 + }, + { + "grad_norm": 0.06152153015136719, + "learning_rate": 3.885253762090043e-05, + "loss": 0.0053, + "step": 59300 + }, + { + "grad_norm": 0.05090470612049103, + "learning_rate": 3.8836419710022534e-05, + "loss": 0.0042, + "step": 59310 + }, + { + "grad_norm": 0.045648977160453796, + "learning_rate": 3.882030301997698e-05, + "loss": 0.0065, + "step": 59320 + }, + { + "grad_norm": 0.044015638530254364, + "learning_rate": 3.880418755252629e-05, + "loss": 0.0058, + "step": 59330 + }, + { + "grad_norm": 0.04040941223502159, + "learning_rate": 3.87880733094328e-05, + "loss": 0.0045, + "step": 59340 + }, + { + "grad_norm": 0.03974924609065056, + "learning_rate": 3.877196029245877e-05, + "loss": 0.0047, + "step": 59350 + }, + { + "grad_norm": 0.05028626322746277, + "learning_rate": 3.875584850336627e-05, + "loss": 0.0043, + "step": 59360 + }, + { + "grad_norm": 0.042713478207588196, + "learning_rate": 3.873973794391728e-05, + "loss": 0.0047, + "step": 59370 + }, + { + "grad_norm": 0.057080239057540894, + "learning_rate": 3.872362861587361e-05, + "loss": 0.0046, + "step": 59380 + }, + { + "grad_norm": 0.04963266849517822, + "learning_rate": 3.870752052099698e-05, + "loss": 0.006, + "step": 59390 + }, + { + "grad_norm": 0.031433816999197006, + "learning_rate": 3.8691413661048916e-05, + "loss": 0.004, + "step": 59400 + }, + { + "grad_norm": 0.02429521270096302, + "learning_rate": 3.867530803779085e-05, + "loss": 0.0035, + "step": 59410 + }, + { + "grad_norm": 0.056537140160799026, + "learning_rate": 3.8659203652984054e-05, + "loss": 0.0054, + "step": 59420 + }, + { + "grad_norm": 0.047358665615320206, + "learning_rate": 3.864310050838972e-05, + "loss": 0.0046, + "step": 59430 + }, + { + "grad_norm": 0.031828880310058594, + "learning_rate": 3.8626998605768835e-05, + "loss": 0.0045, + "step": 59440 + }, + { + "grad_norm": 0.0517512708902359, + "learning_rate": 3.8610897946882294e-05, + "loss": 0.0042, + "step": 59450 + }, + { + "grad_norm": 0.04037023335695267, + "learning_rate": 3.859479853349082e-05, + "loss": 0.0045, + "step": 59460 + }, + { + "grad_norm": 0.035730279982089996, + "learning_rate": 3.857870036735503e-05, + "loss": 0.0046, + "step": 59470 + }, + { + "grad_norm": 0.06811237335205078, + "learning_rate": 3.856260345023538e-05, + "loss": 0.0058, + "step": 59480 + }, + { + "grad_norm": 0.06625557690858841, + "learning_rate": 3.854650778389224e-05, + "loss": 0.0043, + "step": 59490 + }, + { + "grad_norm": 0.030357258394360542, + "learning_rate": 3.8530413370085786e-05, + "loss": 0.004, + "step": 59500 + }, + { + "grad_norm": 0.05432690680027008, + "learning_rate": 3.851432021057608e-05, + "loss": 0.0069, + "step": 59510 + }, + { + "grad_norm": 0.04086485132575035, + "learning_rate": 3.849822830712306e-05, + "loss": 0.0034, + "step": 59520 + }, + { + "grad_norm": 0.04191877692937851, + "learning_rate": 3.848213766148649e-05, + "loss": 0.007, + "step": 59530 + }, + { + "grad_norm": 0.03427337855100632, + "learning_rate": 3.846604827542602e-05, + "loss": 0.0038, + "step": 59540 + }, + { + "grad_norm": 0.03862573206424713, + "learning_rate": 3.844996015070117e-05, + "loss": 0.0044, + "step": 59550 + }, + { + "grad_norm": 0.054625820368528366, + "learning_rate": 3.843387328907132e-05, + "loss": 0.0046, + "step": 59560 + }, + { + "grad_norm": 0.04243886098265648, + "learning_rate": 3.8417787692295696e-05, + "loss": 0.0038, + "step": 59570 + }, + { + "grad_norm": 0.0396660715341568, + "learning_rate": 3.84017033621334e-05, + "loss": 0.0042, + "step": 59580 + }, + { + "grad_norm": 0.02976328693330288, + "learning_rate": 3.838562030034338e-05, + "loss": 0.0046, + "step": 59590 + }, + { + "grad_norm": 0.03130350634455681, + "learning_rate": 3.836953850868447e-05, + "loss": 0.0045, + "step": 59600 + }, + { + "grad_norm": 0.04443301260471344, + "learning_rate": 3.835345798891533e-05, + "loss": 0.0065, + "step": 59610 + }, + { + "grad_norm": 0.03452388942241669, + "learning_rate": 3.833737874279452e-05, + "loss": 0.0046, + "step": 59620 + }, + { + "grad_norm": 0.038604985922575, + "learning_rate": 3.8321300772080433e-05, + "loss": 0.0051, + "step": 59630 + }, + { + "grad_norm": 0.05703853815793991, + "learning_rate": 3.8305224078531345e-05, + "loss": 0.0049, + "step": 59640 + }, + { + "grad_norm": 0.05203726887702942, + "learning_rate": 3.828914866390537e-05, + "loss": 0.0051, + "step": 59650 + }, + { + "grad_norm": 0.04522424936294556, + "learning_rate": 3.827307452996048e-05, + "loss": 0.0042, + "step": 59660 + }, + { + "grad_norm": 0.02958996780216694, + "learning_rate": 3.825700167845454e-05, + "loss": 0.0049, + "step": 59670 + }, + { + "grad_norm": 0.04380454123020172, + "learning_rate": 3.8240930111145214e-05, + "loss": 0.0056, + "step": 59680 + }, + { + "grad_norm": 0.057676371186971664, + "learning_rate": 3.8224859829790116e-05, + "loss": 0.007, + "step": 59690 + }, + { + "grad_norm": 0.0324331633746624, + "learning_rate": 3.820879083614664e-05, + "loss": 0.0035, + "step": 59700 + }, + { + "grad_norm": 0.04113291576504707, + "learning_rate": 3.819272313197208e-05, + "loss": 0.0043, + "step": 59710 + }, + { + "grad_norm": 0.04846169427037239, + "learning_rate": 3.817665671902355e-05, + "loss": 0.0065, + "step": 59720 + }, + { + "grad_norm": 0.060511648654937744, + "learning_rate": 3.816059159905808e-05, + "loss": 0.005, + "step": 59730 + }, + { + "grad_norm": 0.06861376017332077, + "learning_rate": 3.8144527773832476e-05, + "loss": 0.0051, + "step": 59740 + }, + { + "grad_norm": 0.06269536912441254, + "learning_rate": 3.812846524510352e-05, + "loss": 0.0064, + "step": 59750 + }, + { + "grad_norm": 0.06545861065387726, + "learning_rate": 3.811240401462775e-05, + "loss": 0.0055, + "step": 59760 + }, + { + "grad_norm": 0.05853552743792534, + "learning_rate": 3.809634408416162e-05, + "loss": 0.0038, + "step": 59770 + }, + { + "grad_norm": 0.03354111686348915, + "learning_rate": 3.8080285455461394e-05, + "loss": 0.0061, + "step": 59780 + }, + { + "grad_norm": 0.04491143673658371, + "learning_rate": 3.806422813028323e-05, + "loss": 0.0061, + "step": 59790 + }, + { + "grad_norm": 0.04513964802026749, + "learning_rate": 3.8048172110383114e-05, + "loss": 0.0044, + "step": 59800 + }, + { + "grad_norm": 0.044651105999946594, + "learning_rate": 3.8032117397516944e-05, + "loss": 0.0038, + "step": 59810 + }, + { + "grad_norm": 0.05872320383787155, + "learning_rate": 3.801606399344042e-05, + "loss": 0.0062, + "step": 59820 + }, + { + "grad_norm": 0.03813955932855606, + "learning_rate": 3.800001189990913e-05, + "loss": 0.0038, + "step": 59830 + }, + { + "grad_norm": 0.040709659457206726, + "learning_rate": 3.798396111867847e-05, + "loss": 0.0048, + "step": 59840 + }, + { + "grad_norm": 0.03740047290921211, + "learning_rate": 3.796791165150378e-05, + "loss": 0.0033, + "step": 59850 + }, + { + "grad_norm": 0.03669922426342964, + "learning_rate": 3.795186350014018e-05, + "loss": 0.005, + "step": 59860 + }, + { + "grad_norm": 0.062433090060949326, + "learning_rate": 3.793581666634266e-05, + "loss": 0.0047, + "step": 59870 + }, + { + "grad_norm": 0.054951246827840805, + "learning_rate": 3.791977115186609e-05, + "loss": 0.0043, + "step": 59880 + }, + { + "grad_norm": 0.04476064816117287, + "learning_rate": 3.79037269584652e-05, + "loss": 0.0042, + "step": 59890 + }, + { + "grad_norm": 0.0394880585372448, + "learning_rate": 3.7887684087894545e-05, + "loss": 0.0031, + "step": 59900 + }, + { + "grad_norm": 0.055569618940353394, + "learning_rate": 3.787164254190854e-05, + "loss": 0.0046, + "step": 59910 + }, + { + "grad_norm": 0.034013304859399796, + "learning_rate": 3.7855602322261473e-05, + "loss": 0.0056, + "step": 59920 + }, + { + "grad_norm": 0.039013274013996124, + "learning_rate": 3.783956343070746e-05, + "loss": 0.0049, + "step": 59930 + }, + { + "grad_norm": 0.055693935602903366, + "learning_rate": 3.7823525869000526e-05, + "loss": 0.0049, + "step": 59940 + }, + { + "grad_norm": 0.05871972441673279, + "learning_rate": 3.78074896388945e-05, + "loss": 0.0055, + "step": 59950 + }, + { + "grad_norm": 0.0670410767197609, + "learning_rate": 3.7791454742143075e-05, + "loss": 0.004, + "step": 59960 + }, + { + "grad_norm": 0.05811981484293938, + "learning_rate": 3.7775421180499795e-05, + "loss": 0.0047, + "step": 59970 + }, + { + "grad_norm": 0.040486134588718414, + "learning_rate": 3.7759388955718086e-05, + "loss": 0.004, + "step": 59980 + }, + { + "grad_norm": 0.031569529324769974, + "learning_rate": 3.774335806955117e-05, + "loss": 0.0052, + "step": 59990 + }, + { + "grad_norm": 0.03956465795636177, + "learning_rate": 3.7727328523752215e-05, + "loss": 0.0046, + "step": 60000 + }, + { + "grad_norm": 0.040213216096162796, + "learning_rate": 3.771130032007415e-05, + "loss": 0.0045, + "step": 60010 + }, + { + "grad_norm": 0.038041774183511734, + "learning_rate": 3.769527346026982e-05, + "loss": 0.0043, + "step": 60020 + }, + { + "grad_norm": 0.03733532875776291, + "learning_rate": 3.767924794609188e-05, + "loss": 0.0045, + "step": 60030 + }, + { + "grad_norm": 0.029196754097938538, + "learning_rate": 3.7663223779292854e-05, + "loss": 0.0043, + "step": 60040 + }, + { + "grad_norm": 0.0285493154078722, + "learning_rate": 3.764720096162512e-05, + "loss": 0.0043, + "step": 60050 + }, + { + "grad_norm": 0.035489026457071304, + "learning_rate": 3.763117949484092e-05, + "loss": 0.0036, + "step": 60060 + }, + { + "grad_norm": 0.03456069529056549, + "learning_rate": 3.7615159380692344e-05, + "loss": 0.0058, + "step": 60070 + }, + { + "grad_norm": 0.04859314486384392, + "learning_rate": 3.759914062093132e-05, + "loss": 0.0057, + "step": 60080 + }, + { + "grad_norm": 0.0402318499982357, + "learning_rate": 3.7583123217309615e-05, + "loss": 0.0064, + "step": 60090 + }, + { + "grad_norm": 0.0470009483397007, + "learning_rate": 3.7567107171578904e-05, + "loss": 0.0037, + "step": 60100 + }, + { + "grad_norm": 0.04639000445604324, + "learning_rate": 3.755109248549066e-05, + "loss": 0.0038, + "step": 60110 + }, + { + "grad_norm": 0.0367952436208725, + "learning_rate": 3.75350791607962e-05, + "loss": 0.0053, + "step": 60120 + }, + { + "grad_norm": 0.03525960072875023, + "learning_rate": 3.751906719924676e-05, + "loss": 0.0056, + "step": 60130 + }, + { + "grad_norm": 0.028345193713903427, + "learning_rate": 3.750305660259337e-05, + "loss": 0.0045, + "step": 60140 + }, + { + "grad_norm": 0.034559570252895355, + "learning_rate": 3.748704737258693e-05, + "loss": 0.0035, + "step": 60150 + }, + { + "grad_norm": 0.05867484211921692, + "learning_rate": 3.747103951097816e-05, + "loss": 0.0043, + "step": 60160 + }, + { + "grad_norm": 0.028278591111302376, + "learning_rate": 3.745503301951768e-05, + "loss": 0.0031, + "step": 60170 + }, + { + "grad_norm": 0.032544247806072235, + "learning_rate": 3.743902789995592e-05, + "loss": 0.0047, + "step": 60180 + }, + { + "grad_norm": 0.028660636395215988, + "learning_rate": 3.7423024154043183e-05, + "loss": 0.005, + "step": 60190 + }, + { + "grad_norm": 0.05228028818964958, + "learning_rate": 3.740702178352963e-05, + "loss": 0.0049, + "step": 60200 + }, + { + "grad_norm": 0.039497122168540955, + "learning_rate": 3.739102079016523e-05, + "loss": 0.0041, + "step": 60210 + }, + { + "grad_norm": 0.0331181101500988, + "learning_rate": 3.7375021175699846e-05, + "loss": 0.0036, + "step": 60220 + }, + { + "grad_norm": 0.03802760690450668, + "learning_rate": 3.735902294188317e-05, + "loss": 0.0054, + "step": 60230 + }, + { + "grad_norm": 0.04259781166911125, + "learning_rate": 3.734302609046473e-05, + "loss": 0.0042, + "step": 60240 + }, + { + "grad_norm": 0.07072918862104416, + "learning_rate": 3.732703062319391e-05, + "loss": 0.0061, + "step": 60250 + }, + { + "grad_norm": 0.039997391402721405, + "learning_rate": 3.731103654181999e-05, + "loss": 0.0034, + "step": 60260 + }, + { + "grad_norm": 0.04137143865227699, + "learning_rate": 3.729504384809203e-05, + "loss": 0.0044, + "step": 60270 + }, + { + "grad_norm": 0.05288896709680557, + "learning_rate": 3.727905254375898e-05, + "loss": 0.0048, + "step": 60280 + }, + { + "grad_norm": 0.025226866826415062, + "learning_rate": 3.7263062630569614e-05, + "loss": 0.0031, + "step": 60290 + }, + { + "grad_norm": 0.04008148983120918, + "learning_rate": 3.724707411027256e-05, + "loss": 0.0052, + "step": 60300 + }, + { + "grad_norm": 0.0648597851395607, + "learning_rate": 3.723108698461631e-05, + "loss": 0.0041, + "step": 60310 + }, + { + "grad_norm": 0.05890063941478729, + "learning_rate": 3.72151012553492e-05, + "loss": 0.0044, + "step": 60320 + }, + { + "grad_norm": 0.044960081577301025, + "learning_rate": 3.719911692421939e-05, + "loss": 0.0043, + "step": 60330 + }, + { + "grad_norm": 0.03785533457994461, + "learning_rate": 3.71831339929749e-05, + "loss": 0.0054, + "step": 60340 + }, + { + "grad_norm": 0.04550829529762268, + "learning_rate": 3.716715246336362e-05, + "loss": 0.0044, + "step": 60350 + }, + { + "grad_norm": 0.041165489703416824, + "learning_rate": 3.715117233713324e-05, + "loss": 0.0054, + "step": 60360 + }, + { + "grad_norm": 0.03970750421285629, + "learning_rate": 3.713519361603135e-05, + "loss": 0.0039, + "step": 60370 + }, + { + "grad_norm": 0.04334504157304764, + "learning_rate": 3.711921630180532e-05, + "loss": 0.0043, + "step": 60380 + }, + { + "grad_norm": 0.0708802193403244, + "learning_rate": 3.710324039620245e-05, + "loss": 0.0051, + "step": 60390 + }, + { + "grad_norm": 0.04477208852767944, + "learning_rate": 3.708726590096982e-05, + "loss": 0.0056, + "step": 60400 + }, + { + "grad_norm": 0.03965991735458374, + "learning_rate": 3.707129281785437e-05, + "loss": 0.0042, + "step": 60410 + }, + { + "grad_norm": 0.032109715044498444, + "learning_rate": 3.705532114860291e-05, + "loss": 0.0059, + "step": 60420 + }, + { + "grad_norm": 0.04442542791366577, + "learning_rate": 3.703935089496207e-05, + "loss": 0.005, + "step": 60430 + }, + { + "grad_norm": 0.04040280729532242, + "learning_rate": 3.702338205867829e-05, + "loss": 0.0038, + "step": 60440 + }, + { + "grad_norm": 0.04722007364034653, + "learning_rate": 3.7007414641497976e-05, + "loss": 0.0044, + "step": 60450 + }, + { + "grad_norm": 0.04431620612740517, + "learning_rate": 3.699144864516726e-05, + "loss": 0.0046, + "step": 60460 + }, + { + "grad_norm": 0.055193185806274414, + "learning_rate": 3.6975484071432153e-05, + "loss": 0.0041, + "step": 60470 + }, + { + "grad_norm": 0.06489575654268265, + "learning_rate": 3.695952092203854e-05, + "loss": 0.0047, + "step": 60480 + }, + { + "grad_norm": 0.0245117899030447, + "learning_rate": 3.69435591987321e-05, + "loss": 0.0047, + "step": 60490 + }, + { + "grad_norm": 0.03371835872530937, + "learning_rate": 3.6927598903258374e-05, + "loss": 0.0056, + "step": 60500 + }, + { + "grad_norm": 0.04460887238383293, + "learning_rate": 3.691164003736281e-05, + "loss": 0.0051, + "step": 60510 + }, + { + "grad_norm": 0.04063641279935837, + "learning_rate": 3.689568260279061e-05, + "loss": 0.0046, + "step": 60520 + }, + { + "grad_norm": 0.05319428816437721, + "learning_rate": 3.687972660128686e-05, + "loss": 0.0059, + "step": 60530 + }, + { + "grad_norm": 0.07257069647312164, + "learning_rate": 3.686377203459648e-05, + "loss": 0.0057, + "step": 60540 + }, + { + "grad_norm": 0.052177757024765015, + "learning_rate": 3.6847818904464226e-05, + "loss": 0.0055, + "step": 60550 + }, + { + "grad_norm": 0.04063575714826584, + "learning_rate": 3.683186721263474e-05, + "loss": 0.0039, + "step": 60560 + }, + { + "grad_norm": 0.04021511226892471, + "learning_rate": 3.6815916960852435e-05, + "loss": 0.0056, + "step": 60570 + }, + { + "grad_norm": 0.039398908615112305, + "learning_rate": 3.679996815086165e-05, + "loss": 0.0045, + "step": 60580 + }, + { + "grad_norm": 0.044173069298267365, + "learning_rate": 3.678402078440649e-05, + "loss": 0.0041, + "step": 60590 + }, + { + "grad_norm": 0.03881504386663437, + "learning_rate": 3.676807486323096e-05, + "loss": 0.0048, + "step": 60600 + }, + { + "grad_norm": 0.06334041059017181, + "learning_rate": 3.6752130389078864e-05, + "loss": 0.0057, + "step": 60610 + }, + { + "grad_norm": 0.05109920725226402, + "learning_rate": 3.673618736369388e-05, + "loss": 0.0043, + "step": 60620 + }, + { + "grad_norm": 0.05530121177434921, + "learning_rate": 3.6720245788819474e-05, + "loss": 0.0049, + "step": 60630 + }, + { + "grad_norm": 0.05480258911848068, + "learning_rate": 3.6704305666199044e-05, + "loss": 0.0044, + "step": 60640 + }, + { + "grad_norm": 0.05664514750242233, + "learning_rate": 3.668836699757576e-05, + "loss": 0.005, + "step": 60650 + }, + { + "grad_norm": 0.049552615731954575, + "learning_rate": 3.667242978469265e-05, + "loss": 0.0051, + "step": 60660 + }, + { + "grad_norm": 0.043848179280757904, + "learning_rate": 3.6656494029292575e-05, + "loss": 0.0053, + "step": 60670 + }, + { + "grad_norm": 0.03190197795629501, + "learning_rate": 3.664055973311825e-05, + "loss": 0.0056, + "step": 60680 + }, + { + "grad_norm": 0.05328253284096718, + "learning_rate": 3.662462689791221e-05, + "loss": 0.0038, + "step": 60690 + }, + { + "grad_norm": 0.05062934383749962, + "learning_rate": 3.660869552541689e-05, + "loss": 0.0039, + "step": 60700 + }, + { + "grad_norm": 0.035633206367492676, + "learning_rate": 3.659276561737448e-05, + "loss": 0.0032, + "step": 60710 + }, + { + "grad_norm": 0.04054466634988785, + "learning_rate": 3.657683717552708e-05, + "loss": 0.0045, + "step": 60720 + }, + { + "grad_norm": 0.03887081891298294, + "learning_rate": 3.6560910201616574e-05, + "loss": 0.0042, + "step": 60730 + }, + { + "grad_norm": 0.0298873633146286, + "learning_rate": 3.654498469738472e-05, + "loss": 0.0051, + "step": 60740 + }, + { + "grad_norm": 0.034950945526361465, + "learning_rate": 3.6529060664573126e-05, + "loss": 0.0043, + "step": 60750 + }, + { + "grad_norm": 0.040629565715789795, + "learning_rate": 3.6513138104923176e-05, + "loss": 0.0058, + "step": 60760 + }, + { + "grad_norm": 0.04137476533651352, + "learning_rate": 3.6497217020176176e-05, + "loss": 0.0044, + "step": 60770 + }, + { + "grad_norm": 0.05753645673394203, + "learning_rate": 3.648129741207323e-05, + "loss": 0.0043, + "step": 60780 + }, + { + "grad_norm": 0.03155408799648285, + "learning_rate": 3.646537928235527e-05, + "loss": 0.0065, + "step": 60790 + }, + { + "grad_norm": 0.03767716512084007, + "learning_rate": 3.6449462632763075e-05, + "loss": 0.0071, + "step": 60800 + }, + { + "grad_norm": 0.04583902657032013, + "learning_rate": 3.643354746503729e-05, + "loss": 0.004, + "step": 60810 + }, + { + "grad_norm": 0.05109759047627449, + "learning_rate": 3.6417633780918345e-05, + "loss": 0.0054, + "step": 60820 + }, + { + "grad_norm": 0.06561187654733658, + "learning_rate": 3.640172158214655e-05, + "loss": 0.0054, + "step": 60830 + }, + { + "grad_norm": 0.06253642588853836, + "learning_rate": 3.6385810870462035e-05, + "loss": 0.0057, + "step": 60840 + }, + { + "grad_norm": 0.03830816596746445, + "learning_rate": 3.636990164760479e-05, + "loss": 0.0051, + "step": 60850 + }, + { + "grad_norm": 0.03687747195363045, + "learning_rate": 3.6353993915314596e-05, + "loss": 0.0044, + "step": 60860 + }, + { + "grad_norm": 0.04347711428999901, + "learning_rate": 3.6338087675331124e-05, + "loss": 0.0074, + "step": 60870 + }, + { + "grad_norm": 0.05748448520898819, + "learning_rate": 3.632218292939383e-05, + "loss": 0.0051, + "step": 60880 + }, + { + "grad_norm": 0.04139869287610054, + "learning_rate": 3.6306279679242033e-05, + "loss": 0.0049, + "step": 60890 + }, + { + "grad_norm": 0.057649459689855576, + "learning_rate": 3.629037792661492e-05, + "loss": 0.0062, + "step": 60900 + }, + { + "grad_norm": 0.03953621909022331, + "learning_rate": 3.627447767325147e-05, + "loss": 0.0058, + "step": 60910 + }, + { + "grad_norm": 0.05246294289827347, + "learning_rate": 3.62585789208905e-05, + "loss": 0.0051, + "step": 60920 + }, + { + "grad_norm": 0.03835666552186012, + "learning_rate": 3.624268167127068e-05, + "loss": 0.0053, + "step": 60930 + }, + { + "grad_norm": 0.04355904459953308, + "learning_rate": 3.622678592613051e-05, + "loss": 0.0046, + "step": 60940 + }, + { + "grad_norm": 0.04848463460803032, + "learning_rate": 3.6210891687208304e-05, + "loss": 0.0048, + "step": 60950 + }, + { + "grad_norm": 0.05264909565448761, + "learning_rate": 3.619499895624228e-05, + "loss": 0.0048, + "step": 60960 + }, + { + "grad_norm": 0.03572649881243706, + "learning_rate": 3.61791077349704e-05, + "loss": 0.0039, + "step": 60970 + }, + { + "grad_norm": 0.04183599352836609, + "learning_rate": 3.616321802513053e-05, + "loss": 0.0047, + "step": 60980 + }, + { + "grad_norm": 0.056588463485240936, + "learning_rate": 3.6147329828460325e-05, + "loss": 0.0053, + "step": 60990 + }, + { + "grad_norm": 0.045318253338336945, + "learning_rate": 3.6131443146697306e-05, + "loss": 0.0057, + "step": 61000 + }, + { + "grad_norm": 0.04182187840342522, + "learning_rate": 3.611555798157879e-05, + "loss": 0.0038, + "step": 61010 + }, + { + "grad_norm": 0.04943569004535675, + "learning_rate": 3.6099674334842e-05, + "loss": 0.0053, + "step": 61020 + }, + { + "grad_norm": 0.06290163099765778, + "learning_rate": 3.6083792208223934e-05, + "loss": 0.0061, + "step": 61030 + }, + { + "grad_norm": 0.04107559844851494, + "learning_rate": 3.606791160346142e-05, + "loss": 0.0032, + "step": 61040 + }, + { + "grad_norm": 0.04797002673149109, + "learning_rate": 3.6052032522291134e-05, + "loss": 0.0036, + "step": 61050 + }, + { + "grad_norm": 0.042699895799160004, + "learning_rate": 3.603615496644962e-05, + "loss": 0.0045, + "step": 61060 + }, + { + "grad_norm": 0.02969600073993206, + "learning_rate": 3.60202789376732e-05, + "loss": 0.0043, + "step": 61070 + }, + { + "grad_norm": 0.04282989725470543, + "learning_rate": 3.6004404437698036e-05, + "loss": 0.0047, + "step": 61080 + }, + { + "grad_norm": 0.0457172617316246, + "learning_rate": 3.5988531468260184e-05, + "loss": 0.004, + "step": 61090 + }, + { + "grad_norm": 0.03722141683101654, + "learning_rate": 3.597266003109546e-05, + "loss": 0.0035, + "step": 61100 + }, + { + "grad_norm": 0.031431496143341064, + "learning_rate": 3.5956790127939557e-05, + "loss": 0.0042, + "step": 61110 + }, + { + "grad_norm": 0.042368333786726, + "learning_rate": 3.594092176052796e-05, + "loss": 0.0073, + "step": 61120 + }, + { + "grad_norm": 0.0401054210960865, + "learning_rate": 3.592505493059603e-05, + "loss": 0.0035, + "step": 61130 + }, + { + "grad_norm": 0.03652023896574974, + "learning_rate": 3.5909189639878905e-05, + "loss": 0.0039, + "step": 61140 + }, + { + "grad_norm": 0.06073926016688347, + "learning_rate": 3.5893325890111626e-05, + "loss": 0.005, + "step": 61150 + }, + { + "grad_norm": 0.03775735944509506, + "learning_rate": 3.5877463683029034e-05, + "loss": 0.0053, + "step": 61160 + }, + { + "grad_norm": 0.02967497520148754, + "learning_rate": 3.586160302036578e-05, + "loss": 0.003, + "step": 61170 + }, + { + "grad_norm": 0.047532640397548676, + "learning_rate": 3.584574390385636e-05, + "loss": 0.0043, + "step": 61180 + }, + { + "grad_norm": 0.03881906718015671, + "learning_rate": 3.582988633523511e-05, + "loss": 0.0047, + "step": 61190 + }, + { + "grad_norm": 0.04333930462598801, + "learning_rate": 3.581403031623616e-05, + "loss": 0.0066, + "step": 61200 + }, + { + "grad_norm": 0.05288408696651459, + "learning_rate": 3.579817584859354e-05, + "loss": 0.0064, + "step": 61210 + }, + { + "grad_norm": 0.04251338914036751, + "learning_rate": 3.578232293404107e-05, + "loss": 0.0048, + "step": 61220 + }, + { + "grad_norm": 0.034222401678562164, + "learning_rate": 3.5766471574312385e-05, + "loss": 0.0055, + "step": 61230 + }, + { + "grad_norm": 0.04274336248636246, + "learning_rate": 3.5750621771140964e-05, + "loss": 0.0059, + "step": 61240 + }, + { + "grad_norm": 0.03469308465719223, + "learning_rate": 3.5734773526260115e-05, + "loss": 0.0039, + "step": 61250 + }, + { + "grad_norm": 0.031702782958745956, + "learning_rate": 3.5718926841402996e-05, + "loss": 0.0044, + "step": 61260 + }, + { + "grad_norm": 0.037446487694978714, + "learning_rate": 3.5703081718302555e-05, + "loss": 0.005, + "step": 61270 + }, + { + "grad_norm": 0.051390018314123154, + "learning_rate": 3.568723815869162e-05, + "loss": 0.0062, + "step": 61280 + }, + { + "grad_norm": 0.04515952616930008, + "learning_rate": 3.5671396164302777e-05, + "loss": 0.0046, + "step": 61290 + }, + { + "grad_norm": 0.04063678905367851, + "learning_rate": 3.565555573686853e-05, + "loss": 0.0048, + "step": 61300 + }, + { + "grad_norm": 0.03296680003404617, + "learning_rate": 3.563971687812113e-05, + "loss": 0.0043, + "step": 61310 + }, + { + "grad_norm": 0.031122684478759766, + "learning_rate": 3.5623879589792706e-05, + "loss": 0.0042, + "step": 61320 + }, + { + "grad_norm": 0.04352227970957756, + "learning_rate": 3.5608043873615185e-05, + "loss": 0.0037, + "step": 61330 + }, + { + "grad_norm": 0.037820953875780106, + "learning_rate": 3.559220973132035e-05, + "loss": 0.004, + "step": 61340 + }, + { + "grad_norm": 0.051559239625930786, + "learning_rate": 3.557637716463981e-05, + "loss": 0.0044, + "step": 61350 + }, + { + "grad_norm": 0.03786667808890343, + "learning_rate": 3.556054617530497e-05, + "loss": 0.0045, + "step": 61360 + }, + { + "grad_norm": 0.05933374539017677, + "learning_rate": 3.5544716765047106e-05, + "loss": 0.0052, + "step": 61370 + }, + { + "grad_norm": 0.03574123978614807, + "learning_rate": 3.5528888935597274e-05, + "loss": 0.0041, + "step": 61380 + }, + { + "grad_norm": 0.058982573449611664, + "learning_rate": 3.551306268868636e-05, + "loss": 0.0064, + "step": 61390 + }, + { + "grad_norm": 0.03811804577708244, + "learning_rate": 3.5497238026045166e-05, + "loss": 0.0042, + "step": 61400 + }, + { + "grad_norm": 0.042496196925640106, + "learning_rate": 3.548141494940421e-05, + "loss": 0.0044, + "step": 61410 + }, + { + "grad_norm": 0.03422178700566292, + "learning_rate": 3.546559346049388e-05, + "loss": 0.0039, + "step": 61420 + }, + { + "grad_norm": 0.030794696882367134, + "learning_rate": 3.544977356104441e-05, + "loss": 0.0053, + "step": 61430 + }, + { + "grad_norm": 0.04600292444229126, + "learning_rate": 3.543395525278582e-05, + "loss": 0.004, + "step": 61440 + }, + { + "grad_norm": 0.056924931704998016, + "learning_rate": 3.5418138537447985e-05, + "loss": 0.005, + "step": 61450 + }, + { + "grad_norm": 0.028201235458254814, + "learning_rate": 3.5402323416760574e-05, + "loss": 0.0036, + "step": 61460 + }, + { + "grad_norm": 0.029756909236311913, + "learning_rate": 3.538650989245316e-05, + "loss": 0.0039, + "step": 61470 + }, + { + "grad_norm": 0.04209349304437637, + "learning_rate": 3.537069796625504e-05, + "loss": 0.0036, + "step": 61480 + }, + { + "grad_norm": 0.05506128445267677, + "learning_rate": 3.53548876398954e-05, + "loss": 0.0052, + "step": 61490 + }, + { + "grad_norm": 0.04106053337454796, + "learning_rate": 3.5339078915103214e-05, + "loss": 0.0036, + "step": 61500 + }, + { + "grad_norm": 0.040005747228860855, + "learning_rate": 3.532327179360733e-05, + "loss": 0.0033, + "step": 61510 + }, + { + "grad_norm": 0.04591141268610954, + "learning_rate": 3.530746627713636e-05, + "loss": 0.0055, + "step": 61520 + }, + { + "grad_norm": 0.047880619764328, + "learning_rate": 3.5291662367418805e-05, + "loss": 0.0062, + "step": 61530 + }, + { + "grad_norm": 0.04157937690615654, + "learning_rate": 3.527586006618293e-05, + "loss": 0.0046, + "step": 61540 + }, + { + "grad_norm": 0.04104522988200188, + "learning_rate": 3.526005937515687e-05, + "loss": 0.0034, + "step": 61550 + }, + { + "grad_norm": 0.03719865158200264, + "learning_rate": 3.524426029606856e-05, + "loss": 0.0049, + "step": 61560 + }, + { + "grad_norm": 0.026061682030558586, + "learning_rate": 3.5228462830645746e-05, + "loss": 0.0038, + "step": 61570 + }, + { + "grad_norm": 0.045582365244627, + "learning_rate": 3.5212666980616036e-05, + "loss": 0.0045, + "step": 61580 + }, + { + "grad_norm": 0.052956804633140564, + "learning_rate": 3.519687274770681e-05, + "loss": 0.0063, + "step": 61590 + }, + { + "grad_norm": 0.052448056638240814, + "learning_rate": 3.518108013364535e-05, + "loss": 0.0054, + "step": 61600 + }, + { + "grad_norm": 0.041566070169210434, + "learning_rate": 3.516528914015868e-05, + "loss": 0.0041, + "step": 61610 + }, + { + "grad_norm": 0.045692648738622665, + "learning_rate": 3.5149499768973694e-05, + "loss": 0.0043, + "step": 61620 + }, + { + "grad_norm": 0.06714235991239548, + "learning_rate": 3.513371202181708e-05, + "loss": 0.0066, + "step": 61630 + }, + { + "grad_norm": 0.05945965275168419, + "learning_rate": 3.511792590041537e-05, + "loss": 0.0032, + "step": 61640 + }, + { + "grad_norm": 0.03350082412362099, + "learning_rate": 3.510214140649488e-05, + "loss": 0.0061, + "step": 61650 + }, + { + "grad_norm": 0.04818723350763321, + "learning_rate": 3.508635854178184e-05, + "loss": 0.0051, + "step": 61660 + }, + { + "grad_norm": 0.036083608865737915, + "learning_rate": 3.50705773080022e-05, + "loss": 0.0046, + "step": 61670 + }, + { + "grad_norm": 0.03968985378742218, + "learning_rate": 3.505479770688178e-05, + "loss": 0.0048, + "step": 61680 + }, + { + "grad_norm": 0.05177226662635803, + "learning_rate": 3.503901974014622e-05, + "loss": 0.0044, + "step": 61690 + }, + { + "grad_norm": 0.05951535329222679, + "learning_rate": 3.502324340952095e-05, + "loss": 0.0054, + "step": 61700 + }, + { + "grad_norm": 0.04118197783827782, + "learning_rate": 3.500746871673126e-05, + "loss": 0.0048, + "step": 61710 + }, + { + "grad_norm": 0.036790765821933746, + "learning_rate": 3.499169566350227e-05, + "loss": 0.0044, + "step": 61720 + }, + { + "grad_norm": 0.040267884731292725, + "learning_rate": 3.4975924251558876e-05, + "loss": 0.0048, + "step": 61730 + }, + { + "grad_norm": 0.028881005942821503, + "learning_rate": 3.496015448262582e-05, + "loss": 0.004, + "step": 61740 + }, + { + "grad_norm": 0.0587877556681633, + "learning_rate": 3.494438635842764e-05, + "loss": 0.0044, + "step": 61750 + }, + { + "grad_norm": 0.038552507758140564, + "learning_rate": 3.492861988068874e-05, + "loss": 0.0049, + "step": 61760 + }, + { + "grad_norm": 0.04352891072630882, + "learning_rate": 3.491285505113332e-05, + "loss": 0.006, + "step": 61770 + }, + { + "grad_norm": 0.058485329151153564, + "learning_rate": 3.4897091871485376e-05, + "loss": 0.0054, + "step": 61780 + }, + { + "grad_norm": 0.050489623099565506, + "learning_rate": 3.488133034346875e-05, + "loss": 0.0057, + "step": 61790 + }, + { + "grad_norm": 0.04315273463726044, + "learning_rate": 3.486557046880713e-05, + "loss": 0.0039, + "step": 61800 + }, + { + "grad_norm": 0.050520073622465134, + "learning_rate": 3.484981224922395e-05, + "loss": 0.0045, + "step": 61810 + }, + { + "grad_norm": 0.03468826040625572, + "learning_rate": 3.4834055686442544e-05, + "loss": 0.0049, + "step": 61820 + }, + { + "grad_norm": 0.052294399589300156, + "learning_rate": 3.4818300782186e-05, + "loss": 0.0055, + "step": 61830 + }, + { + "grad_norm": 0.03253164514899254, + "learning_rate": 3.4802547538177236e-05, + "loss": 0.0044, + "step": 61840 + }, + { + "grad_norm": 0.03478061780333519, + "learning_rate": 3.478679595613905e-05, + "loss": 0.0049, + "step": 61850 + }, + { + "grad_norm": 0.04683801904320717, + "learning_rate": 3.4771046037793975e-05, + "loss": 0.0037, + "step": 61860 + }, + { + "grad_norm": 0.0423881858587265, + "learning_rate": 3.4755297784864416e-05, + "loss": 0.0036, + "step": 61870 + }, + { + "grad_norm": 0.03368721902370453, + "learning_rate": 3.473955119907257e-05, + "loss": 0.0035, + "step": 61880 + }, + { + "grad_norm": 0.036223363131284714, + "learning_rate": 3.472380628214046e-05, + "loss": 0.0055, + "step": 61890 + }, + { + "grad_norm": 0.03949484974145889, + "learning_rate": 3.4708063035789906e-05, + "loss": 0.0044, + "step": 61900 + }, + { + "grad_norm": 0.08221707493066788, + "learning_rate": 3.4692321461742614e-05, + "loss": 0.006, + "step": 61910 + }, + { + "grad_norm": 0.04503154754638672, + "learning_rate": 3.4676581561720023e-05, + "loss": 0.0042, + "step": 61920 + }, + { + "grad_norm": 0.02771582640707493, + "learning_rate": 3.466084333744344e-05, + "loss": 0.0054, + "step": 61930 + }, + { + "grad_norm": 0.05242162197828293, + "learning_rate": 3.4645106790633964e-05, + "loss": 0.0041, + "step": 61940 + }, + { + "grad_norm": 0.038483377546072006, + "learning_rate": 3.4629371923012526e-05, + "loss": 0.004, + "step": 61950 + }, + { + "grad_norm": 0.04821980744600296, + "learning_rate": 3.4613638736299856e-05, + "loss": 0.0037, + "step": 61960 + }, + { + "grad_norm": 0.03880773112177849, + "learning_rate": 3.459790723221652e-05, + "loss": 0.0039, + "step": 61970 + }, + { + "grad_norm": 0.0334661528468132, + "learning_rate": 3.45821774124829e-05, + "loss": 0.0041, + "step": 61980 + }, + { + "grad_norm": 0.03282367065548897, + "learning_rate": 3.456644927881918e-05, + "loss": 0.0035, + "step": 61990 + }, + { + "grad_norm": 0.03113705851137638, + "learning_rate": 3.4550722832945354e-05, + "loss": 0.0043, + "step": 62000 + }, + { + "grad_norm": 0.032702092081308365, + "learning_rate": 3.453499807658126e-05, + "loss": 0.0044, + "step": 62010 + }, + { + "grad_norm": 0.04082479700446129, + "learning_rate": 3.451927501144653e-05, + "loss": 0.0054, + "step": 62020 + }, + { + "grad_norm": 0.049357131123542786, + "learning_rate": 3.450355363926059e-05, + "loss": 0.0042, + "step": 62030 + }, + { + "grad_norm": 0.037304218858480453, + "learning_rate": 3.448783396174273e-05, + "loss": 0.0056, + "step": 62040 + }, + { + "grad_norm": 0.04908269643783569, + "learning_rate": 3.4472115980612034e-05, + "loss": 0.0048, + "step": 62050 + }, + { + "grad_norm": 0.06329723447561264, + "learning_rate": 3.4456399697587396e-05, + "loss": 0.0056, + "step": 62060 + }, + { + "grad_norm": 0.03634044528007507, + "learning_rate": 3.4440685114387506e-05, + "loss": 0.004, + "step": 62070 + }, + { + "grad_norm": 0.03897253796458244, + "learning_rate": 3.442497223273091e-05, + "loss": 0.0048, + "step": 62080 + }, + { + "grad_norm": 0.062367696315050125, + "learning_rate": 3.4409261054335926e-05, + "loss": 0.0041, + "step": 62090 + }, + { + "grad_norm": 0.03503970801830292, + "learning_rate": 3.439355158092069e-05, + "loss": 0.004, + "step": 62100 + }, + { + "grad_norm": 0.04676772654056549, + "learning_rate": 3.437784381420322e-05, + "loss": 0.0041, + "step": 62110 + }, + { + "grad_norm": 0.07255205512046814, + "learning_rate": 3.436213775590126e-05, + "loss": 0.005, + "step": 62120 + }, + { + "grad_norm": 0.043530166149139404, + "learning_rate": 3.4346433407732405e-05, + "loss": 0.0065, + "step": 62130 + }, + { + "grad_norm": 0.02801923081278801, + "learning_rate": 3.433073077141405e-05, + "loss": 0.0044, + "step": 62140 + }, + { + "grad_norm": 0.09000331908464432, + "learning_rate": 3.431502984866343e-05, + "loss": 0.0037, + "step": 62150 + }, + { + "grad_norm": 0.05424405634403229, + "learning_rate": 3.429933064119753e-05, + "loss": 0.0057, + "step": 62160 + }, + { + "grad_norm": 0.042181145399808884, + "learning_rate": 3.4283633150733255e-05, + "loss": 0.0041, + "step": 62170 + }, + { + "grad_norm": 0.045107513666152954, + "learning_rate": 3.4267937378987225e-05, + "loss": 0.0052, + "step": 62180 + }, + { + "grad_norm": 0.0304301455616951, + "learning_rate": 3.425224332767592e-05, + "loss": 0.003, + "step": 62190 + }, + { + "grad_norm": 0.023574884980916977, + "learning_rate": 3.4236550998515595e-05, + "loss": 0.0042, + "step": 62200 + }, + { + "grad_norm": 0.03686148673295975, + "learning_rate": 3.422086039322235e-05, + "loss": 0.0047, + "step": 62210 + }, + { + "grad_norm": 0.0389518141746521, + "learning_rate": 3.420517151351209e-05, + "loss": 0.0035, + "step": 62220 + }, + { + "grad_norm": 0.04644334688782692, + "learning_rate": 3.418948436110053e-05, + "loss": 0.0053, + "step": 62230 + }, + { + "grad_norm": 0.034839730709791183, + "learning_rate": 3.417379893770319e-05, + "loss": 0.0041, + "step": 62240 + }, + { + "grad_norm": 0.034317053854465485, + "learning_rate": 3.41581152450354e-05, + "loss": 0.0045, + "step": 62250 + }, + { + "grad_norm": 0.05771823599934578, + "learning_rate": 3.414243328481231e-05, + "loss": 0.0046, + "step": 62260 + }, + { + "grad_norm": 0.030510248616337776, + "learning_rate": 3.412675305874888e-05, + "loss": 0.0042, + "step": 62270 + }, + { + "grad_norm": 0.0319267138838768, + "learning_rate": 3.4111074568559876e-05, + "loss": 0.0046, + "step": 62280 + }, + { + "grad_norm": 0.04145492985844612, + "learning_rate": 3.4095397815959846e-05, + "loss": 0.004, + "step": 62290 + }, + { + "grad_norm": 0.05242200195789337, + "learning_rate": 3.407972280266322e-05, + "loss": 0.0038, + "step": 62300 + }, + { + "grad_norm": 0.04562915861606598, + "learning_rate": 3.4064049530384166e-05, + "loss": 0.0036, + "step": 62310 + }, + { + "grad_norm": 0.03474844619631767, + "learning_rate": 3.40483780008367e-05, + "loss": 0.0033, + "step": 62320 + }, + { + "grad_norm": 0.026565000414848328, + "learning_rate": 3.4032708215734635e-05, + "loss": 0.0039, + "step": 62330 + }, + { + "grad_norm": 0.0430649034678936, + "learning_rate": 3.4017040176791584e-05, + "loss": 0.0052, + "step": 62340 + }, + { + "grad_norm": 0.05601256713271141, + "learning_rate": 3.4001373885720976e-05, + "loss": 0.0052, + "step": 62350 + }, + { + "grad_norm": 0.04020481929183006, + "learning_rate": 3.398570934423608e-05, + "loss": 0.0044, + "step": 62360 + }, + { + "grad_norm": 0.054118793457746506, + "learning_rate": 3.397004655404994e-05, + "loss": 0.0059, + "step": 62370 + }, + { + "grad_norm": 0.029807399958372116, + "learning_rate": 3.395438551687541e-05, + "loss": 0.0058, + "step": 62380 + }, + { + "grad_norm": 0.05925764888525009, + "learning_rate": 3.393872623442515e-05, + "loss": 0.0043, + "step": 62390 + }, + { + "grad_norm": 0.04936780780553818, + "learning_rate": 3.392306870841164e-05, + "loss": 0.0051, + "step": 62400 + }, + { + "grad_norm": 0.053734831511974335, + "learning_rate": 3.390741294054715e-05, + "loss": 0.0075, + "step": 62410 + }, + { + "grad_norm": 0.025525202974677086, + "learning_rate": 3.38917589325438e-05, + "loss": 0.0054, + "step": 62420 + }, + { + "grad_norm": 0.02780316211283207, + "learning_rate": 3.387610668611349e-05, + "loss": 0.0033, + "step": 62430 + }, + { + "grad_norm": 0.05004413053393364, + "learning_rate": 3.38604562029679e-05, + "loss": 0.0036, + "step": 62440 + }, + { + "grad_norm": 0.04280316084623337, + "learning_rate": 3.384480748481855e-05, + "loss": 0.0045, + "step": 62450 + }, + { + "grad_norm": 0.04022075608372688, + "learning_rate": 3.382916053337676e-05, + "loss": 0.0046, + "step": 62460 + }, + { + "grad_norm": 0.026144806295633316, + "learning_rate": 3.381351535035366e-05, + "loss": 0.0029, + "step": 62470 + }, + { + "grad_norm": 0.03374773636460304, + "learning_rate": 3.379787193746019e-05, + "loss": 0.0035, + "step": 62480 + }, + { + "grad_norm": 0.04096795246005058, + "learning_rate": 3.378223029640709e-05, + "loss": 0.0044, + "step": 62490 + }, + { + "grad_norm": 0.038249336183071136, + "learning_rate": 3.376659042890489e-05, + "loss": 0.0061, + "step": 62500 + }, + { + "grad_norm": 0.05239435285329819, + "learning_rate": 3.375095233666397e-05, + "loss": 0.0051, + "step": 62510 + }, + { + "grad_norm": 0.04370329901576042, + "learning_rate": 3.373531602139447e-05, + "loss": 0.0057, + "step": 62520 + }, + { + "grad_norm": 0.051722049713134766, + "learning_rate": 3.371968148480635e-05, + "loss": 0.0063, + "step": 62530 + }, + { + "grad_norm": 0.058978330343961716, + "learning_rate": 3.370404872860937e-05, + "loss": 0.0045, + "step": 62540 + }, + { + "grad_norm": 0.05379229784011841, + "learning_rate": 3.3688417754513145e-05, + "loss": 0.0046, + "step": 62550 + }, + { + "grad_norm": 0.03688269481062889, + "learning_rate": 3.367278856422703e-05, + "loss": 0.0061, + "step": 62560 + }, + { + "grad_norm": 0.04216654598712921, + "learning_rate": 3.365716115946021e-05, + "loss": 0.0046, + "step": 62570 + }, + { + "grad_norm": 0.03982079029083252, + "learning_rate": 3.3641535541921666e-05, + "loss": 0.0049, + "step": 62580 + }, + { + "grad_norm": 0.036116715520620346, + "learning_rate": 3.36259117133202e-05, + "loss": 0.0057, + "step": 62590 + }, + { + "grad_norm": 0.04162757843732834, + "learning_rate": 3.36102896753644e-05, + "loss": 0.0067, + "step": 62600 + }, + { + "grad_norm": 0.045933548361063004, + "learning_rate": 3.35946694297627e-05, + "loss": 0.0042, + "step": 62610 + }, + { + "grad_norm": 0.051838260143995285, + "learning_rate": 3.357905097822328e-05, + "loss": 0.0051, + "step": 62620 + }, + { + "grad_norm": 0.051639389246702194, + "learning_rate": 3.356343432245416e-05, + "loss": 0.0047, + "step": 62630 + }, + { + "grad_norm": 0.04671590402722359, + "learning_rate": 3.354781946416315e-05, + "loss": 0.0036, + "step": 62640 + }, + { + "grad_norm": 0.048213787376880646, + "learning_rate": 3.3532206405057864e-05, + "loss": 0.0053, + "step": 62650 + }, + { + "grad_norm": 0.053717803210020065, + "learning_rate": 3.351659514684573e-05, + "loss": 0.0049, + "step": 62660 + }, + { + "grad_norm": 0.03869478777050972, + "learning_rate": 3.350098569123395e-05, + "loss": 0.0042, + "step": 62670 + }, + { + "grad_norm": 0.043494127690792084, + "learning_rate": 3.348537803992958e-05, + "loss": 0.004, + "step": 62680 + }, + { + "grad_norm": 0.05078434944152832, + "learning_rate": 3.346977219463945e-05, + "loss": 0.0048, + "step": 62690 + }, + { + "grad_norm": 0.030866701155900955, + "learning_rate": 3.345416815707017e-05, + "loss": 0.0036, + "step": 62700 + }, + { + "grad_norm": 0.028343012556433678, + "learning_rate": 3.3438565928928185e-05, + "loss": 0.0039, + "step": 62710 + }, + { + "grad_norm": 0.04045509919524193, + "learning_rate": 3.3422965511919736e-05, + "loss": 0.0052, + "step": 62720 + }, + { + "grad_norm": 0.0390552319586277, + "learning_rate": 3.3407366907750846e-05, + "loss": 0.0038, + "step": 62730 + }, + { + "grad_norm": 0.0302121564745903, + "learning_rate": 3.339177011812737e-05, + "loss": 0.0044, + "step": 62740 + }, + { + "grad_norm": 0.04769182950258255, + "learning_rate": 3.3376175144754935e-05, + "loss": 0.0043, + "step": 62750 + }, + { + "grad_norm": 0.05473422631621361, + "learning_rate": 3.3360581989339004e-05, + "loss": 0.0061, + "step": 62760 + }, + { + "grad_norm": 0.05063485726714134, + "learning_rate": 3.3344990653584804e-05, + "loss": 0.0032, + "step": 62770 + }, + { + "grad_norm": 0.045301757752895355, + "learning_rate": 3.332940113919739e-05, + "loss": 0.0033, + "step": 62780 + }, + { + "grad_norm": 0.04904527962207794, + "learning_rate": 3.331381344788161e-05, + "loss": 0.0043, + "step": 62790 + }, + { + "grad_norm": 0.046499721705913544, + "learning_rate": 3.3298227581342076e-05, + "loss": 0.0046, + "step": 62800 + }, + { + "grad_norm": 0.0451543964445591, + "learning_rate": 3.328264354128329e-05, + "loss": 0.0042, + "step": 62810 + }, + { + "grad_norm": 0.04687187448143959, + "learning_rate": 3.3267061329409465e-05, + "loss": 0.0048, + "step": 62820 + }, + { + "grad_norm": 0.055553458631038666, + "learning_rate": 3.325148094742466e-05, + "loss": 0.0047, + "step": 62830 + }, + { + "grad_norm": 0.04188048094511032, + "learning_rate": 3.323590239703271e-05, + "loss": 0.0048, + "step": 62840 + }, + { + "grad_norm": 0.06183715909719467, + "learning_rate": 3.322032567993728e-05, + "loss": 0.0041, + "step": 62850 + }, + { + "grad_norm": 0.04944014176726341, + "learning_rate": 3.3204750797841774e-05, + "loss": 0.0052, + "step": 62860 + }, + { + "grad_norm": 0.024950847029685974, + "learning_rate": 3.31891777524495e-05, + "loss": 0.0048, + "step": 62870 + }, + { + "grad_norm": 0.05101829767227173, + "learning_rate": 3.317360654546347e-05, + "loss": 0.0058, + "step": 62880 + }, + { + "grad_norm": 0.040085792541503906, + "learning_rate": 3.3158037178586534e-05, + "loss": 0.0048, + "step": 62890 + }, + { + "grad_norm": 0.057581689208745956, + "learning_rate": 3.3142469653521316e-05, + "loss": 0.0037, + "step": 62900 + }, + { + "grad_norm": 0.03399064391851425, + "learning_rate": 3.3126903971970284e-05, + "loss": 0.0032, + "step": 62910 + }, + { + "grad_norm": 0.027206383645534515, + "learning_rate": 3.311134013563564e-05, + "loss": 0.0039, + "step": 62920 + }, + { + "grad_norm": 0.05112271010875702, + "learning_rate": 3.309577814621947e-05, + "loss": 0.0051, + "step": 62930 + }, + { + "grad_norm": 0.04008191451430321, + "learning_rate": 3.308021800542359e-05, + "loss": 0.0055, + "step": 62940 + }, + { + "grad_norm": 0.055963609367609024, + "learning_rate": 3.306465971494963e-05, + "loss": 0.0045, + "step": 62950 + }, + { + "grad_norm": 0.0465196929872036, + "learning_rate": 3.304910327649901e-05, + "loss": 0.0041, + "step": 62960 + }, + { + "grad_norm": 0.027304453775286674, + "learning_rate": 3.303354869177297e-05, + "loss": 0.0043, + "step": 62970 + }, + { + "grad_norm": 0.03596306964755058, + "learning_rate": 3.301799596247255e-05, + "loss": 0.004, + "step": 62980 + }, + { + "grad_norm": 0.035879261791706085, + "learning_rate": 3.300244509029854e-05, + "loss": 0.005, + "step": 62990 + }, + { + "grad_norm": 0.05681006610393524, + "learning_rate": 3.2986896076951585e-05, + "loss": 0.005, + "step": 63000 + }, + { + "grad_norm": 0.04283149540424347, + "learning_rate": 3.297134892413211e-05, + "loss": 0.0049, + "step": 63010 + }, + { + "grad_norm": 0.038373637944459915, + "learning_rate": 3.295580363354031e-05, + "loss": 0.0046, + "step": 63020 + }, + { + "grad_norm": 0.07506546378135681, + "learning_rate": 3.2940260206876194e-05, + "loss": 0.0055, + "step": 63030 + }, + { + "grad_norm": 0.042130377143621445, + "learning_rate": 3.292471864583958e-05, + "loss": 0.003, + "step": 63040 + }, + { + "grad_norm": 0.05857369303703308, + "learning_rate": 3.290917895213004e-05, + "loss": 0.0055, + "step": 63050 + }, + { + "grad_norm": 0.04551183432340622, + "learning_rate": 3.289364112744701e-05, + "loss": 0.005, + "step": 63060 + }, + { + "grad_norm": 0.05138049274682999, + "learning_rate": 3.287810517348967e-05, + "loss": 0.0049, + "step": 63070 + }, + { + "grad_norm": 0.0399809405207634, + "learning_rate": 3.2862571091957014e-05, + "loss": 0.0039, + "step": 63080 + }, + { + "grad_norm": 0.04100637137889862, + "learning_rate": 3.284703888454781e-05, + "loss": 0.0044, + "step": 63090 + }, + { + "grad_norm": 0.06843768060207367, + "learning_rate": 3.2831508552960646e-05, + "loss": 0.0065, + "step": 63100 + }, + { + "grad_norm": 0.042711637914180756, + "learning_rate": 3.281598009889387e-05, + "loss": 0.0048, + "step": 63110 + }, + { + "grad_norm": 0.03784080594778061, + "learning_rate": 3.28004535240457e-05, + "loss": 0.0038, + "step": 63120 + }, + { + "grad_norm": 0.046687427908182144, + "learning_rate": 3.278492883011407e-05, + "loss": 0.0041, + "step": 63130 + }, + { + "grad_norm": 0.031760405749082565, + "learning_rate": 3.276940601879675e-05, + "loss": 0.0039, + "step": 63140 + }, + { + "grad_norm": 0.034348975867033005, + "learning_rate": 3.275388509179127e-05, + "loss": 0.0039, + "step": 63150 + }, + { + "grad_norm": 0.0327066034078598, + "learning_rate": 3.2738366050794985e-05, + "loss": 0.0034, + "step": 63160 + }, + { + "grad_norm": 0.038009148091077805, + "learning_rate": 3.2722848897505034e-05, + "loss": 0.0047, + "step": 63170 + }, + { + "grad_norm": 0.05675942450761795, + "learning_rate": 3.2707333633618356e-05, + "loss": 0.0054, + "step": 63180 + }, + { + "grad_norm": 0.06413514912128448, + "learning_rate": 3.269182026083167e-05, + "loss": 0.005, + "step": 63190 + }, + { + "grad_norm": 0.05944152548909187, + "learning_rate": 3.2676308780841516e-05, + "loss": 0.0069, + "step": 63200 + }, + { + "grad_norm": 0.03605165332555771, + "learning_rate": 3.266079919534416e-05, + "loss": 0.0037, + "step": 63210 + }, + { + "grad_norm": 0.03694603964686394, + "learning_rate": 3.264529150603575e-05, + "loss": 0.0038, + "step": 63220 + }, + { + "grad_norm": 0.052981797605752945, + "learning_rate": 3.262978571461217e-05, + "loss": 0.0036, + "step": 63230 + }, + { + "grad_norm": 0.04664256423711777, + "learning_rate": 3.26142818227691e-05, + "loss": 0.0051, + "step": 63240 + }, + { + "grad_norm": 0.08114799112081528, + "learning_rate": 3.259877983220203e-05, + "loss": 0.0047, + "step": 63250 + }, + { + "grad_norm": 0.04213344305753708, + "learning_rate": 3.2583279744606246e-05, + "loss": 0.0037, + "step": 63260 + }, + { + "grad_norm": 0.037156954407691956, + "learning_rate": 3.25677815616768e-05, + "loss": 0.0036, + "step": 63270 + }, + { + "grad_norm": 0.03980008512735367, + "learning_rate": 3.255228528510856e-05, + "loss": 0.0046, + "step": 63280 + }, + { + "grad_norm": 0.03343170881271362, + "learning_rate": 3.253679091659617e-05, + "loss": 0.0036, + "step": 63290 + }, + { + "grad_norm": 0.03925672173500061, + "learning_rate": 3.252129845783404e-05, + "loss": 0.0035, + "step": 63300 + }, + { + "grad_norm": 0.02345498837530613, + "learning_rate": 3.2505807910516456e-05, + "loss": 0.0043, + "step": 63310 + }, + { + "grad_norm": 0.03913407400250435, + "learning_rate": 3.249031927633742e-05, + "loss": 0.0049, + "step": 63320 + }, + { + "grad_norm": 0.030353160575032234, + "learning_rate": 3.2474832556990737e-05, + "loss": 0.0042, + "step": 63330 + }, + { + "grad_norm": 0.03861727938055992, + "learning_rate": 3.245934775417002e-05, + "loss": 0.0038, + "step": 63340 + }, + { + "grad_norm": 0.037186458706855774, + "learning_rate": 3.244386486956866e-05, + "loss": 0.0047, + "step": 63350 + }, + { + "grad_norm": 0.045492984354496, + "learning_rate": 3.2428383904879846e-05, + "loss": 0.0053, + "step": 63360 + }, + { + "grad_norm": 0.04503960534930229, + "learning_rate": 3.241290486179652e-05, + "loss": 0.0032, + "step": 63370 + }, + { + "grad_norm": 0.04975968226790428, + "learning_rate": 3.239742774201151e-05, + "loss": 0.0042, + "step": 63380 + }, + { + "grad_norm": 0.042666245251894, + "learning_rate": 3.238195254721733e-05, + "loss": 0.003, + "step": 63390 + }, + { + "grad_norm": 0.05619681626558304, + "learning_rate": 3.236647927910633e-05, + "loss": 0.0044, + "step": 63400 + }, + { + "grad_norm": 0.03616129234433174, + "learning_rate": 3.2351007939370646e-05, + "loss": 0.0045, + "step": 63410 + }, + { + "grad_norm": 0.03769892826676369, + "learning_rate": 3.2335538529702184e-05, + "loss": 0.0045, + "step": 63420 + }, + { + "grad_norm": 0.05556995049118996, + "learning_rate": 3.232007105179268e-05, + "loss": 0.0047, + "step": 63430 + }, + { + "grad_norm": 0.07276339083909988, + "learning_rate": 3.230460550733363e-05, + "loss": 0.0049, + "step": 63440 + }, + { + "grad_norm": 0.07130671292543411, + "learning_rate": 3.2289141898016315e-05, + "loss": 0.0047, + "step": 63450 + }, + { + "grad_norm": 0.030901290476322174, + "learning_rate": 3.227368022553181e-05, + "loss": 0.0037, + "step": 63460 + }, + { + "grad_norm": 0.06641732156276703, + "learning_rate": 3.225822049157099e-05, + "loss": 0.0049, + "step": 63470 + }, + { + "grad_norm": 0.04182238131761551, + "learning_rate": 3.2242762697824494e-05, + "loss": 0.0056, + "step": 63480 + }, + { + "grad_norm": 0.045643649995326996, + "learning_rate": 3.222730684598279e-05, + "loss": 0.005, + "step": 63490 + }, + { + "grad_norm": 0.04895049333572388, + "learning_rate": 3.221185293773605e-05, + "loss": 0.0056, + "step": 63500 + }, + { + "grad_norm": 0.04931146651506424, + "learning_rate": 3.2196400974774354e-05, + "loss": 0.0038, + "step": 63510 + }, + { + "grad_norm": 0.04239499941468239, + "learning_rate": 3.2180950958787485e-05, + "loss": 0.0046, + "step": 63520 + }, + { + "grad_norm": 0.03526632860302925, + "learning_rate": 3.2165502891465024e-05, + "loss": 0.0034, + "step": 63530 + }, + { + "grad_norm": 0.05257919058203697, + "learning_rate": 3.215005677449636e-05, + "loss": 0.0041, + "step": 63540 + }, + { + "grad_norm": 0.0621112659573555, + "learning_rate": 3.2134612609570644e-05, + "loss": 0.0044, + "step": 63550 + }, + { + "grad_norm": 0.06351479142904282, + "learning_rate": 3.211917039837682e-05, + "loss": 0.0045, + "step": 63560 + }, + { + "grad_norm": 0.04916322976350784, + "learning_rate": 3.210373014260365e-05, + "loss": 0.0041, + "step": 63570 + }, + { + "grad_norm": 0.04517946392297745, + "learning_rate": 3.208829184393965e-05, + "loss": 0.0037, + "step": 63580 + }, + { + "grad_norm": 0.05384998023509979, + "learning_rate": 3.207285550407312e-05, + "loss": 0.0039, + "step": 63590 + }, + { + "grad_norm": 0.02122698351740837, + "learning_rate": 3.205742112469217e-05, + "loss": 0.0055, + "step": 63600 + }, + { + "grad_norm": 0.047140590846538544, + "learning_rate": 3.204198870748465e-05, + "loss": 0.005, + "step": 63610 + }, + { + "grad_norm": 0.0554514080286026, + "learning_rate": 3.202655825413824e-05, + "loss": 0.0045, + "step": 63620 + }, + { + "grad_norm": 0.049632951617240906, + "learning_rate": 3.2011129766340416e-05, + "loss": 0.0039, + "step": 63630 + }, + { + "grad_norm": 0.053267043083906174, + "learning_rate": 3.1995703245778384e-05, + "loss": 0.0075, + "step": 63640 + }, + { + "grad_norm": 0.044597964733839035, + "learning_rate": 3.1980278694139186e-05, + "loss": 0.0038, + "step": 63650 + }, + { + "grad_norm": 0.0671522244811058, + "learning_rate": 3.196485611310962e-05, + "loss": 0.0062, + "step": 63660 + }, + { + "grad_norm": 0.043491799384355545, + "learning_rate": 3.1949435504376257e-05, + "loss": 0.0046, + "step": 63670 + }, + { + "grad_norm": 0.02936355583369732, + "learning_rate": 3.1934016869625505e-05, + "loss": 0.0056, + "step": 63680 + }, + { + "grad_norm": 0.047607775777578354, + "learning_rate": 3.191860021054349e-05, + "loss": 0.0044, + "step": 63690 + }, + { + "grad_norm": 0.03515061363577843, + "learning_rate": 3.190318552881618e-05, + "loss": 0.0044, + "step": 63700 + }, + { + "grad_norm": 0.04476558789610863, + "learning_rate": 3.1887772826129285e-05, + "loss": 0.0057, + "step": 63710 + }, + { + "grad_norm": 0.05922915041446686, + "learning_rate": 3.1872362104168335e-05, + "loss": 0.004, + "step": 63720 + }, + { + "grad_norm": 0.04979357868432999, + "learning_rate": 3.185695336461861e-05, + "loss": 0.0047, + "step": 63730 + }, + { + "grad_norm": 0.03946122154593468, + "learning_rate": 3.184154660916519e-05, + "loss": 0.0053, + "step": 63740 + }, + { + "grad_norm": 0.02748391032218933, + "learning_rate": 3.1826141839492906e-05, + "loss": 0.0039, + "step": 63750 + }, + { + "grad_norm": 0.03142695873975754, + "learning_rate": 3.181073905728644e-05, + "loss": 0.0039, + "step": 63760 + }, + { + "grad_norm": 0.028866369277238846, + "learning_rate": 3.1795338264230214e-05, + "loss": 0.0045, + "step": 63770 + }, + { + "grad_norm": 0.041893720626831055, + "learning_rate": 3.1779939462008426e-05, + "loss": 0.0034, + "step": 63780 + }, + { + "grad_norm": 0.0683414489030838, + "learning_rate": 3.1764542652305066e-05, + "loss": 0.0074, + "step": 63790 + }, + { + "grad_norm": 0.040469370782375336, + "learning_rate": 3.17491478368039e-05, + "loss": 0.0043, + "step": 63800 + }, + { + "grad_norm": 0.03542159125208855, + "learning_rate": 3.173375501718847e-05, + "loss": 0.0032, + "step": 63810 + }, + { + "grad_norm": 0.046878162771463394, + "learning_rate": 3.171836419514215e-05, + "loss": 0.0049, + "step": 63820 + }, + { + "grad_norm": 0.044986069202423096, + "learning_rate": 3.170297537234803e-05, + "loss": 0.0029, + "step": 63830 + }, + { + "grad_norm": 0.04461934417486191, + "learning_rate": 3.168758855048901e-05, + "loss": 0.005, + "step": 63840 + }, + { + "grad_norm": 0.045607615262269974, + "learning_rate": 3.1672203731247785e-05, + "loss": 0.0056, + "step": 63850 + }, + { + "grad_norm": 0.03099573403596878, + "learning_rate": 3.16568209163068e-05, + "loss": 0.0034, + "step": 63860 + }, + { + "grad_norm": 0.03849051147699356, + "learning_rate": 3.1641440107348295e-05, + "loss": 0.0038, + "step": 63870 + }, + { + "grad_norm": 0.04075758159160614, + "learning_rate": 3.162606130605429e-05, + "loss": 0.0073, + "step": 63880 + }, + { + "grad_norm": 0.05196652188897133, + "learning_rate": 3.16106845141066e-05, + "loss": 0.0064, + "step": 63890 + }, + { + "grad_norm": 0.056756701320409775, + "learning_rate": 3.159530973318682e-05, + "loss": 0.0057, + "step": 63900 + }, + { + "grad_norm": 0.05311927571892738, + "learning_rate": 3.1579936964976286e-05, + "loss": 0.0057, + "step": 63910 + }, + { + "grad_norm": 0.04687805101275444, + "learning_rate": 3.156456621115615e-05, + "loss": 0.0048, + "step": 63920 + }, + { + "grad_norm": 0.03787863254547119, + "learning_rate": 3.154919747340734e-05, + "loss": 0.0042, + "step": 63930 + }, + { + "grad_norm": 0.048415910452604294, + "learning_rate": 3.153383075341054e-05, + "loss": 0.0047, + "step": 63940 + }, + { + "grad_norm": 0.05444295331835747, + "learning_rate": 3.1518466052846264e-05, + "loss": 0.0055, + "step": 63950 + }, + { + "grad_norm": 0.058011770248413086, + "learning_rate": 3.150310337339474e-05, + "loss": 0.0034, + "step": 63960 + }, + { + "grad_norm": 0.05870078504085541, + "learning_rate": 3.148774271673603e-05, + "loss": 0.005, + "step": 63970 + }, + { + "grad_norm": 0.04093289002776146, + "learning_rate": 3.147238408454995e-05, + "loss": 0.0043, + "step": 63980 + }, + { + "grad_norm": 0.034478649497032166, + "learning_rate": 3.1457027478516086e-05, + "loss": 0.0056, + "step": 63990 + }, + { + "grad_norm": 0.04027858376502991, + "learning_rate": 3.144167290031382e-05, + "loss": 0.0037, + "step": 64000 + }, + { + "grad_norm": 0.04665840417146683, + "learning_rate": 3.142632035162227e-05, + "loss": 0.0047, + "step": 64010 + }, + { + "grad_norm": 0.04733370617032051, + "learning_rate": 3.141096983412044e-05, + "loss": 0.0058, + "step": 64020 + }, + { + "grad_norm": 0.040651835501194, + "learning_rate": 3.139562134948699e-05, + "loss": 0.0043, + "step": 64030 + }, + { + "grad_norm": 0.04169111326336861, + "learning_rate": 3.1380274899400424e-05, + "loss": 0.0043, + "step": 64040 + }, + { + "grad_norm": 0.03128991648554802, + "learning_rate": 3.1364930485539e-05, + "loss": 0.0038, + "step": 64050 + }, + { + "grad_norm": 0.0357796773314476, + "learning_rate": 3.134958810958076e-05, + "loss": 0.0039, + "step": 64060 + }, + { + "grad_norm": 0.03871915861964226, + "learning_rate": 3.13342477732035e-05, + "loss": 0.003, + "step": 64070 + }, + { + "grad_norm": 0.05369843542575836, + "learning_rate": 3.131890947808487e-05, + "loss": 0.0047, + "step": 64080 + }, + { + "grad_norm": 0.04678254947066307, + "learning_rate": 3.13035732259022e-05, + "loss": 0.0045, + "step": 64090 + }, + { + "grad_norm": 0.043366771191358566, + "learning_rate": 3.1288239018332656e-05, + "loss": 0.0033, + "step": 64100 + }, + { + "grad_norm": 0.03675946965813637, + "learning_rate": 3.1272906857053164e-05, + "loss": 0.0044, + "step": 64110 + }, + { + "grad_norm": 0.029330715537071228, + "learning_rate": 3.125757674374042e-05, + "loss": 0.0051, + "step": 64120 + }, + { + "grad_norm": 0.02916182018816471, + "learning_rate": 3.124224868007088e-05, + "loss": 0.0038, + "step": 64130 + }, + { + "grad_norm": 0.040054671466350555, + "learning_rate": 3.1226922667720854e-05, + "loss": 0.0038, + "step": 64140 + }, + { + "grad_norm": 0.02711053192615509, + "learning_rate": 3.121159870836633e-05, + "loss": 0.0048, + "step": 64150 + }, + { + "grad_norm": 0.032963793724775314, + "learning_rate": 3.119627680368312e-05, + "loss": 0.0051, + "step": 64160 + }, + { + "grad_norm": 0.040024951100349426, + "learning_rate": 3.1180956955346816e-05, + "loss": 0.0035, + "step": 64170 + }, + { + "grad_norm": 0.033038314431905746, + "learning_rate": 3.116563916503276e-05, + "loss": 0.0038, + "step": 64180 + }, + { + "grad_norm": 0.03551550954580307, + "learning_rate": 3.1150323434416095e-05, + "loss": 0.0039, + "step": 64190 + }, + { + "grad_norm": 0.033697742968797684, + "learning_rate": 3.1135009765171696e-05, + "loss": 0.0042, + "step": 64200 + }, + { + "grad_norm": 0.03313601016998291, + "learning_rate": 3.111969815897429e-05, + "loss": 0.0038, + "step": 64210 + }, + { + "grad_norm": 0.034465864300727844, + "learning_rate": 3.1104388617498295e-05, + "loss": 0.0041, + "step": 64220 + }, + { + "grad_norm": 0.03210853412747383, + "learning_rate": 3.1089081142417966e-05, + "loss": 0.0044, + "step": 64230 + }, + { + "grad_norm": 0.03803788125514984, + "learning_rate": 3.1073775735407285e-05, + "loss": 0.0043, + "step": 64240 + }, + { + "grad_norm": 0.027904225513339043, + "learning_rate": 3.1058472398140035e-05, + "loss": 0.0039, + "step": 64250 + }, + { + "grad_norm": 0.034758955240249634, + "learning_rate": 3.1043171132289736e-05, + "loss": 0.0045, + "step": 64260 + }, + { + "grad_norm": 0.035574156790971756, + "learning_rate": 3.1027871939529774e-05, + "loss": 0.0055, + "step": 64270 + }, + { + "grad_norm": 0.03966132923960686, + "learning_rate": 3.1012574821533214e-05, + "loss": 0.0035, + "step": 64280 + }, + { + "grad_norm": 0.029170146211981773, + "learning_rate": 3.099727977997291e-05, + "loss": 0.0039, + "step": 64290 + }, + { + "grad_norm": 0.04216257482767105, + "learning_rate": 3.098198681652154e-05, + "loss": 0.0051, + "step": 64300 + }, + { + "grad_norm": 0.042243171483278275, + "learning_rate": 3.0966695932851475e-05, + "loss": 0.0036, + "step": 64310 + }, + { + "grad_norm": 0.052581217139959335, + "learning_rate": 3.0951407130634926e-05, + "loss": 0.005, + "step": 64320 + }, + { + "grad_norm": 0.04736572504043579, + "learning_rate": 3.0936120411543864e-05, + "loss": 0.0061, + "step": 64330 + }, + { + "grad_norm": 0.041199274361133575, + "learning_rate": 3.0920835777250015e-05, + "loss": 0.0046, + "step": 64340 + }, + { + "grad_norm": 0.042282626032829285, + "learning_rate": 3.090555322942488e-05, + "loss": 0.0048, + "step": 64350 + }, + { + "grad_norm": 0.03488540276885033, + "learning_rate": 3.089027276973973e-05, + "loss": 0.0045, + "step": 64360 + }, + { + "grad_norm": 0.033720389008522034, + "learning_rate": 3.0874994399865615e-05, + "loss": 0.0042, + "step": 64370 + }, + { + "grad_norm": 0.049881964921951294, + "learning_rate": 3.085971812147337e-05, + "loss": 0.0046, + "step": 64380 + }, + { + "grad_norm": 0.04253743216395378, + "learning_rate": 3.0844443936233557e-05, + "loss": 0.0045, + "step": 64390 + }, + { + "grad_norm": 0.028307702392339706, + "learning_rate": 3.082917184581657e-05, + "loss": 0.0041, + "step": 64400 + }, + { + "grad_norm": 0.03254227712750435, + "learning_rate": 3.0813901851892513e-05, + "loss": 0.0036, + "step": 64410 + }, + { + "grad_norm": 0.03757401183247566, + "learning_rate": 3.0798633956131316e-05, + "loss": 0.0036, + "step": 64420 + }, + { + "grad_norm": 0.0236307792365551, + "learning_rate": 3.078336816020264e-05, + "loss": 0.0032, + "step": 64430 + }, + { + "grad_norm": 0.043913278728723526, + "learning_rate": 3.0768104465775935e-05, + "loss": 0.0036, + "step": 64440 + }, + { + "grad_norm": 0.04250115156173706, + "learning_rate": 3.0752842874520396e-05, + "loss": 0.0032, + "step": 64450 + }, + { + "grad_norm": 0.049628131091594696, + "learning_rate": 3.073758338810503e-05, + "loss": 0.004, + "step": 64460 + }, + { + "grad_norm": 0.046839676797389984, + "learning_rate": 3.072232600819859e-05, + "loss": 0.0049, + "step": 64470 + }, + { + "grad_norm": 0.03784501180052757, + "learning_rate": 3.070707073646959e-05, + "loss": 0.004, + "step": 64480 + }, + { + "grad_norm": 0.040332965552806854, + "learning_rate": 3.069181757458633e-05, + "loss": 0.0037, + "step": 64490 + }, + { + "grad_norm": 0.03519829735159874, + "learning_rate": 3.067656652421687e-05, + "loss": 0.0044, + "step": 64500 + }, + { + "grad_norm": 0.04013363644480705, + "learning_rate": 3.0661317587029025e-05, + "loss": 0.0049, + "step": 64510 + }, + { + "grad_norm": 0.03917133808135986, + "learning_rate": 3.064607076469043e-05, + "loss": 0.0048, + "step": 64520 + }, + { + "grad_norm": 0.025975720956921577, + "learning_rate": 3.063082605886845e-05, + "loss": 0.0046, + "step": 64530 + }, + { + "grad_norm": 0.03391822427511215, + "learning_rate": 3.061558347123019e-05, + "loss": 0.0034, + "step": 64540 + }, + { + "grad_norm": 0.03775259479880333, + "learning_rate": 3.060034300344259e-05, + "loss": 0.0051, + "step": 64550 + }, + { + "grad_norm": 0.041319724172353745, + "learning_rate": 3.058510465717231e-05, + "loss": 0.0039, + "step": 64560 + }, + { + "grad_norm": 0.03533966466784477, + "learning_rate": 3.0569868434085804e-05, + "loss": 0.0051, + "step": 64570 + }, + { + "grad_norm": 0.0430835522711277, + "learning_rate": 3.055463433584924e-05, + "loss": 0.0046, + "step": 64580 + }, + { + "grad_norm": 0.026928214356303215, + "learning_rate": 3.0539402364128656e-05, + "loss": 0.0033, + "step": 64590 + }, + { + "grad_norm": 0.04270255193114281, + "learning_rate": 3.052417252058977e-05, + "loss": 0.0038, + "step": 64600 + }, + { + "grad_norm": 0.032426778227090836, + "learning_rate": 3.0508944806898092e-05, + "loss": 0.0032, + "step": 64610 + }, + { + "grad_norm": 0.06750599294900894, + "learning_rate": 3.04937192247189e-05, + "loss": 0.004, + "step": 64620 + }, + { + "grad_norm": 0.049002889543771744, + "learning_rate": 3.047849577571725e-05, + "loss": 0.0047, + "step": 64630 + }, + { + "grad_norm": 0.04082095995545387, + "learning_rate": 3.046327446155795e-05, + "loss": 0.0036, + "step": 64640 + }, + { + "grad_norm": 0.04337603598833084, + "learning_rate": 3.0448055283905587e-05, + "loss": 0.0053, + "step": 64650 + }, + { + "grad_norm": 0.05250680074095726, + "learning_rate": 3.0432838244424494e-05, + "loss": 0.0054, + "step": 64660 + }, + { + "grad_norm": 0.044039711356163025, + "learning_rate": 3.04176233447788e-05, + "loss": 0.004, + "step": 64670 + }, + { + "grad_norm": 0.05255868658423424, + "learning_rate": 3.040241058663238e-05, + "loss": 0.0054, + "step": 64680 + }, + { + "grad_norm": 0.048782169818878174, + "learning_rate": 3.038719997164887e-05, + "loss": 0.0059, + "step": 64690 + }, + { + "grad_norm": 0.054037757217884064, + "learning_rate": 3.0371991501491683e-05, + "loss": 0.0037, + "step": 64700 + }, + { + "grad_norm": 0.04757939279079437, + "learning_rate": 3.0356785177823975e-05, + "loss": 0.0045, + "step": 64710 + }, + { + "grad_norm": 0.042796541005373, + "learning_rate": 3.0341581002308726e-05, + "loss": 0.0044, + "step": 64720 + }, + { + "grad_norm": 0.040984202176332474, + "learning_rate": 3.0326378976608627e-05, + "loss": 0.0064, + "step": 64730 + }, + { + "grad_norm": 0.047069430351257324, + "learning_rate": 3.0311179102386146e-05, + "loss": 0.0039, + "step": 64740 + }, + { + "grad_norm": 0.04008230194449425, + "learning_rate": 3.0295981381303513e-05, + "loss": 0.0057, + "step": 64750 + }, + { + "grad_norm": 0.03668077662587166, + "learning_rate": 3.0280785815022727e-05, + "loss": 0.0035, + "step": 64760 + }, + { + "grad_norm": 0.047428473830223083, + "learning_rate": 3.0265592405205544e-05, + "loss": 0.0048, + "step": 64770 + }, + { + "grad_norm": 0.02823125757277012, + "learning_rate": 3.0250401153513517e-05, + "loss": 0.004, + "step": 64780 + }, + { + "grad_norm": 0.03983530402183533, + "learning_rate": 3.0235212061607936e-05, + "loss": 0.0041, + "step": 64790 + }, + { + "grad_norm": 0.04699871316552162, + "learning_rate": 3.0220025131149842e-05, + "loss": 0.0034, + "step": 64800 + }, + { + "grad_norm": 0.04928473010659218, + "learning_rate": 3.020484036380007e-05, + "loss": 0.0033, + "step": 64810 + }, + { + "grad_norm": 0.04253136366605759, + "learning_rate": 3.0189657761219188e-05, + "loss": 0.0047, + "step": 64820 + }, + { + "grad_norm": 0.04791880398988724, + "learning_rate": 3.017447732506753e-05, + "loss": 0.004, + "step": 64830 + }, + { + "grad_norm": 0.03941759467124939, + "learning_rate": 3.015929905700524e-05, + "loss": 0.0045, + "step": 64840 + }, + { + "grad_norm": 0.03777531906962395, + "learning_rate": 3.014412295869218e-05, + "loss": 0.0048, + "step": 64850 + }, + { + "grad_norm": 0.04825013503432274, + "learning_rate": 3.0128949031787977e-05, + "loss": 0.0048, + "step": 64860 + }, + { + "grad_norm": 0.04891957342624664, + "learning_rate": 3.0113777277952022e-05, + "loss": 0.0044, + "step": 64870 + }, + { + "grad_norm": 0.04235240817070007, + "learning_rate": 3.0098607698843496e-05, + "loss": 0.0035, + "step": 64880 + }, + { + "grad_norm": 0.03793158382177353, + "learning_rate": 3.0083440296121312e-05, + "loss": 0.0043, + "step": 64890 + }, + { + "grad_norm": 0.03421254828572273, + "learning_rate": 3.0068275071444142e-05, + "loss": 0.0034, + "step": 64900 + }, + { + "grad_norm": 0.043024126440286636, + "learning_rate": 3.0053112026470442e-05, + "loss": 0.0048, + "step": 64910 + }, + { + "grad_norm": 0.036231204867362976, + "learning_rate": 3.0037951162858434e-05, + "loss": 0.0049, + "step": 64920 + }, + { + "grad_norm": 0.034737441688776016, + "learning_rate": 3.002279248226607e-05, + "loss": 0.0043, + "step": 64930 + }, + { + "grad_norm": 0.0308972354978323, + "learning_rate": 3.000763598635109e-05, + "loss": 0.0044, + "step": 64940 + }, + { + "grad_norm": 0.04302547127008438, + "learning_rate": 2.999248167677098e-05, + "loss": 0.0043, + "step": 64950 + }, + { + "grad_norm": 0.03640067204833031, + "learning_rate": 2.9977329555182976e-05, + "loss": 0.004, + "step": 64960 + }, + { + "grad_norm": 0.04165857657790184, + "learning_rate": 2.9962179623244124e-05, + "loss": 0.0043, + "step": 64970 + }, + { + "grad_norm": 0.027839135378599167, + "learning_rate": 2.9947031882611175e-05, + "loss": 0.0045, + "step": 64980 + }, + { + "grad_norm": 0.04082689806818962, + "learning_rate": 2.9931886334940685e-05, + "loss": 0.0057, + "step": 64990 + }, + { + "grad_norm": 0.05389329046010971, + "learning_rate": 2.9916742981888923e-05, + "loss": 0.0041, + "step": 65000 + }, + { + "grad_norm": 0.0529741607606411, + "learning_rate": 2.990160182511196e-05, + "loss": 0.005, + "step": 65010 + }, + { + "grad_norm": 0.03655575215816498, + "learning_rate": 2.9886462866265574e-05, + "loss": 0.0038, + "step": 65020 + }, + { + "grad_norm": 0.044002413749694824, + "learning_rate": 2.9871326107005392e-05, + "loss": 0.0035, + "step": 65030 + }, + { + "grad_norm": 0.049586765468120575, + "learning_rate": 2.9856191548986735e-05, + "loss": 0.0048, + "step": 65040 + }, + { + "grad_norm": 0.03687158599495888, + "learning_rate": 2.9841059193864672e-05, + "loss": 0.0029, + "step": 65050 + }, + { + "grad_norm": 0.03222857415676117, + "learning_rate": 2.982592904329407e-05, + "loss": 0.0034, + "step": 65060 + }, + { + "grad_norm": 0.02734816074371338, + "learning_rate": 2.981080109892954e-05, + "loss": 0.0042, + "step": 65070 + }, + { + "grad_norm": 0.026577528566122055, + "learning_rate": 2.9795675362425434e-05, + "loss": 0.0046, + "step": 65080 + }, + { + "grad_norm": 0.027128687128424644, + "learning_rate": 2.97805518354359e-05, + "loss": 0.0028, + "step": 65090 + }, + { + "grad_norm": 0.061891477555036545, + "learning_rate": 2.9765430519614825e-05, + "loss": 0.0046, + "step": 65100 + }, + { + "grad_norm": 0.022587094455957413, + "learning_rate": 2.975031141661584e-05, + "loss": 0.0032, + "step": 65110 + }, + { + "grad_norm": 0.049025993794202805, + "learning_rate": 2.9735194528092347e-05, + "loss": 0.0058, + "step": 65120 + }, + { + "grad_norm": 0.036505505442619324, + "learning_rate": 2.9720079855697525e-05, + "loss": 0.0048, + "step": 65130 + }, + { + "grad_norm": 0.04194823279976845, + "learning_rate": 2.970496740108427e-05, + "loss": 0.0033, + "step": 65140 + }, + { + "grad_norm": 0.044213131070137024, + "learning_rate": 2.968985716590527e-05, + "loss": 0.0042, + "step": 65150 + }, + { + "grad_norm": 0.021083667874336243, + "learning_rate": 2.9674749151812953e-05, + "loss": 0.005, + "step": 65160 + }, + { + "grad_norm": 0.04816821590065956, + "learning_rate": 2.965964336045951e-05, + "loss": 0.0041, + "step": 65170 + }, + { + "grad_norm": 0.03752179816365242, + "learning_rate": 2.9644539793496894e-05, + "loss": 0.005, + "step": 65180 + }, + { + "grad_norm": 0.02888570725917816, + "learning_rate": 2.9629438452576807e-05, + "loss": 0.0037, + "step": 65190 + }, + { + "grad_norm": 0.03530872240662575, + "learning_rate": 2.9614339339350698e-05, + "loss": 0.005, + "step": 65200 + }, + { + "grad_norm": 0.04987562075257301, + "learning_rate": 2.959924245546979e-05, + "loss": 0.0031, + "step": 65210 + }, + { + "grad_norm": 0.043164461851119995, + "learning_rate": 2.9584147802585038e-05, + "loss": 0.0038, + "step": 65220 + }, + { + "grad_norm": 0.038400620222091675, + "learning_rate": 2.956905538234721e-05, + "loss": 0.0036, + "step": 65230 + }, + { + "grad_norm": 0.0415305532515049, + "learning_rate": 2.9553965196406764e-05, + "loss": 0.0042, + "step": 65240 + }, + { + "grad_norm": 0.047127410769462585, + "learning_rate": 2.9538877246413943e-05, + "loss": 0.0028, + "step": 65250 + }, + { + "grad_norm": 0.04482370242476463, + "learning_rate": 2.952379153401874e-05, + "loss": 0.0045, + "step": 65260 + }, + { + "grad_norm": 0.030465558171272278, + "learning_rate": 2.9508708060870914e-05, + "loss": 0.0045, + "step": 65270 + }, + { + "grad_norm": 0.03929575905203819, + "learning_rate": 2.9493626828619948e-05, + "loss": 0.0036, + "step": 65280 + }, + { + "grad_norm": 0.0213788952678442, + "learning_rate": 2.9478547838915137e-05, + "loss": 0.0033, + "step": 65290 + }, + { + "grad_norm": 0.04013752564787865, + "learning_rate": 2.946347109340548e-05, + "loss": 0.0034, + "step": 65300 + }, + { + "grad_norm": 0.04005611315369606, + "learning_rate": 2.944839659373976e-05, + "loss": 0.0041, + "step": 65310 + }, + { + "grad_norm": 0.05898970365524292, + "learning_rate": 2.943332434156647e-05, + "loss": 0.0043, + "step": 65320 + }, + { + "grad_norm": 0.06567224115133286, + "learning_rate": 2.9418254338533912e-05, + "loss": 0.0037, + "step": 65330 + }, + { + "grad_norm": 0.06012042984366417, + "learning_rate": 2.940318658629011e-05, + "loss": 0.0039, + "step": 65340 + }, + { + "grad_norm": 0.04922141134738922, + "learning_rate": 2.938812108648287e-05, + "loss": 0.0036, + "step": 65350 + }, + { + "grad_norm": 0.054313644766807556, + "learning_rate": 2.937305784075972e-05, + "loss": 0.0049, + "step": 65360 + }, + { + "grad_norm": 0.04640129581093788, + "learning_rate": 2.9357996850767937e-05, + "loss": 0.0053, + "step": 65370 + }, + { + "grad_norm": 0.055792514234781265, + "learning_rate": 2.9342938118154595e-05, + "loss": 0.0043, + "step": 65380 + }, + { + "grad_norm": 0.03697536140680313, + "learning_rate": 2.9327881644566485e-05, + "loss": 0.0039, + "step": 65390 + }, + { + "grad_norm": 0.04214378818869591, + "learning_rate": 2.9312827431650157e-05, + "loss": 0.0032, + "step": 65400 + }, + { + "grad_norm": 0.0692359209060669, + "learning_rate": 2.92977754810519e-05, + "loss": 0.0058, + "step": 65410 + }, + { + "grad_norm": 0.04512036591768265, + "learning_rate": 2.928272579441781e-05, + "loss": 0.0047, + "step": 65420 + }, + { + "grad_norm": 0.048276275396347046, + "learning_rate": 2.9267678373393676e-05, + "loss": 0.0039, + "step": 65430 + }, + { + "grad_norm": 0.058070071041584015, + "learning_rate": 2.925263321962507e-05, + "loss": 0.0053, + "step": 65440 + }, + { + "grad_norm": 0.043257590383291245, + "learning_rate": 2.9237590334757292e-05, + "loss": 0.0048, + "step": 65450 + }, + { + "grad_norm": 0.03157200291752815, + "learning_rate": 2.922254972043543e-05, + "loss": 0.0035, + "step": 65460 + }, + { + "grad_norm": 0.04384966567158699, + "learning_rate": 2.9207511378304265e-05, + "loss": 0.0045, + "step": 65470 + }, + { + "grad_norm": 0.04578528180718422, + "learning_rate": 2.9192475310008428e-05, + "loss": 0.0046, + "step": 65480 + }, + { + "grad_norm": 0.035422082990407944, + "learning_rate": 2.917744151719219e-05, + "loss": 0.0035, + "step": 65490 + }, + { + "grad_norm": 0.031091418117284775, + "learning_rate": 2.916241000149964e-05, + "loss": 0.004, + "step": 65500 + }, + { + "grad_norm": 0.03222375363111496, + "learning_rate": 2.9147380764574622e-05, + "loss": 0.0028, + "step": 65510 + }, + { + "grad_norm": 0.03554492071270943, + "learning_rate": 2.9132353808060674e-05, + "loss": 0.0056, + "step": 65520 + }, + { + "grad_norm": 0.026898304000496864, + "learning_rate": 2.9117329133601134e-05, + "loss": 0.0033, + "step": 65530 + }, + { + "grad_norm": 0.0371001735329628, + "learning_rate": 2.9102306742839093e-05, + "loss": 0.0032, + "step": 65540 + }, + { + "grad_norm": 0.032883718609809875, + "learning_rate": 2.9087286637417375e-05, + "loss": 0.0051, + "step": 65550 + }, + { + "grad_norm": 0.037453215569257736, + "learning_rate": 2.9072268818978537e-05, + "loss": 0.003, + "step": 65560 + }, + { + "grad_norm": 0.06699896603822708, + "learning_rate": 2.905725328916493e-05, + "loss": 0.0044, + "step": 65570 + }, + { + "grad_norm": 0.04813608527183533, + "learning_rate": 2.9042240049618608e-05, + "loss": 0.0043, + "step": 65580 + }, + { + "grad_norm": 0.030267832800745964, + "learning_rate": 2.9027229101981413e-05, + "loss": 0.0043, + "step": 65590 + }, + { + "grad_norm": 0.05329272896051407, + "learning_rate": 2.901222044789488e-05, + "loss": 0.0063, + "step": 65600 + }, + { + "grad_norm": 0.04112730547785759, + "learning_rate": 2.89972140890004e-05, + "loss": 0.0048, + "step": 65610 + }, + { + "grad_norm": 0.05684660002589226, + "learning_rate": 2.8982210026938988e-05, + "loss": 0.0064, + "step": 65620 + }, + { + "grad_norm": 0.049122411757707596, + "learning_rate": 2.896720826335151e-05, + "loss": 0.0038, + "step": 65630 + }, + { + "grad_norm": 0.03922000527381897, + "learning_rate": 2.8952208799878493e-05, + "loss": 0.0066, + "step": 65640 + }, + { + "grad_norm": 0.039980821311473846, + "learning_rate": 2.89372116381603e-05, + "loss": 0.0049, + "step": 65650 + }, + { + "grad_norm": 0.039266377687454224, + "learning_rate": 2.8922216779836932e-05, + "loss": 0.0036, + "step": 65660 + }, + { + "grad_norm": 0.055783092975616455, + "learning_rate": 2.8907224226548285e-05, + "loss": 0.0058, + "step": 65670 + }, + { + "grad_norm": 0.037755921483039856, + "learning_rate": 2.8892233979933868e-05, + "loss": 0.003, + "step": 65680 + }, + { + "grad_norm": 0.03862232714891434, + "learning_rate": 2.8877246041633026e-05, + "loss": 0.0057, + "step": 65690 + }, + { + "grad_norm": 0.04383091256022453, + "learning_rate": 2.886226041328478e-05, + "loss": 0.0045, + "step": 65700 + }, + { + "grad_norm": 0.0374910905957222, + "learning_rate": 2.8847277096527957e-05, + "loss": 0.0057, + "step": 65710 + }, + { + "grad_norm": 0.03390152007341385, + "learning_rate": 2.8832296093001105e-05, + "loss": 0.0029, + "step": 65720 + }, + { + "grad_norm": 0.044696319848299026, + "learning_rate": 2.8817317404342547e-05, + "loss": 0.0039, + "step": 65730 + }, + { + "grad_norm": 0.03428943082690239, + "learning_rate": 2.880234103219029e-05, + "loss": 0.0039, + "step": 65740 + }, + { + "grad_norm": 0.03620893508195877, + "learning_rate": 2.878736697818215e-05, + "loss": 0.0039, + "step": 65750 + }, + { + "grad_norm": 0.039943091571331024, + "learning_rate": 2.877239524395568e-05, + "loss": 0.0043, + "step": 65760 + }, + { + "grad_norm": 0.033139683306217194, + "learning_rate": 2.8757425831148134e-05, + "loss": 0.0033, + "step": 65770 + }, + { + "grad_norm": 0.03582308813929558, + "learning_rate": 2.874245874139658e-05, + "loss": 0.0033, + "step": 65780 + }, + { + "grad_norm": 0.045226600021123886, + "learning_rate": 2.8727493976337737e-05, + "loss": 0.0054, + "step": 65790 + }, + { + "grad_norm": 0.035157304257154465, + "learning_rate": 2.8712531537608218e-05, + "loss": 0.0039, + "step": 65800 + }, + { + "grad_norm": 0.03702333942055702, + "learning_rate": 2.8697571426844217e-05, + "loss": 0.0051, + "step": 65810 + }, + { + "grad_norm": 0.0317205972969532, + "learning_rate": 2.86826136456818e-05, + "loss": 0.003, + "step": 65820 + }, + { + "grad_norm": 0.0362372063100338, + "learning_rate": 2.8667658195756685e-05, + "loss": 0.0042, + "step": 65830 + }, + { + "grad_norm": 0.04029519483447075, + "learning_rate": 2.8652705078704412e-05, + "loss": 0.005, + "step": 65840 + }, + { + "grad_norm": 0.03196307271718979, + "learning_rate": 2.863775429616018e-05, + "loss": 0.004, + "step": 65850 + }, + { + "grad_norm": 0.027989236637949944, + "learning_rate": 2.862280584975906e-05, + "loss": 0.0057, + "step": 65860 + }, + { + "grad_norm": 0.04068707674741745, + "learning_rate": 2.8607859741135735e-05, + "loss": 0.0045, + "step": 65870 + }, + { + "grad_norm": 0.050092633813619614, + "learning_rate": 2.8592915971924723e-05, + "loss": 0.0054, + "step": 65880 + }, + { + "grad_norm": 0.02709057554602623, + "learning_rate": 2.8577974543760212e-05, + "loss": 0.0045, + "step": 65890 + }, + { + "grad_norm": 0.06899118423461914, + "learning_rate": 2.8563035458276222e-05, + "loss": 0.0058, + "step": 65900 + }, + { + "grad_norm": 0.04118320718407631, + "learning_rate": 2.8548098717106424e-05, + "loss": 0.003, + "step": 65910 + }, + { + "grad_norm": 0.03407575562596321, + "learning_rate": 2.85331643218843e-05, + "loss": 0.003, + "step": 65920 + }, + { + "grad_norm": 0.05826601758599281, + "learning_rate": 2.8518232274243046e-05, + "loss": 0.0061, + "step": 65930 + }, + { + "grad_norm": 0.06422383338212967, + "learning_rate": 2.8503302575815638e-05, + "loss": 0.0034, + "step": 65940 + }, + { + "grad_norm": 0.03190331161022186, + "learning_rate": 2.8488375228234716e-05, + "loss": 0.0038, + "step": 65950 + }, + { + "grad_norm": 0.045138951390981674, + "learning_rate": 2.8473450233132738e-05, + "loss": 0.0043, + "step": 65960 + }, + { + "grad_norm": 0.0465497225522995, + "learning_rate": 2.8458527592141898e-05, + "loss": 0.0036, + "step": 65970 + }, + { + "grad_norm": 0.04150128364562988, + "learning_rate": 2.8443607306894073e-05, + "loss": 0.0039, + "step": 65980 + }, + { + "grad_norm": 0.04050072655081749, + "learning_rate": 2.8428689379020946e-05, + "loss": 0.0036, + "step": 65990 + }, + { + "grad_norm": 0.03431962803006172, + "learning_rate": 2.841377381015391e-05, + "loss": 0.0046, + "step": 66000 + }, + { + "grad_norm": 0.024364225566387177, + "learning_rate": 2.8398860601924144e-05, + "loss": 0.0033, + "step": 66010 + }, + { + "grad_norm": 0.04599660634994507, + "learning_rate": 2.838394975596248e-05, + "loss": 0.004, + "step": 66020 + }, + { + "grad_norm": 0.04460862651467323, + "learning_rate": 2.83690412738996e-05, + "loss": 0.0053, + "step": 66030 + }, + { + "grad_norm": 0.0450311154127121, + "learning_rate": 2.835413515736579e-05, + "loss": 0.0051, + "step": 66040 + }, + { + "grad_norm": 0.04443848133087158, + "learning_rate": 2.8339231407991267e-05, + "loss": 0.0039, + "step": 66050 + }, + { + "grad_norm": 0.030568299815058708, + "learning_rate": 2.832433002740581e-05, + "loss": 0.003, + "step": 66060 + }, + { + "grad_norm": 0.05425732210278511, + "learning_rate": 2.8309431017239047e-05, + "loss": 0.0047, + "step": 66070 + }, + { + "grad_norm": 0.04095695912837982, + "learning_rate": 2.829453437912028e-05, + "loss": 0.0043, + "step": 66080 + }, + { + "grad_norm": 0.04825190454721451, + "learning_rate": 2.8279640114678617e-05, + "loss": 0.0048, + "step": 66090 + }, + { + "grad_norm": 0.04799070209264755, + "learning_rate": 2.8264748225542836e-05, + "loss": 0.0052, + "step": 66100 + }, + { + "grad_norm": 0.046096429228782654, + "learning_rate": 2.8249858713341516e-05, + "loss": 0.0053, + "step": 66110 + }, + { + "grad_norm": 0.04126433655619621, + "learning_rate": 2.823497157970294e-05, + "loss": 0.0043, + "step": 66120 + }, + { + "grad_norm": 0.037394702434539795, + "learning_rate": 2.8220086826255164e-05, + "loss": 0.0042, + "step": 66130 + }, + { + "grad_norm": 0.03342019394040108, + "learning_rate": 2.8205204454625934e-05, + "loss": 0.0032, + "step": 66140 + }, + { + "grad_norm": 0.04215827211737633, + "learning_rate": 2.8190324466442798e-05, + "loss": 0.0064, + "step": 66150 + }, + { + "grad_norm": 0.05273037403821945, + "learning_rate": 2.8175446863332965e-05, + "loss": 0.0048, + "step": 66160 + }, + { + "grad_norm": 0.034385550767183304, + "learning_rate": 2.816057164692345e-05, + "loss": 0.0037, + "step": 66170 + }, + { + "grad_norm": 0.028545871376991272, + "learning_rate": 2.8145698818840992e-05, + "loss": 0.0027, + "step": 66180 + }, + { + "grad_norm": 0.06342946738004684, + "learning_rate": 2.8130828380712065e-05, + "loss": 0.0053, + "step": 66190 + }, + { + "grad_norm": 0.03781837224960327, + "learning_rate": 2.811596033416285e-05, + "loss": 0.0038, + "step": 66200 + }, + { + "grad_norm": 0.04342251643538475, + "learning_rate": 2.8101094680819307e-05, + "loss": 0.0038, + "step": 66210 + }, + { + "grad_norm": 0.03198975324630737, + "learning_rate": 2.808623142230715e-05, + "loss": 0.0041, + "step": 66220 + }, + { + "grad_norm": 0.04126502200961113, + "learning_rate": 2.8071370560251757e-05, + "loss": 0.0033, + "step": 66230 + }, + { + "grad_norm": 0.03994597867131233, + "learning_rate": 2.805651209627831e-05, + "loss": 0.0064, + "step": 66240 + }, + { + "grad_norm": 0.035007789731025696, + "learning_rate": 2.804165603201171e-05, + "loss": 0.0059, + "step": 66250 + }, + { + "grad_norm": 0.053089629858732224, + "learning_rate": 2.802680236907661e-05, + "loss": 0.0046, + "step": 66260 + }, + { + "grad_norm": 0.052784863859415054, + "learning_rate": 2.8011951109097344e-05, + "loss": 0.0046, + "step": 66270 + }, + { + "grad_norm": 0.0537596121430397, + "learning_rate": 2.7997102253698072e-05, + "loss": 0.0045, + "step": 66280 + }, + { + "grad_norm": 0.04196944460272789, + "learning_rate": 2.7982255804502588e-05, + "loss": 0.0041, + "step": 66290 + }, + { + "grad_norm": 0.03670016676187515, + "learning_rate": 2.7967411763134516e-05, + "loss": 0.0053, + "step": 66300 + }, + { + "grad_norm": 0.0576372966170311, + "learning_rate": 2.7952570131217166e-05, + "loss": 0.006, + "step": 66310 + }, + { + "grad_norm": 0.03862733021378517, + "learning_rate": 2.7937730910373618e-05, + "loss": 0.0046, + "step": 66320 + }, + { + "grad_norm": 0.06729298830032349, + "learning_rate": 2.7922894102226628e-05, + "loss": 0.0041, + "step": 66330 + }, + { + "grad_norm": 0.04327411204576492, + "learning_rate": 2.790805970839876e-05, + "loss": 0.0059, + "step": 66340 + }, + { + "grad_norm": 0.03786587342619896, + "learning_rate": 2.789322773051225e-05, + "loss": 0.0035, + "step": 66350 + }, + { + "grad_norm": 0.030447855591773987, + "learning_rate": 2.7878398170189124e-05, + "loss": 0.0036, + "step": 66360 + }, + { + "grad_norm": 0.03774154558777809, + "learning_rate": 2.7863571029051112e-05, + "loss": 0.004, + "step": 66370 + }, + { + "grad_norm": 0.03496064245700836, + "learning_rate": 2.784874630871971e-05, + "loss": 0.003, + "step": 66380 + }, + { + "grad_norm": 0.046665776520967484, + "learning_rate": 2.7833924010816086e-05, + "loss": 0.0038, + "step": 66390 + }, + { + "grad_norm": 0.0649498924612999, + "learning_rate": 2.7819104136961226e-05, + "loss": 0.0053, + "step": 66400 + }, + { + "grad_norm": 0.03893500939011574, + "learning_rate": 2.780428668877577e-05, + "loss": 0.0039, + "step": 66410 + }, + { + "grad_norm": 0.06080934777855873, + "learning_rate": 2.7789471667880152e-05, + "loss": 0.0055, + "step": 66420 + }, + { + "grad_norm": 0.03247353807091713, + "learning_rate": 2.7774659075894523e-05, + "loss": 0.0037, + "step": 66430 + }, + { + "grad_norm": 0.030233757570385933, + "learning_rate": 2.7759848914438778e-05, + "loss": 0.0039, + "step": 66440 + }, + { + "grad_norm": 0.03745598718523979, + "learning_rate": 2.7745041185132496e-05, + "loss": 0.0047, + "step": 66450 + }, + { + "grad_norm": 0.03452708199620247, + "learning_rate": 2.773023588959506e-05, + "loss": 0.003, + "step": 66460 + }, + { + "grad_norm": 0.03702197223901749, + "learning_rate": 2.7715433029445555e-05, + "loss": 0.0053, + "step": 66470 + }, + { + "grad_norm": 0.04073164612054825, + "learning_rate": 2.7700632606302766e-05, + "loss": 0.0031, + "step": 66480 + }, + { + "grad_norm": 0.036330901086330414, + "learning_rate": 2.768583462178528e-05, + "loss": 0.0056, + "step": 66490 + }, + { + "grad_norm": 0.05136467143893242, + "learning_rate": 2.767103907751136e-05, + "loss": 0.0036, + "step": 66500 + }, + { + "grad_norm": 0.042482890188694, + "learning_rate": 2.7656245975099065e-05, + "loss": 0.0037, + "step": 66510 + }, + { + "grad_norm": 0.05293336510658264, + "learning_rate": 2.7641455316166093e-05, + "loss": 0.0043, + "step": 66520 + }, + { + "grad_norm": 0.026512520387768745, + "learning_rate": 2.7626667102329973e-05, + "loss": 0.0032, + "step": 66530 + }, + { + "grad_norm": 0.0328996367752552, + "learning_rate": 2.761188133520788e-05, + "loss": 0.0037, + "step": 66540 + }, + { + "grad_norm": 0.02872333489358425, + "learning_rate": 2.7597098016416784e-05, + "loss": 0.0045, + "step": 66550 + }, + { + "grad_norm": 0.03182058036327362, + "learning_rate": 2.7582317147573368e-05, + "loss": 0.0037, + "step": 66560 + }, + { + "grad_norm": 0.05634007230401039, + "learning_rate": 2.7567538730294064e-05, + "loss": 0.0041, + "step": 66570 + }, + { + "grad_norm": 0.022910015657544136, + "learning_rate": 2.7552762766194973e-05, + "loss": 0.0041, + "step": 66580 + }, + { + "grad_norm": 0.04456500709056854, + "learning_rate": 2.753798925689202e-05, + "loss": 0.0044, + "step": 66590 + }, + { + "grad_norm": 0.0429668165743351, + "learning_rate": 2.7523218204000774e-05, + "loss": 0.0038, + "step": 66600 + }, + { + "grad_norm": 0.03607070446014404, + "learning_rate": 2.7508449609136612e-05, + "loss": 0.003, + "step": 66610 + }, + { + "grad_norm": 0.041173290461301804, + "learning_rate": 2.749368347391454e-05, + "loss": 0.0043, + "step": 66620 + }, + { + "grad_norm": 0.03625732287764549, + "learning_rate": 2.7478919799949443e-05, + "loss": 0.0038, + "step": 66630 + }, + { + "grad_norm": 0.04007142037153244, + "learning_rate": 2.7464158588855803e-05, + "loss": 0.0049, + "step": 66640 + }, + { + "grad_norm": 0.026033073663711548, + "learning_rate": 2.7449399842247918e-05, + "loss": 0.0063, + "step": 66650 + }, + { + "grad_norm": 0.04942150413990021, + "learning_rate": 2.7434643561739736e-05, + "loss": 0.0042, + "step": 66660 + }, + { + "grad_norm": 0.06190791726112366, + "learning_rate": 2.7419889748945016e-05, + "loss": 0.0043, + "step": 66670 + }, + { + "grad_norm": 0.047238387167453766, + "learning_rate": 2.7405138405477203e-05, + "loss": 0.0036, + "step": 66680 + }, + { + "grad_norm": 0.02762617915868759, + "learning_rate": 2.73903895329495e-05, + "loss": 0.004, + "step": 66690 + }, + { + "grad_norm": 0.04027896746993065, + "learning_rate": 2.737564313297478e-05, + "loss": 0.0036, + "step": 66700 + }, + { + "grad_norm": 0.03029545582830906, + "learning_rate": 2.736089920716572e-05, + "loss": 0.005, + "step": 66710 + }, + { + "grad_norm": 0.04578825831413269, + "learning_rate": 2.7346157757134706e-05, + "loss": 0.0041, + "step": 66720 + }, + { + "grad_norm": 0.03973783552646637, + "learning_rate": 2.733141878449379e-05, + "loss": 0.0039, + "step": 66730 + }, + { + "grad_norm": 0.028992850333452225, + "learning_rate": 2.7316682290854844e-05, + "loss": 0.0031, + "step": 66740 + }, + { + "grad_norm": 0.03123476170003414, + "learning_rate": 2.7301948277829414e-05, + "loss": 0.0035, + "step": 66750 + }, + { + "grad_norm": 0.05598260462284088, + "learning_rate": 2.7287216747028814e-05, + "loss": 0.004, + "step": 66760 + }, + { + "grad_norm": 0.02953089401125908, + "learning_rate": 2.7272487700064025e-05, + "loss": 0.003, + "step": 66770 + }, + { + "grad_norm": 0.033837251365184784, + "learning_rate": 2.725776113854583e-05, + "loss": 0.0039, + "step": 66780 + }, + { + "grad_norm": 0.03145357593894005, + "learning_rate": 2.7243037064084664e-05, + "loss": 0.0028, + "step": 66790 + }, + { + "grad_norm": 0.027161646634340286, + "learning_rate": 2.7228315478290766e-05, + "loss": 0.0052, + "step": 66800 + }, + { + "grad_norm": 0.02598663419485092, + "learning_rate": 2.721359638277402e-05, + "loss": 0.0053, + "step": 66810 + }, + { + "grad_norm": 0.03331747651100159, + "learning_rate": 2.7198879779144148e-05, + "loss": 0.003, + "step": 66820 + }, + { + "grad_norm": 0.03935544565320015, + "learning_rate": 2.7184165669010485e-05, + "loss": 0.0037, + "step": 66830 + }, + { + "grad_norm": 0.031101945787668228, + "learning_rate": 2.716945405398218e-05, + "loss": 0.0031, + "step": 66840 + }, + { + "grad_norm": 0.06394801288843155, + "learning_rate": 2.715474493566803e-05, + "loss": 0.005, + "step": 66850 + }, + { + "grad_norm": 0.02271532081067562, + "learning_rate": 2.7140038315676652e-05, + "loss": 0.0026, + "step": 66860 + }, + { + "grad_norm": 0.055027712136507034, + "learning_rate": 2.7125334195616275e-05, + "loss": 0.0039, + "step": 66870 + }, + { + "grad_norm": 0.04264559969305992, + "learning_rate": 2.7110632577094997e-05, + "loss": 0.0039, + "step": 66880 + }, + { + "grad_norm": 0.04964181035757065, + "learning_rate": 2.709593346172051e-05, + "loss": 0.0033, + "step": 66890 + }, + { + "grad_norm": 0.03224323317408562, + "learning_rate": 2.708123685110032e-05, + "loss": 0.0039, + "step": 66900 + }, + { + "grad_norm": 0.030337294563651085, + "learning_rate": 2.7066542746841588e-05, + "loss": 0.0029, + "step": 66910 + }, + { + "grad_norm": 0.040496256202459335, + "learning_rate": 2.7051851150551267e-05, + "loss": 0.0042, + "step": 66920 + }, + { + "grad_norm": 0.04374242201447487, + "learning_rate": 2.7037162063835996e-05, + "loss": 0.0042, + "step": 66930 + }, + { + "grad_norm": 0.022593239322304726, + "learning_rate": 2.702247548830218e-05, + "loss": 0.0039, + "step": 66940 + }, + { + "grad_norm": 0.036121029406785965, + "learning_rate": 2.7007791425555883e-05, + "loss": 0.0035, + "step": 66950 + }, + { + "grad_norm": 0.03255005553364754, + "learning_rate": 2.6993109877202945e-05, + "loss": 0.0048, + "step": 66960 + }, + { + "grad_norm": 0.04405402019619942, + "learning_rate": 2.6978430844848935e-05, + "loss": 0.0047, + "step": 66970 + }, + { + "grad_norm": 0.03792135417461395, + "learning_rate": 2.69637543300991e-05, + "loss": 0.0044, + "step": 66980 + }, + { + "grad_norm": 0.045989420264959335, + "learning_rate": 2.694908033455848e-05, + "loss": 0.0041, + "step": 66990 + }, + { + "grad_norm": 0.029246384277939796, + "learning_rate": 2.6934408859831733e-05, + "loss": 0.0038, + "step": 67000 + }, + { + "grad_norm": 0.03876793384552002, + "learning_rate": 2.69197399075234e-05, + "loss": 0.0029, + "step": 67010 + }, + { + "grad_norm": 0.04376818984746933, + "learning_rate": 2.6905073479237584e-05, + "loss": 0.0047, + "step": 67020 + }, + { + "grad_norm": 0.028850365430116653, + "learning_rate": 2.6890409576578235e-05, + "loss": 0.0036, + "step": 67030 + }, + { + "grad_norm": 0.04125627502799034, + "learning_rate": 2.687574820114892e-05, + "loss": 0.0035, + "step": 67040 + }, + { + "grad_norm": 0.036552783101797104, + "learning_rate": 2.686108935455305e-05, + "loss": 0.0037, + "step": 67050 + }, + { + "grad_norm": 0.04243968054652214, + "learning_rate": 2.6846433038393603e-05, + "loss": 0.0033, + "step": 67060 + }, + { + "grad_norm": 0.0430532805621624, + "learning_rate": 2.6831779254273482e-05, + "loss": 0.0037, + "step": 67070 + }, + { + "grad_norm": 0.02498021349310875, + "learning_rate": 2.6817128003795122e-05, + "loss": 0.0034, + "step": 67080 + }, + { + "grad_norm": 0.052589450031518936, + "learning_rate": 2.680247928856081e-05, + "loss": 0.0049, + "step": 67090 + }, + { + "grad_norm": 0.0349850170314312, + "learning_rate": 2.6787833110172466e-05, + "loss": 0.0037, + "step": 67100 + }, + { + "grad_norm": 0.041278623044490814, + "learning_rate": 2.6773189470231807e-05, + "loss": 0.0058, + "step": 67110 + }, + { + "grad_norm": 0.031013159081339836, + "learning_rate": 2.675854837034022e-05, + "loss": 0.0037, + "step": 67120 + }, + { + "grad_norm": 0.03292890638113022, + "learning_rate": 2.6743909812098833e-05, + "loss": 0.0031, + "step": 67130 + }, + { + "grad_norm": 0.021951405331492424, + "learning_rate": 2.67292737971085e-05, + "loss": 0.0028, + "step": 67140 + }, + { + "grad_norm": 0.03404933214187622, + "learning_rate": 2.671464032696982e-05, + "loss": 0.0047, + "step": 67150 + }, + { + "grad_norm": 0.03804114833474159, + "learning_rate": 2.6700009403283055e-05, + "loss": 0.0028, + "step": 67160 + }, + { + "grad_norm": 0.04174738749861717, + "learning_rate": 2.6685381027648214e-05, + "loss": 0.0047, + "step": 67170 + }, + { + "grad_norm": 0.042754486203193665, + "learning_rate": 2.6670755201665086e-05, + "loss": 0.0035, + "step": 67180 + }, + { + "grad_norm": 0.04929684102535248, + "learning_rate": 2.6656131926933066e-05, + "loss": 0.005, + "step": 67190 + }, + { + "grad_norm": 0.03961510956287384, + "learning_rate": 2.664151120505136e-05, + "loss": 0.004, + "step": 67200 + }, + { + "grad_norm": 0.02666153572499752, + "learning_rate": 2.6626893037618873e-05, + "loss": 0.0037, + "step": 67210 + }, + { + "grad_norm": 0.04860461875796318, + "learning_rate": 2.6612277426234243e-05, + "loss": 0.0047, + "step": 67220 + }, + { + "grad_norm": 0.04884359985589981, + "learning_rate": 2.659766437249577e-05, + "loss": 0.0048, + "step": 67230 + }, + { + "grad_norm": 0.03589958697557449, + "learning_rate": 2.6583053878001557e-05, + "loss": 0.0043, + "step": 67240 + }, + { + "grad_norm": 0.03431377559900284, + "learning_rate": 2.6568445944349323e-05, + "loss": 0.0066, + "step": 67250 + }, + { + "grad_norm": 0.038042888045310974, + "learning_rate": 2.6553840573136657e-05, + "loss": 0.0023, + "step": 67260 + }, + { + "grad_norm": 0.03327340632677078, + "learning_rate": 2.6539237765960706e-05, + "loss": 0.0032, + "step": 67270 + }, + { + "grad_norm": 0.022937379777431488, + "learning_rate": 2.6524637524418465e-05, + "loss": 0.0038, + "step": 67280 + }, + { + "grad_norm": 0.026365872472524643, + "learning_rate": 2.651003985010655e-05, + "loss": 0.0027, + "step": 67290 + }, + { + "grad_norm": 0.04551827907562256, + "learning_rate": 2.649544474462138e-05, + "loss": 0.0043, + "step": 67300 + }, + { + "grad_norm": 0.04587486758828163, + "learning_rate": 2.6480852209559015e-05, + "loss": 0.0047, + "step": 67310 + }, + { + "grad_norm": 0.02240944653749466, + "learning_rate": 2.6466262246515296e-05, + "loss": 0.0034, + "step": 67320 + }, + { + "grad_norm": 0.0335998460650444, + "learning_rate": 2.6451674857085746e-05, + "loss": 0.0047, + "step": 67330 + }, + { + "grad_norm": 0.05257783457636833, + "learning_rate": 2.6437090042865655e-05, + "loss": 0.0057, + "step": 67340 + }, + { + "grad_norm": 0.05565295368432999, + "learning_rate": 2.6422507805449947e-05, + "loss": 0.0045, + "step": 67350 + }, + { + "grad_norm": 0.048301342874765396, + "learning_rate": 2.6407928146433357e-05, + "loss": 0.004, + "step": 67360 + }, + { + "grad_norm": 0.035603463649749756, + "learning_rate": 2.639335106741026e-05, + "loss": 0.0035, + "step": 67370 + }, + { + "grad_norm": 0.02925901673734188, + "learning_rate": 2.6378776569974796e-05, + "loss": 0.0031, + "step": 67380 + }, + { + "grad_norm": 0.05032648891210556, + "learning_rate": 2.6364204655720814e-05, + "loss": 0.0046, + "step": 67390 + }, + { + "grad_norm": 0.02382754348218441, + "learning_rate": 2.6349635326241894e-05, + "loss": 0.0033, + "step": 67400 + }, + { + "grad_norm": 0.037693221122026443, + "learning_rate": 2.633506858313127e-05, + "loss": 0.0041, + "step": 67410 + }, + { + "grad_norm": 0.04911329224705696, + "learning_rate": 2.6320504427981975e-05, + "loss": 0.0036, + "step": 67420 + }, + { + "grad_norm": 0.04082232341170311, + "learning_rate": 2.6305942862386734e-05, + "loss": 0.0028, + "step": 67430 + }, + { + "grad_norm": 0.04429766908288002, + "learning_rate": 2.629138388793793e-05, + "loss": 0.0046, + "step": 67440 + }, + { + "grad_norm": 0.0640062764286995, + "learning_rate": 2.6276827506227743e-05, + "loss": 0.0041, + "step": 67450 + }, + { + "grad_norm": 0.053510818630456924, + "learning_rate": 2.6262273718848037e-05, + "loss": 0.0044, + "step": 67460 + }, + { + "grad_norm": 0.0457097589969635, + "learning_rate": 2.6247722527390407e-05, + "loss": 0.0035, + "step": 67470 + }, + { + "grad_norm": 0.0488361194729805, + "learning_rate": 2.6233173933446104e-05, + "loss": 0.0027, + "step": 67480 + }, + { + "grad_norm": 0.03935731574892998, + "learning_rate": 2.6218627938606188e-05, + "loss": 0.0037, + "step": 67490 + }, + { + "grad_norm": 0.036514367908239365, + "learning_rate": 2.620408454446135e-05, + "loss": 0.0043, + "step": 67500 + }, + { + "grad_norm": 0.028736842796206474, + "learning_rate": 2.6189543752602046e-05, + "loss": 0.0029, + "step": 67510 + }, + { + "grad_norm": 0.03204913064837456, + "learning_rate": 2.6175005564618444e-05, + "loss": 0.0028, + "step": 67520 + }, + { + "grad_norm": 0.03049914911389351, + "learning_rate": 2.6160469982100428e-05, + "loss": 0.0028, + "step": 67530 + }, + { + "grad_norm": 0.027732979506254196, + "learning_rate": 2.6145937006637555e-05, + "loss": 0.0026, + "step": 67540 + }, + { + "grad_norm": 0.05432756617665291, + "learning_rate": 2.6131406639819166e-05, + "loss": 0.0047, + "step": 67550 + }, + { + "grad_norm": 0.033348847180604935, + "learning_rate": 2.611687888323424e-05, + "loss": 0.0045, + "step": 67560 + }, + { + "grad_norm": 0.03366254270076752, + "learning_rate": 2.610235373847153e-05, + "loss": 0.0033, + "step": 67570 + }, + { + "grad_norm": 0.05405944585800171, + "learning_rate": 2.6087831207119494e-05, + "loss": 0.0046, + "step": 67580 + }, + { + "grad_norm": 0.03516167774796486, + "learning_rate": 2.60733112907663e-05, + "loss": 0.0035, + "step": 67590 + }, + { + "grad_norm": 0.03866519033908844, + "learning_rate": 2.6058793990999786e-05, + "loss": 0.0038, + "step": 67600 + }, + { + "grad_norm": 0.02932462841272354, + "learning_rate": 2.6044279309407593e-05, + "loss": 0.0029, + "step": 67610 + }, + { + "grad_norm": 0.02613035961985588, + "learning_rate": 2.6029767247576976e-05, + "loss": 0.0035, + "step": 67620 + }, + { + "grad_norm": 0.04896793141961098, + "learning_rate": 2.601525780709497e-05, + "loss": 0.0048, + "step": 67630 + }, + { + "grad_norm": 0.05062234401702881, + "learning_rate": 2.6000750989548318e-05, + "loss": 0.0034, + "step": 67640 + }, + { + "grad_norm": 0.03589087352156639, + "learning_rate": 2.5986246796523462e-05, + "loss": 0.0032, + "step": 67650 + }, + { + "grad_norm": 0.0439298078417778, + "learning_rate": 2.5971745229606546e-05, + "loss": 0.004, + "step": 67660 + }, + { + "grad_norm": 0.055816393345594406, + "learning_rate": 2.5957246290383442e-05, + "loss": 0.0047, + "step": 67670 + }, + { + "grad_norm": 0.053820330649614334, + "learning_rate": 2.594274998043975e-05, + "loss": 0.004, + "step": 67680 + }, + { + "grad_norm": 0.034188162535429, + "learning_rate": 2.5928256301360733e-05, + "loss": 0.0032, + "step": 67690 + }, + { + "grad_norm": 0.041930191218853, + "learning_rate": 2.591376525473142e-05, + "loss": 0.004, + "step": 67700 + }, + { + "grad_norm": 0.03001675195991993, + "learning_rate": 2.5899276842136523e-05, + "loss": 0.0042, + "step": 67710 + }, + { + "grad_norm": 0.04713624343276024, + "learning_rate": 2.5884791065160495e-05, + "loss": 0.0052, + "step": 67720 + }, + { + "grad_norm": 0.05154506117105484, + "learning_rate": 2.5870307925387436e-05, + "loss": 0.0052, + "step": 67730 + }, + { + "grad_norm": 0.037508051842451096, + "learning_rate": 2.585582742440125e-05, + "loss": 0.0033, + "step": 67740 + }, + { + "grad_norm": 0.05094493180513382, + "learning_rate": 2.5841349563785455e-05, + "loss": 0.0042, + "step": 67750 + }, + { + "grad_norm": 0.054401054978370667, + "learning_rate": 2.5826874345123355e-05, + "loss": 0.0052, + "step": 67760 + }, + { + "grad_norm": 0.03286585584282875, + "learning_rate": 2.581240176999792e-05, + "loss": 0.0041, + "step": 67770 + }, + { + "grad_norm": 0.0502723753452301, + "learning_rate": 2.579793183999189e-05, + "loss": 0.0057, + "step": 67780 + }, + { + "grad_norm": 0.047182440757751465, + "learning_rate": 2.5783464556687627e-05, + "loss": 0.0034, + "step": 67790 + }, + { + "grad_norm": 0.03621238097548485, + "learning_rate": 2.576899992166729e-05, + "loss": 0.0028, + "step": 67800 + }, + { + "grad_norm": 0.030726440250873566, + "learning_rate": 2.5754537936512668e-05, + "loss": 0.0032, + "step": 67810 + }, + { + "grad_norm": 0.03461480140686035, + "learning_rate": 2.574007860280535e-05, + "loss": 0.0042, + "step": 67820 + }, + { + "grad_norm": 0.055794499814510345, + "learning_rate": 2.5725621922126518e-05, + "loss": 0.0042, + "step": 67830 + }, + { + "grad_norm": 0.03733636811375618, + "learning_rate": 2.571116789605722e-05, + "loss": 0.0037, + "step": 67840 + }, + { + "grad_norm": 0.0543648935854435, + "learning_rate": 2.5696716526178054e-05, + "loss": 0.0036, + "step": 67850 + }, + { + "grad_norm": 0.050738923251628876, + "learning_rate": 2.568226781406946e-05, + "loss": 0.0045, + "step": 67860 + }, + { + "grad_norm": 0.04149547219276428, + "learning_rate": 2.5667821761311474e-05, + "loss": 0.0045, + "step": 67870 + }, + { + "grad_norm": 0.04557792469859123, + "learning_rate": 2.5653378369483917e-05, + "loss": 0.0037, + "step": 67880 + }, + { + "grad_norm": 0.04910953342914581, + "learning_rate": 2.5638937640166292e-05, + "loss": 0.0038, + "step": 67890 + }, + { + "grad_norm": 0.02977614663541317, + "learning_rate": 2.5624499574937842e-05, + "loss": 0.0029, + "step": 67900 + }, + { + "grad_norm": 0.02935045212507248, + "learning_rate": 2.5610064175377456e-05, + "loss": 0.0032, + "step": 67910 + }, + { + "grad_norm": 0.026675082743167877, + "learning_rate": 2.5595631443063782e-05, + "loss": 0.0037, + "step": 67920 + }, + { + "grad_norm": 0.023322399705648422, + "learning_rate": 2.5581201379575177e-05, + "loss": 0.0033, + "step": 67930 + }, + { + "grad_norm": 0.04273240268230438, + "learning_rate": 2.5566773986489655e-05, + "loss": 0.003, + "step": 67940 + }, + { + "grad_norm": 0.029162105172872543, + "learning_rate": 2.5552349265385e-05, + "loss": 0.0035, + "step": 67950 + }, + { + "grad_norm": 0.027351628988981247, + "learning_rate": 2.5537927217838675e-05, + "loss": 0.0033, + "step": 67960 + }, + { + "grad_norm": 0.03546740859746933, + "learning_rate": 2.5523507845427864e-05, + "loss": 0.0035, + "step": 67970 + }, + { + "grad_norm": 0.0396500900387764, + "learning_rate": 2.5509091149729413e-05, + "loss": 0.0042, + "step": 67980 + }, + { + "grad_norm": 0.03682414069771767, + "learning_rate": 2.5494677132319962e-05, + "loss": 0.0035, + "step": 67990 + }, + { + "grad_norm": 0.032631680369377136, + "learning_rate": 2.548026579477575e-05, + "loss": 0.0031, + "step": 68000 + }, + { + "grad_norm": 0.051527515053749084, + "learning_rate": 2.5465857138672834e-05, + "loss": 0.0038, + "step": 68010 + }, + { + "grad_norm": 0.052164770662784576, + "learning_rate": 2.5451451165586848e-05, + "loss": 0.0046, + "step": 68020 + }, + { + "grad_norm": 0.0466497465968132, + "learning_rate": 2.543704787709329e-05, + "loss": 0.0069, + "step": 68030 + }, + { + "grad_norm": 0.03962551802396774, + "learning_rate": 2.542264727476723e-05, + "loss": 0.0043, + "step": 68040 + }, + { + "grad_norm": 0.05190601572394371, + "learning_rate": 2.5408249360183535e-05, + "loss": 0.0039, + "step": 68050 + }, + { + "grad_norm": 0.04419935122132301, + "learning_rate": 2.5393854134916694e-05, + "loss": 0.005, + "step": 68060 + }, + { + "grad_norm": 0.04942954331636429, + "learning_rate": 2.5379461600540993e-05, + "loss": 0.0032, + "step": 68070 + }, + { + "grad_norm": 0.050268128514289856, + "learning_rate": 2.5365071758630317e-05, + "loss": 0.0041, + "step": 68080 + }, + { + "grad_norm": 0.029452580958604813, + "learning_rate": 2.535068461075839e-05, + "loss": 0.0031, + "step": 68090 + }, + { + "grad_norm": 0.04555730149149895, + "learning_rate": 2.5336300158498516e-05, + "loss": 0.0046, + "step": 68100 + }, + { + "grad_norm": 0.026848506182432175, + "learning_rate": 2.5321918403423793e-05, + "loss": 0.0029, + "step": 68110 + }, + { + "grad_norm": 0.03865323215723038, + "learning_rate": 2.5307539347106957e-05, + "loss": 0.0032, + "step": 68120 + }, + { + "grad_norm": 0.029803572222590446, + "learning_rate": 2.529316299112049e-05, + "loss": 0.0039, + "step": 68130 + }, + { + "grad_norm": 0.026532746851444244, + "learning_rate": 2.5278789337036567e-05, + "loss": 0.0031, + "step": 68140 + }, + { + "grad_norm": 0.029048241674900055, + "learning_rate": 2.52644183864271e-05, + "loss": 0.0039, + "step": 68150 + }, + { + "grad_norm": 0.02885257638990879, + "learning_rate": 2.525005014086363e-05, + "loss": 0.004, + "step": 68160 + }, + { + "grad_norm": 0.04485558345913887, + "learning_rate": 2.5235684601917465e-05, + "loss": 0.0032, + "step": 68170 + }, + { + "grad_norm": 0.031417690217494965, + "learning_rate": 2.5221321771159618e-05, + "loss": 0.0028, + "step": 68180 + }, + { + "grad_norm": 0.03583313897252083, + "learning_rate": 2.5206961650160743e-05, + "loss": 0.0035, + "step": 68190 + }, + { + "grad_norm": 0.034365057945251465, + "learning_rate": 2.519260424049129e-05, + "loss": 0.0038, + "step": 68200 + }, + { + "grad_norm": 0.05052126199007034, + "learning_rate": 2.5178249543721304e-05, + "loss": 0.0039, + "step": 68210 + }, + { + "grad_norm": 0.039615631103515625, + "learning_rate": 2.5163897561420668e-05, + "loss": 0.0044, + "step": 68220 + }, + { + "grad_norm": 0.04085669293999672, + "learning_rate": 2.514954829515883e-05, + "loss": 0.0037, + "step": 68230 + }, + { + "grad_norm": 0.03690318018198013, + "learning_rate": 2.513520174650505e-05, + "loss": 0.004, + "step": 68240 + }, + { + "grad_norm": 0.05644090846180916, + "learning_rate": 2.5120857917028207e-05, + "loss": 0.0045, + "step": 68250 + }, + { + "grad_norm": 0.02438068762421608, + "learning_rate": 2.5106516808296953e-05, + "loss": 0.0039, + "step": 68260 + }, + { + "grad_norm": 0.03564511612057686, + "learning_rate": 2.5092178421879552e-05, + "loss": 0.0043, + "step": 68270 + }, + { + "grad_norm": 0.0396217443048954, + "learning_rate": 2.5077842759344116e-05, + "loss": 0.0049, + "step": 68280 + }, + { + "grad_norm": 0.040992025285959244, + "learning_rate": 2.5063509822258304e-05, + "loss": 0.0049, + "step": 68290 + }, + { + "grad_norm": 0.04883980005979538, + "learning_rate": 2.5049179612189576e-05, + "loss": 0.0042, + "step": 68300 + }, + { + "grad_norm": 0.04371732473373413, + "learning_rate": 2.5034852130705043e-05, + "loss": 0.0032, + "step": 68310 + }, + { + "grad_norm": 0.03454647958278656, + "learning_rate": 2.5020527379371572e-05, + "loss": 0.0037, + "step": 68320 + }, + { + "grad_norm": 0.036428648978471756, + "learning_rate": 2.500620535975564e-05, + "loss": 0.003, + "step": 68330 + }, + { + "grad_norm": 0.0678696259856224, + "learning_rate": 2.4991886073423515e-05, + "loss": 0.0039, + "step": 68340 + }, + { + "grad_norm": 0.03728291764855385, + "learning_rate": 2.4977569521941134e-05, + "loss": 0.003, + "step": 68350 + }, + { + "grad_norm": 0.05495084822177887, + "learning_rate": 2.4963255706874143e-05, + "loss": 0.005, + "step": 68360 + }, + { + "grad_norm": 0.04751668870449066, + "learning_rate": 2.4948944629787853e-05, + "loss": 0.0047, + "step": 68370 + }, + { + "grad_norm": 0.0354316383600235, + "learning_rate": 2.493463629224731e-05, + "loss": 0.0035, + "step": 68380 + }, + { + "grad_norm": 0.04627299681305885, + "learning_rate": 2.492033069581728e-05, + "loss": 0.005, + "step": 68390 + }, + { + "grad_norm": 0.02387753315269947, + "learning_rate": 2.4906027842062137e-05, + "loss": 0.0038, + "step": 68400 + }, + { + "grad_norm": 0.022781943902373314, + "learning_rate": 2.48917277325461e-05, + "loss": 0.0032, + "step": 68410 + }, + { + "grad_norm": 0.03002871759235859, + "learning_rate": 2.487743036883295e-05, + "loss": 0.0033, + "step": 68420 + }, + { + "grad_norm": 0.0342613086104393, + "learning_rate": 2.486313575248626e-05, + "loss": 0.0035, + "step": 68430 + }, + { + "grad_norm": 0.04062996059656143, + "learning_rate": 2.4848843885069235e-05, + "loss": 0.0038, + "step": 68440 + }, + { + "grad_norm": 0.037780266255140305, + "learning_rate": 2.483455476814484e-05, + "loss": 0.0044, + "step": 68450 + }, + { + "grad_norm": 0.04882292076945305, + "learning_rate": 2.4820268403275672e-05, + "loss": 0.0036, + "step": 68460 + }, + { + "grad_norm": 0.055452827364206314, + "learning_rate": 2.480598479202413e-05, + "loss": 0.0044, + "step": 68470 + }, + { + "grad_norm": 0.023307668045163155, + "learning_rate": 2.4791703935952193e-05, + "loss": 0.0027, + "step": 68480 + }, + { + "grad_norm": 0.03165707737207413, + "learning_rate": 2.477742583662163e-05, + "loss": 0.0045, + "step": 68490 + }, + { + "grad_norm": 0.03150848671793938, + "learning_rate": 2.476315049559384e-05, + "loss": 0.0033, + "step": 68500 + }, + { + "grad_norm": 0.0384063720703125, + "learning_rate": 2.4748877914429987e-05, + "loss": 0.0042, + "step": 68510 + }, + { + "grad_norm": 0.0538603775203228, + "learning_rate": 2.4734608094690857e-05, + "loss": 0.0035, + "step": 68520 + }, + { + "grad_norm": 0.027741845697164536, + "learning_rate": 2.4720341037937006e-05, + "loss": 0.0046, + "step": 68530 + }, + { + "grad_norm": 0.04530772566795349, + "learning_rate": 2.4706076745728645e-05, + "loss": 0.0035, + "step": 68540 + }, + { + "grad_norm": 0.024925699457526207, + "learning_rate": 2.469181521962573e-05, + "loss": 0.0027, + "step": 68550 + }, + { + "grad_norm": 0.028896698728203773, + "learning_rate": 2.467755646118783e-05, + "loss": 0.003, + "step": 68560 + }, + { + "grad_norm": 0.023786453530192375, + "learning_rate": 2.46633004719743e-05, + "loss": 0.0035, + "step": 68570 + }, + { + "grad_norm": 0.05452103540301323, + "learning_rate": 2.4649047253544123e-05, + "loss": 0.0033, + "step": 68580 + }, + { + "grad_norm": 0.06075967475771904, + "learning_rate": 2.4634796807456016e-05, + "loss": 0.0042, + "step": 68590 + }, + { + "grad_norm": 0.0318974144756794, + "learning_rate": 2.46205491352684e-05, + "loss": 0.0029, + "step": 68600 + }, + { + "grad_norm": 0.04417508468031883, + "learning_rate": 2.4606304238539375e-05, + "loss": 0.0043, + "step": 68610 + }, + { + "grad_norm": 0.03799480199813843, + "learning_rate": 2.4592062118826753e-05, + "loss": 0.0032, + "step": 68620 + }, + { + "grad_norm": 0.03689911216497421, + "learning_rate": 2.4577822777688004e-05, + "loss": 0.0033, + "step": 68630 + }, + { + "grad_norm": 0.03033382073044777, + "learning_rate": 2.4563586216680347e-05, + "loss": 0.0041, + "step": 68640 + }, + { + "grad_norm": 0.03447786718606949, + "learning_rate": 2.4549352437360622e-05, + "loss": 0.0032, + "step": 68650 + }, + { + "grad_norm": 0.03511226177215576, + "learning_rate": 2.4535121441285493e-05, + "loss": 0.0054, + "step": 68660 + }, + { + "grad_norm": 0.0541437491774559, + "learning_rate": 2.4520893230011172e-05, + "loss": 0.0049, + "step": 68670 + }, + { + "grad_norm": 0.03605970740318298, + "learning_rate": 2.450666780509368e-05, + "loss": 0.0028, + "step": 68680 + }, + { + "grad_norm": 0.04312889277935028, + "learning_rate": 2.4492445168088656e-05, + "loss": 0.003, + "step": 68690 + }, + { + "grad_norm": 0.03762608394026756, + "learning_rate": 2.447822532055149e-05, + "loss": 0.0028, + "step": 68700 + }, + { + "grad_norm": 0.024466177448630333, + "learning_rate": 2.4464008264037212e-05, + "loss": 0.0022, + "step": 68710 + }, + { + "grad_norm": 0.028712043538689613, + "learning_rate": 2.4449794000100605e-05, + "loss": 0.0045, + "step": 68720 + }, + { + "grad_norm": 0.027648361399769783, + "learning_rate": 2.4435582530296108e-05, + "loss": 0.0029, + "step": 68730 + }, + { + "grad_norm": 0.04175199940800667, + "learning_rate": 2.4421373856177887e-05, + "loss": 0.0031, + "step": 68740 + }, + { + "grad_norm": 0.03458952531218529, + "learning_rate": 2.440716797929975e-05, + "loss": 0.0037, + "step": 68750 + }, + { + "grad_norm": 0.02805788628757, + "learning_rate": 2.439296490121526e-05, + "loss": 0.004, + "step": 68760 + }, + { + "grad_norm": 0.04097278416156769, + "learning_rate": 2.4378764623477608e-05, + "loss": 0.0038, + "step": 68770 + }, + { + "grad_norm": 0.03882456570863724, + "learning_rate": 2.436456714763974e-05, + "loss": 0.0033, + "step": 68780 + }, + { + "grad_norm": 0.032073233276605606, + "learning_rate": 2.4350372475254262e-05, + "loss": 0.0035, + "step": 68790 + }, + { + "grad_norm": 0.050606027245521545, + "learning_rate": 2.433618060787351e-05, + "loss": 0.0046, + "step": 68800 + }, + { + "grad_norm": 0.021429475396871567, + "learning_rate": 2.432199154704944e-05, + "loss": 0.0027, + "step": 68810 + }, + { + "grad_norm": 0.032712534070014954, + "learning_rate": 2.430780529433377e-05, + "loss": 0.0037, + "step": 68820 + }, + { + "grad_norm": 0.04669502004981041, + "learning_rate": 2.42936218512779e-05, + "loss": 0.0032, + "step": 68830 + }, + { + "grad_norm": 0.0504152774810791, + "learning_rate": 2.427944121943288e-05, + "loss": 0.0032, + "step": 68840 + }, + { + "grad_norm": 0.04121039807796478, + "learning_rate": 2.4265263400349497e-05, + "loss": 0.005, + "step": 68850 + }, + { + "grad_norm": 0.03246583417057991, + "learning_rate": 2.4251088395578213e-05, + "loss": 0.0052, + "step": 68860 + }, + { + "grad_norm": 0.04839794337749481, + "learning_rate": 2.4236916206669214e-05, + "loss": 0.0057, + "step": 68870 + }, + { + "grad_norm": 0.03958490490913391, + "learning_rate": 2.4222746835172304e-05, + "loss": 0.0049, + "step": 68880 + }, + { + "grad_norm": 0.049555618315935135, + "learning_rate": 2.4208580282637065e-05, + "loss": 0.0046, + "step": 68890 + }, + { + "grad_norm": 0.041808221489191055, + "learning_rate": 2.4194416550612687e-05, + "loss": 0.005, + "step": 68900 + }, + { + "grad_norm": 0.03201381862163544, + "learning_rate": 2.418025564064812e-05, + "loss": 0.0038, + "step": 68910 + }, + { + "grad_norm": 0.02581239677965641, + "learning_rate": 2.4166097554291978e-05, + "loss": 0.003, + "step": 68920 + }, + { + "grad_norm": 0.031713590025901794, + "learning_rate": 2.4151942293092583e-05, + "loss": 0.0038, + "step": 68930 + }, + { + "grad_norm": 0.04552645981311798, + "learning_rate": 2.41377898585979e-05, + "loss": 0.0038, + "step": 68940 + }, + { + "grad_norm": 0.040339283645153046, + "learning_rate": 2.4123640252355652e-05, + "loss": 0.0032, + "step": 68950 + }, + { + "grad_norm": 0.044813644140958786, + "learning_rate": 2.410949347591318e-05, + "loss": 0.0025, + "step": 68960 + }, + { + "grad_norm": 0.022644666954874992, + "learning_rate": 2.4095349530817578e-05, + "loss": 0.0036, + "step": 68970 + }, + { + "grad_norm": 0.029924841597676277, + "learning_rate": 2.4081208418615603e-05, + "loss": 0.0028, + "step": 68980 + }, + { + "grad_norm": 0.04029710590839386, + "learning_rate": 2.4067070140853725e-05, + "loss": 0.0034, + "step": 68990 + }, + { + "grad_norm": 0.04729044437408447, + "learning_rate": 2.405293469907805e-05, + "loss": 0.0041, + "step": 69000 + }, + { + "grad_norm": 0.038976795971393585, + "learning_rate": 2.4038802094834446e-05, + "loss": 0.0033, + "step": 69010 + }, + { + "grad_norm": 0.030960150063037872, + "learning_rate": 2.4024672329668395e-05, + "loss": 0.0044, + "step": 69020 + }, + { + "grad_norm": 0.04078123718500137, + "learning_rate": 2.4010545405125125e-05, + "loss": 0.003, + "step": 69030 + }, + { + "grad_norm": 0.05293715372681618, + "learning_rate": 2.3996421322749528e-05, + "loss": 0.0029, + "step": 69040 + }, + { + "grad_norm": 0.048552464693784714, + "learning_rate": 2.3982300084086224e-05, + "loss": 0.0048, + "step": 69050 + }, + { + "grad_norm": 0.04230061545968056, + "learning_rate": 2.396818169067944e-05, + "loss": 0.0033, + "step": 69060 + }, + { + "grad_norm": 0.039252012968063354, + "learning_rate": 2.3954066144073178e-05, + "loss": 0.0051, + "step": 69070 + }, + { + "grad_norm": 0.043899279087781906, + "learning_rate": 2.39399534458111e-05, + "loss": 0.0041, + "step": 69080 + }, + { + "grad_norm": 0.04618256166577339, + "learning_rate": 2.392584359743651e-05, + "loss": 0.0038, + "step": 69090 + }, + { + "grad_norm": 0.03676259145140648, + "learning_rate": 2.3911736600492463e-05, + "loss": 0.0024, + "step": 69100 + }, + { + "grad_norm": 0.03283039107918739, + "learning_rate": 2.3897632456521672e-05, + "loss": 0.0042, + "step": 69110 + }, + { + "grad_norm": 0.03297044336795807, + "learning_rate": 2.388353116706658e-05, + "loss": 0.0029, + "step": 69120 + }, + { + "grad_norm": 0.04899835214018822, + "learning_rate": 2.3869432733669232e-05, + "loss": 0.0042, + "step": 69130 + }, + { + "grad_norm": 0.02535175159573555, + "learning_rate": 2.385533715787145e-05, + "loss": 0.0029, + "step": 69140 + }, + { + "grad_norm": 0.04382670670747757, + "learning_rate": 2.3841244441214673e-05, + "loss": 0.0041, + "step": 69150 + }, + { + "grad_norm": 0.05711986497044563, + "learning_rate": 2.3827154585240073e-05, + "loss": 0.0036, + "step": 69160 + }, + { + "grad_norm": 0.05771061033010483, + "learning_rate": 2.3813067591488498e-05, + "loss": 0.0043, + "step": 69170 + }, + { + "grad_norm": 0.05083213001489639, + "learning_rate": 2.3798983461500494e-05, + "loss": 0.0079, + "step": 69180 + }, + { + "grad_norm": 0.043151840567588806, + "learning_rate": 2.3784902196816256e-05, + "loss": 0.0037, + "step": 69190 + }, + { + "grad_norm": 0.0326283760368824, + "learning_rate": 2.3770823798975716e-05, + "loss": 0.0034, + "step": 69200 + }, + { + "grad_norm": 0.04925896227359772, + "learning_rate": 2.3756748269518424e-05, + "loss": 0.0037, + "step": 69210 + }, + { + "grad_norm": 0.04104623571038246, + "learning_rate": 2.374267560998371e-05, + "loss": 0.003, + "step": 69220 + }, + { + "grad_norm": 0.03243548423051834, + "learning_rate": 2.3728605821910477e-05, + "loss": 0.0064, + "step": 69230 + }, + { + "grad_norm": 0.03747805953025818, + "learning_rate": 2.3714538906837453e-05, + "loss": 0.0041, + "step": 69240 + }, + { + "grad_norm": 0.017353495582938194, + "learning_rate": 2.3700474866302913e-05, + "loss": 0.0026, + "step": 69250 + }, + { + "grad_norm": 0.03682359680533409, + "learning_rate": 2.3686413701844917e-05, + "loss": 0.0041, + "step": 69260 + }, + { + "grad_norm": 0.02993498183786869, + "learning_rate": 2.367235541500114e-05, + "loss": 0.0053, + "step": 69270 + }, + { + "grad_norm": 0.03610438480973244, + "learning_rate": 2.3658300007308993e-05, + "loss": 0.004, + "step": 69280 + }, + { + "grad_norm": 0.04031316936016083, + "learning_rate": 2.364424748030555e-05, + "loss": 0.0033, + "step": 69290 + }, + { + "grad_norm": 0.032392989844083786, + "learning_rate": 2.3630197835527595e-05, + "loss": 0.0038, + "step": 69300 + }, + { + "grad_norm": 0.05546257644891739, + "learning_rate": 2.3616151074511546e-05, + "loss": 0.0033, + "step": 69310 + }, + { + "grad_norm": 0.047107357531785965, + "learning_rate": 2.3602107198793545e-05, + "loss": 0.0034, + "step": 69320 + }, + { + "grad_norm": 0.04978376254439354, + "learning_rate": 2.3588066209909427e-05, + "loss": 0.0038, + "step": 69330 + }, + { + "grad_norm": 0.03669743984937668, + "learning_rate": 2.3574028109394662e-05, + "loss": 0.0032, + "step": 69340 + }, + { + "grad_norm": 0.03221642225980759, + "learning_rate": 2.3559992898784454e-05, + "loss": 0.0038, + "step": 69350 + }, + { + "grad_norm": 0.05385454744100571, + "learning_rate": 2.354596057961367e-05, + "loss": 0.0044, + "step": 69360 + }, + { + "grad_norm": 0.04382467269897461, + "learning_rate": 2.353193115341688e-05, + "loss": 0.0031, + "step": 69370 + }, + { + "grad_norm": 0.03602490574121475, + "learning_rate": 2.3517904621728294e-05, + "loss": 0.0027, + "step": 69380 + }, + { + "grad_norm": 0.0421549528837204, + "learning_rate": 2.3503880986081855e-05, + "loss": 0.0042, + "step": 69390 + }, + { + "grad_norm": 0.03919006511569023, + "learning_rate": 2.3489860248011136e-05, + "loss": 0.0028, + "step": 69400 + }, + { + "grad_norm": 0.03416084498167038, + "learning_rate": 2.3475842409049464e-05, + "loss": 0.0037, + "step": 69410 + }, + { + "grad_norm": 0.045050397515296936, + "learning_rate": 2.346182747072975e-05, + "loss": 0.004, + "step": 69420 + }, + { + "grad_norm": 0.034244947135448456, + "learning_rate": 2.3447815434584718e-05, + "loss": 0.0044, + "step": 69430 + }, + { + "grad_norm": 0.03849903121590614, + "learning_rate": 2.3433806302146656e-05, + "loss": 0.0036, + "step": 69440 + }, + { + "grad_norm": 0.030644645914435387, + "learning_rate": 2.3419800074947612e-05, + "loss": 0.0036, + "step": 69450 + }, + { + "grad_norm": 0.039589717984199524, + "learning_rate": 2.3405796754519248e-05, + "loss": 0.0061, + "step": 69460 + }, + { + "grad_norm": 0.04525395482778549, + "learning_rate": 2.3391796342392992e-05, + "loss": 0.0047, + "step": 69470 + }, + { + "grad_norm": 0.03812089562416077, + "learning_rate": 2.337779884009984e-05, + "loss": 0.0041, + "step": 69480 + }, + { + "grad_norm": 0.035766251385211945, + "learning_rate": 2.3363804249170624e-05, + "loss": 0.0031, + "step": 69490 + }, + { + "grad_norm": 0.021151555702090263, + "learning_rate": 2.3349812571135703e-05, + "loss": 0.0032, + "step": 69500 + }, + { + "grad_norm": 0.045745186507701874, + "learning_rate": 2.333582380752523e-05, + "loss": 0.0044, + "step": 69510 + }, + { + "grad_norm": 0.02693183161318302, + "learning_rate": 2.3321837959868963e-05, + "loss": 0.0028, + "step": 69520 + }, + { + "grad_norm": 0.041763100773096085, + "learning_rate": 2.3307855029696383e-05, + "loss": 0.0041, + "step": 69530 + }, + { + "grad_norm": 0.02911369688808918, + "learning_rate": 2.3293875018536648e-05, + "loss": 0.0028, + "step": 69540 + }, + { + "grad_norm": 0.04470912739634514, + "learning_rate": 2.327989792791861e-05, + "loss": 0.0037, + "step": 69550 + }, + { + "grad_norm": 0.03391748294234276, + "learning_rate": 2.326592375937074e-05, + "loss": 0.0027, + "step": 69560 + }, + { + "grad_norm": 0.03635392338037491, + "learning_rate": 2.3251952514421248e-05, + "loss": 0.0035, + "step": 69570 + }, + { + "grad_norm": 0.03103363700211048, + "learning_rate": 2.323798419459804e-05, + "loss": 0.0027, + "step": 69580 + }, + { + "grad_norm": 0.05549479275941849, + "learning_rate": 2.3224018801428625e-05, + "loss": 0.0039, + "step": 69590 + }, + { + "grad_norm": 0.042786188423633575, + "learning_rate": 2.3210056336440273e-05, + "loss": 0.0051, + "step": 69600 + }, + { + "grad_norm": 0.04344868287444115, + "learning_rate": 2.3196096801159844e-05, + "loss": 0.0034, + "step": 69610 + }, + { + "grad_norm": 0.04655644670128822, + "learning_rate": 2.3182140197114012e-05, + "loss": 0.0034, + "step": 69620 + }, + { + "grad_norm": 0.0276240985840559, + "learning_rate": 2.3168186525828985e-05, + "loss": 0.0037, + "step": 69630 + }, + { + "grad_norm": 0.01862875372171402, + "learning_rate": 2.3154235788830757e-05, + "loss": 0.0037, + "step": 69640 + }, + { + "grad_norm": 0.034004341810941696, + "learning_rate": 2.3140287987644927e-05, + "loss": 0.0045, + "step": 69650 + }, + { + "grad_norm": 0.06600568443536758, + "learning_rate": 2.3126343123796833e-05, + "loss": 0.0035, + "step": 69660 + }, + { + "grad_norm": 0.041753754019737244, + "learning_rate": 2.3112401198811418e-05, + "loss": 0.0034, + "step": 69670 + }, + { + "grad_norm": 0.027212245389819145, + "learning_rate": 2.3098462214213418e-05, + "loss": 0.0045, + "step": 69680 + }, + { + "grad_norm": 0.04234969615936279, + "learning_rate": 2.308452617152712e-05, + "loss": 0.005, + "step": 69690 + }, + { + "grad_norm": 0.02771763503551483, + "learning_rate": 2.3070593072276603e-05, + "loss": 0.0034, + "step": 69700 + }, + { + "grad_norm": 0.035303469747304916, + "learning_rate": 2.3056662917985518e-05, + "loss": 0.0029, + "step": 69710 + }, + { + "grad_norm": 0.03234519809484482, + "learning_rate": 2.3042735710177283e-05, + "loss": 0.0036, + "step": 69720 + }, + { + "grad_norm": 0.03805732727050781, + "learning_rate": 2.302881145037492e-05, + "loss": 0.0026, + "step": 69730 + }, + { + "grad_norm": 0.02976806089282036, + "learning_rate": 2.3014890140101192e-05, + "loss": 0.004, + "step": 69740 + }, + { + "grad_norm": 0.035700760781764984, + "learning_rate": 2.30009717808785e-05, + "loss": 0.0034, + "step": 69750 + }, + { + "grad_norm": 0.04309411719441414, + "learning_rate": 2.2987056374228967e-05, + "loss": 0.0054, + "step": 69760 + }, + { + "grad_norm": 0.020390227437019348, + "learning_rate": 2.297314392167431e-05, + "loss": 0.004, + "step": 69770 + }, + { + "grad_norm": 0.03740667179226875, + "learning_rate": 2.295923442473601e-05, + "loss": 0.0034, + "step": 69780 + }, + { + "grad_norm": 0.04787907376885414, + "learning_rate": 2.294532788493519e-05, + "loss": 0.0035, + "step": 69790 + }, + { + "grad_norm": 0.04757307097315788, + "learning_rate": 2.293142430379262e-05, + "loss": 0.0034, + "step": 69800 + }, + { + "grad_norm": 0.037323519587516785, + "learning_rate": 2.291752368282879e-05, + "loss": 0.0038, + "step": 69810 + }, + { + "grad_norm": 0.03269372135400772, + "learning_rate": 2.290362602356385e-05, + "loss": 0.0042, + "step": 69820 + }, + { + "grad_norm": 0.05538884177803993, + "learning_rate": 2.288973132751765e-05, + "loss": 0.0046, + "step": 69830 + }, + { + "grad_norm": 0.0388767309486866, + "learning_rate": 2.287583959620965e-05, + "loss": 0.003, + "step": 69840 + }, + { + "grad_norm": 0.04900595173239708, + "learning_rate": 2.2861950831159074e-05, + "loss": 0.0052, + "step": 69850 + }, + { + "grad_norm": 0.032056666910648346, + "learning_rate": 2.284806503388472e-05, + "loss": 0.004, + "step": 69860 + }, + { + "grad_norm": 0.03059779666364193, + "learning_rate": 2.283418220590518e-05, + "loss": 0.0037, + "step": 69870 + }, + { + "grad_norm": 0.0465676486492157, + "learning_rate": 2.282030234873862e-05, + "loss": 0.0044, + "step": 69880 + }, + { + "grad_norm": 0.046426136046648026, + "learning_rate": 2.2806425463902948e-05, + "loss": 0.0036, + "step": 69890 + }, + { + "grad_norm": 0.02509789727628231, + "learning_rate": 2.279255155291568e-05, + "loss": 0.0043, + "step": 69900 + }, + { + "grad_norm": 0.041445307433605194, + "learning_rate": 2.277868061729409e-05, + "loss": 0.0063, + "step": 69910 + }, + { + "grad_norm": 0.04050598666071892, + "learning_rate": 2.276481265855504e-05, + "loss": 0.0034, + "step": 69920 + }, + { + "grad_norm": 0.01998807117342949, + "learning_rate": 2.2750947678215128e-05, + "loss": 0.0043, + "step": 69930 + }, + { + "grad_norm": 0.05424906313419342, + "learning_rate": 2.2737085677790616e-05, + "loss": 0.0048, + "step": 69940 + }, + { + "grad_norm": 0.030303524807095528, + "learning_rate": 2.2723226658797443e-05, + "loss": 0.0046, + "step": 69950 + }, + { + "grad_norm": 0.03428089618682861, + "learning_rate": 2.270937062275117e-05, + "loss": 0.0039, + "step": 69960 + }, + { + "grad_norm": 0.03963582217693329, + "learning_rate": 2.2695517571167113e-05, + "loss": 0.0059, + "step": 69970 + }, + { + "grad_norm": 0.034732796251773834, + "learning_rate": 2.268166750556019e-05, + "loss": 0.0031, + "step": 69980 + }, + { + "grad_norm": 0.03452399745583534, + "learning_rate": 2.266782042744503e-05, + "loss": 0.0044, + "step": 69990 + }, + { + "grad_norm": 0.023482374846935272, + "learning_rate": 2.2653976338335935e-05, + "loss": 0.0032, + "step": 70000 + }, + { + "grad_norm": 0.05041399598121643, + "learning_rate": 2.2640135239746895e-05, + "loss": 0.0048, + "step": 70010 + }, + { + "grad_norm": 0.028781471773982048, + "learning_rate": 2.2626297133191515e-05, + "loss": 0.005, + "step": 70020 + }, + { + "grad_norm": 0.04196140915155411, + "learning_rate": 2.261246202018312e-05, + "loss": 0.0036, + "step": 70030 + }, + { + "grad_norm": 0.046785324811935425, + "learning_rate": 2.2598629902234718e-05, + "loss": 0.005, + "step": 70040 + }, + { + "grad_norm": 0.03957848250865936, + "learning_rate": 2.258480078085894e-05, + "loss": 0.0038, + "step": 70050 + }, + { + "grad_norm": 0.059720706194639206, + "learning_rate": 2.2570974657568126e-05, + "loss": 0.003, + "step": 70060 + }, + { + "grad_norm": 0.04233299195766449, + "learning_rate": 2.2557151533874286e-05, + "loss": 0.0045, + "step": 70070 + }, + { + "grad_norm": 0.051318347454071045, + "learning_rate": 2.2543331411289116e-05, + "loss": 0.0055, + "step": 70080 + }, + { + "grad_norm": 0.040842533111572266, + "learning_rate": 2.252951429132392e-05, + "loss": 0.0032, + "step": 70090 + }, + { + "grad_norm": 0.043880730867385864, + "learning_rate": 2.2515700175489762e-05, + "loss": 0.0042, + "step": 70100 + }, + { + "grad_norm": 0.051471348851919174, + "learning_rate": 2.2501889065297288e-05, + "loss": 0.0043, + "step": 70110 + }, + { + "grad_norm": 0.04148077219724655, + "learning_rate": 2.2488080962256887e-05, + "loss": 0.0037, + "step": 70120 + }, + { + "grad_norm": 0.04141819477081299, + "learning_rate": 2.2474275867878585e-05, + "loss": 0.0052, + "step": 70130 + }, + { + "grad_norm": 0.033450011163949966, + "learning_rate": 2.246047378367211e-05, + "loss": 0.0035, + "step": 70140 + }, + { + "grad_norm": 0.02670850045979023, + "learning_rate": 2.2446674711146798e-05, + "loss": 0.0027, + "step": 70150 + }, + { + "grad_norm": 0.03546974062919617, + "learning_rate": 2.2432878651811734e-05, + "loss": 0.0039, + "step": 70160 + }, + { + "grad_norm": 0.03885864466428757, + "learning_rate": 2.2419085607175594e-05, + "loss": 0.0036, + "step": 70170 + }, + { + "grad_norm": 0.02695484831929207, + "learning_rate": 2.240529557874678e-05, + "loss": 0.003, + "step": 70180 + }, + { + "grad_norm": 0.037649817764759064, + "learning_rate": 2.2391508568033358e-05, + "loss": 0.0034, + "step": 70190 + }, + { + "grad_norm": 0.03291170299053192, + "learning_rate": 2.237772457654307e-05, + "loss": 0.0029, + "step": 70200 + }, + { + "grad_norm": 0.024051573127508163, + "learning_rate": 2.236394360578327e-05, + "loss": 0.0027, + "step": 70210 + }, + { + "grad_norm": 0.02891591750085354, + "learning_rate": 2.2350165657261073e-05, + "loss": 0.0031, + "step": 70220 + }, + { + "grad_norm": 0.043202709406614304, + "learning_rate": 2.2336390732483164e-05, + "loss": 0.0036, + "step": 70230 + }, + { + "grad_norm": 0.03576742485165596, + "learning_rate": 2.2322618832955976e-05, + "loss": 0.0041, + "step": 70240 + }, + { + "grad_norm": 0.024772416800260544, + "learning_rate": 2.2308849960185578e-05, + "loss": 0.0048, + "step": 70250 + }, + { + "grad_norm": 0.04696573689579964, + "learning_rate": 2.229508411567774e-05, + "loss": 0.0039, + "step": 70260 + }, + { + "grad_norm": 0.036909300833940506, + "learning_rate": 2.2281321300937823e-05, + "loss": 0.0029, + "step": 70270 + }, + { + "grad_norm": 0.040344737470149994, + "learning_rate": 2.2267561517470937e-05, + "loss": 0.0038, + "step": 70280 + }, + { + "grad_norm": 0.05649112910032272, + "learning_rate": 2.2253804766781845e-05, + "loss": 0.0029, + "step": 70290 + }, + { + "grad_norm": 0.032820187509059906, + "learning_rate": 2.2240051050374933e-05, + "loss": 0.0033, + "step": 70300 + }, + { + "grad_norm": 0.02778448536992073, + "learning_rate": 2.22263003697543e-05, + "loss": 0.0043, + "step": 70310 + }, + { + "grad_norm": 0.03906482458114624, + "learning_rate": 2.2212552726423702e-05, + "loss": 0.0033, + "step": 70320 + }, + { + "grad_norm": 0.030401958152651787, + "learning_rate": 2.2198808121886578e-05, + "loss": 0.0026, + "step": 70330 + }, + { + "grad_norm": 0.030450349673628807, + "learning_rate": 2.218506655764598e-05, + "loss": 0.003, + "step": 70340 + }, + { + "grad_norm": 0.029937028884887695, + "learning_rate": 2.2171328035204704e-05, + "loss": 0.0036, + "step": 70350 + }, + { + "grad_norm": 0.038307689130306244, + "learning_rate": 2.2157592556065133e-05, + "loss": 0.0037, + "step": 70360 + }, + { + "grad_norm": 0.027972323819994926, + "learning_rate": 2.2143860121729382e-05, + "loss": 0.003, + "step": 70370 + }, + { + "grad_norm": 0.05410797521471977, + "learning_rate": 2.2130130733699206e-05, + "loss": 0.0036, + "step": 70380 + }, + { + "grad_norm": 0.04358711466193199, + "learning_rate": 2.2116404393476052e-05, + "loss": 0.0026, + "step": 70390 + }, + { + "grad_norm": 0.022520242258906364, + "learning_rate": 2.2102681102560967e-05, + "loss": 0.0029, + "step": 70400 + }, + { + "grad_norm": 0.038133297115564346, + "learning_rate": 2.208896086245476e-05, + "loss": 0.0041, + "step": 70410 + }, + { + "grad_norm": 0.036095015704631805, + "learning_rate": 2.2075243674657808e-05, + "loss": 0.0036, + "step": 70420 + }, + { + "grad_norm": 0.02995947003364563, + "learning_rate": 2.206152954067024e-05, + "loss": 0.0035, + "step": 70430 + }, + { + "grad_norm": 0.028080571442842484, + "learning_rate": 2.2047818461991758e-05, + "loss": 0.0026, + "step": 70440 + }, + { + "grad_norm": 0.03131457045674324, + "learning_rate": 2.2034110440121864e-05, + "loss": 0.0039, + "step": 70450 + }, + { + "grad_norm": 0.02901502698659897, + "learning_rate": 2.2020405476559585e-05, + "loss": 0.0033, + "step": 70460 + }, + { + "grad_norm": 0.0340389646589756, + "learning_rate": 2.2006703572803722e-05, + "loss": 0.0049, + "step": 70470 + }, + { + "grad_norm": 0.03333403915166855, + "learning_rate": 2.1993004730352646e-05, + "loss": 0.0046, + "step": 70480 + }, + { + "grad_norm": 0.05657069385051727, + "learning_rate": 2.1979308950704464e-05, + "loss": 0.0038, + "step": 70490 + }, + { + "grad_norm": 0.030677003785967827, + "learning_rate": 2.1965616235356923e-05, + "loss": 0.0039, + "step": 70500 + }, + { + "grad_norm": 0.029670411720871925, + "learning_rate": 2.1951926585807465e-05, + "loss": 0.003, + "step": 70510 + }, + { + "grad_norm": 0.027285438030958176, + "learning_rate": 2.193824000355313e-05, + "loss": 0.0035, + "step": 70520 + }, + { + "grad_norm": 0.0361507311463356, + "learning_rate": 2.192455649009067e-05, + "loss": 0.003, + "step": 70530 + }, + { + "grad_norm": 0.028658736497163773, + "learning_rate": 2.1910876046916523e-05, + "loss": 0.0028, + "step": 70540 + }, + { + "grad_norm": 0.03332521766424179, + "learning_rate": 2.1897198675526725e-05, + "loss": 0.0027, + "step": 70550 + }, + { + "grad_norm": 0.04406587779521942, + "learning_rate": 2.1883524377417024e-05, + "loss": 0.0033, + "step": 70560 + }, + { + "grad_norm": 0.038199570029973984, + "learning_rate": 2.1869853154082825e-05, + "loss": 0.0033, + "step": 70570 + }, + { + "grad_norm": 0.0408095046877861, + "learning_rate": 2.1856185007019215e-05, + "loss": 0.0045, + "step": 70580 + }, + { + "grad_norm": 0.04024054855108261, + "learning_rate": 2.1842519937720874e-05, + "loss": 0.0037, + "step": 70590 + }, + { + "grad_norm": 0.04376133531332016, + "learning_rate": 2.182885794768224e-05, + "loss": 0.004, + "step": 70600 + }, + { + "grad_norm": 0.04603923112154007, + "learning_rate": 2.1815199038397326e-05, + "loss": 0.0029, + "step": 70610 + }, + { + "grad_norm": 0.05104438215494156, + "learning_rate": 2.1801543211359893e-05, + "loss": 0.0033, + "step": 70620 + }, + { + "grad_norm": 0.045301299542188644, + "learning_rate": 2.178789046806326e-05, + "loss": 0.0047, + "step": 70630 + }, + { + "grad_norm": 0.04330070689320564, + "learning_rate": 2.1774240810000547e-05, + "loss": 0.0044, + "step": 70640 + }, + { + "grad_norm": 0.04983311519026756, + "learning_rate": 2.17605942386644e-05, + "loss": 0.0032, + "step": 70650 + }, + { + "grad_norm": 0.065918929874897, + "learning_rate": 2.174695075554723e-05, + "loss": 0.0035, + "step": 70660 + }, + { + "grad_norm": 0.05063241720199585, + "learning_rate": 2.173331036214103e-05, + "loss": 0.0032, + "step": 70670 + }, + { + "grad_norm": 0.02559010311961174, + "learning_rate": 2.171967305993753e-05, + "loss": 0.0046, + "step": 70680 + }, + { + "grad_norm": 0.03281001001596451, + "learning_rate": 2.1706038850428024e-05, + "loss": 0.0028, + "step": 70690 + }, + { + "grad_norm": 0.03692471981048584, + "learning_rate": 2.169240773510361e-05, + "loss": 0.0036, + "step": 70700 + }, + { + "grad_norm": 0.045206159353256226, + "learning_rate": 2.1678779715454906e-05, + "loss": 0.0041, + "step": 70710 + }, + { + "grad_norm": 0.03838447853922844, + "learning_rate": 2.166515479297229e-05, + "loss": 0.0044, + "step": 70720 + }, + { + "grad_norm": 0.03247047960758209, + "learning_rate": 2.1651532969145732e-05, + "loss": 0.0029, + "step": 70730 + }, + { + "grad_norm": 0.027404988184571266, + "learning_rate": 2.1637914245464903e-05, + "loss": 0.003, + "step": 70740 + }, + { + "grad_norm": 0.03268512338399887, + "learning_rate": 2.1624298623419133e-05, + "loss": 0.0034, + "step": 70750 + }, + { + "grad_norm": 0.03239411115646362, + "learning_rate": 2.1610686104497413e-05, + "loss": 0.0043, + "step": 70760 + }, + { + "grad_norm": 0.04644099995493889, + "learning_rate": 2.1597076690188366e-05, + "loss": 0.0032, + "step": 70770 + }, + { + "grad_norm": 0.03715863823890686, + "learning_rate": 2.1583470381980315e-05, + "loss": 0.0032, + "step": 70780 + }, + { + "grad_norm": 0.044427476823329926, + "learning_rate": 2.156986718136123e-05, + "loss": 0.0053, + "step": 70790 + }, + { + "grad_norm": 0.024724192917346954, + "learning_rate": 2.155626708981871e-05, + "loss": 0.0027, + "step": 70800 + }, + { + "grad_norm": 0.029755206778645515, + "learning_rate": 2.1542670108840075e-05, + "loss": 0.0042, + "step": 70810 + }, + { + "grad_norm": 0.02963378094136715, + "learning_rate": 2.152907623991221e-05, + "loss": 0.0035, + "step": 70820 + }, + { + "grad_norm": 0.02887066826224327, + "learning_rate": 2.1515485484521802e-05, + "loss": 0.0039, + "step": 70830 + }, + { + "grad_norm": 0.02610844373703003, + "learning_rate": 2.150189784415506e-05, + "loss": 0.0048, + "step": 70840 + }, + { + "grad_norm": 0.026804283261299133, + "learning_rate": 2.148831332029794e-05, + "loss": 0.0044, + "step": 70850 + }, + { + "grad_norm": 0.047368865460157394, + "learning_rate": 2.147473191443599e-05, + "loss": 0.0037, + "step": 70860 + }, + { + "grad_norm": 0.028957920148968697, + "learning_rate": 2.1461153628054492e-05, + "loss": 0.0029, + "step": 70870 + }, + { + "grad_norm": 0.04903459921479225, + "learning_rate": 2.1447578462638284e-05, + "loss": 0.0033, + "step": 70880 + }, + { + "grad_norm": 0.04528622701764107, + "learning_rate": 2.143400641967201e-05, + "loss": 0.0041, + "step": 70890 + }, + { + "grad_norm": 0.03153287619352341, + "learning_rate": 2.1420437500639823e-05, + "loss": 0.0029, + "step": 70900 + }, + { + "grad_norm": 0.024356968700885773, + "learning_rate": 2.1406871707025646e-05, + "loss": 0.0029, + "step": 70910 + }, + { + "grad_norm": 0.01790609396994114, + "learning_rate": 2.1393309040312963e-05, + "loss": 0.0036, + "step": 70920 + }, + { + "grad_norm": 0.02831471711397171, + "learning_rate": 2.137974950198502e-05, + "loss": 0.0027, + "step": 70930 + }, + { + "grad_norm": 0.04246557503938675, + "learning_rate": 2.1366193093524617e-05, + "loss": 0.0033, + "step": 70940 + }, + { + "grad_norm": 0.02829654887318611, + "learning_rate": 2.1352639816414288e-05, + "loss": 0.0044, + "step": 70950 + }, + { + "grad_norm": 0.03631947934627533, + "learning_rate": 2.1339089672136202e-05, + "loss": 0.0036, + "step": 70960 + }, + { + "grad_norm": 0.041845232248306274, + "learning_rate": 2.1325542662172193e-05, + "loss": 0.0043, + "step": 70970 + }, + { + "grad_norm": 0.03882572427392006, + "learning_rate": 2.1311998788003712e-05, + "loss": 0.0042, + "step": 70980 + }, + { + "grad_norm": 0.03719785436987877, + "learning_rate": 2.1298458051111907e-05, + "loss": 0.0038, + "step": 70990 + }, + { + "grad_norm": 0.03324272111058235, + "learning_rate": 2.1284920452977602e-05, + "loss": 0.0048, + "step": 71000 + }, + { + "grad_norm": 0.03165978938341141, + "learning_rate": 2.12713859950812e-05, + "loss": 0.0037, + "step": 71010 + }, + { + "grad_norm": 0.039791788905858994, + "learning_rate": 2.1257854678902832e-05, + "loss": 0.0042, + "step": 71020 + }, + { + "grad_norm": 0.05306296795606613, + "learning_rate": 2.124432650592227e-05, + "loss": 0.003, + "step": 71030 + }, + { + "grad_norm": 0.038112372159957886, + "learning_rate": 2.1230801477618937e-05, + "loss": 0.0034, + "step": 71040 + }, + { + "grad_norm": 0.04138922318816185, + "learning_rate": 2.121727959547189e-05, + "loss": 0.0028, + "step": 71050 + }, + { + "grad_norm": 0.024926532059907913, + "learning_rate": 2.1203760860959886e-05, + "loss": 0.0031, + "step": 71060 + }, + { + "grad_norm": 0.05521383509039879, + "learning_rate": 2.119024527556127e-05, + "loss": 0.0044, + "step": 71070 + }, + { + "grad_norm": 0.034106917679309845, + "learning_rate": 2.117673284075415e-05, + "loss": 0.0042, + "step": 71080 + }, + { + "grad_norm": 0.04268515855073929, + "learning_rate": 2.1163223558016165e-05, + "loss": 0.0042, + "step": 71090 + }, + { + "grad_norm": 0.03635011985898018, + "learning_rate": 2.1149717428824722e-05, + "loss": 0.0054, + "step": 71100 + }, + { + "grad_norm": 0.05130597949028015, + "learning_rate": 2.1136214454656784e-05, + "loss": 0.0072, + "step": 71110 + }, + { + "grad_norm": 0.030850030481815338, + "learning_rate": 2.1122714636989056e-05, + "loss": 0.0031, + "step": 71120 + }, + { + "grad_norm": 0.03358350321650505, + "learning_rate": 2.1109217977297823e-05, + "loss": 0.0033, + "step": 71130 + }, + { + "grad_norm": 0.02401147037744522, + "learning_rate": 2.1095724477059077e-05, + "loss": 0.0047, + "step": 71140 + }, + { + "grad_norm": 0.03390485420823097, + "learning_rate": 2.1082234137748434e-05, + "loss": 0.0039, + "step": 71150 + }, + { + "grad_norm": 0.03989917039871216, + "learning_rate": 2.106874696084122e-05, + "loss": 0.0058, + "step": 71160 + }, + { + "grad_norm": 0.043634407222270966, + "learning_rate": 2.1055262947812315e-05, + "loss": 0.0036, + "step": 71170 + }, + { + "grad_norm": 0.03223537653684616, + "learning_rate": 2.104178210013636e-05, + "loss": 0.0039, + "step": 71180 + }, + { + "grad_norm": 0.023838797584176064, + "learning_rate": 2.1028304419287554e-05, + "loss": 0.0026, + "step": 71190 + }, + { + "grad_norm": 0.020164750516414642, + "learning_rate": 2.101482990673982e-05, + "loss": 0.0027, + "step": 71200 + }, + { + "grad_norm": 0.02621466852724552, + "learning_rate": 2.1001358563966707e-05, + "loss": 0.0031, + "step": 71210 + }, + { + "grad_norm": 0.05061144754290581, + "learning_rate": 2.098789039244144e-05, + "loss": 0.0056, + "step": 71220 + }, + { + "grad_norm": 0.03752436488866806, + "learning_rate": 2.097442539363685e-05, + "loss": 0.0052, + "step": 71230 + }, + { + "grad_norm": 0.06291015446186066, + "learning_rate": 2.0960963569025448e-05, + "loss": 0.0048, + "step": 71240 + }, + { + "grad_norm": 0.032237470149993896, + "learning_rate": 2.0947504920079435e-05, + "loss": 0.0031, + "step": 71250 + }, + { + "grad_norm": 0.05510765686631203, + "learning_rate": 2.093404944827059e-05, + "loss": 0.0048, + "step": 71260 + }, + { + "grad_norm": 0.06158433482050896, + "learning_rate": 2.0920597155070394e-05, + "loss": 0.0047, + "step": 71270 + }, + { + "grad_norm": 0.05121220648288727, + "learning_rate": 2.090714804194997e-05, + "loss": 0.005, + "step": 71280 + }, + { + "grad_norm": 0.077989861369133, + "learning_rate": 2.0893702110380114e-05, + "loss": 0.0046, + "step": 71290 + }, + { + "grad_norm": 0.07609426230192184, + "learning_rate": 2.0880259361831216e-05, + "loss": 0.0048, + "step": 71300 + }, + { + "grad_norm": 0.03830759599804878, + "learning_rate": 2.0866819797773397e-05, + "loss": 0.003, + "step": 71310 + }, + { + "grad_norm": 0.027091888710856438, + "learning_rate": 2.085338341967634e-05, + "loss": 0.0022, + "step": 71320 + }, + { + "grad_norm": 0.021391427144408226, + "learning_rate": 2.083995022900946e-05, + "loss": 0.0025, + "step": 71330 + }, + { + "grad_norm": 0.0358424186706543, + "learning_rate": 2.0826520227241768e-05, + "loss": 0.0052, + "step": 71340 + }, + { + "grad_norm": 0.05275844410061836, + "learning_rate": 2.081309341584199e-05, + "loss": 0.0037, + "step": 71350 + }, + { + "grad_norm": 0.039208415895700455, + "learning_rate": 2.0799669796278413e-05, + "loss": 0.0031, + "step": 71360 + }, + { + "grad_norm": 0.06274636834859848, + "learning_rate": 2.078624937001906e-05, + "loss": 0.0042, + "step": 71370 + }, + { + "grad_norm": 0.03780581057071686, + "learning_rate": 2.0772832138531544e-05, + "loss": 0.0029, + "step": 71380 + }, + { + "grad_norm": 0.045922089368104935, + "learning_rate": 2.0759418103283158e-05, + "loss": 0.0039, + "step": 71390 + }, + { + "grad_norm": 0.042825110256671906, + "learning_rate": 2.0746007265740846e-05, + "loss": 0.0044, + "step": 71400 + }, + { + "grad_norm": 0.03727151080965996, + "learning_rate": 2.0732599627371215e-05, + "loss": 0.0041, + "step": 71410 + }, + { + "grad_norm": 0.04779690504074097, + "learning_rate": 2.0719195189640468e-05, + "loss": 0.003, + "step": 71420 + }, + { + "grad_norm": 0.04375709220767021, + "learning_rate": 2.0705793954014528e-05, + "loss": 0.0034, + "step": 71430 + }, + { + "grad_norm": 0.034156642854213715, + "learning_rate": 2.06923959219589e-05, + "loss": 0.0032, + "step": 71440 + }, + { + "grad_norm": 0.05198074132204056, + "learning_rate": 2.0679001094938793e-05, + "loss": 0.0036, + "step": 71450 + }, + { + "grad_norm": 0.029319263994693756, + "learning_rate": 2.066560947441904e-05, + "loss": 0.0021, + "step": 71460 + }, + { + "grad_norm": 0.030629336833953857, + "learning_rate": 2.0652221061864152e-05, + "loss": 0.0028, + "step": 71470 + }, + { + "grad_norm": 0.03013657219707966, + "learning_rate": 2.0638835858738226e-05, + "loss": 0.0028, + "step": 71480 + }, + { + "grad_norm": 0.03413126617670059, + "learning_rate": 2.0625453866505068e-05, + "loss": 0.0025, + "step": 71490 + }, + { + "grad_norm": 0.04023457691073418, + "learning_rate": 2.0612075086628123e-05, + "loss": 0.0033, + "step": 71500 + }, + { + "grad_norm": 0.050412505865097046, + "learning_rate": 2.059869952057045e-05, + "loss": 0.0037, + "step": 71510 + }, + { + "grad_norm": 0.03771508112549782, + "learning_rate": 2.0585327169794793e-05, + "loss": 0.0034, + "step": 71520 + }, + { + "grad_norm": 0.030093137174844742, + "learning_rate": 2.0571958035763534e-05, + "loss": 0.003, + "step": 71530 + }, + { + "grad_norm": 0.02517816796898842, + "learning_rate": 2.0558592119938715e-05, + "loss": 0.0035, + "step": 71540 + }, + { + "grad_norm": 0.03619414195418358, + "learning_rate": 2.054522942378198e-05, + "loss": 0.0047, + "step": 71550 + }, + { + "grad_norm": 0.04315018281340599, + "learning_rate": 2.053186994875469e-05, + "loss": 0.003, + "step": 71560 + }, + { + "grad_norm": 0.03167184069752693, + "learning_rate": 2.0518513696317775e-05, + "loss": 0.0028, + "step": 71570 + }, + { + "grad_norm": 0.03583882376551628, + "learning_rate": 2.050516066793188e-05, + "loss": 0.0026, + "step": 71580 + }, + { + "grad_norm": 0.03151467815041542, + "learning_rate": 2.0491810865057276e-05, + "loss": 0.0039, + "step": 71590 + }, + { + "grad_norm": 0.02974812686443329, + "learning_rate": 2.0478464289153882e-05, + "loss": 0.0033, + "step": 71600 + }, + { + "grad_norm": 0.03875359147787094, + "learning_rate": 2.0465120941681232e-05, + "loss": 0.0025, + "step": 71610 + }, + { + "grad_norm": 0.017591135576367378, + "learning_rate": 2.0451780824098566e-05, + "loss": 0.0018, + "step": 71620 + }, + { + "grad_norm": 0.03728962689638138, + "learning_rate": 2.043844393786471e-05, + "loss": 0.0052, + "step": 71630 + }, + { + "grad_norm": 0.03639984130859375, + "learning_rate": 2.0425110284438193e-05, + "loss": 0.0028, + "step": 71640 + }, + { + "grad_norm": 0.047724079340696335, + "learning_rate": 2.041177986527712e-05, + "loss": 0.0036, + "step": 71650 + }, + { + "grad_norm": 0.028877735137939453, + "learning_rate": 2.0398452681839347e-05, + "loss": 0.0033, + "step": 71660 + }, + { + "grad_norm": 0.04382212460041046, + "learning_rate": 2.0385128735582272e-05, + "loss": 0.0039, + "step": 71670 + }, + { + "grad_norm": 0.061591677367687225, + "learning_rate": 2.0371808027963003e-05, + "loss": 0.0044, + "step": 71680 + }, + { + "grad_norm": 0.03825860098004341, + "learning_rate": 2.035849056043825e-05, + "loss": 0.0029, + "step": 71690 + }, + { + "grad_norm": 0.05010126158595085, + "learning_rate": 2.03451763344644e-05, + "loss": 0.0043, + "step": 71700 + }, + { + "grad_norm": 0.034077152609825134, + "learning_rate": 2.0331865351497482e-05, + "loss": 0.0033, + "step": 71710 + }, + { + "grad_norm": 0.03136353939771652, + "learning_rate": 2.0318557612993185e-05, + "loss": 0.0028, + "step": 71720 + }, + { + "grad_norm": 0.0458749495446682, + "learning_rate": 2.0305253120406782e-05, + "loss": 0.004, + "step": 71730 + }, + { + "grad_norm": 0.0361468531191349, + "learning_rate": 2.029195187519325e-05, + "loss": 0.0035, + "step": 71740 + }, + { + "grad_norm": 0.03115610033273697, + "learning_rate": 2.0278653878807217e-05, + "loss": 0.0019, + "step": 71750 + }, + { + "grad_norm": 0.037072546780109406, + "learning_rate": 2.0265359132702892e-05, + "loss": 0.0028, + "step": 71760 + }, + { + "grad_norm": 0.04979758709669113, + "learning_rate": 2.0252067638334182e-05, + "loss": 0.0034, + "step": 71770 + }, + { + "grad_norm": 0.032345980405807495, + "learning_rate": 2.023877939715464e-05, + "loss": 0.004, + "step": 71780 + }, + { + "grad_norm": 0.037674374878406525, + "learning_rate": 2.0225494410617448e-05, + "loss": 0.003, + "step": 71790 + }, + { + "grad_norm": 0.02395811676979065, + "learning_rate": 2.02122126801754e-05, + "loss": 0.0026, + "step": 71800 + }, + { + "grad_norm": 0.04970292001962662, + "learning_rate": 2.0198934207281018e-05, + "loss": 0.0031, + "step": 71810 + }, + { + "grad_norm": 0.040255140513181686, + "learning_rate": 2.0185658993386357e-05, + "loss": 0.0043, + "step": 71820 + }, + { + "grad_norm": 0.056485679000616074, + "learning_rate": 2.0172387039943226e-05, + "loss": 0.0038, + "step": 71830 + }, + { + "grad_norm": 0.0355655811727047, + "learning_rate": 2.0159118348402968e-05, + "loss": 0.0039, + "step": 71840 + }, + { + "grad_norm": 0.028621327131986618, + "learning_rate": 2.0145852920216697e-05, + "loss": 0.0038, + "step": 71850 + }, + { + "grad_norm": 0.027739141136407852, + "learning_rate": 2.0132590756835045e-05, + "loss": 0.0032, + "step": 71860 + }, + { + "grad_norm": 0.02184925228357315, + "learning_rate": 2.0119331859708386e-05, + "loss": 0.0035, + "step": 71870 + }, + { + "grad_norm": 0.04774085059762001, + "learning_rate": 2.0106076230286654e-05, + "loss": 0.0034, + "step": 71880 + }, + { + "grad_norm": 0.03312782570719719, + "learning_rate": 2.0092823870019488e-05, + "loss": 0.0022, + "step": 71890 + }, + { + "grad_norm": 0.03196477144956589, + "learning_rate": 2.0079574780356115e-05, + "loss": 0.0047, + "step": 71900 + }, + { + "grad_norm": 0.03690231591463089, + "learning_rate": 2.0066328962745486e-05, + "loss": 0.0034, + "step": 71910 + }, + { + "grad_norm": 0.029655206948518753, + "learning_rate": 2.00530864186361e-05, + "loss": 0.0024, + "step": 71920 + }, + { + "grad_norm": 0.04205987602472305, + "learning_rate": 2.003984714947618e-05, + "loss": 0.0031, + "step": 71930 + }, + { + "grad_norm": 0.0264968890696764, + "learning_rate": 2.0026611156713515e-05, + "loss": 0.0024, + "step": 71940 + }, + { + "grad_norm": 0.023684827610850334, + "learning_rate": 2.0013378441795584e-05, + "loss": 0.0036, + "step": 71950 + }, + { + "grad_norm": 0.03600889444351196, + "learning_rate": 2.0000149006169506e-05, + "loss": 0.0037, + "step": 71960 + }, + { + "grad_norm": 0.03311685845255852, + "learning_rate": 1.9986922851282048e-05, + "loss": 0.0038, + "step": 71970 + }, + { + "grad_norm": 0.029691074043512344, + "learning_rate": 1.997369997857956e-05, + "loss": 0.0026, + "step": 71980 + }, + { + "grad_norm": 0.03362475335597992, + "learning_rate": 1.9960480389508103e-05, + "loss": 0.0033, + "step": 71990 + }, + { + "grad_norm": 0.028343113139271736, + "learning_rate": 1.9947264085513363e-05, + "loss": 0.003, + "step": 72000 + }, + { + "grad_norm": 0.027550319209694862, + "learning_rate": 1.993405106804062e-05, + "loss": 0.0027, + "step": 72010 + }, + { + "grad_norm": 0.03136974200606346, + "learning_rate": 1.9920841338534867e-05, + "loss": 0.004, + "step": 72020 + }, + { + "grad_norm": 0.046035055071115494, + "learning_rate": 1.9907634898440646e-05, + "loss": 0.0028, + "step": 72030 + }, + { + "grad_norm": 0.04082230478525162, + "learning_rate": 1.989443174920227e-05, + "loss": 0.0039, + "step": 72040 + }, + { + "grad_norm": 0.02649669721722603, + "learning_rate": 1.988123189226355e-05, + "loss": 0.0026, + "step": 72050 + }, + { + "grad_norm": 0.04409874603152275, + "learning_rate": 1.986803532906805e-05, + "loss": 0.0031, + "step": 72060 + }, + { + "grad_norm": 0.040092989802360535, + "learning_rate": 1.985484206105889e-05, + "loss": 0.0021, + "step": 72070 + }, + { + "grad_norm": 0.034445371478796005, + "learning_rate": 1.9841652089678887e-05, + "loss": 0.0034, + "step": 72080 + }, + { + "grad_norm": 0.036357901990413666, + "learning_rate": 1.9828465416370435e-05, + "loss": 0.0043, + "step": 72090 + }, + { + "grad_norm": 0.034097690135240555, + "learning_rate": 1.9815282042575685e-05, + "loss": 0.0042, + "step": 72100 + }, + { + "grad_norm": 0.02534273825585842, + "learning_rate": 1.9802101969736285e-05, + "loss": 0.0043, + "step": 72110 + }, + { + "grad_norm": 0.031661953777074814, + "learning_rate": 1.9788925199293633e-05, + "loss": 0.003, + "step": 72120 + }, + { + "grad_norm": 0.03140539675951004, + "learning_rate": 1.977575173268868e-05, + "loss": 0.0052, + "step": 72130 + }, + { + "grad_norm": 0.03284488245844841, + "learning_rate": 1.9762581571362094e-05, + "loss": 0.0033, + "step": 72140 + }, + { + "grad_norm": 0.0435405895113945, + "learning_rate": 1.9749414716754112e-05, + "loss": 0.0032, + "step": 72150 + }, + { + "grad_norm": 0.025253232568502426, + "learning_rate": 1.973625117030465e-05, + "loss": 0.0026, + "step": 72160 + }, + { + "grad_norm": 0.029446903616189957, + "learning_rate": 1.972309093345326e-05, + "loss": 0.0029, + "step": 72170 + }, + { + "grad_norm": 0.0360327772796154, + "learning_rate": 1.970993400763915e-05, + "loss": 0.0034, + "step": 72180 + }, + { + "grad_norm": 0.03593166917562485, + "learning_rate": 1.969678039430109e-05, + "loss": 0.0066, + "step": 72190 + }, + { + "grad_norm": 0.03158038482069969, + "learning_rate": 1.9683630094877564e-05, + "loss": 0.0055, + "step": 72200 + }, + { + "grad_norm": 0.03681159019470215, + "learning_rate": 1.9670483110806696e-05, + "loss": 0.0037, + "step": 72210 + }, + { + "grad_norm": 0.042863667011260986, + "learning_rate": 1.9657339443526172e-05, + "loss": 0.003, + "step": 72220 + }, + { + "grad_norm": 0.03723262622952461, + "learning_rate": 1.964419909447339e-05, + "loss": 0.0037, + "step": 72230 + }, + { + "grad_norm": 0.02462133578956127, + "learning_rate": 1.963106206508536e-05, + "loss": 0.003, + "step": 72240 + }, + { + "grad_norm": 0.03325935825705528, + "learning_rate": 1.9617928356798738e-05, + "loss": 0.0033, + "step": 72250 + }, + { + "grad_norm": 0.0375698022544384, + "learning_rate": 1.960479797104977e-05, + "loss": 0.0036, + "step": 72260 + }, + { + "grad_norm": 0.033279601484537125, + "learning_rate": 1.9591670909274418e-05, + "loss": 0.0046, + "step": 72270 + }, + { + "grad_norm": 0.030780628323554993, + "learning_rate": 1.9578547172908184e-05, + "loss": 0.0032, + "step": 72280 + }, + { + "grad_norm": 0.03743116185069084, + "learning_rate": 1.9565426763386325e-05, + "loss": 0.0027, + "step": 72290 + }, + { + "grad_norm": 0.05124358832836151, + "learning_rate": 1.9552309682143622e-05, + "loss": 0.0031, + "step": 72300 + }, + { + "grad_norm": 0.04305484518408775, + "learning_rate": 1.9539195930614574e-05, + "loss": 0.0056, + "step": 72310 + }, + { + "grad_norm": 0.037119023501873016, + "learning_rate": 1.952608551023324e-05, + "loss": 0.0022, + "step": 72320 + }, + { + "grad_norm": 0.052073895931243896, + "learning_rate": 1.9512978422433397e-05, + "loss": 0.0051, + "step": 72330 + }, + { + "grad_norm": 0.03006785921752453, + "learning_rate": 1.9499874668648376e-05, + "loss": 0.0041, + "step": 72340 + }, + { + "grad_norm": 0.06355267763137817, + "learning_rate": 1.9486774250311213e-05, + "loss": 0.0036, + "step": 72350 + }, + { + "grad_norm": 0.042394138872623444, + "learning_rate": 1.9473677168854537e-05, + "loss": 0.0031, + "step": 72360 + }, + { + "grad_norm": 0.03502591699361801, + "learning_rate": 1.946058342571065e-05, + "loss": 0.0048, + "step": 72370 + }, + { + "grad_norm": 0.03970660641789436, + "learning_rate": 1.9447493022311424e-05, + "loss": 0.0056, + "step": 72380 + }, + { + "grad_norm": 0.03932706639170647, + "learning_rate": 1.9434405960088442e-05, + "loss": 0.0034, + "step": 72390 + }, + { + "grad_norm": 0.03588807210326195, + "learning_rate": 1.9421322240472857e-05, + "loss": 0.0022, + "step": 72400 + }, + { + "grad_norm": 0.03085373155772686, + "learning_rate": 1.9408241864895494e-05, + "loss": 0.003, + "step": 72410 + }, + { + "grad_norm": 0.041942890733480453, + "learning_rate": 1.939516483478681e-05, + "loss": 0.003, + "step": 72420 + }, + { + "grad_norm": 0.0493505597114563, + "learning_rate": 1.9382091151576902e-05, + "loss": 0.0039, + "step": 72430 + }, + { + "grad_norm": 0.036474715918302536, + "learning_rate": 1.9369020816695454e-05, + "loss": 0.0053, + "step": 72440 + }, + { + "grad_norm": 0.03461694344878197, + "learning_rate": 1.935595383157184e-05, + "loss": 0.0039, + "step": 72450 + }, + { + "grad_norm": 0.04044966399669647, + "learning_rate": 1.9342890197635057e-05, + "loss": 0.0027, + "step": 72460 + }, + { + "grad_norm": 0.03589116781949997, + "learning_rate": 1.9329829916313685e-05, + "loss": 0.0028, + "step": 72470 + }, + { + "grad_norm": 0.03896504268050194, + "learning_rate": 1.9316772989036013e-05, + "loss": 0.0043, + "step": 72480 + }, + { + "grad_norm": 0.04480453208088875, + "learning_rate": 1.930371941722991e-05, + "loss": 0.003, + "step": 72490 + }, + { + "grad_norm": 0.029981547966599464, + "learning_rate": 1.929066920232292e-05, + "loss": 0.0036, + "step": 72500 + }, + { + "grad_norm": 0.03788768872618675, + "learning_rate": 1.9277622345742152e-05, + "loss": 0.0031, + "step": 72510 + }, + { + "grad_norm": 0.04824627563357353, + "learning_rate": 1.926457884891444e-05, + "loss": 0.003, + "step": 72520 + }, + { + "grad_norm": 0.04303891211748123, + "learning_rate": 1.9251538713266147e-05, + "loss": 0.0054, + "step": 72530 + }, + { + "grad_norm": 0.035458289086818695, + "learning_rate": 1.9238501940223358e-05, + "loss": 0.0031, + "step": 72540 + }, + { + "grad_norm": 0.028899697586894035, + "learning_rate": 1.922546853121175e-05, + "loss": 0.0038, + "step": 72550 + }, + { + "grad_norm": 0.06490504741668701, + "learning_rate": 1.9212438487656647e-05, + "loss": 0.0034, + "step": 72560 + }, + { + "grad_norm": 0.03254346549510956, + "learning_rate": 1.9199411810982965e-05, + "loss": 0.0025, + "step": 72570 + }, + { + "grad_norm": 0.05909965559840202, + "learning_rate": 1.918638850261532e-05, + "loss": 0.0037, + "step": 72580 + }, + { + "grad_norm": 0.04263681545853615, + "learning_rate": 1.9173368563977885e-05, + "loss": 0.0038, + "step": 72590 + }, + { + "grad_norm": 0.03989231213927269, + "learning_rate": 1.9160351996494513e-05, + "loss": 0.0034, + "step": 72600 + }, + { + "grad_norm": 0.04351562634110451, + "learning_rate": 1.9147338801588677e-05, + "loss": 0.0037, + "step": 72610 + }, + { + "grad_norm": 0.03199835121631622, + "learning_rate": 1.913432898068351e-05, + "loss": 0.0029, + "step": 72620 + }, + { + "grad_norm": 0.043180279433727264, + "learning_rate": 1.912132253520169e-05, + "loss": 0.0033, + "step": 72630 + }, + { + "grad_norm": 0.03917333111166954, + "learning_rate": 1.910831946656564e-05, + "loss": 0.0028, + "step": 72640 + }, + { + "grad_norm": 0.032194897532463074, + "learning_rate": 1.909531977619731e-05, + "loss": 0.0033, + "step": 72650 + }, + { + "grad_norm": 0.05850451439619064, + "learning_rate": 1.908232346551834e-05, + "loss": 0.0033, + "step": 72660 + }, + { + "grad_norm": 0.0426018200814724, + "learning_rate": 1.9069330535949993e-05, + "loss": 0.0038, + "step": 72670 + }, + { + "grad_norm": 0.02700316347181797, + "learning_rate": 1.9056340988913167e-05, + "loss": 0.0025, + "step": 72680 + }, + { + "grad_norm": 0.03695647791028023, + "learning_rate": 1.9043354825828357e-05, + "loss": 0.0038, + "step": 72690 + }, + { + "grad_norm": 0.040645383298397064, + "learning_rate": 1.9030372048115724e-05, + "loss": 0.0028, + "step": 72700 + }, + { + "grad_norm": 0.03989734500646591, + "learning_rate": 1.9017392657195056e-05, + "loss": 0.0032, + "step": 72710 + }, + { + "grad_norm": 0.036866944283246994, + "learning_rate": 1.9004416654485724e-05, + "loss": 0.0027, + "step": 72720 + }, + { + "grad_norm": 0.04397895932197571, + "learning_rate": 1.8991444041406785e-05, + "loss": 0.0038, + "step": 72730 + }, + { + "grad_norm": 0.03438238054513931, + "learning_rate": 1.8978474819376912e-05, + "loss": 0.0039, + "step": 72740 + }, + { + "grad_norm": 0.032630763947963715, + "learning_rate": 1.8965508989814406e-05, + "loss": 0.0041, + "step": 72750 + }, + { + "grad_norm": 0.0417344756424427, + "learning_rate": 1.8952546554137158e-05, + "loss": 0.0057, + "step": 72760 + }, + { + "grad_norm": 0.030684463679790497, + "learning_rate": 1.8939587513762757e-05, + "loss": 0.0046, + "step": 72770 + }, + { + "grad_norm": 0.03202876076102257, + "learning_rate": 1.892663187010835e-05, + "loss": 0.0035, + "step": 72780 + }, + { + "grad_norm": 0.04778125882148743, + "learning_rate": 1.891367962459077e-05, + "loss": 0.0034, + "step": 72790 + }, + { + "grad_norm": 0.04069212079048157, + "learning_rate": 1.890073077862644e-05, + "loss": 0.0037, + "step": 72800 + }, + { + "grad_norm": 0.04275602847337723, + "learning_rate": 1.888778533363146e-05, + "loss": 0.0034, + "step": 72810 + }, + { + "grad_norm": 0.04135540500283241, + "learning_rate": 1.8874843291021478e-05, + "loss": 0.0042, + "step": 72820 + }, + { + "grad_norm": 0.02320028841495514, + "learning_rate": 1.8861904652211853e-05, + "loss": 0.0043, + "step": 72830 + }, + { + "grad_norm": 0.024409543722867966, + "learning_rate": 1.8848969418617507e-05, + "loss": 0.0027, + "step": 72840 + }, + { + "grad_norm": 0.026865852996706963, + "learning_rate": 1.8836037591653044e-05, + "loss": 0.0036, + "step": 72850 + }, + { + "grad_norm": 0.037271372973918915, + "learning_rate": 1.882310917273262e-05, + "loss": 0.0053, + "step": 72860 + }, + { + "grad_norm": 0.053708452731370926, + "learning_rate": 1.881018416327014e-05, + "loss": 0.0061, + "step": 72870 + }, + { + "grad_norm": 0.044439125806093216, + "learning_rate": 1.8797262564679014e-05, + "loss": 0.0035, + "step": 72880 + }, + { + "grad_norm": 0.024104351177811623, + "learning_rate": 1.8784344378372355e-05, + "loss": 0.0027, + "step": 72890 + }, + { + "grad_norm": 0.030880305916070938, + "learning_rate": 1.8771429605762843e-05, + "loss": 0.0018, + "step": 72900 + }, + { + "grad_norm": 0.0553271658718586, + "learning_rate": 1.8758518248262835e-05, + "loss": 0.0039, + "step": 72910 + }, + { + "grad_norm": 0.02675025351345539, + "learning_rate": 1.8745610307284305e-05, + "loss": 0.0059, + "step": 72920 + }, + { + "grad_norm": 0.038983795791864395, + "learning_rate": 1.8732705784238862e-05, + "loss": 0.0031, + "step": 72930 + }, + { + "grad_norm": 0.02531595714390278, + "learning_rate": 1.8719804680537682e-05, + "loss": 0.0024, + "step": 72940 + }, + { + "grad_norm": 0.026354636996984482, + "learning_rate": 1.870690699759164e-05, + "loss": 0.004, + "step": 72950 + }, + { + "grad_norm": 0.0346081368625164, + "learning_rate": 1.8694012736811213e-05, + "loss": 0.0026, + "step": 72960 + }, + { + "grad_norm": 0.06365296989679337, + "learning_rate": 1.8681121899606463e-05, + "loss": 0.0035, + "step": 72970 + }, + { + "grad_norm": 0.026112407445907593, + "learning_rate": 1.8668234487387144e-05, + "loss": 0.0021, + "step": 72980 + }, + { + "grad_norm": 0.051819875836372375, + "learning_rate": 1.8655350501562592e-05, + "loss": 0.0029, + "step": 72990 + }, + { + "grad_norm": 0.04157845675945282, + "learning_rate": 1.8642469943541796e-05, + "loss": 0.0036, + "step": 73000 + }, + { + "grad_norm": 0.03353267163038254, + "learning_rate": 1.8629592814733328e-05, + "loss": 0.0024, + "step": 73010 + }, + { + "grad_norm": 0.031832266598939896, + "learning_rate": 1.8616719116545438e-05, + "loss": 0.0034, + "step": 73020 + }, + { + "grad_norm": 0.04396570846438408, + "learning_rate": 1.8603848850385952e-05, + "loss": 0.0036, + "step": 73030 + }, + { + "grad_norm": 0.04141226038336754, + "learning_rate": 1.859098201766236e-05, + "loss": 0.0042, + "step": 73040 + }, + { + "grad_norm": 0.0398697555065155, + "learning_rate": 1.8578118619781726e-05, + "loss": 0.0024, + "step": 73050 + }, + { + "grad_norm": 0.03580847382545471, + "learning_rate": 1.856525865815083e-05, + "loss": 0.0049, + "step": 73060 + }, + { + "grad_norm": 0.027985302731394768, + "learning_rate": 1.8552402134175966e-05, + "loss": 0.0035, + "step": 73070 + }, + { + "grad_norm": 0.050076745450496674, + "learning_rate": 1.8539549049263143e-05, + "loss": 0.0038, + "step": 73080 + }, + { + "grad_norm": 0.03945969045162201, + "learning_rate": 1.8526699404817917e-05, + "loss": 0.003, + "step": 73090 + }, + { + "grad_norm": 0.022833779454231262, + "learning_rate": 1.8513853202245546e-05, + "loss": 0.0037, + "step": 73100 + }, + { + "grad_norm": 0.04397999122738838, + "learning_rate": 1.850101044295081e-05, + "loss": 0.0026, + "step": 73110 + }, + { + "grad_norm": 0.052576981484889984, + "learning_rate": 1.848817112833825e-05, + "loss": 0.0034, + "step": 73120 + }, + { + "grad_norm": 0.030318917706608772, + "learning_rate": 1.8475335259811905e-05, + "loss": 0.0049, + "step": 73130 + }, + { + "grad_norm": 0.022317705675959587, + "learning_rate": 1.8462502838775508e-05, + "loss": 0.0033, + "step": 73140 + }, + { + "grad_norm": 0.04037972539663315, + "learning_rate": 1.8449673866632378e-05, + "loss": 0.0048, + "step": 73150 + }, + { + "grad_norm": 0.031709831207990646, + "learning_rate": 1.8436848344785473e-05, + "loss": 0.004, + "step": 73160 + }, + { + "grad_norm": 0.0315798744559288, + "learning_rate": 1.8424026274637378e-05, + "loss": 0.0029, + "step": 73170 + }, + { + "grad_norm": 0.0381435751914978, + "learning_rate": 1.8411207657590312e-05, + "loss": 0.003, + "step": 73180 + }, + { + "grad_norm": 0.035774488002061844, + "learning_rate": 1.839839249504607e-05, + "loss": 0.0038, + "step": 73190 + }, + { + "grad_norm": 0.0480956956744194, + "learning_rate": 1.8385580788406108e-05, + "loss": 0.0031, + "step": 73200 + }, + { + "grad_norm": 0.04355227202177048, + "learning_rate": 1.837277253907152e-05, + "loss": 0.0041, + "step": 73210 + }, + { + "grad_norm": 0.032348353415727615, + "learning_rate": 1.8359967748442957e-05, + "loss": 0.0032, + "step": 73220 + }, + { + "grad_norm": 0.028807958588004112, + "learning_rate": 1.8347166417920774e-05, + "loss": 0.0037, + "step": 73230 + }, + { + "grad_norm": 0.028400933369994164, + "learning_rate": 1.833436854890484e-05, + "loss": 0.0025, + "step": 73240 + }, + { + "grad_norm": 0.02567799761891365, + "learning_rate": 1.8321574142794796e-05, + "loss": 0.0031, + "step": 73250 + }, + { + "grad_norm": 0.022214259952306747, + "learning_rate": 1.8308783200989748e-05, + "loss": 0.0033, + "step": 73260 + }, + { + "grad_norm": 0.027572354301810265, + "learning_rate": 1.829599572488855e-05, + "loss": 0.0036, + "step": 73270 + }, + { + "grad_norm": 0.029706576839089394, + "learning_rate": 1.828321171588957e-05, + "loss": 0.0035, + "step": 73280 + }, + { + "grad_norm": 0.027338091284036636, + "learning_rate": 1.827043117539089e-05, + "loss": 0.0027, + "step": 73290 + }, + { + "grad_norm": 0.029484177008271217, + "learning_rate": 1.8257654104790116e-05, + "loss": 0.0025, + "step": 73300 + }, + { + "grad_norm": 0.025545986369252205, + "learning_rate": 1.82448805054846e-05, + "loss": 0.0036, + "step": 73310 + }, + { + "grad_norm": 0.03981305658817291, + "learning_rate": 1.823211037887119e-05, + "loss": 0.0051, + "step": 73320 + }, + { + "grad_norm": 0.032954275608062744, + "learning_rate": 1.821934372634645e-05, + "loss": 0.0039, + "step": 73330 + }, + { + "grad_norm": 0.02599961683154106, + "learning_rate": 1.8206580549306472e-05, + "loss": 0.0034, + "step": 73340 + }, + { + "grad_norm": 0.03040711209177971, + "learning_rate": 1.819382084914707e-05, + "loss": 0.0032, + "step": 73350 + }, + { + "grad_norm": 0.030503040179610252, + "learning_rate": 1.8181064627263577e-05, + "loss": 0.0028, + "step": 73360 + }, + { + "grad_norm": 0.03337506949901581, + "learning_rate": 1.8168311885051022e-05, + "loss": 0.0033, + "step": 73370 + }, + { + "grad_norm": 0.03628924861550331, + "learning_rate": 1.815556262390402e-05, + "loss": 0.0051, + "step": 73380 + }, + { + "grad_norm": 0.03273021802306175, + "learning_rate": 1.8142816845216826e-05, + "loss": 0.0038, + "step": 73390 + }, + { + "grad_norm": 0.03610340133309364, + "learning_rate": 1.8130074550383276e-05, + "loss": 0.0041, + "step": 73400 + }, + { + "grad_norm": 0.02594112791121006, + "learning_rate": 1.8117335740796848e-05, + "loss": 0.0026, + "step": 73410 + }, + { + "grad_norm": 0.04574952274560928, + "learning_rate": 1.810460041785067e-05, + "loss": 0.0034, + "step": 73420 + }, + { + "grad_norm": 0.04647260159254074, + "learning_rate": 1.809186858293742e-05, + "loss": 0.0047, + "step": 73430 + }, + { + "grad_norm": 0.04177587851881981, + "learning_rate": 1.8079140237449448e-05, + "loss": 0.0042, + "step": 73440 + }, + { + "grad_norm": 0.06280867010354996, + "learning_rate": 1.8066415382778707e-05, + "loss": 0.0043, + "step": 73450 + }, + { + "grad_norm": 0.03464604541659355, + "learning_rate": 1.8053694020316786e-05, + "loss": 0.0032, + "step": 73460 + }, + { + "grad_norm": 0.026563959196209908, + "learning_rate": 1.8040976151454837e-05, + "loss": 0.0024, + "step": 73470 + }, + { + "grad_norm": 0.028158506378531456, + "learning_rate": 1.8028261777583704e-05, + "loss": 0.0033, + "step": 73480 + }, + { + "grad_norm": 0.05464541167020798, + "learning_rate": 1.8015550900093758e-05, + "loss": 0.0044, + "step": 73490 + }, + { + "grad_norm": 0.053658608347177505, + "learning_rate": 1.800284352037512e-05, + "loss": 0.0034, + "step": 73500 + }, + { + "grad_norm": 0.032077156007289886, + "learning_rate": 1.799013963981738e-05, + "loss": 0.003, + "step": 73510 + }, + { + "grad_norm": 0.04840577393770218, + "learning_rate": 1.797743925980987e-05, + "loss": 0.0032, + "step": 73520 + }, + { + "grad_norm": 0.0335395373404026, + "learning_rate": 1.7964742381741435e-05, + "loss": 0.0029, + "step": 73530 + }, + { + "grad_norm": 0.023740042001008987, + "learning_rate": 1.7952049007000627e-05, + "loss": 0.0035, + "step": 73540 + }, + { + "grad_norm": 0.02880612015724182, + "learning_rate": 1.7939359136975535e-05, + "loss": 0.0035, + "step": 73550 + }, + { + "grad_norm": 0.045020103454589844, + "learning_rate": 1.7926672773053932e-05, + "loss": 0.0026, + "step": 73560 + }, + { + "grad_norm": 0.03223269805312157, + "learning_rate": 1.791398991662317e-05, + "loss": 0.003, + "step": 73570 + }, + { + "grad_norm": 0.04728852957487106, + "learning_rate": 1.7901310569070244e-05, + "loss": 0.0056, + "step": 73580 + }, + { + "grad_norm": 0.041912782937288284, + "learning_rate": 1.7888634731781724e-05, + "loss": 0.0038, + "step": 73590 + }, + { + "grad_norm": 0.024302002042531967, + "learning_rate": 1.7875962406143844e-05, + "loss": 0.004, + "step": 73600 + }, + { + "grad_norm": 0.0471317283809185, + "learning_rate": 1.7863293593542403e-05, + "loss": 0.0052, + "step": 73610 + }, + { + "grad_norm": 0.03669777885079384, + "learning_rate": 1.7850628295362853e-05, + "loss": 0.004, + "step": 73620 + }, + { + "grad_norm": 0.0314909964799881, + "learning_rate": 1.7837966512990263e-05, + "loss": 0.004, + "step": 73630 + }, + { + "grad_norm": 0.03393557295203209, + "learning_rate": 1.782530824780931e-05, + "loss": 0.0038, + "step": 73640 + }, + { + "grad_norm": 0.030902806669473648, + "learning_rate": 1.7812653501204263e-05, + "loss": 0.0032, + "step": 73650 + }, + { + "grad_norm": 0.026112155988812447, + "learning_rate": 1.780000227455903e-05, + "loss": 0.0036, + "step": 73660 + }, + { + "grad_norm": 0.03273859620094299, + "learning_rate": 1.778735456925716e-05, + "loss": 0.0035, + "step": 73670 + }, + { + "grad_norm": 0.022100243717432022, + "learning_rate": 1.7774710386681737e-05, + "loss": 0.0032, + "step": 73680 + }, + { + "grad_norm": 0.035218942910432816, + "learning_rate": 1.7762069728215545e-05, + "loss": 0.0027, + "step": 73690 + }, + { + "grad_norm": 0.03919916972517967, + "learning_rate": 1.7749432595240932e-05, + "loss": 0.003, + "step": 73700 + }, + { + "grad_norm": 0.030081091448664665, + "learning_rate": 1.773679898913991e-05, + "loss": 0.0029, + "step": 73710 + }, + { + "grad_norm": 0.030866414308547974, + "learning_rate": 1.7724168911294015e-05, + "loss": 0.0051, + "step": 73720 + }, + { + "grad_norm": 0.025239231064915657, + "learning_rate": 1.7711542363084504e-05, + "loss": 0.0032, + "step": 73730 + }, + { + "grad_norm": 0.04382318630814552, + "learning_rate": 1.7698919345892158e-05, + "loss": 0.0042, + "step": 73740 + }, + { + "grad_norm": 0.06983204185962677, + "learning_rate": 1.768629986109743e-05, + "loss": 0.0054, + "step": 73750 + }, + { + "grad_norm": 0.06185420975089073, + "learning_rate": 1.7673683910080357e-05, + "loss": 0.0061, + "step": 73760 + }, + { + "grad_norm": 0.03679746389389038, + "learning_rate": 1.7661071494220638e-05, + "loss": 0.0027, + "step": 73770 + }, + { + "grad_norm": 0.03543498367071152, + "learning_rate": 1.7648462614897498e-05, + "loss": 0.0031, + "step": 73780 + }, + { + "grad_norm": 0.04715801402926445, + "learning_rate": 1.7635857273489863e-05, + "loss": 0.003, + "step": 73790 + }, + { + "grad_norm": 0.04901695251464844, + "learning_rate": 1.7623255471376198e-05, + "loss": 0.0042, + "step": 73800 + }, + { + "grad_norm": 0.026628829538822174, + "learning_rate": 1.7610657209934646e-05, + "loss": 0.0027, + "step": 73810 + }, + { + "grad_norm": 0.032798536121845245, + "learning_rate": 1.7598062490542915e-05, + "loss": 0.0031, + "step": 73820 + }, + { + "grad_norm": 0.022138720378279686, + "learning_rate": 1.7585471314578377e-05, + "loss": 0.0024, + "step": 73830 + }, + { + "grad_norm": 0.05382412672042847, + "learning_rate": 1.7572883683417945e-05, + "loss": 0.0055, + "step": 73840 + }, + { + "grad_norm": 0.02298537828028202, + "learning_rate": 1.7560299598438212e-05, + "loss": 0.0024, + "step": 73850 + }, + { + "grad_norm": 0.023761453106999397, + "learning_rate": 1.7547719061015334e-05, + "loss": 0.003, + "step": 73860 + }, + { + "grad_norm": 0.038259509950876236, + "learning_rate": 1.75351420725251e-05, + "loss": 0.0031, + "step": 73870 + }, + { + "grad_norm": 0.029590269550681114, + "learning_rate": 1.7522568634342923e-05, + "loss": 0.0035, + "step": 73880 + }, + { + "grad_norm": 0.03544600307941437, + "learning_rate": 1.7509998747843825e-05, + "loss": 0.003, + "step": 73890 + }, + { + "grad_norm": 0.03310895338654518, + "learning_rate": 1.7497432414402403e-05, + "loss": 0.0054, + "step": 73900 + }, + { + "grad_norm": 0.03155563771724701, + "learning_rate": 1.7484869635392903e-05, + "loss": 0.0023, + "step": 73910 + }, + { + "grad_norm": 0.03615928068757057, + "learning_rate": 1.747231041218919e-05, + "loss": 0.004, + "step": 73920 + }, + { + "grad_norm": 0.04595327749848366, + "learning_rate": 1.7459754746164692e-05, + "loss": 0.0036, + "step": 73930 + }, + { + "grad_norm": 0.03464966267347336, + "learning_rate": 1.744720263869249e-05, + "loss": 0.0048, + "step": 73940 + }, + { + "grad_norm": 0.05489117652177811, + "learning_rate": 1.7434654091145263e-05, + "loss": 0.0036, + "step": 73950 + }, + { + "grad_norm": 0.03370539844036102, + "learning_rate": 1.742210910489532e-05, + "loss": 0.0027, + "step": 73960 + }, + { + "grad_norm": 0.036957286298274994, + "learning_rate": 1.7409567681314532e-05, + "loss": 0.0031, + "step": 73970 + }, + { + "grad_norm": 0.033373989164829254, + "learning_rate": 1.7397029821774434e-05, + "loss": 0.0027, + "step": 73980 + }, + { + "grad_norm": 0.029540466144680977, + "learning_rate": 1.7384495527646126e-05, + "loss": 0.0025, + "step": 73990 + }, + { + "grad_norm": 0.031218023970723152, + "learning_rate": 1.737196480030035e-05, + "loss": 0.003, + "step": 74000 + }, + { + "grad_norm": 0.024044927209615707, + "learning_rate": 1.7359437641107447e-05, + "loss": 0.0026, + "step": 74010 + }, + { + "grad_norm": 0.03080177679657936, + "learning_rate": 1.7346914051437387e-05, + "loss": 0.0026, + "step": 74020 + }, + { + "grad_norm": 0.03278682380914688, + "learning_rate": 1.733439403265969e-05, + "loss": 0.0038, + "step": 74030 + }, + { + "grad_norm": 0.02945123426616192, + "learning_rate": 1.732187758614357e-05, + "loss": 0.0045, + "step": 74040 + }, + { + "grad_norm": 0.04080568253993988, + "learning_rate": 1.730936471325777e-05, + "loss": 0.0037, + "step": 74050 + }, + { + "grad_norm": 0.03303862363100052, + "learning_rate": 1.7296855415370718e-05, + "loss": 0.0035, + "step": 74060 + }, + { + "grad_norm": 0.0329853817820549, + "learning_rate": 1.728434969385035e-05, + "loss": 0.0056, + "step": 74070 + }, + { + "grad_norm": 0.023908013477921486, + "learning_rate": 1.7271847550064346e-05, + "loss": 0.0025, + "step": 74080 + }, + { + "grad_norm": 0.03820355236530304, + "learning_rate": 1.725934898537987e-05, + "loss": 0.0028, + "step": 74090 + }, + { + "grad_norm": 0.03911713883280754, + "learning_rate": 1.7246854001163783e-05, + "loss": 0.0033, + "step": 74100 + }, + { + "grad_norm": 0.0416576974093914, + "learning_rate": 1.7234362598782476e-05, + "loss": 0.0024, + "step": 74110 + }, + { + "grad_norm": 0.029164722189307213, + "learning_rate": 1.722187477960202e-05, + "loss": 0.0026, + "step": 74120 + }, + { + "grad_norm": 0.04236803948879242, + "learning_rate": 1.7209390544988057e-05, + "loss": 0.0037, + "step": 74130 + }, + { + "grad_norm": 0.042420826852321625, + "learning_rate": 1.7196909896305856e-05, + "loss": 0.0039, + "step": 74140 + }, + { + "grad_norm": 0.04943903535604477, + "learning_rate": 1.718443283492026e-05, + "loss": 0.005, + "step": 74150 + }, + { + "grad_norm": 0.036500364542007446, + "learning_rate": 1.7171959362195746e-05, + "loss": 0.0028, + "step": 74160 + }, + { + "grad_norm": 0.07741504907608032, + "learning_rate": 1.715948947949642e-05, + "loss": 0.0045, + "step": 74170 + }, + { + "grad_norm": 0.05881417170166969, + "learning_rate": 1.714702318818593e-05, + "loss": 0.0041, + "step": 74180 + }, + { + "grad_norm": 0.04291334003210068, + "learning_rate": 1.7134560489627592e-05, + "loss": 0.0042, + "step": 74190 + }, + { + "grad_norm": 0.04603981226682663, + "learning_rate": 1.712210138518431e-05, + "loss": 0.0032, + "step": 74200 + }, + { + "grad_norm": 0.024847373366355896, + "learning_rate": 1.71096458762186e-05, + "loss": 0.0038, + "step": 74210 + }, + { + "grad_norm": 0.03047962673008442, + "learning_rate": 1.7097193964092552e-05, + "loss": 0.0035, + "step": 74220 + }, + { + "grad_norm": 0.04581769183278084, + "learning_rate": 1.708474565016792e-05, + "loss": 0.003, + "step": 74230 + }, + { + "grad_norm": 0.02972797490656376, + "learning_rate": 1.7072300935806e-05, + "loss": 0.0045, + "step": 74240 + }, + { + "grad_norm": 0.033585693687200546, + "learning_rate": 1.705985982236776e-05, + "loss": 0.0034, + "step": 74250 + }, + { + "grad_norm": 0.020683957263827324, + "learning_rate": 1.7047422311213697e-05, + "loss": 0.0028, + "step": 74260 + }, + { + "grad_norm": 0.04207955673336983, + "learning_rate": 1.703498840370401e-05, + "loss": 0.0035, + "step": 74270 + }, + { + "grad_norm": 0.03412940353155136, + "learning_rate": 1.702255810119842e-05, + "loss": 0.0029, + "step": 74280 + }, + { + "grad_norm": 0.03198186680674553, + "learning_rate": 1.7010131405056305e-05, + "loss": 0.0044, + "step": 74290 + }, + { + "grad_norm": 0.025690237060189247, + "learning_rate": 1.6997708316636613e-05, + "loss": 0.0028, + "step": 74300 + }, + { + "grad_norm": 0.024466300383210182, + "learning_rate": 1.6985288837297932e-05, + "loss": 0.0028, + "step": 74310 + }, + { + "grad_norm": 0.02646709233522415, + "learning_rate": 1.6972872968398394e-05, + "loss": 0.0043, + "step": 74320 + }, + { + "grad_norm": 0.04551529884338379, + "learning_rate": 1.6960460711295845e-05, + "loss": 0.0045, + "step": 74330 + }, + { + "grad_norm": 0.045613616704940796, + "learning_rate": 1.6948052067347623e-05, + "loss": 0.0045, + "step": 74340 + }, + { + "grad_norm": 0.04119208827614784, + "learning_rate": 1.693564703791075e-05, + "loss": 0.0024, + "step": 74350 + }, + { + "grad_norm": 0.04367794841527939, + "learning_rate": 1.6923245624341793e-05, + "loss": 0.0039, + "step": 74360 + }, + { + "grad_norm": 0.03526189178228378, + "learning_rate": 1.6910847827996962e-05, + "loss": 0.0045, + "step": 74370 + }, + { + "grad_norm": 0.022408705204725266, + "learning_rate": 1.689845365023206e-05, + "loss": 0.0026, + "step": 74380 + }, + { + "grad_norm": 0.02685767039656639, + "learning_rate": 1.6886063092402505e-05, + "loss": 0.0026, + "step": 74390 + }, + { + "grad_norm": 0.0408162996172905, + "learning_rate": 1.6873676155863317e-05, + "loss": 0.0053, + "step": 74400 + }, + { + "grad_norm": 0.04509652405977249, + "learning_rate": 1.6861292841969085e-05, + "loss": 0.0028, + "step": 74410 + }, + { + "grad_norm": 0.035900913178920746, + "learning_rate": 1.684891315207406e-05, + "loss": 0.0036, + "step": 74420 + }, + { + "grad_norm": 0.032991617918014526, + "learning_rate": 1.6836537087532034e-05, + "loss": 0.003, + "step": 74430 + }, + { + "grad_norm": 0.03039911575615406, + "learning_rate": 1.6824164649696465e-05, + "loss": 0.004, + "step": 74440 + }, + { + "grad_norm": 0.04609197750687599, + "learning_rate": 1.681179583992034e-05, + "loss": 0.0037, + "step": 74450 + }, + { + "grad_norm": 0.03271529823541641, + "learning_rate": 1.679943065955636e-05, + "loss": 0.003, + "step": 74460 + }, + { + "grad_norm": 0.058312833309173584, + "learning_rate": 1.678706910995671e-05, + "loss": 0.0029, + "step": 74470 + }, + { + "grad_norm": 0.038918107748031616, + "learning_rate": 1.6774711192473262e-05, + "loss": 0.0033, + "step": 74480 + }, + { + "grad_norm": 0.034119535237550735, + "learning_rate": 1.676235690845743e-05, + "loss": 0.0038, + "step": 74490 + }, + { + "grad_norm": 0.040927089750766754, + "learning_rate": 1.6750006259260285e-05, + "loss": 0.0043, + "step": 74500 + }, + { + "grad_norm": 0.034184761345386505, + "learning_rate": 1.673765924623244e-05, + "loss": 0.0025, + "step": 74510 + }, + { + "grad_norm": 0.06518582254648209, + "learning_rate": 1.67253158707242e-05, + "loss": 0.0036, + "step": 74520 + }, + { + "grad_norm": 0.0269350353628397, + "learning_rate": 1.671297613408537e-05, + "loss": 0.0026, + "step": 74530 + }, + { + "grad_norm": 0.035347674041986465, + "learning_rate": 1.6700640037665444e-05, + "loss": 0.0035, + "step": 74540 + }, + { + "grad_norm": 0.033390842378139496, + "learning_rate": 1.668830758281344e-05, + "loss": 0.0025, + "step": 74550 + }, + { + "grad_norm": 0.020149478688836098, + "learning_rate": 1.6675978770878052e-05, + "loss": 0.0023, + "step": 74560 + }, + { + "grad_norm": 0.02942027524113655, + "learning_rate": 1.6663653603207506e-05, + "loss": 0.0022, + "step": 74570 + }, + { + "grad_norm": 0.02767311781644821, + "learning_rate": 1.6651332081149685e-05, + "loss": 0.0024, + "step": 74580 + }, + { + "grad_norm": 0.036115240305662155, + "learning_rate": 1.6639014206052046e-05, + "loss": 0.0039, + "step": 74590 + }, + { + "grad_norm": 0.04830991476774216, + "learning_rate": 1.6626699979261657e-05, + "loss": 0.0042, + "step": 74600 + }, + { + "grad_norm": 0.02622210420668125, + "learning_rate": 1.6614389402125197e-05, + "loss": 0.0029, + "step": 74610 + }, + { + "grad_norm": 0.05843222141265869, + "learning_rate": 1.66020824759889e-05, + "loss": 0.0048, + "step": 74620 + }, + { + "grad_norm": 0.059340640902519226, + "learning_rate": 1.6589779202198675e-05, + "loss": 0.0038, + "step": 74630 + }, + { + "grad_norm": 0.03736288845539093, + "learning_rate": 1.6577479582099924e-05, + "loss": 0.0024, + "step": 74640 + }, + { + "grad_norm": 0.03497045487165451, + "learning_rate": 1.6565183617037788e-05, + "loss": 0.0035, + "step": 74650 + }, + { + "grad_norm": 0.05253607779741287, + "learning_rate": 1.6552891308356894e-05, + "loss": 0.0063, + "step": 74660 + }, + { + "grad_norm": 0.050471801310777664, + "learning_rate": 1.6540602657401526e-05, + "loss": 0.0036, + "step": 74670 + }, + { + "grad_norm": 0.03185206651687622, + "learning_rate": 1.6528317665515537e-05, + "loss": 0.0045, + "step": 74680 + }, + { + "grad_norm": 0.03313040733337402, + "learning_rate": 1.6516036334042416e-05, + "loss": 0.0039, + "step": 74690 + }, + { + "grad_norm": 0.038481224328279495, + "learning_rate": 1.650375866432519e-05, + "loss": 0.0026, + "step": 74700 + }, + { + "grad_norm": 0.02299436368048191, + "learning_rate": 1.649148465770659e-05, + "loss": 0.0018, + "step": 74710 + }, + { + "grad_norm": 0.06012249365448952, + "learning_rate": 1.647921431552883e-05, + "loss": 0.0034, + "step": 74720 + }, + { + "grad_norm": 0.022266950458288193, + "learning_rate": 1.646694763913381e-05, + "loss": 0.0031, + "step": 74730 + }, + { + "grad_norm": 0.07113555073738098, + "learning_rate": 1.6454684629862966e-05, + "loss": 0.0043, + "step": 74740 + }, + { + "grad_norm": 0.03441106900572777, + "learning_rate": 1.6442425289057388e-05, + "loss": 0.0039, + "step": 74750 + }, + { + "grad_norm": 0.042311180382966995, + "learning_rate": 1.6430169618057718e-05, + "loss": 0.0034, + "step": 74760 + }, + { + "grad_norm": 0.038701243698596954, + "learning_rate": 1.641791761820422e-05, + "loss": 0.0038, + "step": 74770 + }, + { + "grad_norm": 0.030850490555167198, + "learning_rate": 1.640566929083676e-05, + "loss": 0.004, + "step": 74780 + }, + { + "grad_norm": 0.037824809551239014, + "learning_rate": 1.6393424637294823e-05, + "loss": 0.0031, + "step": 74790 + }, + { + "grad_norm": 0.025335537269711494, + "learning_rate": 1.638118365891742e-05, + "loss": 0.002, + "step": 74800 + }, + { + "grad_norm": 0.02185453660786152, + "learning_rate": 1.636894635704323e-05, + "loss": 0.0025, + "step": 74810 + }, + { + "grad_norm": 0.02995138429105282, + "learning_rate": 1.6356712733010517e-05, + "loss": 0.0026, + "step": 74820 + }, + { + "grad_norm": 0.05876392498612404, + "learning_rate": 1.6344482788157096e-05, + "loss": 0.003, + "step": 74830 + }, + { + "grad_norm": 0.048767101019620895, + "learning_rate": 1.6332256523820444e-05, + "loss": 0.0039, + "step": 74840 + }, + { + "grad_norm": 0.0412098690867424, + "learning_rate": 1.6320033941337597e-05, + "loss": 0.0035, + "step": 74850 + }, + { + "grad_norm": 0.03891553357243538, + "learning_rate": 1.6307815042045216e-05, + "loss": 0.0038, + "step": 74860 + }, + { + "grad_norm": 0.0466943196952343, + "learning_rate": 1.6295599827279513e-05, + "loss": 0.0028, + "step": 74870 + }, + { + "grad_norm": 0.03350067138671875, + "learning_rate": 1.6283388298376356e-05, + "loss": 0.0036, + "step": 74880 + }, + { + "grad_norm": 0.04885358735918999, + "learning_rate": 1.6271180456671125e-05, + "loss": 0.0023, + "step": 74890 + }, + { + "grad_norm": 0.032538946717977524, + "learning_rate": 1.625897630349893e-05, + "loss": 0.004, + "step": 74900 + }, + { + "grad_norm": 0.04003691300749779, + "learning_rate": 1.6246775840194335e-05, + "loss": 0.0042, + "step": 74910 + }, + { + "grad_norm": 0.03885914385318756, + "learning_rate": 1.6234579068091604e-05, + "loss": 0.003, + "step": 74920 + }, + { + "grad_norm": 0.0194961316883564, + "learning_rate": 1.6222385988524524e-05, + "loss": 0.0032, + "step": 74930 + }, + { + "grad_norm": 0.03940067067742348, + "learning_rate": 1.6210196602826544e-05, + "loss": 0.0032, + "step": 74940 + }, + { + "grad_norm": 0.04537266120314598, + "learning_rate": 1.6198010912330646e-05, + "loss": 0.0043, + "step": 74950 + }, + { + "grad_norm": 0.03280823305249214, + "learning_rate": 1.6185828918369456e-05, + "loss": 0.0035, + "step": 74960 + }, + { + "grad_norm": 0.03108474612236023, + "learning_rate": 1.617365062227517e-05, + "loss": 0.0033, + "step": 74970 + }, + { + "grad_norm": 0.04117630794644356, + "learning_rate": 1.616147602537962e-05, + "loss": 0.0027, + "step": 74980 + }, + { + "grad_norm": 0.027829991653561592, + "learning_rate": 1.614930512901415e-05, + "loss": 0.0027, + "step": 74990 + }, + { + "grad_norm": 0.024143442511558533, + "learning_rate": 1.6137137934509804e-05, + "loss": 0.0034, + "step": 75000 + }, + { + "grad_norm": 0.0365399532020092, + "learning_rate": 1.6124974443197123e-05, + "loss": 0.0035, + "step": 75010 + }, + { + "grad_norm": 0.03663690760731697, + "learning_rate": 1.6112814656406306e-05, + "loss": 0.0028, + "step": 75020 + }, + { + "grad_norm": 0.04202396422624588, + "learning_rate": 1.6100658575467125e-05, + "loss": 0.0029, + "step": 75030 + }, + { + "grad_norm": 0.030158469453454018, + "learning_rate": 1.6088506201708974e-05, + "loss": 0.0025, + "step": 75040 + }, + { + "grad_norm": 0.034708283841609955, + "learning_rate": 1.6076357536460785e-05, + "loss": 0.0021, + "step": 75050 + }, + { + "grad_norm": 0.034282900393009186, + "learning_rate": 1.606421258105113e-05, + "loss": 0.0029, + "step": 75060 + }, + { + "grad_norm": 0.02700982429087162, + "learning_rate": 1.6052071336808183e-05, + "loss": 0.0039, + "step": 75070 + }, + { + "grad_norm": 0.029942205175757408, + "learning_rate": 1.6039933805059658e-05, + "loss": 0.0038, + "step": 75080 + }, + { + "grad_norm": 0.03409591689705849, + "learning_rate": 1.6027799987132915e-05, + "loss": 0.0034, + "step": 75090 + }, + { + "grad_norm": 0.022239897400140762, + "learning_rate": 1.6015669884354888e-05, + "loss": 0.0026, + "step": 75100 + }, + { + "grad_norm": 0.03727109730243683, + "learning_rate": 1.6003543498052125e-05, + "loss": 0.0027, + "step": 75110 + }, + { + "grad_norm": 0.02916380949318409, + "learning_rate": 1.5991420829550715e-05, + "loss": 0.0033, + "step": 75120 + }, + { + "grad_norm": 0.04330079257488251, + "learning_rate": 1.5979301880176405e-05, + "loss": 0.0054, + "step": 75130 + }, + { + "grad_norm": 0.035470593720674515, + "learning_rate": 1.5967186651254485e-05, + "loss": 0.0031, + "step": 75140 + }, + { + "grad_norm": 0.04206075891852379, + "learning_rate": 1.5955075144109853e-05, + "loss": 0.0035, + "step": 75150 + }, + { + "grad_norm": 0.02654997818171978, + "learning_rate": 1.594296736006703e-05, + "loss": 0.0044, + "step": 75160 + }, + { + "grad_norm": 0.02731027454137802, + "learning_rate": 1.59308633004501e-05, + "loss": 0.0028, + "step": 75170 + }, + { + "grad_norm": 0.02382238768041134, + "learning_rate": 1.5918762966582724e-05, + "loss": 0.005, + "step": 75180 + }, + { + "grad_norm": 0.023617101833224297, + "learning_rate": 1.5906666359788203e-05, + "loss": 0.0028, + "step": 75190 + }, + { + "grad_norm": 0.029668932780623436, + "learning_rate": 1.589457348138938e-05, + "loss": 0.0034, + "step": 75200 + }, + { + "grad_norm": 0.0325956717133522, + "learning_rate": 1.588248433270872e-05, + "loss": 0.0025, + "step": 75210 + }, + { + "grad_norm": 0.025087319314479828, + "learning_rate": 1.587039891506828e-05, + "loss": 0.0028, + "step": 75220 + }, + { + "grad_norm": 0.033079810440540314, + "learning_rate": 1.5858317229789715e-05, + "loss": 0.003, + "step": 75230 + }, + { + "grad_norm": 0.051670778542757034, + "learning_rate": 1.584623927819423e-05, + "loss": 0.0073, + "step": 75240 + }, + { + "grad_norm": 0.030594224110245705, + "learning_rate": 1.583416506160269e-05, + "loss": 0.0034, + "step": 75250 + }, + { + "grad_norm": 0.05460627004504204, + "learning_rate": 1.5822094581335472e-05, + "loss": 0.0043, + "step": 75260 + }, + { + "grad_norm": 0.03204284980893135, + "learning_rate": 1.5810027838712604e-05, + "loss": 0.0024, + "step": 75270 + }, + { + "grad_norm": 0.02787627838551998, + "learning_rate": 1.5797964835053687e-05, + "loss": 0.0039, + "step": 75280 + }, + { + "grad_norm": 0.02667502872645855, + "learning_rate": 1.5785905571677933e-05, + "loss": 0.0025, + "step": 75290 + }, + { + "grad_norm": 0.02646179497241974, + "learning_rate": 1.5773850049904086e-05, + "loss": 0.0025, + "step": 75300 + }, + { + "grad_norm": 0.03925841674208641, + "learning_rate": 1.5761798271050543e-05, + "loss": 0.003, + "step": 75310 + }, + { + "grad_norm": 0.049953196197748184, + "learning_rate": 1.5749750236435277e-05, + "loss": 0.0035, + "step": 75320 + }, + { + "grad_norm": 0.035253286361694336, + "learning_rate": 1.5737705947375815e-05, + "loss": 0.0026, + "step": 75330 + }, + { + "grad_norm": 0.035856131464242935, + "learning_rate": 1.5725665405189322e-05, + "loss": 0.005, + "step": 75340 + }, + { + "grad_norm": 0.0358503982424736, + "learning_rate": 1.571362861119253e-05, + "loss": 0.0024, + "step": 75350 + }, + { + "grad_norm": 0.02120877057313919, + "learning_rate": 1.570159556670177e-05, + "loss": 0.0029, + "step": 75360 + }, + { + "grad_norm": 0.03410812467336655, + "learning_rate": 1.5689566273032946e-05, + "loss": 0.0034, + "step": 75370 + }, + { + "grad_norm": 0.027957310900092125, + "learning_rate": 1.5677540731501583e-05, + "loss": 0.0024, + "step": 75380 + }, + { + "grad_norm": 0.021603746339678764, + "learning_rate": 1.5665518943422746e-05, + "loss": 0.003, + "step": 75390 + }, + { + "grad_norm": 0.028718456625938416, + "learning_rate": 1.5653500910111134e-05, + "loss": 0.0041, + "step": 75400 + }, + { + "grad_norm": 0.045548900961875916, + "learning_rate": 1.5641486632881023e-05, + "loss": 0.0031, + "step": 75410 + }, + { + "grad_norm": 0.05863666534423828, + "learning_rate": 1.56294761130463e-05, + "loss": 0.0036, + "step": 75420 + }, + { + "grad_norm": 0.05601450800895691, + "learning_rate": 1.5617469351920373e-05, + "loss": 0.0034, + "step": 75430 + }, + { + "grad_norm": 0.02459845133125782, + "learning_rate": 1.5605466350816315e-05, + "loss": 0.0026, + "step": 75440 + }, + { + "grad_norm": 0.038829173892736435, + "learning_rate": 1.559346711104674e-05, + "loss": 0.0042, + "step": 75450 + }, + { + "grad_norm": 0.030662711709737778, + "learning_rate": 1.5581471633923883e-05, + "loss": 0.0045, + "step": 75460 + }, + { + "grad_norm": 0.03360135853290558, + "learning_rate": 1.5569479920759505e-05, + "loss": 0.0026, + "step": 75470 + }, + { + "grad_norm": 0.039968691766262054, + "learning_rate": 1.5557491972865072e-05, + "loss": 0.003, + "step": 75480 + }, + { + "grad_norm": 0.03263649344444275, + "learning_rate": 1.554550779155152e-05, + "loss": 0.0032, + "step": 75490 + }, + { + "grad_norm": 0.056865718215703964, + "learning_rate": 1.5533527378129453e-05, + "loss": 0.004, + "step": 75500 + }, + { + "grad_norm": 0.049982137978076935, + "learning_rate": 1.552155073390899e-05, + "loss": 0.0035, + "step": 75510 + }, + { + "grad_norm": 0.03041587397456169, + "learning_rate": 1.5509577860199903e-05, + "loss": 0.0034, + "step": 75520 + }, + { + "grad_norm": 0.046107519418001175, + "learning_rate": 1.5497608758311525e-05, + "loss": 0.0027, + "step": 75530 + }, + { + "grad_norm": 0.04568188264966011, + "learning_rate": 1.5485643429552797e-05, + "loss": 0.0026, + "step": 75540 + }, + { + "grad_norm": 0.06513576954603195, + "learning_rate": 1.5473681875232197e-05, + "loss": 0.0031, + "step": 75550 + }, + { + "grad_norm": 0.032437920570373535, + "learning_rate": 1.5461724096657837e-05, + "loss": 0.0045, + "step": 75560 + }, + { + "grad_norm": 0.03320440277457237, + "learning_rate": 1.5449770095137416e-05, + "loss": 0.0032, + "step": 75570 + }, + { + "grad_norm": 0.0422799289226532, + "learning_rate": 1.5437819871978178e-05, + "loss": 0.0044, + "step": 75580 + }, + { + "grad_norm": 0.032873112708330154, + "learning_rate": 1.5425873428486992e-05, + "loss": 0.0035, + "step": 75590 + }, + { + "grad_norm": 0.03610813617706299, + "learning_rate": 1.5413930765970312e-05, + "loss": 0.005, + "step": 75600 + }, + { + "grad_norm": 0.04425286501646042, + "learning_rate": 1.5401991885734175e-05, + "loss": 0.0025, + "step": 75610 + }, + { + "grad_norm": 0.035492636263370514, + "learning_rate": 1.5390056789084174e-05, + "loss": 0.0035, + "step": 75620 + }, + { + "grad_norm": 0.05217916890978813, + "learning_rate": 1.5378125477325545e-05, + "loss": 0.0037, + "step": 75630 + }, + { + "grad_norm": 0.034666553139686584, + "learning_rate": 1.5366197951763046e-05, + "loss": 0.0032, + "step": 75640 + }, + { + "grad_norm": 0.034051116555929184, + "learning_rate": 1.5354274213701075e-05, + "loss": 0.0027, + "step": 75650 + }, + { + "grad_norm": 0.03554100543260574, + "learning_rate": 1.5342354264443566e-05, + "loss": 0.003, + "step": 75660 + }, + { + "grad_norm": 0.023067397996783257, + "learning_rate": 1.533043810529411e-05, + "loss": 0.0026, + "step": 75670 + }, + { + "grad_norm": 0.03193509206175804, + "learning_rate": 1.5318525737555807e-05, + "loss": 0.0035, + "step": 75680 + }, + { + "grad_norm": 0.05592704191803932, + "learning_rate": 1.53066171625314e-05, + "loss": 0.0034, + "step": 75690 + }, + { + "grad_norm": 0.032079752534627914, + "learning_rate": 1.529471238152317e-05, + "loss": 0.0036, + "step": 75700 + }, + { + "grad_norm": 0.03216518834233284, + "learning_rate": 1.5282811395833023e-05, + "loss": 0.0034, + "step": 75710 + }, + { + "grad_norm": 0.03610731288790703, + "learning_rate": 1.5270914206762397e-05, + "loss": 0.0028, + "step": 75720 + }, + { + "grad_norm": 0.042141977697610855, + "learning_rate": 1.525902081561241e-05, + "loss": 0.0032, + "step": 75730 + }, + { + "grad_norm": 0.03208860754966736, + "learning_rate": 1.5247131223683664e-05, + "loss": 0.0026, + "step": 75740 + }, + { + "grad_norm": 0.025497019290924072, + "learning_rate": 1.5235245432276408e-05, + "loss": 0.0045, + "step": 75750 + }, + { + "grad_norm": 0.021731430664658546, + "learning_rate": 1.5223363442690424e-05, + "loss": 0.0039, + "step": 75760 + }, + { + "grad_norm": 0.028647003695368767, + "learning_rate": 1.5211485256225133e-05, + "loss": 0.0027, + "step": 75770 + }, + { + "grad_norm": 0.024054815992712975, + "learning_rate": 1.519961087417951e-05, + "loss": 0.0033, + "step": 75780 + }, + { + "grad_norm": 0.057288944721221924, + "learning_rate": 1.5187740297852137e-05, + "loss": 0.0027, + "step": 75790 + }, + { + "grad_norm": 0.028664108365774155, + "learning_rate": 1.517587352854113e-05, + "loss": 0.0027, + "step": 75800 + }, + { + "grad_norm": 0.04491119086742401, + "learning_rate": 1.516401056754423e-05, + "loss": 0.0034, + "step": 75810 + }, + { + "grad_norm": 0.028088748455047607, + "learning_rate": 1.5152151416158783e-05, + "loss": 0.0036, + "step": 75820 + }, + { + "grad_norm": 0.016699953004717827, + "learning_rate": 1.5140296075681642e-05, + "loss": 0.0024, + "step": 75830 + }, + { + "grad_norm": 0.055355314165353775, + "learning_rate": 1.5128444547409331e-05, + "loss": 0.003, + "step": 75840 + }, + { + "grad_norm": 0.05303456634283066, + "learning_rate": 1.5116596832637864e-05, + "loss": 0.004, + "step": 75850 + }, + { + "grad_norm": 0.027784114703536034, + "learning_rate": 1.5104752932662952e-05, + "loss": 0.0023, + "step": 75860 + }, + { + "grad_norm": 0.02030160278081894, + "learning_rate": 1.5092912848779778e-05, + "loss": 0.0021, + "step": 75870 + }, + { + "grad_norm": 0.02390694059431553, + "learning_rate": 1.5081076582283194e-05, + "loss": 0.0026, + "step": 75880 + }, + { + "grad_norm": 0.03318610042333603, + "learning_rate": 1.5069244134467553e-05, + "loss": 0.0028, + "step": 75890 + }, + { + "grad_norm": 0.032308679074048996, + "learning_rate": 1.5057415506626876e-05, + "loss": 0.0032, + "step": 75900 + }, + { + "grad_norm": 0.039752211421728134, + "learning_rate": 1.5045590700054668e-05, + "loss": 0.0027, + "step": 75910 + }, + { + "grad_norm": 0.029233217239379883, + "learning_rate": 1.5033769716044144e-05, + "loss": 0.0033, + "step": 75920 + }, + { + "grad_norm": 0.039022814482450485, + "learning_rate": 1.5021952555887975e-05, + "loss": 0.004, + "step": 75930 + }, + { + "grad_norm": 0.027447955682873726, + "learning_rate": 1.50101392208785e-05, + "loss": 0.0029, + "step": 75940 + }, + { + "grad_norm": 0.036099065095186234, + "learning_rate": 1.4998329712307579e-05, + "loss": 0.0034, + "step": 75950 + }, + { + "grad_norm": 0.04638180136680603, + "learning_rate": 1.4986524031466708e-05, + "loss": 0.0027, + "step": 75960 + }, + { + "grad_norm": 0.0240984708070755, + "learning_rate": 1.4974722179646916e-05, + "loss": 0.0024, + "step": 75970 + }, + { + "grad_norm": 0.03546197712421417, + "learning_rate": 1.4962924158138847e-05, + "loss": 0.0029, + "step": 75980 + }, + { + "grad_norm": 0.030972955748438835, + "learning_rate": 1.4951129968232714e-05, + "loss": 0.0027, + "step": 75990 + }, + { + "grad_norm": 0.03626757487654686, + "learning_rate": 1.4939339611218329e-05, + "loss": 0.0035, + "step": 76000 + }, + { + "grad_norm": 0.04144709184765816, + "learning_rate": 1.4927553088385032e-05, + "loss": 0.0028, + "step": 76010 + }, + { + "grad_norm": 0.030498947948217392, + "learning_rate": 1.4915770401021801e-05, + "loss": 0.0023, + "step": 76020 + }, + { + "grad_norm": 0.03336264565587044, + "learning_rate": 1.4903991550417185e-05, + "loss": 0.0035, + "step": 76030 + }, + { + "grad_norm": 0.03038519062101841, + "learning_rate": 1.4892216537859277e-05, + "loss": 0.0026, + "step": 76040 + }, + { + "grad_norm": 0.028279397636651993, + "learning_rate": 1.4880445364635775e-05, + "loss": 0.0024, + "step": 76050 + }, + { + "grad_norm": 0.031429026275873184, + "learning_rate": 1.4868678032033973e-05, + "loss": 0.004, + "step": 76060 + }, + { + "grad_norm": 0.033086951822042465, + "learning_rate": 1.4856914541340739e-05, + "loss": 0.0029, + "step": 76070 + }, + { + "grad_norm": 0.03264826536178589, + "learning_rate": 1.4845154893842472e-05, + "loss": 0.0037, + "step": 76080 + }, + { + "grad_norm": 0.03402995690703392, + "learning_rate": 1.4833399090825234e-05, + "loss": 0.0028, + "step": 76090 + }, + { + "grad_norm": 0.0462164580821991, + "learning_rate": 1.4821647133574562e-05, + "loss": 0.0029, + "step": 76100 + }, + { + "grad_norm": 0.027498949319124222, + "learning_rate": 1.4809899023375706e-05, + "loss": 0.0024, + "step": 76110 + }, + { + "grad_norm": 0.0391414538025856, + "learning_rate": 1.4798154761513366e-05, + "loss": 0.0043, + "step": 76120 + }, + { + "grad_norm": 0.03572608903050423, + "learning_rate": 1.4786414349271916e-05, + "loss": 0.004, + "step": 76130 + }, + { + "grad_norm": 0.023668844252824783, + "learning_rate": 1.4774677787935226e-05, + "loss": 0.0031, + "step": 76140 + }, + { + "grad_norm": 0.0318104550242424, + "learning_rate": 1.4762945078786838e-05, + "loss": 0.0026, + "step": 76150 + }, + { + "grad_norm": 0.024206645786762238, + "learning_rate": 1.4751216223109777e-05, + "loss": 0.0028, + "step": 76160 + }, + { + "grad_norm": 0.030341574922204018, + "learning_rate": 1.4739491222186714e-05, + "loss": 0.0032, + "step": 76170 + }, + { + "grad_norm": 0.05260156840085983, + "learning_rate": 1.4727770077299879e-05, + "loss": 0.0033, + "step": 76180 + }, + { + "grad_norm": 0.03703036531805992, + "learning_rate": 1.4716052789731094e-05, + "loss": 0.0028, + "step": 76190 + }, + { + "grad_norm": 0.03618577495217323, + "learning_rate": 1.470433936076171e-05, + "loss": 0.0043, + "step": 76200 + }, + { + "grad_norm": 0.04210599884390831, + "learning_rate": 1.4692629791672719e-05, + "loss": 0.0033, + "step": 76210 + }, + { + "grad_norm": 0.02367209456861019, + "learning_rate": 1.468092408374464e-05, + "loss": 0.0035, + "step": 76220 + }, + { + "grad_norm": 0.026222432032227516, + "learning_rate": 1.4669222238257602e-05, + "loss": 0.0028, + "step": 76230 + }, + { + "grad_norm": 0.039619330316782, + "learning_rate": 1.4657524256491301e-05, + "loss": 0.006, + "step": 76240 + }, + { + "grad_norm": 0.03407948091626167, + "learning_rate": 1.464583013972502e-05, + "loss": 0.0037, + "step": 76250 + }, + { + "grad_norm": 0.020795362070202827, + "learning_rate": 1.4634139889237591e-05, + "loss": 0.0023, + "step": 76260 + }, + { + "grad_norm": 0.022732747718691826, + "learning_rate": 1.4622453506307448e-05, + "loss": 0.0029, + "step": 76270 + }, + { + "grad_norm": 0.02568778768181801, + "learning_rate": 1.461077099221262e-05, + "loss": 0.0031, + "step": 76280 + }, + { + "grad_norm": 0.025568120181560516, + "learning_rate": 1.4599092348230652e-05, + "loss": 0.0021, + "step": 76290 + }, + { + "grad_norm": 0.025636257603764534, + "learning_rate": 1.4587417575638724e-05, + "loss": 0.0047, + "step": 76300 + }, + { + "grad_norm": 0.028218697756528854, + "learning_rate": 1.4575746675713563e-05, + "loss": 0.0038, + "step": 76310 + }, + { + "grad_norm": 0.027761301025748253, + "learning_rate": 1.4564079649731504e-05, + "loss": 0.0029, + "step": 76320 + }, + { + "grad_norm": 0.025239529088139534, + "learning_rate": 1.4552416498968402e-05, + "loss": 0.003, + "step": 76330 + }, + { + "grad_norm": 0.01856435090303421, + "learning_rate": 1.4540757224699747e-05, + "loss": 0.0025, + "step": 76340 + }, + { + "grad_norm": 0.019537849351763725, + "learning_rate": 1.452910182820056e-05, + "loss": 0.0025, + "step": 76350 + }, + { + "grad_norm": 0.035424135625362396, + "learning_rate": 1.4517450310745468e-05, + "loss": 0.003, + "step": 76360 + }, + { + "grad_norm": 0.04297180473804474, + "learning_rate": 1.450580267360866e-05, + "loss": 0.0028, + "step": 76370 + }, + { + "grad_norm": 0.025547979399561882, + "learning_rate": 1.4494158918063922e-05, + "loss": 0.0021, + "step": 76380 + }, + { + "grad_norm": 0.0271573755890131, + "learning_rate": 1.4482519045384568e-05, + "loss": 0.0033, + "step": 76390 + }, + { + "grad_norm": 0.021405735984444618, + "learning_rate": 1.4470883056843548e-05, + "loss": 0.0031, + "step": 76400 + }, + { + "grad_norm": 0.01973307691514492, + "learning_rate": 1.4459250953713322e-05, + "loss": 0.0032, + "step": 76410 + }, + { + "grad_norm": 0.031143130734562874, + "learning_rate": 1.444762273726598e-05, + "loss": 0.0036, + "step": 76420 + }, + { + "grad_norm": 0.02982197515666485, + "learning_rate": 1.443599840877316e-05, + "loss": 0.0041, + "step": 76430 + }, + { + "grad_norm": 0.04426368698477745, + "learning_rate": 1.4424377969506103e-05, + "loss": 0.004, + "step": 76440 + }, + { + "grad_norm": 0.026142306625843048, + "learning_rate": 1.4412761420735565e-05, + "loss": 0.0028, + "step": 76450 + }, + { + "grad_norm": 0.03387384116649628, + "learning_rate": 1.4401148763731953e-05, + "loss": 0.0022, + "step": 76460 + }, + { + "grad_norm": 0.04458362236618996, + "learning_rate": 1.4389539999765173e-05, + "loss": 0.003, + "step": 76470 + }, + { + "grad_norm": 0.02456367388367653, + "learning_rate": 1.4377935130104758e-05, + "loss": 0.0023, + "step": 76480 + }, + { + "grad_norm": 0.04610113427042961, + "learning_rate": 1.4366334156019801e-05, + "loss": 0.0038, + "step": 76490 + }, + { + "grad_norm": 0.051562804728746414, + "learning_rate": 1.435473707877898e-05, + "loss": 0.0026, + "step": 76500 + }, + { + "grad_norm": 0.051229387521743774, + "learning_rate": 1.4343143899650508e-05, + "loss": 0.0036, + "step": 76510 + }, + { + "grad_norm": 0.05006275326013565, + "learning_rate": 1.4331554619902204e-05, + "loss": 0.0041, + "step": 76520 + }, + { + "grad_norm": 0.04758930951356888, + "learning_rate": 1.4319969240801484e-05, + "loss": 0.0037, + "step": 76530 + }, + { + "grad_norm": 0.07354298233985901, + "learning_rate": 1.430838776361526e-05, + "loss": 0.0041, + "step": 76540 + }, + { + "grad_norm": 0.026282673701643944, + "learning_rate": 1.4296810189610093e-05, + "loss": 0.0024, + "step": 76550 + }, + { + "grad_norm": 0.03358647599816322, + "learning_rate": 1.4285236520052087e-05, + "loss": 0.003, + "step": 76560 + }, + { + "grad_norm": 0.03239958733320236, + "learning_rate": 1.4273666756206933e-05, + "loss": 0.0022, + "step": 76570 + }, + { + "grad_norm": 0.030520670115947723, + "learning_rate": 1.4262100899339848e-05, + "loss": 0.0042, + "step": 76580 + }, + { + "grad_norm": 0.025185631588101387, + "learning_rate": 1.4250538950715696e-05, + "loss": 0.0028, + "step": 76590 + }, + { + "grad_norm": 0.03250717371702194, + "learning_rate": 1.4238980911598842e-05, + "loss": 0.0022, + "step": 76600 + }, + { + "grad_norm": 0.03974173963069916, + "learning_rate": 1.4227426783253267e-05, + "loss": 0.0034, + "step": 76610 + }, + { + "grad_norm": 0.019845938310027122, + "learning_rate": 1.4215876566942521e-05, + "loss": 0.0034, + "step": 76620 + }, + { + "grad_norm": 0.046467553824186325, + "learning_rate": 1.4204330263929721e-05, + "loss": 0.0041, + "step": 76630 + }, + { + "grad_norm": 0.019317999482154846, + "learning_rate": 1.4192787875477537e-05, + "loss": 0.0036, + "step": 76640 + }, + { + "grad_norm": 0.01812666282057762, + "learning_rate": 1.4181249402848246e-05, + "loss": 0.0019, + "step": 76650 + }, + { + "grad_norm": 0.019143855199217796, + "learning_rate": 1.4169714847303656e-05, + "loss": 0.0024, + "step": 76660 + }, + { + "grad_norm": 0.0477159209549427, + "learning_rate": 1.4158184210105197e-05, + "loss": 0.0044, + "step": 76670 + }, + { + "grad_norm": 0.021597811952233315, + "learning_rate": 1.4146657492513787e-05, + "loss": 0.0037, + "step": 76680 + }, + { + "grad_norm": 0.02664043940603733, + "learning_rate": 1.4135134695790047e-05, + "loss": 0.0042, + "step": 76690 + }, + { + "grad_norm": 0.04251830652356148, + "learning_rate": 1.4123615821194037e-05, + "loss": 0.0027, + "step": 76700 + }, + { + "grad_norm": 0.030730420723557472, + "learning_rate": 1.4112100869985473e-05, + "loss": 0.0023, + "step": 76710 + }, + { + "grad_norm": 0.02096298336982727, + "learning_rate": 1.4100589843423589e-05, + "loss": 0.0039, + "step": 76720 + }, + { + "grad_norm": 0.017011400312185287, + "learning_rate": 1.408908274276723e-05, + "loss": 0.002, + "step": 76730 + }, + { + "grad_norm": 0.028383849188685417, + "learning_rate": 1.4077579569274786e-05, + "loss": 0.0034, + "step": 76740 + }, + { + "grad_norm": 0.05708357319235802, + "learning_rate": 1.4066080324204245e-05, + "loss": 0.0032, + "step": 76750 + }, + { + "grad_norm": 0.03291316330432892, + "learning_rate": 1.405458500881312e-05, + "loss": 0.0035, + "step": 76760 + }, + { + "grad_norm": 0.049031469970941544, + "learning_rate": 1.4043093624358538e-05, + "loss": 0.0057, + "step": 76770 + }, + { + "grad_norm": 0.02890239655971527, + "learning_rate": 1.4031606172097189e-05, + "loss": 0.0025, + "step": 76780 + }, + { + "grad_norm": 0.02852732315659523, + "learning_rate": 1.40201226532853e-05, + "loss": 0.0047, + "step": 76790 + }, + { + "grad_norm": 0.029257845133543015, + "learning_rate": 1.4008643069178701e-05, + "loss": 0.0027, + "step": 76800 + }, + { + "grad_norm": 0.024263691157102585, + "learning_rate": 1.399716742103278e-05, + "loss": 0.0023, + "step": 76810 + }, + { + "grad_norm": 0.04188662767410278, + "learning_rate": 1.398569571010252e-05, + "loss": 0.0035, + "step": 76820 + }, + { + "grad_norm": 0.024039126932621002, + "learning_rate": 1.3974227937642414e-05, + "loss": 0.0018, + "step": 76830 + }, + { + "grad_norm": 0.023386837914586067, + "learning_rate": 1.3962764104906595e-05, + "loss": 0.002, + "step": 76840 + }, + { + "grad_norm": 0.023371225222945213, + "learning_rate": 1.3951304213148696e-05, + "loss": 0.0027, + "step": 76850 + }, + { + "grad_norm": 0.05082729831337929, + "learning_rate": 1.3939848263621979e-05, + "loss": 0.0028, + "step": 76860 + }, + { + "grad_norm": 0.020122239366173744, + "learning_rate": 1.3928396257579219e-05, + "loss": 0.0021, + "step": 76870 + }, + { + "grad_norm": 0.023242173716425896, + "learning_rate": 1.3916948196272833e-05, + "loss": 0.0043, + "step": 76880 + }, + { + "grad_norm": 0.02314603514969349, + "learning_rate": 1.3905504080954734e-05, + "loss": 0.0019, + "step": 76890 + }, + { + "grad_norm": 0.024196606129407883, + "learning_rate": 1.3894063912876454e-05, + "loss": 0.002, + "step": 76900 + }, + { + "grad_norm": 0.01890004426240921, + "learning_rate": 1.3882627693289046e-05, + "loss": 0.0018, + "step": 76910 + }, + { + "grad_norm": 0.015280182473361492, + "learning_rate": 1.3871195423443183e-05, + "loss": 0.0025, + "step": 76920 + }, + { + "grad_norm": 0.03594040498137474, + "learning_rate": 1.3859767104589044e-05, + "loss": 0.0027, + "step": 76930 + }, + { + "grad_norm": 0.05137278884649277, + "learning_rate": 1.3848342737976467e-05, + "loss": 0.0026, + "step": 76940 + }, + { + "grad_norm": 0.04015439376235008, + "learning_rate": 1.3836922324854757e-05, + "loss": 0.0043, + "step": 76950 + }, + { + "grad_norm": 0.02792789787054062, + "learning_rate": 1.382550586647287e-05, + "loss": 0.0021, + "step": 76960 + }, + { + "grad_norm": 0.030771814286708832, + "learning_rate": 1.3814093364079257e-05, + "loss": 0.0031, + "step": 76970 + }, + { + "grad_norm": 0.025222577154636383, + "learning_rate": 1.3802684818921991e-05, + "loss": 0.003, + "step": 76980 + }, + { + "grad_norm": 0.023121051490306854, + "learning_rate": 1.3791280232248687e-05, + "loss": 0.0023, + "step": 76990 + }, + { + "grad_norm": 0.025282591581344604, + "learning_rate": 1.3779879605306555e-05, + "loss": 0.0029, + "step": 77000 + }, + { + "grad_norm": 0.02490958943963051, + "learning_rate": 1.376848293934232e-05, + "loss": 0.0025, + "step": 77010 + }, + { + "grad_norm": 0.0440312922000885, + "learning_rate": 1.3757090235602316e-05, + "loss": 0.0036, + "step": 77020 + }, + { + "grad_norm": 0.029347991570830345, + "learning_rate": 1.3745701495332447e-05, + "loss": 0.0029, + "step": 77030 + }, + { + "grad_norm": 0.0242171548306942, + "learning_rate": 1.3734316719778145e-05, + "loss": 0.0029, + "step": 77040 + }, + { + "grad_norm": 0.042544107884168625, + "learning_rate": 1.3722935910184454e-05, + "loss": 0.0024, + "step": 77050 + }, + { + "grad_norm": 0.05230553075671196, + "learning_rate": 1.3711559067795921e-05, + "loss": 0.0033, + "step": 77060 + }, + { + "grad_norm": 0.024219123646616936, + "learning_rate": 1.3700186193856756e-05, + "loss": 0.0024, + "step": 77070 + }, + { + "grad_norm": 0.019396359100937843, + "learning_rate": 1.368881728961064e-05, + "loss": 0.002, + "step": 77080 + }, + { + "grad_norm": 0.027120200917124748, + "learning_rate": 1.3677452356300885e-05, + "loss": 0.0027, + "step": 77090 + }, + { + "grad_norm": 0.03764430060982704, + "learning_rate": 1.3666091395170321e-05, + "loss": 0.0035, + "step": 77100 + }, + { + "grad_norm": 0.028729619458317757, + "learning_rate": 1.3654734407461384e-05, + "loss": 0.0033, + "step": 77110 + }, + { + "grad_norm": 0.03296968713402748, + "learning_rate": 1.3643381394416022e-05, + "loss": 0.002, + "step": 77120 + }, + { + "grad_norm": 0.04485545679926872, + "learning_rate": 1.3632032357275838e-05, + "loss": 0.0049, + "step": 77130 + }, + { + "grad_norm": 0.03366191312670708, + "learning_rate": 1.3620687297281898e-05, + "loss": 0.0029, + "step": 77140 + }, + { + "grad_norm": 0.030369717627763748, + "learning_rate": 1.3609346215674917e-05, + "loss": 0.0037, + "step": 77150 + }, + { + "grad_norm": 0.0318329893052578, + "learning_rate": 1.3598009113695109e-05, + "loss": 0.0036, + "step": 77160 + }, + { + "grad_norm": 0.02893698960542679, + "learning_rate": 1.358667599258231e-05, + "loss": 0.0028, + "step": 77170 + }, + { + "grad_norm": 0.041265737265348434, + "learning_rate": 1.357534685357586e-05, + "loss": 0.0034, + "step": 77180 + }, + { + "grad_norm": 0.029450220987200737, + "learning_rate": 1.3564021697914715e-05, + "loss": 0.002, + "step": 77190 + }, + { + "grad_norm": 0.022820744663476944, + "learning_rate": 1.3552700526837381e-05, + "loss": 0.0027, + "step": 77200 + }, + { + "grad_norm": 0.02532067522406578, + "learning_rate": 1.3541383341581925e-05, + "loss": 0.0037, + "step": 77210 + }, + { + "grad_norm": 0.027107035741209984, + "learning_rate": 1.3530070143385965e-05, + "loss": 0.0021, + "step": 77220 + }, + { + "grad_norm": 0.019736791029572487, + "learning_rate": 1.3518760933486702e-05, + "loss": 0.0032, + "step": 77230 + }, + { + "grad_norm": 0.028673646971583366, + "learning_rate": 1.3507455713120908e-05, + "loss": 0.0035, + "step": 77240 + }, + { + "grad_norm": 0.044582683593034744, + "learning_rate": 1.3496154483524876e-05, + "loss": 0.0027, + "step": 77250 + }, + { + "grad_norm": 0.03347836807370186, + "learning_rate": 1.3484857245934506e-05, + "loss": 0.0021, + "step": 77260 + }, + { + "grad_norm": 0.02748723514378071, + "learning_rate": 1.3473564001585248e-05, + "loss": 0.0033, + "step": 77270 + }, + { + "grad_norm": 0.03408389538526535, + "learning_rate": 1.3462274751712129e-05, + "loss": 0.0029, + "step": 77280 + }, + { + "grad_norm": 0.052737653255462646, + "learning_rate": 1.3450989497549698e-05, + "loss": 0.0041, + "step": 77290 + }, + { + "grad_norm": 0.03713119402527809, + "learning_rate": 1.3439708240332115e-05, + "loss": 0.0036, + "step": 77300 + }, + { + "grad_norm": 0.03261232376098633, + "learning_rate": 1.3428430981293043e-05, + "loss": 0.0036, + "step": 77310 + }, + { + "grad_norm": 0.022384297102689743, + "learning_rate": 1.3417157721665808e-05, + "loss": 0.0031, + "step": 77320 + }, + { + "grad_norm": 0.023279231041669846, + "learning_rate": 1.3405888462683186e-05, + "loss": 0.0025, + "step": 77330 + }, + { + "grad_norm": 0.03085857816040516, + "learning_rate": 1.3394623205577595e-05, + "loss": 0.0023, + "step": 77340 + }, + { + "grad_norm": 0.024974312633275986, + "learning_rate": 1.3383361951580963e-05, + "loss": 0.003, + "step": 77350 + }, + { + "grad_norm": 0.021008068695664406, + "learning_rate": 1.337210470192483e-05, + "loss": 0.0026, + "step": 77360 + }, + { + "grad_norm": 0.019865449517965317, + "learning_rate": 1.3360851457840245e-05, + "loss": 0.0039, + "step": 77370 + }, + { + "grad_norm": 0.06137353926897049, + "learning_rate": 1.3349602220557856e-05, + "loss": 0.0042, + "step": 77380 + }, + { + "grad_norm": 0.03804722800850868, + "learning_rate": 1.3338356991307865e-05, + "loss": 0.0024, + "step": 77390 + }, + { + "grad_norm": 0.029056500643491745, + "learning_rate": 1.3327115771320048e-05, + "loss": 0.0033, + "step": 77400 + }, + { + "grad_norm": 0.04743490368127823, + "learning_rate": 1.3315878561823697e-05, + "loss": 0.0032, + "step": 77410 + }, + { + "grad_norm": 0.03683203086256981, + "learning_rate": 1.330464536404773e-05, + "loss": 0.0023, + "step": 77420 + }, + { + "grad_norm": 0.0409255176782608, + "learning_rate": 1.3293416179220563e-05, + "loss": 0.0028, + "step": 77430 + }, + { + "grad_norm": 0.04523959010839462, + "learning_rate": 1.328219100857021e-05, + "loss": 0.0041, + "step": 77440 + }, + { + "grad_norm": 0.042620446532964706, + "learning_rate": 1.327096985332425e-05, + "loss": 0.0052, + "step": 77450 + }, + { + "grad_norm": 0.03544468805193901, + "learning_rate": 1.3259752714709816e-05, + "loss": 0.0029, + "step": 77460 + }, + { + "grad_norm": 0.01925675943493843, + "learning_rate": 1.3248539593953569e-05, + "loss": 0.0044, + "step": 77470 + }, + { + "grad_norm": 0.021373147144913673, + "learning_rate": 1.323733049228178e-05, + "loss": 0.0045, + "step": 77480 + }, + { + "grad_norm": 0.024774977937340736, + "learning_rate": 1.3226125410920276e-05, + "loss": 0.0023, + "step": 77490 + }, + { + "grad_norm": 0.03538978472352028, + "learning_rate": 1.3214924351094388e-05, + "loss": 0.0032, + "step": 77500 + }, + { + "grad_norm": 0.03838035836815834, + "learning_rate": 1.3203727314029069e-05, + "loss": 0.0029, + "step": 77510 + }, + { + "grad_norm": 0.028983430936932564, + "learning_rate": 1.3192534300948811e-05, + "loss": 0.0034, + "step": 77520 + }, + { + "grad_norm": 0.03802455961704254, + "learning_rate": 1.3181345313077675e-05, + "loss": 0.0025, + "step": 77530 + }, + { + "grad_norm": 0.03319511562585831, + "learning_rate": 1.3170160351639243e-05, + "loss": 0.0034, + "step": 77540 + }, + { + "grad_norm": 0.029506606981158257, + "learning_rate": 1.3158979417856721e-05, + "loss": 0.0033, + "step": 77550 + }, + { + "grad_norm": 0.028618626296520233, + "learning_rate": 1.31478025129528e-05, + "loss": 0.0045, + "step": 77560 + }, + { + "grad_norm": 0.025080552324652672, + "learning_rate": 1.3136629638149794e-05, + "loss": 0.0022, + "step": 77570 + }, + { + "grad_norm": 0.027941415086388588, + "learning_rate": 1.3125460794669547e-05, + "loss": 0.0028, + "step": 77580 + }, + { + "grad_norm": 0.04576142504811287, + "learning_rate": 1.3114295983733483e-05, + "loss": 0.0052, + "step": 77590 + }, + { + "grad_norm": 0.03288951516151428, + "learning_rate": 1.3103135206562534e-05, + "loss": 0.0035, + "step": 77600 + }, + { + "grad_norm": 0.02509632334113121, + "learning_rate": 1.3091978464377264e-05, + "loss": 0.0024, + "step": 77610 + }, + { + "grad_norm": 0.050991639494895935, + "learning_rate": 1.3080825758397724e-05, + "loss": 0.0038, + "step": 77620 + }, + { + "grad_norm": 0.033041875809431076, + "learning_rate": 1.3069677089843568e-05, + "loss": 0.0044, + "step": 77630 + }, + { + "grad_norm": 0.039542607963085175, + "learning_rate": 1.305853245993401e-05, + "loss": 0.0024, + "step": 77640 + }, + { + "grad_norm": 0.032152291387319565, + "learning_rate": 1.3047391869887809e-05, + "loss": 0.0038, + "step": 77650 + }, + { + "grad_norm": 0.034075021743774414, + "learning_rate": 1.3036255320923263e-05, + "loss": 0.0027, + "step": 77660 + }, + { + "grad_norm": 0.045779999345541, + "learning_rate": 1.3025122814258273e-05, + "loss": 0.0032, + "step": 77670 + }, + { + "grad_norm": 0.027435436844825745, + "learning_rate": 1.3013994351110249e-05, + "loss": 0.0032, + "step": 77680 + }, + { + "grad_norm": 0.01903853937983513, + "learning_rate": 1.3002869932696194e-05, + "loss": 0.0027, + "step": 77690 + }, + { + "grad_norm": 0.05813339352607727, + "learning_rate": 1.2991749560232664e-05, + "loss": 0.0025, + "step": 77700 + }, + { + "grad_norm": 0.03587209805846214, + "learning_rate": 1.2980633234935768e-05, + "loss": 0.0023, + "step": 77710 + }, + { + "grad_norm": 0.04571743309497833, + "learning_rate": 1.2969520958021153e-05, + "loss": 0.0036, + "step": 77720 + }, + { + "grad_norm": 0.060133472084999084, + "learning_rate": 1.295841273070405e-05, + "loss": 0.0035, + "step": 77730 + }, + { + "grad_norm": 0.03802245110273361, + "learning_rate": 1.2947308554199255e-05, + "loss": 0.0032, + "step": 77740 + }, + { + "grad_norm": 0.045221079140901566, + "learning_rate": 1.2936208429721075e-05, + "loss": 0.0029, + "step": 77750 + }, + { + "grad_norm": 0.025354845449328423, + "learning_rate": 1.2925112358483416e-05, + "loss": 0.0028, + "step": 77760 + }, + { + "grad_norm": 0.03462231904268265, + "learning_rate": 1.2914020341699728e-05, + "loss": 0.003, + "step": 77770 + }, + { + "grad_norm": 0.025376606732606888, + "learning_rate": 1.2902932380583032e-05, + "loss": 0.0025, + "step": 77780 + }, + { + "grad_norm": 0.03656987100839615, + "learning_rate": 1.2891848476345863e-05, + "loss": 0.003, + "step": 77790 + }, + { + "grad_norm": 0.03491520881652832, + "learning_rate": 1.2880768630200368e-05, + "loss": 0.0034, + "step": 77800 + }, + { + "grad_norm": 0.03484214097261429, + "learning_rate": 1.286969284335819e-05, + "loss": 0.0048, + "step": 77810 + }, + { + "grad_norm": 0.031239407137036324, + "learning_rate": 1.2858621117030578e-05, + "loss": 0.0041, + "step": 77820 + }, + { + "grad_norm": 0.015030311420559883, + "learning_rate": 1.2847553452428324e-05, + "loss": 0.0032, + "step": 77830 + }, + { + "grad_norm": 0.052963726222515106, + "learning_rate": 1.283648985076178e-05, + "loss": 0.0036, + "step": 77840 + }, + { + "grad_norm": 0.03642522543668747, + "learning_rate": 1.2825430313240817e-05, + "loss": 0.0028, + "step": 77850 + }, + { + "grad_norm": 0.02435249835252762, + "learning_rate": 1.281437484107491e-05, + "loss": 0.0046, + "step": 77860 + }, + { + "grad_norm": 0.02086489647626877, + "learning_rate": 1.2803323435473058e-05, + "loss": 0.0029, + "step": 77870 + }, + { + "grad_norm": 0.030282745137810707, + "learning_rate": 1.2792276097643841e-05, + "loss": 0.0032, + "step": 77880 + }, + { + "grad_norm": 0.027414539828896523, + "learning_rate": 1.2781232828795342e-05, + "loss": 0.0046, + "step": 77890 + }, + { + "grad_norm": 0.05150890722870827, + "learning_rate": 1.2770193630135292e-05, + "loss": 0.0036, + "step": 77900 + }, + { + "grad_norm": 0.03160295635461807, + "learning_rate": 1.2759158502870877e-05, + "loss": 0.0035, + "step": 77910 + }, + { + "grad_norm": 0.020880332216620445, + "learning_rate": 1.2748127448208913e-05, + "loss": 0.0024, + "step": 77920 + }, + { + "grad_norm": 0.025822719559073448, + "learning_rate": 1.2737100467355706e-05, + "loss": 0.0022, + "step": 77930 + }, + { + "grad_norm": 0.03148232027888298, + "learning_rate": 1.2726077561517169e-05, + "loss": 0.0026, + "step": 77940 + }, + { + "grad_norm": 0.027779849246144295, + "learning_rate": 1.2715058731898755e-05, + "loss": 0.003, + "step": 77950 + }, + { + "grad_norm": 0.01871710643172264, + "learning_rate": 1.2704043979705471e-05, + "loss": 0.0033, + "step": 77960 + }, + { + "grad_norm": 0.026358485221862793, + "learning_rate": 1.269303330614185e-05, + "loss": 0.0023, + "step": 77970 + }, + { + "grad_norm": 0.025826100260019302, + "learning_rate": 1.2682026712412015e-05, + "loss": 0.0023, + "step": 77980 + }, + { + "grad_norm": 0.024604329839348793, + "learning_rate": 1.2671024199719644e-05, + "loss": 0.0027, + "step": 77990 + }, + { + "grad_norm": 0.06432486325502396, + "learning_rate": 1.2660025769267936e-05, + "loss": 0.0029, + "step": 78000 + }, + { + "grad_norm": 0.03963392227888107, + "learning_rate": 1.2649031422259667e-05, + "loss": 0.0035, + "step": 78010 + }, + { + "grad_norm": 0.024221712723374367, + "learning_rate": 1.2638041159897163e-05, + "loss": 0.0019, + "step": 78020 + }, + { + "grad_norm": 0.018103141337633133, + "learning_rate": 1.262705498338232e-05, + "loss": 0.0025, + "step": 78030 + }, + { + "grad_norm": 0.04264954477548599, + "learning_rate": 1.2616072893916542e-05, + "loss": 0.0038, + "step": 78040 + }, + { + "grad_norm": 0.025251930579543114, + "learning_rate": 1.2605094892700836e-05, + "loss": 0.0024, + "step": 78050 + }, + { + "grad_norm": 0.03143123909831047, + "learning_rate": 1.259412098093572e-05, + "loss": 0.0034, + "step": 78060 + }, + { + "grad_norm": 0.059617795050144196, + "learning_rate": 1.258315115982131e-05, + "loss": 0.0033, + "step": 78070 + }, + { + "grad_norm": 0.035752519965171814, + "learning_rate": 1.2572185430557203e-05, + "loss": 0.0027, + "step": 78080 + }, + { + "grad_norm": 0.04379528760910034, + "learning_rate": 1.2561223794342659e-05, + "loss": 0.0027, + "step": 78090 + }, + { + "grad_norm": 0.033022571355104446, + "learning_rate": 1.255026625237637e-05, + "loss": 0.0022, + "step": 78100 + }, + { + "grad_norm": 0.022327493876218796, + "learning_rate": 1.2539312805856678e-05, + "loss": 0.0018, + "step": 78110 + }, + { + "grad_norm": 0.030945228412747383, + "learning_rate": 1.25283634559814e-05, + "loss": 0.0038, + "step": 78120 + }, + { + "grad_norm": 0.023315321654081345, + "learning_rate": 1.2517418203947972e-05, + "loss": 0.0021, + "step": 78130 + }, + { + "grad_norm": 0.021473582834005356, + "learning_rate": 1.2506477050953308e-05, + "loss": 0.0026, + "step": 78140 + }, + { + "grad_norm": 0.019500097259879112, + "learning_rate": 1.2495539998193972e-05, + "loss": 0.0028, + "step": 78150 + }, + { + "grad_norm": 0.023788796737790108, + "learning_rate": 1.2484607046865975e-05, + "loss": 0.002, + "step": 78160 + }, + { + "grad_norm": 0.023153575137257576, + "learning_rate": 1.2473678198164968e-05, + "loss": 0.0039, + "step": 78170 + }, + { + "grad_norm": 0.028747403994202614, + "learning_rate": 1.2462753453286075e-05, + "loss": 0.0032, + "step": 78180 + }, + { + "grad_norm": 0.02235906943678856, + "learning_rate": 1.2451832813424031e-05, + "loss": 0.0026, + "step": 78190 + }, + { + "grad_norm": 0.0480617880821228, + "learning_rate": 1.2440916279773095e-05, + "loss": 0.0044, + "step": 78200 + }, + { + "grad_norm": 0.033683110028505325, + "learning_rate": 1.2430003853527101e-05, + "loss": 0.0025, + "step": 78210 + }, + { + "grad_norm": 0.021532902494072914, + "learning_rate": 1.2419095535879383e-05, + "loss": 0.0026, + "step": 78220 + }, + { + "grad_norm": 0.04439634084701538, + "learning_rate": 1.2408191328022878e-05, + "loss": 0.0056, + "step": 78230 + }, + { + "grad_norm": 0.040004026144742966, + "learning_rate": 1.2397291231150066e-05, + "loss": 0.0035, + "step": 78240 + }, + { + "grad_norm": 0.023364124819636345, + "learning_rate": 1.2386395246452937e-05, + "loss": 0.0021, + "step": 78250 + }, + { + "grad_norm": 0.04117736965417862, + "learning_rate": 1.2375503375123083e-05, + "loss": 0.004, + "step": 78260 + }, + { + "grad_norm": 0.030380580574274063, + "learning_rate": 1.2364615618351583e-05, + "loss": 0.003, + "step": 78270 + }, + { + "grad_norm": 0.02712133340537548, + "learning_rate": 1.2353731977329169e-05, + "loss": 0.0023, + "step": 78280 + }, + { + "grad_norm": 0.04753910005092621, + "learning_rate": 1.2342852453246007e-05, + "loss": 0.0026, + "step": 78290 + }, + { + "grad_norm": 0.023429008200764656, + "learning_rate": 1.2331977047291898e-05, + "loss": 0.0024, + "step": 78300 + }, + { + "grad_norm": 0.02583722397685051, + "learning_rate": 1.2321105760656137e-05, + "loss": 0.003, + "step": 78310 + }, + { + "grad_norm": 0.03029809519648552, + "learning_rate": 1.2310238594527613e-05, + "loss": 0.0037, + "step": 78320 + }, + { + "grad_norm": 0.024095268920063972, + "learning_rate": 1.2299375550094693e-05, + "loss": 0.0024, + "step": 78330 + }, + { + "grad_norm": 0.051666975021362305, + "learning_rate": 1.2288516628545421e-05, + "loss": 0.0029, + "step": 78340 + }, + { + "grad_norm": 0.041162289679050446, + "learning_rate": 1.2277661831067255e-05, + "loss": 0.0031, + "step": 78350 + }, + { + "grad_norm": 0.021803319454193115, + "learning_rate": 1.2266811158847286e-05, + "loss": 0.0022, + "step": 78360 + }, + { + "grad_norm": 0.044129591435194016, + "learning_rate": 1.2255964613072107e-05, + "loss": 0.0033, + "step": 78370 + }, + { + "grad_norm": 0.03151383623480797, + "learning_rate": 1.2245122194927905e-05, + "loss": 0.0026, + "step": 78380 + }, + { + "grad_norm": 0.027531389147043228, + "learning_rate": 1.2234283905600364e-05, + "loss": 0.0025, + "step": 78390 + }, + { + "grad_norm": 0.021055016666650772, + "learning_rate": 1.222344974627475e-05, + "loss": 0.0028, + "step": 78400 + }, + { + "grad_norm": 0.022264961153268814, + "learning_rate": 1.2212619718135875e-05, + "loss": 0.0029, + "step": 78410 + }, + { + "grad_norm": 0.04686887189745903, + "learning_rate": 1.2201793822368118e-05, + "loss": 0.0055, + "step": 78420 + }, + { + "grad_norm": 0.028820553794503212, + "learning_rate": 1.2190972060155337e-05, + "loss": 0.0027, + "step": 78430 + }, + { + "grad_norm": 0.039023879915475845, + "learning_rate": 1.2180154432681013e-05, + "loss": 0.0037, + "step": 78440 + }, + { + "grad_norm": 0.030085047706961632, + "learning_rate": 1.2169340941128148e-05, + "loss": 0.0026, + "step": 78450 + }, + { + "grad_norm": 0.021820003166794777, + "learning_rate": 1.2158531586679267e-05, + "loss": 0.0025, + "step": 78460 + }, + { + "grad_norm": 0.02652684412896633, + "learning_rate": 1.2147726370516477e-05, + "loss": 0.0029, + "step": 78470 + }, + { + "grad_norm": 0.024292629212141037, + "learning_rate": 1.2136925293821422e-05, + "loss": 0.0022, + "step": 78480 + }, + { + "grad_norm": 0.026193810626864433, + "learning_rate": 1.2126128357775306e-05, + "loss": 0.003, + "step": 78490 + }, + { + "grad_norm": 0.0174171794205904, + "learning_rate": 1.2115335563558838e-05, + "loss": 0.0019, + "step": 78500 + }, + { + "grad_norm": 0.028210770338773727, + "learning_rate": 1.2104546912352327e-05, + "loss": 0.0025, + "step": 78510 + }, + { + "grad_norm": 0.014090693555772305, + "learning_rate": 1.2093762405335557e-05, + "loss": 0.0015, + "step": 78520 + }, + { + "grad_norm": 0.04334772378206253, + "learning_rate": 1.2082982043687974e-05, + "loss": 0.0031, + "step": 78530 + }, + { + "grad_norm": 0.038780998438596725, + "learning_rate": 1.207220582858845e-05, + "loss": 0.0032, + "step": 78540 + }, + { + "grad_norm": 0.035311948508024216, + "learning_rate": 1.206143376121549e-05, + "loss": 0.0033, + "step": 78550 + }, + { + "grad_norm": 0.0331871323287487, + "learning_rate": 1.2050665842747078e-05, + "loss": 0.0028, + "step": 78560 + }, + { + "grad_norm": 0.05049580708146095, + "learning_rate": 1.2039902074360804e-05, + "loss": 0.0035, + "step": 78570 + }, + { + "grad_norm": 0.03358661010861397, + "learning_rate": 1.2029142457233755e-05, + "loss": 0.0017, + "step": 78580 + }, + { + "grad_norm": 0.037137825042009354, + "learning_rate": 1.2018386992542601e-05, + "loss": 0.0027, + "step": 78590 + }, + { + "grad_norm": 0.02537696249783039, + "learning_rate": 1.2007635681463542e-05, + "loss": 0.0026, + "step": 78600 + }, + { + "grad_norm": 0.02517252042889595, + "learning_rate": 1.1996888525172334e-05, + "loss": 0.0027, + "step": 78610 + }, + { + "grad_norm": 0.02905990742146969, + "learning_rate": 1.198614552484425e-05, + "loss": 0.0028, + "step": 78620 + }, + { + "grad_norm": 0.027274239808321, + "learning_rate": 1.197540668165415e-05, + "loss": 0.0023, + "step": 78630 + }, + { + "grad_norm": 0.03164755553007126, + "learning_rate": 1.1964671996776395e-05, + "loss": 0.0043, + "step": 78640 + }, + { + "grad_norm": 0.0304128248244524, + "learning_rate": 1.1953941471384922e-05, + "loss": 0.0024, + "step": 78650 + }, + { + "grad_norm": 0.02904103882610798, + "learning_rate": 1.1943215106653205e-05, + "loss": 0.0035, + "step": 78660 + }, + { + "grad_norm": 0.02359585277736187, + "learning_rate": 1.193249290375429e-05, + "loss": 0.0025, + "step": 78670 + }, + { + "grad_norm": 0.03779551014304161, + "learning_rate": 1.1921774863860696e-05, + "loss": 0.0034, + "step": 78680 + }, + { + "grad_norm": 0.01930069737136364, + "learning_rate": 1.1911060988144556e-05, + "loss": 0.0035, + "step": 78690 + }, + { + "grad_norm": 0.014911691658198833, + "learning_rate": 1.1900351277777533e-05, + "loss": 0.0021, + "step": 78700 + }, + { + "grad_norm": 0.049591150134801865, + "learning_rate": 1.1889645733930798e-05, + "loss": 0.0026, + "step": 78710 + }, + { + "grad_norm": 0.0365753173828125, + "learning_rate": 1.1878944357775107e-05, + "loss": 0.003, + "step": 78720 + }, + { + "grad_norm": 0.04835441708564758, + "learning_rate": 1.1868247150480749e-05, + "loss": 0.0033, + "step": 78730 + }, + { + "grad_norm": 0.024205049499869347, + "learning_rate": 1.1857554113217567e-05, + "loss": 0.0026, + "step": 78740 + }, + { + "grad_norm": 0.036519668996334076, + "learning_rate": 1.1846865247154903e-05, + "loss": 0.0027, + "step": 78750 + }, + { + "grad_norm": 0.023282082751393318, + "learning_rate": 1.1836180553461711e-05, + "loss": 0.0023, + "step": 78760 + }, + { + "grad_norm": 0.037818051874637604, + "learning_rate": 1.1825500033306419e-05, + "loss": 0.0023, + "step": 78770 + }, + { + "grad_norm": 0.02067267894744873, + "learning_rate": 1.1814823687857052e-05, + "loss": 0.0027, + "step": 78780 + }, + { + "grad_norm": 0.020418817177414894, + "learning_rate": 1.1804151518281153e-05, + "loss": 0.0036, + "step": 78790 + }, + { + "grad_norm": 0.016059761866927147, + "learning_rate": 1.179348352574583e-05, + "loss": 0.0032, + "step": 78800 + }, + { + "grad_norm": 0.01679978147149086, + "learning_rate": 1.1782819711417697e-05, + "loss": 0.0029, + "step": 78810 + }, + { + "grad_norm": 0.04720383137464523, + "learning_rate": 1.1772160076462951e-05, + "loss": 0.0031, + "step": 78820 + }, + { + "grad_norm": 0.03374601528048515, + "learning_rate": 1.1761504622047293e-05, + "loss": 0.0043, + "step": 78830 + }, + { + "grad_norm": 0.03355899825692177, + "learning_rate": 1.1750853349335999e-05, + "loss": 0.0033, + "step": 78840 + }, + { + "grad_norm": 0.023466086015105247, + "learning_rate": 1.1740206259493874e-05, + "loss": 0.0022, + "step": 78850 + }, + { + "grad_norm": 0.031615808606147766, + "learning_rate": 1.1729563353685286e-05, + "loss": 0.0028, + "step": 78860 + }, + { + "grad_norm": 0.03688586503267288, + "learning_rate": 1.1718924633074091e-05, + "loss": 0.0039, + "step": 78870 + }, + { + "grad_norm": 0.023775862529873848, + "learning_rate": 1.1708290098823765e-05, + "loss": 0.0023, + "step": 78880 + }, + { + "grad_norm": 0.017013441771268845, + "learning_rate": 1.1697659752097245e-05, + "loss": 0.0038, + "step": 78890 + }, + { + "grad_norm": 0.026181448251008987, + "learning_rate": 1.1687033594057068e-05, + "loss": 0.0027, + "step": 78900 + }, + { + "grad_norm": 0.025283239781856537, + "learning_rate": 1.1676411625865297e-05, + "loss": 0.0022, + "step": 78910 + }, + { + "grad_norm": 0.03676673397421837, + "learning_rate": 1.1665793848683542e-05, + "loss": 0.003, + "step": 78920 + }, + { + "grad_norm": 0.043408431112766266, + "learning_rate": 1.1655180263672927e-05, + "loss": 0.0047, + "step": 78930 + }, + { + "grad_norm": 0.044279590249061584, + "learning_rate": 1.1644570871994142e-05, + "loss": 0.0026, + "step": 78940 + }, + { + "grad_norm": 0.023601870983839035, + "learning_rate": 1.163396567480744e-05, + "loss": 0.0034, + "step": 78950 + }, + { + "grad_norm": 0.044711627066135406, + "learning_rate": 1.1623364673272552e-05, + "loss": 0.0038, + "step": 78960 + }, + { + "grad_norm": 0.022728893905878067, + "learning_rate": 1.1612767868548807e-05, + "loss": 0.0025, + "step": 78970 + }, + { + "grad_norm": 0.022331478074193, + "learning_rate": 1.1602175261795057e-05, + "loss": 0.0023, + "step": 78980 + }, + { + "grad_norm": 0.02836306020617485, + "learning_rate": 1.1591586854169696e-05, + "loss": 0.0025, + "step": 78990 + }, + { + "grad_norm": 0.022662319242954254, + "learning_rate": 1.158100264683064e-05, + "loss": 0.0025, + "step": 79000 + }, + { + "grad_norm": 0.019364044070243835, + "learning_rate": 1.1570422640935386e-05, + "loss": 0.0037, + "step": 79010 + }, + { + "grad_norm": 0.012238416820764542, + "learning_rate": 1.155984683764092e-05, + "loss": 0.0016, + "step": 79020 + }, + { + "grad_norm": 0.024196088314056396, + "learning_rate": 1.1549275238103812e-05, + "loss": 0.0022, + "step": 79030 + }, + { + "grad_norm": 0.04734749719500542, + "learning_rate": 1.1538707843480152e-05, + "loss": 0.0031, + "step": 79040 + }, + { + "grad_norm": 0.017042076215147972, + "learning_rate": 1.1528144654925593e-05, + "loss": 0.0021, + "step": 79050 + }, + { + "grad_norm": 0.027063759043812752, + "learning_rate": 1.1517585673595277e-05, + "loss": 0.0033, + "step": 79060 + }, + { + "grad_norm": 0.023528950288891792, + "learning_rate": 1.150703090064395e-05, + "loss": 0.0034, + "step": 79070 + }, + { + "grad_norm": 0.034031763672828674, + "learning_rate": 1.1496480337225835e-05, + "loss": 0.0031, + "step": 79080 + }, + { + "grad_norm": 0.022243795916438103, + "learning_rate": 1.1485933984494751e-05, + "loss": 0.0026, + "step": 79090 + }, + { + "grad_norm": 0.029662614688277245, + "learning_rate": 1.1475391843604e-05, + "loss": 0.0037, + "step": 79100 + }, + { + "grad_norm": 0.04602574557065964, + "learning_rate": 1.1464853915706503e-05, + "loss": 0.0029, + "step": 79110 + }, + { + "grad_norm": 0.025074627250432968, + "learning_rate": 1.1454320201954626e-05, + "loss": 0.0025, + "step": 79120 + }, + { + "grad_norm": 0.01606319472193718, + "learning_rate": 1.1443790703500356e-05, + "loss": 0.004, + "step": 79130 + }, + { + "grad_norm": 0.050257358700037, + "learning_rate": 1.1433265421495154e-05, + "loss": 0.0019, + "step": 79140 + }, + { + "grad_norm": 0.03846339136362076, + "learning_rate": 1.1422744357090065e-05, + "loss": 0.0032, + "step": 79150 + }, + { + "grad_norm": 0.03935323283076286, + "learning_rate": 1.1412227511435647e-05, + "loss": 0.0031, + "step": 79160 + }, + { + "grad_norm": 0.03853175789117813, + "learning_rate": 1.1401714885682025e-05, + "loss": 0.0032, + "step": 79170 + }, + { + "grad_norm": 0.013902852311730385, + "learning_rate": 1.1391206480978823e-05, + "loss": 0.0016, + "step": 79180 + }, + { + "grad_norm": 0.014167986810207367, + "learning_rate": 1.1380702298475227e-05, + "loss": 0.0041, + "step": 79190 + }, + { + "grad_norm": 0.03229773789644241, + "learning_rate": 1.1370202339319985e-05, + "loss": 0.0027, + "step": 79200 + }, + { + "grad_norm": 0.018527435138821602, + "learning_rate": 1.1359706604661319e-05, + "loss": 0.0031, + "step": 79210 + }, + { + "grad_norm": 0.018151480704545975, + "learning_rate": 1.1349215095647042e-05, + "loss": 0.0023, + "step": 79220 + }, + { + "grad_norm": 0.03261515125632286, + "learning_rate": 1.1338727813424493e-05, + "loss": 0.0031, + "step": 79230 + }, + { + "grad_norm": 0.014180047437548637, + "learning_rate": 1.132824475914056e-05, + "loss": 0.0019, + "step": 79240 + }, + { + "grad_norm": 0.04527116194367409, + "learning_rate": 1.131776593394162e-05, + "loss": 0.0029, + "step": 79250 + }, + { + "grad_norm": 0.03185687214136124, + "learning_rate": 1.1307291338973657e-05, + "loss": 0.0022, + "step": 79260 + }, + { + "grad_norm": 0.020903494209051132, + "learning_rate": 1.1296820975382121e-05, + "loss": 0.0023, + "step": 79270 + }, + { + "grad_norm": 0.028383424505591393, + "learning_rate": 1.1286354844312074e-05, + "loss": 0.0025, + "step": 79280 + }, + { + "grad_norm": 0.026854312047362328, + "learning_rate": 1.1275892946908023e-05, + "loss": 0.0043, + "step": 79290 + }, + { + "grad_norm": 0.02263464778661728, + "learning_rate": 1.126543528431413e-05, + "loss": 0.0021, + "step": 79300 + }, + { + "grad_norm": 0.02291787788271904, + "learning_rate": 1.125498185767398e-05, + "loss": 0.0028, + "step": 79310 + }, + { + "grad_norm": 0.03887293487787247, + "learning_rate": 1.1244532668130781e-05, + "loss": 0.0028, + "step": 79320 + }, + { + "grad_norm": 0.050402507185935974, + "learning_rate": 1.1234087716827203e-05, + "loss": 0.0036, + "step": 79330 + }, + { + "grad_norm": 0.01895589381456375, + "learning_rate": 1.1223647004905524e-05, + "loss": 0.002, + "step": 79340 + }, + { + "grad_norm": 0.01842346414923668, + "learning_rate": 1.1213210533507485e-05, + "loss": 0.002, + "step": 79350 + }, + { + "grad_norm": 0.02780151180922985, + "learning_rate": 1.1202778303774447e-05, + "loss": 0.0049, + "step": 79360 + }, + { + "grad_norm": 0.028554178774356842, + "learning_rate": 1.1192350316847234e-05, + "loss": 0.0022, + "step": 79370 + }, + { + "grad_norm": 0.04956311360001564, + "learning_rate": 1.1181926573866258e-05, + "loss": 0.0037, + "step": 79380 + }, + { + "grad_norm": 0.05864700675010681, + "learning_rate": 1.1171507075971416e-05, + "loss": 0.006, + "step": 79390 + }, + { + "grad_norm": 0.05153708904981613, + "learning_rate": 1.1161091824302177e-05, + "loss": 0.0033, + "step": 79400 + }, + { + "grad_norm": 0.044268760830163956, + "learning_rate": 1.115068081999755e-05, + "loss": 0.0023, + "step": 79410 + }, + { + "grad_norm": 0.02036578208208084, + "learning_rate": 1.1140274064196071e-05, + "loss": 0.0022, + "step": 79420 + }, + { + "grad_norm": 0.024974925443530083, + "learning_rate": 1.112987155803578e-05, + "loss": 0.0028, + "step": 79430 + }, + { + "grad_norm": 0.034852564334869385, + "learning_rate": 1.1119473302654298e-05, + "loss": 0.0036, + "step": 79440 + }, + { + "grad_norm": 0.03334026411175728, + "learning_rate": 1.1109079299188769e-05, + "loss": 0.0028, + "step": 79450 + }, + { + "grad_norm": 0.024974139407277107, + "learning_rate": 1.1098689548775847e-05, + "loss": 0.0023, + "step": 79460 + }, + { + "grad_norm": 0.02731071598827839, + "learning_rate": 1.1088304052551762e-05, + "loss": 0.0036, + "step": 79470 + }, + { + "grad_norm": 0.03221871703863144, + "learning_rate": 1.1077922811652215e-05, + "loss": 0.0026, + "step": 79480 + }, + { + "grad_norm": 0.031185586005449295, + "learning_rate": 1.1067545827212534e-05, + "loss": 0.0032, + "step": 79490 + }, + { + "grad_norm": 0.031005429103970528, + "learning_rate": 1.1057173100367496e-05, + "loss": 0.0022, + "step": 79500 + }, + { + "grad_norm": 0.021092813462018967, + "learning_rate": 1.1046804632251474e-05, + "loss": 0.0031, + "step": 79510 + }, + { + "grad_norm": 0.030523257330060005, + "learning_rate": 1.1036440423998318e-05, + "loss": 0.0033, + "step": 79520 + }, + { + "grad_norm": 0.031618840992450714, + "learning_rate": 1.1026080476741469e-05, + "loss": 0.0026, + "step": 79530 + }, + { + "grad_norm": 0.02795429714024067, + "learning_rate": 1.1015724791613836e-05, + "loss": 0.0036, + "step": 79540 + }, + { + "grad_norm": 0.021546555683016777, + "learning_rate": 1.1005373369747951e-05, + "loss": 0.0029, + "step": 79550 + }, + { + "grad_norm": 0.04693891108036041, + "learning_rate": 1.09950262122758e-05, + "loss": 0.0039, + "step": 79560 + }, + { + "grad_norm": 0.03660713508725166, + "learning_rate": 1.0984683320328948e-05, + "loss": 0.0033, + "step": 79570 + }, + { + "grad_norm": 0.03244699165225029, + "learning_rate": 1.0974344695038458e-05, + "loss": 0.0021, + "step": 79580 + }, + { + "grad_norm": 0.028558464720845222, + "learning_rate": 1.0964010337534968e-05, + "loss": 0.0028, + "step": 79590 + }, + { + "grad_norm": 0.034182414412498474, + "learning_rate": 1.0953680248948611e-05, + "loss": 0.0029, + "step": 79600 + }, + { + "grad_norm": 0.046257324516773224, + "learning_rate": 1.0943354430409069e-05, + "loss": 0.003, + "step": 79610 + }, + { + "grad_norm": 0.06798155605792999, + "learning_rate": 1.0933032883045574e-05, + "loss": 0.0034, + "step": 79620 + }, + { + "grad_norm": 0.04368075355887413, + "learning_rate": 1.0922715607986872e-05, + "loss": 0.0047, + "step": 79630 + }, + { + "grad_norm": 0.029078586027026176, + "learning_rate": 1.0912402606361234e-05, + "loss": 0.0024, + "step": 79640 + }, + { + "grad_norm": 0.04219958186149597, + "learning_rate": 1.090209387929647e-05, + "loss": 0.0029, + "step": 79650 + }, + { + "grad_norm": 0.027789033949375153, + "learning_rate": 1.0891789427919957e-05, + "loss": 0.0027, + "step": 79660 + }, + { + "grad_norm": 0.023208357393741608, + "learning_rate": 1.088148925335854e-05, + "loss": 0.0037, + "step": 79670 + }, + { + "grad_norm": 0.02191266417503357, + "learning_rate": 1.087119335673864e-05, + "loss": 0.0025, + "step": 79680 + }, + { + "grad_norm": 0.03398097679018974, + "learning_rate": 1.0860901739186208e-05, + "loss": 0.0057, + "step": 79690 + }, + { + "grad_norm": 0.036921147257089615, + "learning_rate": 1.085061440182673e-05, + "loss": 0.0058, + "step": 79700 + }, + { + "grad_norm": 0.031957145780324936, + "learning_rate": 1.0840331345785193e-05, + "loss": 0.0021, + "step": 79710 + }, + { + "grad_norm": 0.01774328202009201, + "learning_rate": 1.083005257218615e-05, + "loss": 0.0019, + "step": 79720 + }, + { + "grad_norm": 0.03102797642350197, + "learning_rate": 1.0819778082153642e-05, + "loss": 0.0035, + "step": 79730 + }, + { + "grad_norm": 0.038984522223472595, + "learning_rate": 1.0809507876811325e-05, + "loss": 0.0024, + "step": 79740 + }, + { + "grad_norm": 0.02461632899940014, + "learning_rate": 1.0799241957282292e-05, + "loss": 0.0025, + "step": 79750 + }, + { + "grad_norm": 0.016806291416287422, + "learning_rate": 1.078898032468923e-05, + "loss": 0.0029, + "step": 79760 + }, + { + "grad_norm": 0.026250308379530907, + "learning_rate": 1.0778722980154315e-05, + "loss": 0.0025, + "step": 79770 + }, + { + "grad_norm": 0.042814597487449646, + "learning_rate": 1.0768469924799302e-05, + "loss": 0.0034, + "step": 79780 + }, + { + "grad_norm": 0.037342369556427, + "learning_rate": 1.0758221159745419e-05, + "loss": 0.0024, + "step": 79790 + }, + { + "grad_norm": 0.025035027414560318, + "learning_rate": 1.074797668611347e-05, + "loss": 0.0022, + "step": 79800 + }, + { + "grad_norm": 0.019282063469290733, + "learning_rate": 1.0737736505023777e-05, + "loss": 0.002, + "step": 79810 + }, + { + "grad_norm": 0.03152550384402275, + "learning_rate": 1.0727500617596203e-05, + "loss": 0.0033, + "step": 79820 + }, + { + "grad_norm": 0.022890176624059677, + "learning_rate": 1.0717269024950104e-05, + "loss": 0.0026, + "step": 79830 + }, + { + "grad_norm": 0.018162567168474197, + "learning_rate": 1.070704172820442e-05, + "loss": 0.0034, + "step": 79840 + }, + { + "grad_norm": 0.024027744308114052, + "learning_rate": 1.0696818728477559e-05, + "loss": 0.0041, + "step": 79850 + }, + { + "grad_norm": 0.02340344339609146, + "learning_rate": 1.0686600026887511e-05, + "loss": 0.0033, + "step": 79860 + }, + { + "grad_norm": 0.027440931648015976, + "learning_rate": 1.0676385624551777e-05, + "loss": 0.0029, + "step": 79870 + }, + { + "grad_norm": 0.022211074829101562, + "learning_rate": 1.0666175522587402e-05, + "loss": 0.0025, + "step": 79880 + }, + { + "grad_norm": 0.023045003414154053, + "learning_rate": 1.0655969722110925e-05, + "loss": 0.0023, + "step": 79890 + }, + { + "grad_norm": 0.026227278634905815, + "learning_rate": 1.0645768224238439e-05, + "loss": 0.0023, + "step": 79900 + }, + { + "grad_norm": 0.02014509215950966, + "learning_rate": 1.063557103008559e-05, + "loss": 0.002, + "step": 79910 + }, + { + "grad_norm": 0.03528915345668793, + "learning_rate": 1.0625378140767494e-05, + "loss": 0.0028, + "step": 79920 + }, + { + "grad_norm": 0.03209390118718147, + "learning_rate": 1.0615189557398846e-05, + "loss": 0.0023, + "step": 79930 + }, + { + "grad_norm": 0.024579424411058426, + "learning_rate": 1.060500528109386e-05, + "loss": 0.0032, + "step": 79940 + }, + { + "grad_norm": 0.028109155595302582, + "learning_rate": 1.0594825312966272e-05, + "loss": 0.002, + "step": 79950 + }, + { + "grad_norm": 0.030730217695236206, + "learning_rate": 1.0584649654129336e-05, + "loss": 0.0038, + "step": 79960 + }, + { + "grad_norm": 0.022935109212994576, + "learning_rate": 1.0574478305695863e-05, + "loss": 0.0018, + "step": 79970 + }, + { + "grad_norm": 0.027472039684653282, + "learning_rate": 1.0564311268778154e-05, + "loss": 0.0026, + "step": 79980 + }, + { + "grad_norm": 0.038345806300640106, + "learning_rate": 1.0554148544488073e-05, + "loss": 0.0032, + "step": 79990 + }, + { + "grad_norm": 0.018976913765072823, + "learning_rate": 1.0543990133937004e-05, + "loss": 0.0024, + "step": 80000 + }, + { + "grad_norm": 0.042910102754831314, + "learning_rate": 1.0533836038235868e-05, + "loss": 0.002, + "step": 80010 + }, + { + "grad_norm": 0.022115051746368408, + "learning_rate": 1.052368625849507e-05, + "loss": 0.0024, + "step": 80020 + }, + { + "grad_norm": 0.020326191559433937, + "learning_rate": 1.05135407958246e-05, + "loss": 0.0032, + "step": 80030 + }, + { + "grad_norm": 0.023710565641522408, + "learning_rate": 1.0503399651333934e-05, + "loss": 0.0029, + "step": 80040 + }, + { + "grad_norm": 0.032790154218673706, + "learning_rate": 1.04932628261321e-05, + "loss": 0.0027, + "step": 80050 + }, + { + "grad_norm": 0.018290434032678604, + "learning_rate": 1.0483130321327644e-05, + "loss": 0.0021, + "step": 80060 + }, + { + "grad_norm": 0.02674073353409767, + "learning_rate": 1.0473002138028654e-05, + "loss": 0.0032, + "step": 80070 + }, + { + "grad_norm": 0.026616981253027916, + "learning_rate": 1.0462878277342713e-05, + "loss": 0.0024, + "step": 80080 + }, + { + "grad_norm": 0.037627339363098145, + "learning_rate": 1.0452758740376972e-05, + "loss": 0.0026, + "step": 80090 + }, + { + "grad_norm": 0.02563195861876011, + "learning_rate": 1.0442643528238067e-05, + "loss": 0.0017, + "step": 80100 + }, + { + "grad_norm": 0.0170728899538517, + "learning_rate": 1.0432532642032188e-05, + "loss": 0.0022, + "step": 80110 + }, + { + "grad_norm": 0.03629729151725769, + "learning_rate": 1.042242608286505e-05, + "loss": 0.003, + "step": 80120 + }, + { + "grad_norm": 0.03667852282524109, + "learning_rate": 1.041232385184191e-05, + "loss": 0.0034, + "step": 80130 + }, + { + "grad_norm": 0.026425829157233238, + "learning_rate": 1.0402225950067495e-05, + "loss": 0.0025, + "step": 80140 + }, + { + "grad_norm": 0.029789576306939125, + "learning_rate": 1.0392132378646119e-05, + "loss": 0.0025, + "step": 80150 + }, + { + "grad_norm": 0.022389691323041916, + "learning_rate": 1.0382043138681607e-05, + "loss": 0.0031, + "step": 80160 + }, + { + "grad_norm": 0.03579476475715637, + "learning_rate": 1.0371958231277284e-05, + "loss": 0.0057, + "step": 80170 + }, + { + "grad_norm": 0.03183580935001373, + "learning_rate": 1.0361877657536024e-05, + "loss": 0.0023, + "step": 80180 + }, + { + "grad_norm": 0.07668663561344147, + "learning_rate": 1.035180141856023e-05, + "loss": 0.0032, + "step": 80190 + }, + { + "grad_norm": 0.034731343388557434, + "learning_rate": 1.0341729515451836e-05, + "loss": 0.0024, + "step": 80200 + }, + { + "grad_norm": 0.03200950473546982, + "learning_rate": 1.0331661949312265e-05, + "loss": 0.0024, + "step": 80210 + }, + { + "grad_norm": 0.04314696043729782, + "learning_rate": 1.0321598721242514e-05, + "loss": 0.003, + "step": 80220 + }, + { + "grad_norm": 0.031977131962776184, + "learning_rate": 1.0311539832343064e-05, + "loss": 0.0029, + "step": 80230 + }, + { + "grad_norm": 0.02422538585960865, + "learning_rate": 1.0301485283713942e-05, + "loss": 0.0027, + "step": 80240 + }, + { + "grad_norm": 0.02738594450056553, + "learning_rate": 1.0291435076454703e-05, + "loss": 0.0027, + "step": 80250 + }, + { + "grad_norm": 0.02359665557742119, + "learning_rate": 1.028138921166444e-05, + "loss": 0.003, + "step": 80260 + }, + { + "grad_norm": 0.031972430646419525, + "learning_rate": 1.0271347690441719e-05, + "loss": 0.0021, + "step": 80270 + }, + { + "grad_norm": 0.02692101150751114, + "learning_rate": 1.0261310513884698e-05, + "loss": 0.0021, + "step": 80280 + }, + { + "grad_norm": 0.0248489361256361, + "learning_rate": 1.0251277683090998e-05, + "loss": 0.0023, + "step": 80290 + }, + { + "grad_norm": 0.017724255099892616, + "learning_rate": 1.0241249199157826e-05, + "loss": 0.0017, + "step": 80300 + }, + { + "grad_norm": 0.0330035500228405, + "learning_rate": 1.0231225063181837e-05, + "loss": 0.0028, + "step": 80310 + }, + { + "grad_norm": 0.038958821445703506, + "learning_rate": 1.0221205276259316e-05, + "loss": 0.003, + "step": 80320 + }, + { + "grad_norm": 0.024553241208195686, + "learning_rate": 1.0211189839485958e-05, + "loss": 0.0031, + "step": 80330 + }, + { + "grad_norm": 0.028557173907756805, + "learning_rate": 1.0201178753957075e-05, + "loss": 0.0026, + "step": 80340 + }, + { + "grad_norm": 0.04147474467754364, + "learning_rate": 1.0191172020767432e-05, + "loss": 0.004, + "step": 80350 + }, + { + "grad_norm": 0.028584755957126617, + "learning_rate": 1.0181169641011362e-05, + "loss": 0.0037, + "step": 80360 + }, + { + "grad_norm": 0.05332944542169571, + "learning_rate": 1.0171171615782721e-05, + "loss": 0.0038, + "step": 80370 + }, + { + "grad_norm": 0.031340353190898895, + "learning_rate": 1.0161177946174866e-05, + "loss": 0.003, + "step": 80380 + }, + { + "grad_norm": 0.029738005250692368, + "learning_rate": 1.0151188633280706e-05, + "loss": 0.0025, + "step": 80390 + }, + { + "grad_norm": 0.03338000178337097, + "learning_rate": 1.0141203678192635e-05, + "loss": 0.0023, + "step": 80400 + }, + { + "grad_norm": 0.02251678705215454, + "learning_rate": 1.0131223082002612e-05, + "loss": 0.003, + "step": 80410 + }, + { + "grad_norm": 0.02671162411570549, + "learning_rate": 1.012124684580208e-05, + "loss": 0.0025, + "step": 80420 + }, + { + "grad_norm": 0.05161992460489273, + "learning_rate": 1.0111274970682039e-05, + "loss": 0.0041, + "step": 80430 + }, + { + "grad_norm": 0.05161828175187111, + "learning_rate": 1.010130745773299e-05, + "loss": 0.0028, + "step": 80440 + }, + { + "grad_norm": 0.02899422124028206, + "learning_rate": 1.0091344308044986e-05, + "loss": 0.0036, + "step": 80450 + }, + { + "grad_norm": 0.025471411645412445, + "learning_rate": 1.0081385522707554e-05, + "loss": 0.0029, + "step": 80460 + }, + { + "grad_norm": 0.038293104618787766, + "learning_rate": 1.0071431102809803e-05, + "loss": 0.0045, + "step": 80470 + }, + { + "grad_norm": 0.028452545404434204, + "learning_rate": 1.0061481049440297e-05, + "loss": 0.0032, + "step": 80480 + }, + { + "grad_norm": 0.03708403557538986, + "learning_rate": 1.0051535363687187e-05, + "loss": 0.0028, + "step": 80490 + }, + { + "grad_norm": 0.02377425879240036, + "learning_rate": 1.0041594046638087e-05, + "loss": 0.003, + "step": 80500 + }, + { + "grad_norm": 0.028457239270210266, + "learning_rate": 1.0031657099380204e-05, + "loss": 0.0028, + "step": 80510 + }, + { + "grad_norm": 0.01697743497788906, + "learning_rate": 1.0021724523000204e-05, + "loss": 0.0018, + "step": 80520 + }, + { + "grad_norm": 0.022666985169053078, + "learning_rate": 1.001179631858431e-05, + "loss": 0.0022, + "step": 80530 + }, + { + "grad_norm": 0.022934727370738983, + "learning_rate": 1.0001872487218245e-05, + "loss": 0.0022, + "step": 80540 + }, + { + "grad_norm": 0.018942156806588173, + "learning_rate": 9.991953029987272e-06, + "loss": 0.0031, + "step": 80550 + }, + { + "grad_norm": 0.05494339019060135, + "learning_rate": 9.982037947976147e-06, + "loss": 0.0029, + "step": 80560 + }, + { + "grad_norm": 0.039824508130550385, + "learning_rate": 9.972127242269208e-06, + "loss": 0.0024, + "step": 80570 + }, + { + "grad_norm": 0.02350301295518875, + "learning_rate": 9.962220913950238e-06, + "loss": 0.0028, + "step": 80580 + }, + { + "grad_norm": 0.028152642771601677, + "learning_rate": 9.952318964102591e-06, + "loss": 0.0029, + "step": 80590 + }, + { + "grad_norm": 0.05089893564581871, + "learning_rate": 9.942421393809148e-06, + "loss": 0.0023, + "step": 80600 + }, + { + "grad_norm": 0.016327250748872757, + "learning_rate": 9.932528204152264e-06, + "loss": 0.0027, + "step": 80610 + }, + { + "grad_norm": 0.04271906614303589, + "learning_rate": 9.922639396213856e-06, + "loss": 0.0054, + "step": 80620 + }, + { + "grad_norm": 0.03866638243198395, + "learning_rate": 9.912754971075344e-06, + "loss": 0.0023, + "step": 80630 + }, + { + "grad_norm": 0.02293103002011776, + "learning_rate": 9.902874929817696e-06, + "loss": 0.0039, + "step": 80640 + }, + { + "grad_norm": 0.031370989978313446, + "learning_rate": 9.892999273521342e-06, + "loss": 0.0031, + "step": 80650 + }, + { + "grad_norm": 0.025583351030945778, + "learning_rate": 9.883128003266302e-06, + "loss": 0.0021, + "step": 80660 + }, + { + "grad_norm": 0.023873327299952507, + "learning_rate": 9.87326112013206e-06, + "loss": 0.0018, + "step": 80670 + }, + { + "grad_norm": 0.017158158123493195, + "learning_rate": 9.863398625197662e-06, + "loss": 0.0023, + "step": 80680 + }, + { + "grad_norm": 0.023349691182374954, + "learning_rate": 9.853540519541615e-06, + "loss": 0.002, + "step": 80690 + }, + { + "grad_norm": 0.030966129153966904, + "learning_rate": 9.843686804242053e-06, + "loss": 0.0023, + "step": 80700 + }, + { + "grad_norm": 0.02167864888906479, + "learning_rate": 9.833837480376506e-06, + "loss": 0.0028, + "step": 80710 + }, + { + "grad_norm": 0.04040049761533737, + "learning_rate": 9.82399254902212e-06, + "loss": 0.0033, + "step": 80720 + }, + { + "grad_norm": 0.03207913041114807, + "learning_rate": 9.814152011255494e-06, + "loss": 0.0035, + "step": 80730 + }, + { + "grad_norm": 0.023055357858538628, + "learning_rate": 9.804315868152797e-06, + "loss": 0.0021, + "step": 80740 + }, + { + "grad_norm": 0.04949353635311127, + "learning_rate": 9.79448412078966e-06, + "loss": 0.003, + "step": 80750 + }, + { + "grad_norm": 0.03850327804684639, + "learning_rate": 9.784656770241312e-06, + "loss": 0.0038, + "step": 80760 + }, + { + "grad_norm": 0.03870305418968201, + "learning_rate": 9.77483381758243e-06, + "loss": 0.0047, + "step": 80770 + }, + { + "grad_norm": 0.03902798518538475, + "learning_rate": 9.76501526388725e-06, + "loss": 0.0031, + "step": 80780 + }, + { + "grad_norm": 0.051094766706228256, + "learning_rate": 9.7552011102295e-06, + "loss": 0.003, + "step": 80790 + }, + { + "grad_norm": 0.055011969059705734, + "learning_rate": 9.745391357682449e-06, + "loss": 0.0031, + "step": 80800 + }, + { + "grad_norm": 0.034526385366916656, + "learning_rate": 9.735586007318875e-06, + "loss": 0.0026, + "step": 80810 + }, + { + "grad_norm": 0.034612078219652176, + "learning_rate": 9.725785060211096e-06, + "loss": 0.0023, + "step": 80820 + }, + { + "grad_norm": 0.05544797703623772, + "learning_rate": 9.715988517430896e-06, + "loss": 0.0021, + "step": 80830 + }, + { + "grad_norm": 0.01828235760331154, + "learning_rate": 9.706196380049625e-06, + "loss": 0.0024, + "step": 80840 + }, + { + "grad_norm": 0.0388031005859375, + "learning_rate": 9.69640864913815e-06, + "loss": 0.0034, + "step": 80850 + }, + { + "grad_norm": 0.0485280342400074, + "learning_rate": 9.686625325766818e-06, + "loss": 0.0037, + "step": 80860 + }, + { + "grad_norm": 0.03039964847266674, + "learning_rate": 9.67684641100554e-06, + "loss": 0.0024, + "step": 80870 + }, + { + "grad_norm": 0.03637823089957237, + "learning_rate": 9.667071905923692e-06, + "loss": 0.0026, + "step": 80880 + }, + { + "grad_norm": 0.03952370956540108, + "learning_rate": 9.657301811590242e-06, + "loss": 0.0025, + "step": 80890 + }, + { + "grad_norm": 0.03223634138703346, + "learning_rate": 9.647536129073603e-06, + "loss": 0.0024, + "step": 80900 + }, + { + "grad_norm": 0.026474231854081154, + "learning_rate": 9.637774859441751e-06, + "loss": 0.0021, + "step": 80910 + }, + { + "grad_norm": 0.04020601883530617, + "learning_rate": 9.628018003762152e-06, + "loss": 0.0019, + "step": 80920 + }, + { + "grad_norm": 0.016708483919501305, + "learning_rate": 9.618265563101813e-06, + "loss": 0.0027, + "step": 80930 + }, + { + "grad_norm": 0.03663915768265724, + "learning_rate": 9.608517538527211e-06, + "loss": 0.0033, + "step": 80940 + }, + { + "grad_norm": 0.01651374064385891, + "learning_rate": 9.598773931104431e-06, + "loss": 0.0026, + "step": 80950 + }, + { + "grad_norm": 0.020957769826054573, + "learning_rate": 9.589034741898983e-06, + "loss": 0.0036, + "step": 80960 + }, + { + "grad_norm": 0.02479555644094944, + "learning_rate": 9.579299971975947e-06, + "loss": 0.0037, + "step": 80970 + }, + { + "grad_norm": 0.03153470531105995, + "learning_rate": 9.569569622399882e-06, + "loss": 0.002, + "step": 80980 + }, + { + "grad_norm": 0.026647556573152542, + "learning_rate": 9.559843694234916e-06, + "loss": 0.0021, + "step": 80990 + }, + { + "grad_norm": 0.05601706728339195, + "learning_rate": 9.550122188544625e-06, + "loss": 0.0029, + "step": 81000 + }, + { + "grad_norm": 0.035684287548065186, + "learning_rate": 9.540405106392152e-06, + "loss": 0.0021, + "step": 81010 + }, + { + "grad_norm": 0.02384156920015812, + "learning_rate": 9.53069244884015e-06, + "loss": 0.002, + "step": 81020 + }, + { + "grad_norm": 0.021115822717547417, + "learning_rate": 9.52098421695079e-06, + "loss": 0.0041, + "step": 81030 + }, + { + "grad_norm": 0.019080059602856636, + "learning_rate": 9.511280411785728e-06, + "loss": 0.0033, + "step": 81040 + }, + { + "grad_norm": 0.04402351379394531, + "learning_rate": 9.50158103440616e-06, + "loss": 0.0028, + "step": 81050 + }, + { + "grad_norm": 0.016092153266072273, + "learning_rate": 9.491886085872815e-06, + "loss": 0.0036, + "step": 81060 + }, + { + "grad_norm": 0.019594602286815643, + "learning_rate": 9.482195567245895e-06, + "loss": 0.0026, + "step": 81070 + }, + { + "grad_norm": 0.018574601039290428, + "learning_rate": 9.472509479585151e-06, + "loss": 0.0024, + "step": 81080 + }, + { + "grad_norm": 0.036770112812519073, + "learning_rate": 9.462827823949833e-06, + "loss": 0.0031, + "step": 81090 + }, + { + "grad_norm": 0.02627689018845558, + "learning_rate": 9.453150601398735e-06, + "loss": 0.0022, + "step": 81100 + }, + { + "grad_norm": 0.02169734053313732, + "learning_rate": 9.44347781299011e-06, + "loss": 0.0019, + "step": 81110 + }, + { + "grad_norm": 0.03299323841929436, + "learning_rate": 9.43380945978179e-06, + "loss": 0.0022, + "step": 81120 + }, + { + "grad_norm": 0.02606634423136711, + "learning_rate": 9.424145542831047e-06, + "loss": 0.0025, + "step": 81130 + }, + { + "grad_norm": 0.022142695263028145, + "learning_rate": 9.414486063194778e-06, + "loss": 0.0027, + "step": 81140 + }, + { + "grad_norm": 0.02801416628062725, + "learning_rate": 9.404831021929273e-06, + "loss": 0.0023, + "step": 81150 + }, + { + "grad_norm": 0.02498718537390232, + "learning_rate": 9.395180420090427e-06, + "loss": 0.003, + "step": 81160 + }, + { + "grad_norm": 0.03263469785451889, + "learning_rate": 9.385534258733591e-06, + "loss": 0.003, + "step": 81170 + }, + { + "grad_norm": 0.01747267134487629, + "learning_rate": 9.375892538913677e-06, + "loss": 0.0026, + "step": 81180 + }, + { + "grad_norm": 0.02404853329062462, + "learning_rate": 9.36625526168507e-06, + "loss": 0.0027, + "step": 81190 + }, + { + "grad_norm": 0.028788918629288673, + "learning_rate": 9.356622428101686e-06, + "loss": 0.0023, + "step": 81200 + }, + { + "grad_norm": 0.027038544416427612, + "learning_rate": 9.346994039216972e-06, + "loss": 0.0021, + "step": 81210 + }, + { + "grad_norm": 0.03635205328464508, + "learning_rate": 9.337370096083886e-06, + "loss": 0.0031, + "step": 81220 + }, + { + "grad_norm": 0.020248107612133026, + "learning_rate": 9.327750599754854e-06, + "loss": 0.0019, + "step": 81230 + }, + { + "grad_norm": 0.02212091162800789, + "learning_rate": 9.318135551281875e-06, + "loss": 0.0026, + "step": 81240 + }, + { + "grad_norm": 0.028832940384745598, + "learning_rate": 9.308524951716424e-06, + "loss": 0.0021, + "step": 81250 + }, + { + "grad_norm": 0.046251121908426285, + "learning_rate": 9.298918802109497e-06, + "loss": 0.003, + "step": 81260 + }, + { + "grad_norm": 0.0326334685087204, + "learning_rate": 9.289317103511625e-06, + "loss": 0.0028, + "step": 81270 + }, + { + "grad_norm": 0.024582333862781525, + "learning_rate": 9.279719856972835e-06, + "loss": 0.0024, + "step": 81280 + }, + { + "grad_norm": 0.018083222210407257, + "learning_rate": 9.270127063542645e-06, + "loss": 0.0026, + "step": 81290 + }, + { + "grad_norm": 0.020336752757430077, + "learning_rate": 9.260538724270124e-06, + "loss": 0.0036, + "step": 81300 + }, + { + "grad_norm": 0.03103073313832283, + "learning_rate": 9.250954840203852e-06, + "loss": 0.0023, + "step": 81310 + }, + { + "grad_norm": 0.028395863249897957, + "learning_rate": 9.241375412391878e-06, + "loss": 0.0046, + "step": 81320 + }, + { + "grad_norm": 0.020528370514512062, + "learning_rate": 9.23180044188181e-06, + "loss": 0.0022, + "step": 81330 + }, + { + "grad_norm": 0.02332645282149315, + "learning_rate": 9.222229929720755e-06, + "loss": 0.0024, + "step": 81340 + }, + { + "grad_norm": 0.033841051161289215, + "learning_rate": 9.212663876955336e-06, + "loss": 0.0027, + "step": 81350 + }, + { + "grad_norm": 0.042123593389987946, + "learning_rate": 9.20310228463166e-06, + "loss": 0.0033, + "step": 81360 + }, + { + "grad_norm": 0.050554823130369186, + "learning_rate": 9.19354515379539e-06, + "loss": 0.0047, + "step": 81370 + }, + { + "grad_norm": 0.027707507833838463, + "learning_rate": 9.183992485491666e-06, + "loss": 0.0034, + "step": 81380 + }, + { + "grad_norm": 0.024759404361248016, + "learning_rate": 9.174444280765154e-06, + "loss": 0.0023, + "step": 81390 + }, + { + "grad_norm": 0.02205667272210121, + "learning_rate": 9.164900540660032e-06, + "loss": 0.0028, + "step": 81400 + }, + { + "grad_norm": 0.01923377811908722, + "learning_rate": 9.155361266220008e-06, + "loss": 0.0032, + "step": 81410 + }, + { + "grad_norm": 0.030393952503800392, + "learning_rate": 9.145826458488254e-06, + "loss": 0.0032, + "step": 81420 + }, + { + "grad_norm": 0.022486716508865356, + "learning_rate": 9.136296118507509e-06, + "loss": 0.0027, + "step": 81430 + }, + { + "grad_norm": 0.022069741040468216, + "learning_rate": 9.126770247319966e-06, + "loss": 0.004, + "step": 81440 + }, + { + "grad_norm": 0.04126167669892311, + "learning_rate": 9.11724884596738e-06, + "loss": 0.0025, + "step": 81450 + }, + { + "grad_norm": 0.04137694835662842, + "learning_rate": 9.107731915490996e-06, + "loss": 0.0028, + "step": 81460 + }, + { + "grad_norm": 0.02439381554722786, + "learning_rate": 9.098219456931579e-06, + "loss": 0.0038, + "step": 81470 + }, + { + "grad_norm": 0.031156668439507484, + "learning_rate": 9.088711471329376e-06, + "loss": 0.0022, + "step": 81480 + }, + { + "grad_norm": 0.05170459672808647, + "learning_rate": 9.079207959724184e-06, + "loss": 0.0037, + "step": 81490 + }, + { + "grad_norm": 0.02617848478257656, + "learning_rate": 9.069708923155279e-06, + "loss": 0.0041, + "step": 81500 + }, + { + "grad_norm": 0.014716234058141708, + "learning_rate": 9.060214362661468e-06, + "loss": 0.0023, + "step": 81510 + }, + { + "grad_norm": 0.03098759427666664, + "learning_rate": 9.050724279281064e-06, + "loss": 0.0031, + "step": 81520 + }, + { + "grad_norm": 0.04847056418657303, + "learning_rate": 9.041238674051894e-06, + "loss": 0.0024, + "step": 81530 + }, + { + "grad_norm": 0.0337550975382328, + "learning_rate": 9.031757548011277e-06, + "loss": 0.0039, + "step": 81540 + }, + { + "grad_norm": 0.03049827553331852, + "learning_rate": 9.022280902196051e-06, + "loss": 0.0021, + "step": 81550 + }, + { + "grad_norm": 0.01948394998908043, + "learning_rate": 9.012808737642592e-06, + "loss": 0.0026, + "step": 81560 + }, + { + "grad_norm": 0.05167170986533165, + "learning_rate": 9.003341055386739e-06, + "loss": 0.0037, + "step": 81570 + }, + { + "grad_norm": 0.047032590955495834, + "learning_rate": 8.993877856463862e-06, + "loss": 0.003, + "step": 81580 + }, + { + "grad_norm": 0.04211348667740822, + "learning_rate": 8.984419141908856e-06, + "loss": 0.0028, + "step": 81590 + }, + { + "grad_norm": 0.02567889913916588, + "learning_rate": 8.974964912756117e-06, + "loss": 0.0039, + "step": 81600 + }, + { + "grad_norm": 0.05159831419587135, + "learning_rate": 8.965515170039523e-06, + "loss": 0.0035, + "step": 81610 + }, + { + "grad_norm": 0.03378491848707199, + "learning_rate": 8.956069914792514e-06, + "loss": 0.004, + "step": 81620 + }, + { + "grad_norm": 0.020121105015277863, + "learning_rate": 8.946629148047969e-06, + "loss": 0.0052, + "step": 81630 + }, + { + "grad_norm": 0.02779882773756981, + "learning_rate": 8.93719287083834e-06, + "loss": 0.0019, + "step": 81640 + }, + { + "grad_norm": 0.030048353597521782, + "learning_rate": 8.927761084195569e-06, + "loss": 0.0027, + "step": 81650 + }, + { + "grad_norm": 0.03366197273135185, + "learning_rate": 8.918333789151096e-06, + "loss": 0.003, + "step": 81660 + }, + { + "grad_norm": 0.038842517882585526, + "learning_rate": 8.90891098673587e-06, + "loss": 0.0028, + "step": 81670 + }, + { + "grad_norm": 0.03516688942909241, + "learning_rate": 8.89949267798037e-06, + "loss": 0.0047, + "step": 81680 + }, + { + "grad_norm": 0.027616441249847412, + "learning_rate": 8.890078863914542e-06, + "loss": 0.0044, + "step": 81690 + }, + { + "grad_norm": 0.01849259063601494, + "learning_rate": 8.880669545567894e-06, + "loss": 0.0022, + "step": 81700 + }, + { + "grad_norm": 0.034293144941329956, + "learning_rate": 8.871264723969381e-06, + "loss": 0.003, + "step": 81710 + }, + { + "grad_norm": 0.04748133569955826, + "learning_rate": 8.86186440014754e-06, + "loss": 0.003, + "step": 81720 + }, + { + "grad_norm": 0.06703232228755951, + "learning_rate": 8.852468575130351e-06, + "loss": 0.0025, + "step": 81730 + }, + { + "grad_norm": 0.02857309952378273, + "learning_rate": 8.843077249945347e-06, + "loss": 0.0024, + "step": 81740 + }, + { + "grad_norm": 0.03619103878736496, + "learning_rate": 8.833690425619523e-06, + "loss": 0.0028, + "step": 81750 + }, + { + "grad_norm": 0.019714126363396645, + "learning_rate": 8.82430810317942e-06, + "loss": 0.0017, + "step": 81760 + }, + { + "grad_norm": 0.04279857128858566, + "learning_rate": 8.814930283651074e-06, + "loss": 0.0029, + "step": 81770 + }, + { + "grad_norm": 0.028239456936717033, + "learning_rate": 8.805556968060048e-06, + "loss": 0.0019, + "step": 81780 + }, + { + "grad_norm": 0.04264038801193237, + "learning_rate": 8.796188157431362e-06, + "loss": 0.0024, + "step": 81790 + }, + { + "grad_norm": 0.040092386305332184, + "learning_rate": 8.786823852789595e-06, + "loss": 0.0025, + "step": 81800 + }, + { + "grad_norm": 0.01979237236082554, + "learning_rate": 8.777464055158813e-06, + "loss": 0.0023, + "step": 81810 + }, + { + "grad_norm": 0.026949793100357056, + "learning_rate": 8.768108765562578e-06, + "loss": 0.0022, + "step": 81820 + }, + { + "grad_norm": 0.01627792790532112, + "learning_rate": 8.758757985023975e-06, + "loss": 0.0021, + "step": 81830 + }, + { + "grad_norm": 0.018158886581659317, + "learning_rate": 8.749411714565598e-06, + "loss": 0.0029, + "step": 81840 + }, + { + "grad_norm": 0.029587499797344208, + "learning_rate": 8.74006995520954e-06, + "loss": 0.0027, + "step": 81850 + }, + { + "grad_norm": 0.04141073301434517, + "learning_rate": 8.730732707977397e-06, + "loss": 0.0024, + "step": 81860 + }, + { + "grad_norm": 0.03166617825627327, + "learning_rate": 8.721399973890281e-06, + "loss": 0.0034, + "step": 81870 + }, + { + "grad_norm": 0.022598732262849808, + "learning_rate": 8.712071753968792e-06, + "loss": 0.0016, + "step": 81880 + }, + { + "grad_norm": 0.024950921535491943, + "learning_rate": 8.702748049233072e-06, + "loss": 0.0022, + "step": 81890 + }, + { + "grad_norm": 0.028141994029283524, + "learning_rate": 8.693428860702718e-06, + "loss": 0.0027, + "step": 81900 + }, + { + "grad_norm": 0.027346864342689514, + "learning_rate": 8.684114189396898e-06, + "loss": 0.0032, + "step": 81910 + }, + { + "grad_norm": 0.016513340175151825, + "learning_rate": 8.674804036334222e-06, + "loss": 0.002, + "step": 81920 + }, + { + "grad_norm": 0.02400882914662361, + "learning_rate": 8.665498402532857e-06, + "loss": 0.004, + "step": 81930 + }, + { + "grad_norm": 0.020891236141324043, + "learning_rate": 8.656197289010432e-06, + "loss": 0.0024, + "step": 81940 + }, + { + "grad_norm": 0.021646274253726006, + "learning_rate": 8.646900696784122e-06, + "loss": 0.0025, + "step": 81950 + }, + { + "grad_norm": 0.030271384865045547, + "learning_rate": 8.637608626870553e-06, + "loss": 0.0032, + "step": 81960 + }, + { + "grad_norm": 0.02113053761422634, + "learning_rate": 8.628321080285944e-06, + "loss": 0.0023, + "step": 81970 + }, + { + "grad_norm": 0.08796950429677963, + "learning_rate": 8.619038058045925e-06, + "loss": 0.0034, + "step": 81980 + }, + { + "grad_norm": 0.03647368773818016, + "learning_rate": 8.609759561165704e-06, + "loss": 0.0043, + "step": 81990 + }, + { + "grad_norm": 0.03748100623488426, + "learning_rate": 8.600485590659934e-06, + "loss": 0.0028, + "step": 82000 + }, + { + "grad_norm": 0.029187001287937164, + "learning_rate": 8.591216147542813e-06, + "loss": 0.0032, + "step": 82010 + }, + { + "grad_norm": 0.04006044194102287, + "learning_rate": 8.581951232828045e-06, + "loss": 0.0031, + "step": 82020 + }, + { + "grad_norm": 0.03631780296564102, + "learning_rate": 8.572690847528825e-06, + "loss": 0.0026, + "step": 82030 + }, + { + "grad_norm": 0.027819326147437096, + "learning_rate": 8.563434992657838e-06, + "loss": 0.0031, + "step": 82040 + }, + { + "grad_norm": 0.023358291015028954, + "learning_rate": 8.554183669227307e-06, + "loss": 0.0029, + "step": 82050 + }, + { + "grad_norm": 0.025091908872127533, + "learning_rate": 8.544936878248937e-06, + "loss": 0.0027, + "step": 82060 + }, + { + "grad_norm": 0.027650026604533195, + "learning_rate": 8.535694620733942e-06, + "loss": 0.0021, + "step": 82070 + }, + { + "grad_norm": 0.033246755599975586, + "learning_rate": 8.526456897693053e-06, + "loss": 0.0039, + "step": 82080 + }, + { + "grad_norm": 0.019385043531656265, + "learning_rate": 8.517223710136456e-06, + "loss": 0.0031, + "step": 82090 + }, + { + "grad_norm": 0.02546611614525318, + "learning_rate": 8.507995059073931e-06, + "loss": 0.0034, + "step": 82100 + }, + { + "grad_norm": 0.03363512083888054, + "learning_rate": 8.498770945514673e-06, + "loss": 0.0033, + "step": 82110 + }, + { + "grad_norm": 0.030147317796945572, + "learning_rate": 8.489551370467436e-06, + "loss": 0.0026, + "step": 82120 + }, + { + "grad_norm": 0.03313877061009407, + "learning_rate": 8.480336334940442e-06, + "loss": 0.0039, + "step": 82130 + }, + { + "grad_norm": 0.021137002855539322, + "learning_rate": 8.471125839941458e-06, + "loss": 0.0021, + "step": 82140 + }, + { + "grad_norm": 0.01661922223865986, + "learning_rate": 8.461919886477682e-06, + "loss": 0.0026, + "step": 82150 + }, + { + "grad_norm": 0.03428469970822334, + "learning_rate": 8.452718475555927e-06, + "loss": 0.0032, + "step": 82160 + }, + { + "grad_norm": 0.031144101172685623, + "learning_rate": 8.443521608182398e-06, + "loss": 0.0019, + "step": 82170 + }, + { + "grad_norm": 0.04510721191763878, + "learning_rate": 8.43432928536288e-06, + "loss": 0.0028, + "step": 82180 + }, + { + "grad_norm": 0.0232565738260746, + "learning_rate": 8.425141508102607e-06, + "loss": 0.0023, + "step": 82190 + }, + { + "grad_norm": 0.05704693868756294, + "learning_rate": 8.415958277406365e-06, + "loss": 0.0032, + "step": 82200 + }, + { + "grad_norm": 0.07128886878490448, + "learning_rate": 8.406779594278392e-06, + "loss": 0.0034, + "step": 82210 + }, + { + "grad_norm": 0.03614988923072815, + "learning_rate": 8.397605459722468e-06, + "loss": 0.0029, + "step": 82220 + }, + { + "grad_norm": 0.020812882110476494, + "learning_rate": 8.388435874741863e-06, + "loss": 0.0021, + "step": 82230 + }, + { + "grad_norm": 0.027562212198972702, + "learning_rate": 8.37927084033936e-06, + "loss": 0.0031, + "step": 82240 + }, + { + "grad_norm": 0.04060203582048416, + "learning_rate": 8.370110357517208e-06, + "loss": 0.002, + "step": 82250 + }, + { + "grad_norm": 0.02076026052236557, + "learning_rate": 8.360954427277201e-06, + "loss": 0.002, + "step": 82260 + }, + { + "grad_norm": 0.02414841763675213, + "learning_rate": 8.351803050620627e-06, + "loss": 0.0018, + "step": 82270 + }, + { + "grad_norm": 0.04010481759905815, + "learning_rate": 8.34265622854824e-06, + "loss": 0.0026, + "step": 82280 + }, + { + "grad_norm": 0.03172920644283295, + "learning_rate": 8.333513962060335e-06, + "loss": 0.0026, + "step": 82290 + }, + { + "grad_norm": 0.019759109243750572, + "learning_rate": 8.324376252156701e-06, + "loss": 0.0026, + "step": 82300 + }, + { + "grad_norm": 0.016966668888926506, + "learning_rate": 8.315243099836633e-06, + "loss": 0.0019, + "step": 82310 + }, + { + "grad_norm": 0.028227422386407852, + "learning_rate": 8.3061145060989e-06, + "loss": 0.002, + "step": 82320 + }, + { + "grad_norm": 0.021579179912805557, + "learning_rate": 8.296990471941806e-06, + "loss": 0.0027, + "step": 82330 + }, + { + "grad_norm": 0.022198038175702095, + "learning_rate": 8.287870998363112e-06, + "loss": 0.0028, + "step": 82340 + }, + { + "grad_norm": 0.023718059062957764, + "learning_rate": 8.278756086360156e-06, + "loss": 0.0023, + "step": 82350 + }, + { + "grad_norm": 0.026803933084011078, + "learning_rate": 8.269645736929693e-06, + "loss": 0.0021, + "step": 82360 + }, + { + "grad_norm": 0.01451724860817194, + "learning_rate": 8.260539951068048e-06, + "loss": 0.0039, + "step": 82370 + }, + { + "grad_norm": 0.019479572772979736, + "learning_rate": 8.251438729770983e-06, + "loss": 0.0032, + "step": 82380 + }, + { + "grad_norm": 0.017917810007929802, + "learning_rate": 8.242342074033826e-06, + "loss": 0.0025, + "step": 82390 + }, + { + "grad_norm": 0.017339976504445076, + "learning_rate": 8.233249984851344e-06, + "loss": 0.0027, + "step": 82400 + }, + { + "grad_norm": 0.023641321808099747, + "learning_rate": 8.224162463217849e-06, + "loss": 0.0019, + "step": 82410 + }, + { + "grad_norm": 0.03936535865068436, + "learning_rate": 8.215079510127139e-06, + "loss": 0.0037, + "step": 82420 + }, + { + "grad_norm": 0.027607332915067673, + "learning_rate": 8.206001126572521e-06, + "loss": 0.0025, + "step": 82430 + }, + { + "grad_norm": 0.022410031408071518, + "learning_rate": 8.196927313546776e-06, + "loss": 0.0037, + "step": 82440 + }, + { + "grad_norm": 0.05103748291730881, + "learning_rate": 8.18785807204222e-06, + "loss": 0.0039, + "step": 82450 + }, + { + "grad_norm": 0.07473686337471008, + "learning_rate": 8.178793403050627e-06, + "loss": 0.0032, + "step": 82460 + }, + { + "grad_norm": 0.032518502324819565, + "learning_rate": 8.16973330756331e-06, + "loss": 0.0028, + "step": 82470 + }, + { + "grad_norm": 0.034772902727127075, + "learning_rate": 8.16067778657107e-06, + "loss": 0.003, + "step": 82480 + }, + { + "grad_norm": 0.014154093340039253, + "learning_rate": 8.151626841064214e-06, + "loss": 0.0023, + "step": 82490 + }, + { + "grad_norm": 0.02313447743654251, + "learning_rate": 8.142580472032518e-06, + "loss": 0.0027, + "step": 82500 + }, + { + "grad_norm": 0.026345636695623398, + "learning_rate": 8.133538680465285e-06, + "loss": 0.0019, + "step": 82510 + }, + { + "grad_norm": 0.02049177698791027, + "learning_rate": 8.124501467351336e-06, + "loss": 0.0031, + "step": 82520 + }, + { + "grad_norm": 0.024612024426460266, + "learning_rate": 8.11546883367893e-06, + "loss": 0.0025, + "step": 82530 + }, + { + "grad_norm": 0.04248461872339249, + "learning_rate": 8.106440780435882e-06, + "loss": 0.0031, + "step": 82540 + }, + { + "grad_norm": 0.03534957766532898, + "learning_rate": 8.097417308609485e-06, + "loss": 0.003, + "step": 82550 + }, + { + "grad_norm": 0.03826414793729782, + "learning_rate": 8.088398419186544e-06, + "loss": 0.0035, + "step": 82560 + }, + { + "grad_norm": 0.035939883440732956, + "learning_rate": 8.079384113153321e-06, + "loss": 0.0021, + "step": 82570 + }, + { + "grad_norm": 0.02939572185277939, + "learning_rate": 8.070374391495644e-06, + "loss": 0.0021, + "step": 82580 + }, + { + "grad_norm": 0.024608829990029335, + "learning_rate": 8.061369255198764e-06, + "loss": 0.0022, + "step": 82590 + }, + { + "grad_norm": 0.04120219871401787, + "learning_rate": 8.052368705247493e-06, + "loss": 0.0025, + "step": 82600 + }, + { + "grad_norm": 0.04554067924618721, + "learning_rate": 8.043372742626114e-06, + "loss": 0.0034, + "step": 82610 + }, + { + "grad_norm": 0.08040326833724976, + "learning_rate": 8.034381368318422e-06, + "loss": 0.003, + "step": 82620 + }, + { + "grad_norm": 0.05334455519914627, + "learning_rate": 8.025394583307677e-06, + "loss": 0.0036, + "step": 82630 + }, + { + "grad_norm": 0.021657517179846764, + "learning_rate": 8.016412388576683e-06, + "loss": 0.0022, + "step": 82640 + }, + { + "grad_norm": 0.014540556818246841, + "learning_rate": 8.007434785107699e-06, + "loss": 0.0025, + "step": 82650 + }, + { + "grad_norm": 0.04378550499677658, + "learning_rate": 7.998461773882509e-06, + "loss": 0.0026, + "step": 82660 + }, + { + "grad_norm": 0.03385549038648605, + "learning_rate": 7.989493355882393e-06, + "loss": 0.0043, + "step": 82670 + }, + { + "grad_norm": 0.02899950183928013, + "learning_rate": 7.980529532088137e-06, + "loss": 0.0028, + "step": 82680 + }, + { + "grad_norm": 0.023276664316654205, + "learning_rate": 7.97157030347998e-06, + "loss": 0.0019, + "step": 82690 + }, + { + "grad_norm": 0.04380866512656212, + "learning_rate": 7.96261567103772e-06, + "loss": 0.004, + "step": 82700 + }, + { + "grad_norm": 0.018479855731129646, + "learning_rate": 7.953665635740593e-06, + "loss": 0.0022, + "step": 82710 + }, + { + "grad_norm": 0.0323653407394886, + "learning_rate": 7.94472019856738e-06, + "loss": 0.0023, + "step": 82720 + }, + { + "grad_norm": 0.051934026181697845, + "learning_rate": 7.935779360496337e-06, + "loss": 0.0027, + "step": 82730 + }, + { + "grad_norm": 0.024414392188191414, + "learning_rate": 7.92684312250523e-06, + "loss": 0.0024, + "step": 82740 + }, + { + "grad_norm": 0.025166403502225876, + "learning_rate": 7.917911485571289e-06, + "loss": 0.0022, + "step": 82750 + }, + { + "grad_norm": 0.04804809018969536, + "learning_rate": 7.90898445067128e-06, + "loss": 0.0037, + "step": 82760 + }, + { + "grad_norm": 0.022642433643341064, + "learning_rate": 7.900062018781462e-06, + "loss": 0.0026, + "step": 82770 + }, + { + "grad_norm": 0.04893022030591965, + "learning_rate": 7.891144190877553e-06, + "loss": 0.0036, + "step": 82780 + }, + { + "grad_norm": 0.02720397710800171, + "learning_rate": 7.882230967934806e-06, + "loss": 0.0018, + "step": 82790 + }, + { + "grad_norm": 0.03274289518594742, + "learning_rate": 7.873322350927959e-06, + "loss": 0.0035, + "step": 82800 + }, + { + "grad_norm": 0.03397572785615921, + "learning_rate": 7.864418340831253e-06, + "loss": 0.0024, + "step": 82810 + }, + { + "grad_norm": 0.046387892216444016, + "learning_rate": 7.855518938618395e-06, + "loss": 0.0025, + "step": 82820 + }, + { + "grad_norm": 0.03199056163430214, + "learning_rate": 7.846624145262633e-06, + "loss": 0.0033, + "step": 82830 + }, + { + "grad_norm": 0.043992236256599426, + "learning_rate": 7.837733961736672e-06, + "loss": 0.003, + "step": 82840 + }, + { + "grad_norm": 0.052478089928627014, + "learning_rate": 7.828848389012727e-06, + "loss": 0.0026, + "step": 82850 + }, + { + "grad_norm": 0.021755002439022064, + "learning_rate": 7.819967428062524e-06, + "loss": 0.0021, + "step": 82860 + }, + { + "grad_norm": 0.02339375950396061, + "learning_rate": 7.811091079857274e-06, + "loss": 0.0024, + "step": 82870 + }, + { + "grad_norm": 0.020830797031521797, + "learning_rate": 7.802219345367667e-06, + "loss": 0.0015, + "step": 82880 + }, + { + "grad_norm": 0.029795313253998756, + "learning_rate": 7.793352225563916e-06, + "loss": 0.0024, + "step": 82890 + }, + { + "grad_norm": 0.037602294236421585, + "learning_rate": 7.7844897214157e-06, + "loss": 0.0028, + "step": 82900 + }, + { + "grad_norm": 0.020289821550250053, + "learning_rate": 7.775631833892228e-06, + "loss": 0.0032, + "step": 82910 + }, + { + "grad_norm": 0.03959547355771065, + "learning_rate": 7.76677856396215e-06, + "loss": 0.0034, + "step": 82920 + }, + { + "grad_norm": 0.022010862827301025, + "learning_rate": 7.757929912593697e-06, + "loss": 0.002, + "step": 82930 + }, + { + "grad_norm": 0.024415794759988785, + "learning_rate": 7.749085880754509e-06, + "loss": 0.0035, + "step": 82940 + }, + { + "grad_norm": 0.02519305795431137, + "learning_rate": 7.740246469411777e-06, + "loss": 0.0035, + "step": 82950 + }, + { + "grad_norm": 0.01784083992242813, + "learning_rate": 7.731411679532142e-06, + "loss": 0.0022, + "step": 82960 + }, + { + "grad_norm": 0.03890090063214302, + "learning_rate": 7.722581512081779e-06, + "loss": 0.0028, + "step": 82970 + }, + { + "grad_norm": 0.022050127387046814, + "learning_rate": 7.713755968026331e-06, + "loss": 0.0027, + "step": 82980 + }, + { + "grad_norm": 0.02640504017472267, + "learning_rate": 7.704935048330969e-06, + "loss": 0.004, + "step": 82990 + }, + { + "grad_norm": 0.016372188925743103, + "learning_rate": 7.696118753960313e-06, + "loss": 0.0025, + "step": 83000 + }, + { + "grad_norm": 0.01761300303041935, + "learning_rate": 7.687307085878503e-06, + "loss": 0.002, + "step": 83010 + }, + { + "grad_norm": 0.01917893812060356, + "learning_rate": 7.678500045049186e-06, + "loss": 0.0026, + "step": 83020 + }, + { + "grad_norm": 0.036183036863803864, + "learning_rate": 7.669697632435463e-06, + "loss": 0.0024, + "step": 83030 + }, + { + "grad_norm": 0.022085703909397125, + "learning_rate": 7.660899848999963e-06, + "loss": 0.0041, + "step": 83040 + }, + { + "grad_norm": 0.03626912459731102, + "learning_rate": 7.652106695704803e-06, + "loss": 0.0023, + "step": 83050 + }, + { + "grad_norm": 0.026116903871297836, + "learning_rate": 7.643318173511598e-06, + "loss": 0.0023, + "step": 83060 + }, + { + "grad_norm": 0.022499658167362213, + "learning_rate": 7.634534283381423e-06, + "loss": 0.0032, + "step": 83070 + }, + { + "grad_norm": 0.04046119749546051, + "learning_rate": 7.625755026274889e-06, + "loss": 0.0034, + "step": 83080 + }, + { + "grad_norm": 0.034966666251420975, + "learning_rate": 7.616980403152069e-06, + "loss": 0.0035, + "step": 83090 + }, + { + "grad_norm": 0.022517137229442596, + "learning_rate": 7.608210414972561e-06, + "loss": 0.0017, + "step": 83100 + }, + { + "grad_norm": 0.029552364721894264, + "learning_rate": 7.599445062695404e-06, + "loss": 0.0036, + "step": 83110 + }, + { + "grad_norm": 0.010414681397378445, + "learning_rate": 7.590684347279214e-06, + "loss": 0.0019, + "step": 83120 + }, + { + "grad_norm": 0.032827142626047134, + "learning_rate": 7.581928269682004e-06, + "loss": 0.002, + "step": 83130 + }, + { + "grad_norm": 0.04406466335058212, + "learning_rate": 7.573176830861361e-06, + "loss": 0.0027, + "step": 83140 + }, + { + "grad_norm": 0.02452380396425724, + "learning_rate": 7.5644300317743e-06, + "loss": 0.0021, + "step": 83150 + }, + { + "grad_norm": 0.020823897793889046, + "learning_rate": 7.555687873377376e-06, + "loss": 0.0031, + "step": 83160 + }, + { + "grad_norm": 0.0282368715852499, + "learning_rate": 7.546950356626592e-06, + "loss": 0.0023, + "step": 83170 + }, + { + "grad_norm": 0.01917113922536373, + "learning_rate": 7.538217482477516e-06, + "loss": 0.0023, + "step": 83180 + }, + { + "grad_norm": 0.05072232335805893, + "learning_rate": 7.529489251885119e-06, + "loss": 0.0026, + "step": 83190 + }, + { + "grad_norm": 0.021458802744746208, + "learning_rate": 7.520765665803941e-06, + "loss": 0.002, + "step": 83200 + }, + { + "grad_norm": 0.027664102613925934, + "learning_rate": 7.512046725187949e-06, + "loss": 0.0022, + "step": 83210 + }, + { + "grad_norm": 0.05009935051202774, + "learning_rate": 7.503332430990645e-06, + "loss": 0.0032, + "step": 83220 + }, + { + "grad_norm": 0.021697290241718292, + "learning_rate": 7.494622784165017e-06, + "loss": 0.0028, + "step": 83230 + }, + { + "grad_norm": 0.023300115019083023, + "learning_rate": 7.485917785663543e-06, + "loss": 0.0019, + "step": 83240 + }, + { + "grad_norm": 0.02481786534190178, + "learning_rate": 7.477217436438172e-06, + "loss": 0.002, + "step": 83250 + }, + { + "grad_norm": 0.04270413890480995, + "learning_rate": 7.46852173744037e-06, + "loss": 0.003, + "step": 83260 + }, + { + "grad_norm": 0.02793031372129917, + "learning_rate": 7.4598306896211004e-06, + "loss": 0.0025, + "step": 83270 + }, + { + "grad_norm": 0.023222824558615685, + "learning_rate": 7.451144293930773e-06, + "loss": 0.0025, + "step": 83280 + }, + { + "grad_norm": 0.038739945739507675, + "learning_rate": 7.4424625513193455e-06, + "loss": 0.0019, + "step": 83290 + }, + { + "grad_norm": 0.014430842362344265, + "learning_rate": 7.433785462736209e-06, + "loss": 0.0019, + "step": 83300 + }, + { + "grad_norm": 0.024087386205792427, + "learning_rate": 7.425113029130315e-06, + "loss": 0.0028, + "step": 83310 + }, + { + "grad_norm": 0.022953402251005173, + "learning_rate": 7.416445251450044e-06, + "loss": 0.003, + "step": 83320 + }, + { + "grad_norm": 0.03435945510864258, + "learning_rate": 7.4077821306433035e-06, + "loss": 0.0018, + "step": 83330 + }, + { + "grad_norm": 0.03123430348932743, + "learning_rate": 7.399123667657465e-06, + "loss": 0.0028, + "step": 83340 + }, + { + "grad_norm": 0.03012656606733799, + "learning_rate": 7.390469863439426e-06, + "loss": 0.003, + "step": 83350 + }, + { + "grad_norm": 0.017234468832612038, + "learning_rate": 7.381820718935512e-06, + "loss": 0.0031, + "step": 83360 + }, + { + "grad_norm": 0.01963895373046398, + "learning_rate": 7.373176235091633e-06, + "loss": 0.0024, + "step": 83370 + }, + { + "grad_norm": 0.022571971639990807, + "learning_rate": 7.364536412853101e-06, + "loss": 0.0029, + "step": 83380 + }, + { + "grad_norm": 0.019044889137148857, + "learning_rate": 7.355901253164776e-06, + "loss": 0.0026, + "step": 83390 + }, + { + "grad_norm": 0.034457456320524216, + "learning_rate": 7.34727075697097e-06, + "loss": 0.0021, + "step": 83400 + }, + { + "grad_norm": 0.046740252524614334, + "learning_rate": 7.3386449252155095e-06, + "loss": 0.0023, + "step": 83410 + }, + { + "grad_norm": 0.024765996262431145, + "learning_rate": 7.330023758841692e-06, + "loss": 0.0027, + "step": 83420 + }, + { + "grad_norm": 0.029380396008491516, + "learning_rate": 7.321407258792323e-06, + "loss": 0.0025, + "step": 83430 + }, + { + "grad_norm": 0.03147556260228157, + "learning_rate": 7.312795426009694e-06, + "loss": 0.0019, + "step": 83440 + }, + { + "grad_norm": 0.022119194269180298, + "learning_rate": 7.304188261435585e-06, + "loss": 0.0033, + "step": 83450 + }, + { + "grad_norm": 0.028530320152640343, + "learning_rate": 7.295585766011242e-06, + "loss": 0.0031, + "step": 83460 + }, + { + "grad_norm": 0.05576150119304657, + "learning_rate": 7.286987940677436e-06, + "loss": 0.0025, + "step": 83470 + }, + { + "grad_norm": 0.02156572788953781, + "learning_rate": 7.278394786374426e-06, + "loss": 0.0032, + "step": 83480 + }, + { + "grad_norm": 0.048814453184604645, + "learning_rate": 7.269806304041915e-06, + "loss": 0.002, + "step": 83490 + }, + { + "grad_norm": 0.013971278443932533, + "learning_rate": 7.261222494619147e-06, + "loss": 0.0026, + "step": 83500 + }, + { + "grad_norm": 0.016864502802491188, + "learning_rate": 7.252643359044825e-06, + "loss": 0.0022, + "step": 83510 + }, + { + "grad_norm": 0.01874404400587082, + "learning_rate": 7.244068898257172e-06, + "loss": 0.0021, + "step": 83520 + }, + { + "grad_norm": 0.025094272568821907, + "learning_rate": 7.2354991131938495e-06, + "loss": 0.0038, + "step": 83530 + }, + { + "grad_norm": 0.029357627034187317, + "learning_rate": 7.2269340047920574e-06, + "loss": 0.0025, + "step": 83540 + }, + { + "grad_norm": 0.019349727779626846, + "learning_rate": 7.218373573988435e-06, + "loss": 0.0026, + "step": 83550 + }, + { + "grad_norm": 0.017759859561920166, + "learning_rate": 7.209817821719178e-06, + "loss": 0.002, + "step": 83560 + }, + { + "grad_norm": 0.01816580258309841, + "learning_rate": 7.201266748919894e-06, + "loss": 0.0024, + "step": 83570 + }, + { + "grad_norm": 0.015560295432806015, + "learning_rate": 7.19272035652575e-06, + "loss": 0.0023, + "step": 83580 + }, + { + "grad_norm": 0.03610396757721901, + "learning_rate": 7.184178645471334e-06, + "loss": 0.0024, + "step": 83590 + }, + { + "grad_norm": 0.021062687039375305, + "learning_rate": 7.175641616690775e-06, + "loss": 0.0027, + "step": 83600 + }, + { + "grad_norm": 0.02132757566869259, + "learning_rate": 7.167109271117661e-06, + "loss": 0.0025, + "step": 83610 + }, + { + "grad_norm": 0.037728626281023026, + "learning_rate": 7.158581609685072e-06, + "loss": 0.0023, + "step": 83620 + }, + { + "grad_norm": 0.023711536079645157, + "learning_rate": 7.150058633325591e-06, + "loss": 0.0021, + "step": 83630 + }, + { + "grad_norm": 0.027664534747600555, + "learning_rate": 7.141540342971287e-06, + "loss": 0.0026, + "step": 83640 + }, + { + "grad_norm": 0.02753005176782608, + "learning_rate": 7.133026739553677e-06, + "loss": 0.0044, + "step": 83650 + }, + { + "grad_norm": 0.032187703996896744, + "learning_rate": 7.124517824003829e-06, + "loss": 0.0021, + "step": 83660 + }, + { + "grad_norm": 0.017413873225450516, + "learning_rate": 7.116013597252236e-06, + "loss": 0.0026, + "step": 83670 + }, + { + "grad_norm": 0.02399514988064766, + "learning_rate": 7.107514060228921e-06, + "loss": 0.0028, + "step": 83680 + }, + { + "grad_norm": 0.019625157117843628, + "learning_rate": 7.099019213863378e-06, + "loss": 0.0017, + "step": 83690 + }, + { + "grad_norm": 0.012493987567722797, + "learning_rate": 7.090529059084611e-06, + "loss": 0.0017, + "step": 83700 + }, + { + "grad_norm": 0.018513675779104233, + "learning_rate": 7.082043596821058e-06, + "loss": 0.0023, + "step": 83710 + }, + { + "grad_norm": 0.02819635346531868, + "learning_rate": 7.07356282800069e-06, + "loss": 0.0041, + "step": 83720 + }, + { + "grad_norm": 0.018826542422175407, + "learning_rate": 7.0650867535509645e-06, + "loss": 0.002, + "step": 83730 + }, + { + "grad_norm": 0.04007473215460777, + "learning_rate": 7.0566153743987895e-06, + "loss": 0.0026, + "step": 83740 + }, + { + "grad_norm": 0.033683598041534424, + "learning_rate": 7.04814869147059e-06, + "loss": 0.0023, + "step": 83750 + }, + { + "grad_norm": 0.03163636475801468, + "learning_rate": 7.0396867056922725e-06, + "loss": 0.003, + "step": 83760 + }, + { + "grad_norm": 0.03367290273308754, + "learning_rate": 7.031229417989244e-06, + "loss": 0.0035, + "step": 83770 + }, + { + "grad_norm": 0.048563890159130096, + "learning_rate": 7.022776829286348e-06, + "loss": 0.0042, + "step": 83780 + }, + { + "grad_norm": 0.030283890664577484, + "learning_rate": 7.014328940507975e-06, + "loss": 0.0036, + "step": 83790 + }, + { + "grad_norm": 0.03010130487382412, + "learning_rate": 7.005885752577945e-06, + "loss": 0.0017, + "step": 83800 + }, + { + "grad_norm": 0.021267008036375046, + "learning_rate": 6.997447266419615e-06, + "loss": 0.0028, + "step": 83810 + }, + { + "grad_norm": 0.026481831446290016, + "learning_rate": 6.989013482955797e-06, + "loss": 0.0035, + "step": 83820 + }, + { + "grad_norm": 0.012854348868131638, + "learning_rate": 6.980584403108808e-06, + "loss": 0.0022, + "step": 83830 + }, + { + "grad_norm": 0.028986284509301186, + "learning_rate": 6.972160027800417e-06, + "loss": 0.0023, + "step": 83840 + }, + { + "grad_norm": 0.028796402737498283, + "learning_rate": 6.963740357951931e-06, + "loss": 0.0024, + "step": 83850 + }, + { + "grad_norm": 0.011950695887207985, + "learning_rate": 6.95532539448408e-06, + "loss": 0.0025, + "step": 83860 + }, + { + "grad_norm": 0.015000404790043831, + "learning_rate": 6.946915138317128e-06, + "loss": 0.0023, + "step": 83870 + }, + { + "grad_norm": 0.03894288092851639, + "learning_rate": 6.9385095903708065e-06, + "loss": 0.0025, + "step": 83880 + }, + { + "grad_norm": 0.025067124515771866, + "learning_rate": 6.930108751564346e-06, + "loss": 0.0026, + "step": 83890 + }, + { + "grad_norm": 0.04112465679645538, + "learning_rate": 6.921712622816428e-06, + "loss": 0.0033, + "step": 83900 + }, + { + "grad_norm": 0.02227422408759594, + "learning_rate": 6.913321205045259e-06, + "loss": 0.0034, + "step": 83910 + }, + { + "grad_norm": 0.017500603571534157, + "learning_rate": 6.904934499168491e-06, + "loss": 0.0026, + "step": 83920 + }, + { + "grad_norm": 0.020704694092273712, + "learning_rate": 6.896552506103299e-06, + "loss": 0.0032, + "step": 83930 + }, + { + "grad_norm": 0.05184745043516159, + "learning_rate": 6.888175226766313e-06, + "loss": 0.0031, + "step": 83940 + }, + { + "grad_norm": 0.04960601031780243, + "learning_rate": 6.879802662073686e-06, + "loss": 0.0034, + "step": 83950 + }, + { + "grad_norm": 0.025916719809174538, + "learning_rate": 6.871434812940991e-06, + "loss": 0.0023, + "step": 83960 + }, + { + "grad_norm": 0.023236660286784172, + "learning_rate": 6.863071680283345e-06, + "loss": 0.0034, + "step": 83970 + }, + { + "grad_norm": 0.02496541477739811, + "learning_rate": 6.854713265015339e-06, + "loss": 0.0032, + "step": 83980 + }, + { + "grad_norm": 0.04395604133605957, + "learning_rate": 6.8463595680510025e-06, + "loss": 0.0047, + "step": 83990 + }, + { + "grad_norm": 0.027223272249102592, + "learning_rate": 6.838010590303911e-06, + "loss": 0.0033, + "step": 84000 + }, + { + "grad_norm": 0.020793719217181206, + "learning_rate": 6.829666332687085e-06, + "loss": 0.0019, + "step": 84010 + }, + { + "grad_norm": 0.02324715256690979, + "learning_rate": 6.821326796113059e-06, + "loss": 0.0022, + "step": 84020 + }, + { + "grad_norm": 0.013846402987837791, + "learning_rate": 6.812991981493799e-06, + "loss": 0.0023, + "step": 84030 + }, + { + "grad_norm": 0.02282799780368805, + "learning_rate": 6.804661889740821e-06, + "loss": 0.0025, + "step": 84040 + }, + { + "grad_norm": 0.03179256245493889, + "learning_rate": 6.7963365217650625e-06, + "loss": 0.0019, + "step": 84050 + }, + { + "grad_norm": 0.02370908297598362, + "learning_rate": 6.788015878476983e-06, + "loss": 0.0024, + "step": 84060 + }, + { + "grad_norm": 0.024660911411046982, + "learning_rate": 6.779699960786523e-06, + "loss": 0.0027, + "step": 84070 + }, + { + "grad_norm": 0.017360582947731018, + "learning_rate": 6.771388769603104e-06, + "loss": 0.0023, + "step": 84080 + }, + { + "grad_norm": 0.012654281221330166, + "learning_rate": 6.763082305835605e-06, + "loss": 0.0021, + "step": 84090 + }, + { + "grad_norm": 0.042303021997213364, + "learning_rate": 6.754780570392433e-06, + "loss": 0.0044, + "step": 84100 + }, + { + "grad_norm": 0.030276048928499222, + "learning_rate": 6.7464835641814215e-06, + "loss": 0.0024, + "step": 84110 + }, + { + "grad_norm": 0.021082157269120216, + "learning_rate": 6.7381912881099515e-06, + "loss": 0.0022, + "step": 84120 + }, + { + "grad_norm": 0.02007366716861725, + "learning_rate": 6.72990374308482e-06, + "loss": 0.0018, + "step": 84130 + }, + { + "grad_norm": 0.036068495362997055, + "learning_rate": 6.721620930012379e-06, + "loss": 0.0031, + "step": 84140 + }, + { + "grad_norm": 0.036790840327739716, + "learning_rate": 6.713342849798393e-06, + "loss": 0.0038, + "step": 84150 + }, + { + "grad_norm": 0.02394905500113964, + "learning_rate": 6.705069503348166e-06, + "loss": 0.0039, + "step": 84160 + }, + { + "grad_norm": 0.031166864559054375, + "learning_rate": 6.696800891566435e-06, + "loss": 0.0024, + "step": 84170 + }, + { + "grad_norm": 0.0325641892850399, + "learning_rate": 6.688537015357449e-06, + "loss": 0.0021, + "step": 84180 + }, + { + "grad_norm": 0.03728778287768364, + "learning_rate": 6.680277875624941e-06, + "loss": 0.0025, + "step": 84190 + }, + { + "grad_norm": 0.016214778646826744, + "learning_rate": 6.672023473272121e-06, + "loss": 0.002, + "step": 84200 + }, + { + "grad_norm": 0.041080355644226074, + "learning_rate": 6.663773809201667e-06, + "loss": 0.0041, + "step": 84210 + }, + { + "grad_norm": 0.02131643332540989, + "learning_rate": 6.655528884315748e-06, + "loss": 0.0022, + "step": 84220 + }, + { + "grad_norm": 0.018629800528287888, + "learning_rate": 6.647288699516036e-06, + "loss": 0.0025, + "step": 84230 + }, + { + "grad_norm": 0.04786355793476105, + "learning_rate": 6.639053255703642e-06, + "loss": 0.0032, + "step": 84240 + }, + { + "grad_norm": 0.035176292061805725, + "learning_rate": 6.6308225537791925e-06, + "loss": 0.0029, + "step": 84250 + }, + { + "grad_norm": 0.027005884796380997, + "learning_rate": 6.622596594642777e-06, + "loss": 0.0019, + "step": 84260 + }, + { + "grad_norm": 0.02560100518167019, + "learning_rate": 6.614375379193999e-06, + "loss": 0.0048, + "step": 84270 + }, + { + "grad_norm": 0.025101352483034134, + "learning_rate": 6.606158908331889e-06, + "loss": 0.0018, + "step": 84280 + }, + { + "grad_norm": 0.018685195595026016, + "learning_rate": 6.597947182955005e-06, + "loss": 0.0022, + "step": 84290 + }, + { + "grad_norm": 0.013739553280174732, + "learning_rate": 6.589740203961359e-06, + "loss": 0.0019, + "step": 84300 + }, + { + "grad_norm": 0.037126824259757996, + "learning_rate": 6.581537972248464e-06, + "loss": 0.003, + "step": 84310 + }, + { + "grad_norm": 0.023019762709736824, + "learning_rate": 6.573340488713275e-06, + "loss": 0.0045, + "step": 84320 + }, + { + "grad_norm": 0.05183713510632515, + "learning_rate": 6.565147754252304e-06, + "loss": 0.0021, + "step": 84330 + }, + { + "grad_norm": 0.020296545699238777, + "learning_rate": 6.556959769761462e-06, + "loss": 0.0023, + "step": 84340 + }, + { + "grad_norm": 0.044116511940956116, + "learning_rate": 6.548776536136192e-06, + "loss": 0.0021, + "step": 84350 + }, + { + "grad_norm": 0.013858085498213768, + "learning_rate": 6.54059805427138e-06, + "loss": 0.002, + "step": 84360 + }, + { + "grad_norm": 0.037320319563150406, + "learning_rate": 6.532424325061437e-06, + "loss": 0.0034, + "step": 84370 + }, + { + "grad_norm": 0.019319433718919754, + "learning_rate": 6.5242553494001935e-06, + "loss": 0.0014, + "step": 84380 + }, + { + "grad_norm": 0.042075514793395996, + "learning_rate": 6.516091128181046e-06, + "loss": 0.0025, + "step": 84390 + }, + { + "grad_norm": 0.019165486097335815, + "learning_rate": 6.507931662296785e-06, + "loss": 0.0032, + "step": 84400 + }, + { + "grad_norm": 0.02045411616563797, + "learning_rate": 6.4997769526397354e-06, + "loss": 0.0019, + "step": 84410 + }, + { + "grad_norm": 0.03335752338171005, + "learning_rate": 6.491627000101663e-06, + "loss": 0.0036, + "step": 84420 + }, + { + "grad_norm": 0.02806747704744339, + "learning_rate": 6.4834818055738545e-06, + "loss": 0.002, + "step": 84430 + }, + { + "grad_norm": 0.07718081027269363, + "learning_rate": 6.4753413699470465e-06, + "loss": 0.003, + "step": 84440 + }, + { + "grad_norm": 0.022519288584589958, + "learning_rate": 6.467205694111478e-06, + "loss": 0.0018, + "step": 84450 + }, + { + "grad_norm": 0.02175779640674591, + "learning_rate": 6.459074778956836e-06, + "loss": 0.0037, + "step": 84460 + }, + { + "grad_norm": 0.03173902630805969, + "learning_rate": 6.450948625372311e-06, + "loss": 0.0027, + "step": 84470 + }, + { + "grad_norm": 0.020661931484937668, + "learning_rate": 6.442827234246585e-06, + "loss": 0.0024, + "step": 84480 + }, + { + "grad_norm": 0.01702185347676277, + "learning_rate": 6.43471060646777e-06, + "loss": 0.0027, + "step": 84490 + }, + { + "grad_norm": 0.0705217570066452, + "learning_rate": 6.426598742923512e-06, + "loss": 0.0026, + "step": 84500 + }, + { + "grad_norm": 0.021436888724565506, + "learning_rate": 6.418491644500885e-06, + "loss": 0.0027, + "step": 84510 + }, + { + "grad_norm": 0.03361902013421059, + "learning_rate": 6.410389312086512e-06, + "loss": 0.002, + "step": 84520 + }, + { + "grad_norm": 0.0181741826236248, + "learning_rate": 6.4022917465664135e-06, + "loss": 0.0026, + "step": 84530 + }, + { + "grad_norm": 0.02107108384370804, + "learning_rate": 6.3941989488261575e-06, + "loss": 0.0019, + "step": 84540 + }, + { + "grad_norm": 0.020730961114168167, + "learning_rate": 6.386110919750726e-06, + "loss": 0.0019, + "step": 84550 + }, + { + "grad_norm": 0.025433901697397232, + "learning_rate": 6.378027660224651e-06, + "loss": 0.003, + "step": 84560 + }, + { + "grad_norm": 0.021402042359113693, + "learning_rate": 6.369949171131856e-06, + "loss": 0.0022, + "step": 84570 + }, + { + "grad_norm": 0.039286062121391296, + "learning_rate": 6.361875453355848e-06, + "loss": 0.0028, + "step": 84580 + }, + { + "grad_norm": 0.029126744717359543, + "learning_rate": 6.353806507779525e-06, + "loss": 0.0028, + "step": 84590 + }, + { + "grad_norm": 0.022102883085608482, + "learning_rate": 6.3457423352853044e-06, + "loss": 0.002, + "step": 84600 + }, + { + "grad_norm": 0.032419901341199875, + "learning_rate": 6.337682936755062e-06, + "loss": 0.0023, + "step": 84610 + }, + { + "grad_norm": 0.0471460297703743, + "learning_rate": 6.329628313070179e-06, + "loss": 0.0027, + "step": 84620 + }, + { + "grad_norm": 0.012593060731887817, + "learning_rate": 6.321578465111477e-06, + "loss": 0.002, + "step": 84630 + }, + { + "grad_norm": 0.03985122963786125, + "learning_rate": 6.31353339375928e-06, + "loss": 0.0031, + "step": 84640 + }, + { + "grad_norm": 0.015898725017905235, + "learning_rate": 6.305493099893395e-06, + "loss": 0.0021, + "step": 84650 + }, + { + "grad_norm": 0.016730478033423424, + "learning_rate": 6.297457584393096e-06, + "loss": 0.0036, + "step": 84660 + }, + { + "grad_norm": 0.01552483532577753, + "learning_rate": 6.289426848137126e-06, + "loss": 0.0016, + "step": 84670 + }, + { + "grad_norm": 0.025446565821766853, + "learning_rate": 6.281400892003713e-06, + "loss": 0.0026, + "step": 84680 + }, + { + "grad_norm": 0.033638160675764084, + "learning_rate": 6.273379716870581e-06, + "loss": 0.0033, + "step": 84690 + }, + { + "grad_norm": 0.02175094000995159, + "learning_rate": 6.265363323614887e-06, + "loss": 0.0021, + "step": 84700 + }, + { + "grad_norm": 0.03433497995138168, + "learning_rate": 6.257351713113308e-06, + "loss": 0.0035, + "step": 84710 + }, + { + "grad_norm": 0.0286326315253973, + "learning_rate": 6.249344886241981e-06, + "loss": 0.0023, + "step": 84720 + }, + { + "grad_norm": 0.06710222363471985, + "learning_rate": 6.2413428438765295e-06, + "loss": 0.0037, + "step": 84730 + }, + { + "grad_norm": 0.01879465952515602, + "learning_rate": 6.233345586892026e-06, + "loss": 0.0023, + "step": 84740 + }, + { + "grad_norm": 0.023523040115833282, + "learning_rate": 6.225353116163052e-06, + "loss": 0.0017, + "step": 84750 + }, + { + "grad_norm": 0.01349357608705759, + "learning_rate": 6.217365432563627e-06, + "loss": 0.0019, + "step": 84760 + }, + { + "grad_norm": 0.03255324810743332, + "learning_rate": 6.209382536967318e-06, + "loss": 0.0024, + "step": 84770 + }, + { + "grad_norm": 0.019813749939203262, + "learning_rate": 6.201404430247082e-06, + "loss": 0.0024, + "step": 84780 + }, + { + "grad_norm": 0.024034611880779266, + "learning_rate": 6.1934311132754165e-06, + "loss": 0.002, + "step": 84790 + }, + { + "grad_norm": 0.01759006641805172, + "learning_rate": 6.185462586924257e-06, + "loss": 0.0018, + "step": 84800 + }, + { + "grad_norm": 0.018646465614438057, + "learning_rate": 6.177498852065039e-06, + "loss": 0.0033, + "step": 84810 + }, + { + "grad_norm": 0.03190124034881592, + "learning_rate": 6.169539909568655e-06, + "loss": 0.0028, + "step": 84820 + }, + { + "grad_norm": 0.03415597602725029, + "learning_rate": 6.161585760305488e-06, + "loss": 0.0033, + "step": 84830 + }, + { + "grad_norm": 0.01603192649781704, + "learning_rate": 6.153636405145391e-06, + "loss": 0.0039, + "step": 84840 + }, + { + "grad_norm": 0.039835214614868164, + "learning_rate": 6.145691844957702e-06, + "loss": 0.0028, + "step": 84850 + }, + { + "grad_norm": 0.0291384719312191, + "learning_rate": 6.1377520806112145e-06, + "loss": 0.003, + "step": 84860 + }, + { + "grad_norm": 0.06117408350110054, + "learning_rate": 6.129817112974218e-06, + "loss": 0.0047, + "step": 84870 + }, + { + "grad_norm": 0.024122297763824463, + "learning_rate": 6.121886942914457e-06, + "loss": 0.0022, + "step": 84880 + }, + { + "grad_norm": 0.020159944891929626, + "learning_rate": 6.11396157129917e-06, + "loss": 0.0018, + "step": 84890 + }, + { + "grad_norm": 0.015250411815941334, + "learning_rate": 6.106040998995066e-06, + "loss": 0.004, + "step": 84900 + }, + { + "grad_norm": 0.022350221872329712, + "learning_rate": 6.098125226868329e-06, + "loss": 0.0026, + "step": 84910 + }, + { + "grad_norm": 0.01686020940542221, + "learning_rate": 6.090214255784604e-06, + "loss": 0.0018, + "step": 84920 + }, + { + "grad_norm": 0.046313412487506866, + "learning_rate": 6.0823080866090346e-06, + "loss": 0.0028, + "step": 84930 + }, + { + "grad_norm": 0.02925475500524044, + "learning_rate": 6.074406720206233e-06, + "loss": 0.0023, + "step": 84940 + }, + { + "grad_norm": 0.025395989418029785, + "learning_rate": 6.066510157440259e-06, + "loss": 0.0026, + "step": 84950 + }, + { + "grad_norm": 0.02597680874168873, + "learning_rate": 6.058618399174676e-06, + "loss": 0.0033, + "step": 84960 + }, + { + "grad_norm": 0.02995014376938343, + "learning_rate": 6.050731446272528e-06, + "loss": 0.002, + "step": 84970 + }, + { + "grad_norm": 0.015932181850075722, + "learning_rate": 6.042849299596321e-06, + "loss": 0.0023, + "step": 84980 + }, + { + "grad_norm": 0.021861985325813293, + "learning_rate": 6.034971960008013e-06, + "loss": 0.0015, + "step": 84990 + }, + { + "grad_norm": 0.02593631111085415, + "learning_rate": 6.027099428369082e-06, + "loss": 0.0015, + "step": 85000 + }, + { + "grad_norm": 0.028593823313713074, + "learning_rate": 6.019231705540435e-06, + "loss": 0.0028, + "step": 85010 + }, + { + "grad_norm": 0.016897061839699745, + "learning_rate": 6.011368792382488e-06, + "loss": 0.0021, + "step": 85020 + }, + { + "grad_norm": 0.04392851144075394, + "learning_rate": 6.00351068975511e-06, + "loss": 0.0041, + "step": 85030 + }, + { + "grad_norm": 0.030052553862333298, + "learning_rate": 5.9956573985176674e-06, + "loss": 0.0022, + "step": 85040 + }, + { + "grad_norm": 0.019654709845781326, + "learning_rate": 5.987808919528959e-06, + "loss": 0.0017, + "step": 85050 + }, + { + "grad_norm": 0.03456300124526024, + "learning_rate": 5.979965253647307e-06, + "loss": 0.0032, + "step": 85060 + }, + { + "grad_norm": 0.03093462809920311, + "learning_rate": 5.9721264017304655e-06, + "loss": 0.0035, + "step": 85070 + }, + { + "grad_norm": 0.019240794703364372, + "learning_rate": 5.964292364635682e-06, + "loss": 0.0015, + "step": 85080 + }, + { + "grad_norm": 0.015650980174541473, + "learning_rate": 5.956463143219682e-06, + "loss": 0.002, + "step": 85090 + }, + { + "grad_norm": 0.02262802980840206, + "learning_rate": 5.948638738338663e-06, + "loss": 0.0048, + "step": 85100 + }, + { + "grad_norm": 0.02017766237258911, + "learning_rate": 5.940819150848271e-06, + "loss": 0.003, + "step": 85110 + }, + { + "grad_norm": 0.022372061386704445, + "learning_rate": 5.933004381603663e-06, + "loss": 0.0021, + "step": 85120 + }, + { + "grad_norm": 0.04172637313604355, + "learning_rate": 5.92519443145943e-06, + "loss": 0.0024, + "step": 85130 + }, + { + "grad_norm": 0.019902024418115616, + "learning_rate": 5.917389301269671e-06, + "loss": 0.0019, + "step": 85140 + }, + { + "grad_norm": 0.024222007021307945, + "learning_rate": 5.9095889918879435e-06, + "loss": 0.0043, + "step": 85150 + }, + { + "grad_norm": 0.023187305778265, + "learning_rate": 5.901793504167286e-06, + "loss": 0.0021, + "step": 85160 + }, + { + "grad_norm": 0.024646082893013954, + "learning_rate": 5.894002838960172e-06, + "loss": 0.0027, + "step": 85170 + }, + { + "grad_norm": 0.033944226801395416, + "learning_rate": 5.886216997118604e-06, + "loss": 0.0036, + "step": 85180 + }, + { + "grad_norm": 0.028159338980913162, + "learning_rate": 5.878435979494023e-06, + "loss": 0.0037, + "step": 85190 + }, + { + "grad_norm": 0.03336549550294876, + "learning_rate": 5.870659786937344e-06, + "loss": 0.0023, + "step": 85200 + }, + { + "grad_norm": 0.018708594143390656, + "learning_rate": 5.862888420298962e-06, + "loss": 0.0036, + "step": 85210 + }, + { + "grad_norm": 0.01918764039874077, + "learning_rate": 5.855121880428743e-06, + "loss": 0.0039, + "step": 85220 + }, + { + "grad_norm": 0.025406794622540474, + "learning_rate": 5.84736016817603e-06, + "loss": 0.003, + "step": 85230 + }, + { + "grad_norm": 0.036687109619379044, + "learning_rate": 5.839603284389622e-06, + "loss": 0.0036, + "step": 85240 + }, + { + "grad_norm": 0.02234085462987423, + "learning_rate": 5.831851229917812e-06, + "loss": 0.0024, + "step": 85250 + }, + { + "grad_norm": 0.022125519812107086, + "learning_rate": 5.824104005608338e-06, + "loss": 0.0026, + "step": 85260 + }, + { + "grad_norm": 0.019301731139421463, + "learning_rate": 5.81636161230843e-06, + "loss": 0.0016, + "step": 85270 + }, + { + "grad_norm": 0.02219560742378235, + "learning_rate": 5.808624050864791e-06, + "loss": 0.0017, + "step": 85280 + }, + { + "grad_norm": 0.021066902205348015, + "learning_rate": 5.800891322123592e-06, + "loss": 0.0029, + "step": 85290 + }, + { + "grad_norm": 0.018713854253292084, + "learning_rate": 5.793163426930454e-06, + "loss": 0.0021, + "step": 85300 + }, + { + "grad_norm": 0.01990865357220173, + "learning_rate": 5.785440366130512e-06, + "loss": 0.004, + "step": 85310 + }, + { + "grad_norm": 0.0354609414935112, + "learning_rate": 5.777722140568326e-06, + "loss": 0.0031, + "step": 85320 + }, + { + "grad_norm": 0.05353866145014763, + "learning_rate": 5.7700087510879685e-06, + "loss": 0.0035, + "step": 85330 + }, + { + "grad_norm": 0.02692946046590805, + "learning_rate": 5.762300198532933e-06, + "loss": 0.0026, + "step": 85340 + }, + { + "grad_norm": 0.025475768372416496, + "learning_rate": 5.7545964837462606e-06, + "loss": 0.003, + "step": 85350 + }, + { + "grad_norm": 0.020342933014035225, + "learning_rate": 5.746897607570384e-06, + "loss": 0.0029, + "step": 85360 + }, + { + "grad_norm": 0.04585200175642967, + "learning_rate": 5.7392035708472626e-06, + "loss": 0.0036, + "step": 85370 + }, + { + "grad_norm": 0.031248196959495544, + "learning_rate": 5.731514374418278e-06, + "loss": 0.0019, + "step": 85380 + }, + { + "grad_norm": 0.037627775222063065, + "learning_rate": 5.7238300191243325e-06, + "loss": 0.0028, + "step": 85390 + }, + { + "grad_norm": 0.017152683809399605, + "learning_rate": 5.716150505805762e-06, + "loss": 0.0028, + "step": 85400 + }, + { + "grad_norm": 0.013922113925218582, + "learning_rate": 5.708475835302407e-06, + "loss": 0.0016, + "step": 85410 + }, + { + "grad_norm": 0.04288482293486595, + "learning_rate": 5.70080600845353e-06, + "loss": 0.0029, + "step": 85420 + }, + { + "grad_norm": 0.03817925602197647, + "learning_rate": 5.693141026097909e-06, + "loss": 0.003, + "step": 85430 + }, + { + "grad_norm": 0.02474098652601242, + "learning_rate": 5.6854808890737745e-06, + "loss": 0.0019, + "step": 85440 + }, + { + "grad_norm": 0.02238229103386402, + "learning_rate": 5.6778255982188225e-06, + "loss": 0.0023, + "step": 85450 + }, + { + "grad_norm": 0.01950259506702423, + "learning_rate": 5.670175154370217e-06, + "loss": 0.0027, + "step": 85460 + }, + { + "grad_norm": 0.04008430615067482, + "learning_rate": 5.662529558364616e-06, + "loss": 0.0021, + "step": 85470 + }, + { + "grad_norm": 0.022142700850963593, + "learning_rate": 5.654888811038128e-06, + "loss": 0.0037, + "step": 85480 + }, + { + "grad_norm": 0.024344557896256447, + "learning_rate": 5.647252913226314e-06, + "loss": 0.0033, + "step": 85490 + }, + { + "grad_norm": 0.05506742745637894, + "learning_rate": 5.639621865764255e-06, + "loss": 0.0028, + "step": 85500 + }, + { + "grad_norm": 0.041385307908058167, + "learning_rate": 5.63199566948644e-06, + "loss": 0.0025, + "step": 85510 + }, + { + "grad_norm": 0.03996507450938225, + "learning_rate": 5.6243743252268835e-06, + "loss": 0.0038, + "step": 85520 + }, + { + "grad_norm": 0.02192959003150463, + "learning_rate": 5.616757833819009e-06, + "loss": 0.0027, + "step": 85530 + }, + { + "grad_norm": 0.02673696167767048, + "learning_rate": 5.609146196095794e-06, + "loss": 0.0027, + "step": 85540 + }, + { + "grad_norm": 0.027554910629987717, + "learning_rate": 5.6015394128896005e-06, + "loss": 0.0032, + "step": 85550 + }, + { + "grad_norm": 0.027124758809804916, + "learning_rate": 5.593937485032308e-06, + "loss": 0.0027, + "step": 85560 + }, + { + "grad_norm": 0.023071302101016045, + "learning_rate": 5.586340413355246e-06, + "loss": 0.0018, + "step": 85570 + }, + { + "grad_norm": 0.020824698731303215, + "learning_rate": 5.578748198689227e-06, + "loss": 0.0019, + "step": 85580 + }, + { + "grad_norm": 0.06268259137868881, + "learning_rate": 5.571160841864504e-06, + "loss": 0.004, + "step": 85590 + }, + { + "grad_norm": 0.0262258630245924, + "learning_rate": 5.563578343710846e-06, + "loss": 0.0016, + "step": 85600 + }, + { + "grad_norm": 0.03793958202004433, + "learning_rate": 5.556000705057446e-06, + "loss": 0.0043, + "step": 85610 + }, + { + "grad_norm": 0.019198564812541008, + "learning_rate": 5.548427926732996e-06, + "loss": 0.0032, + "step": 85620 + }, + { + "grad_norm": 0.02660067193210125, + "learning_rate": 5.540860009565629e-06, + "loss": 0.0041, + "step": 85630 + }, + { + "grad_norm": 0.03888363763689995, + "learning_rate": 5.533296954382966e-06, + "loss": 0.0027, + "step": 85640 + }, + { + "grad_norm": 0.027735155075788498, + "learning_rate": 5.525738762012095e-06, + "loss": 0.002, + "step": 85650 + }, + { + "grad_norm": 0.02066049911081791, + "learning_rate": 5.518185433279577e-06, + "loss": 0.0031, + "step": 85660 + }, + { + "grad_norm": 0.019639909267425537, + "learning_rate": 5.510636969011407e-06, + "loss": 0.0032, + "step": 85670 + }, + { + "grad_norm": 0.02220710925757885, + "learning_rate": 5.503093370033096e-06, + "loss": 0.0022, + "step": 85680 + }, + { + "grad_norm": 0.012291813269257545, + "learning_rate": 5.495554637169603e-06, + "loss": 0.0019, + "step": 85690 + }, + { + "grad_norm": 0.02102874219417572, + "learning_rate": 5.488020771245328e-06, + "loss": 0.0026, + "step": 85700 + }, + { + "grad_norm": 0.031896308064460754, + "learning_rate": 5.480491773084195e-06, + "loss": 0.0026, + "step": 85710 + }, + { + "grad_norm": 0.02550339698791504, + "learning_rate": 5.472967643509525e-06, + "loss": 0.0027, + "step": 85720 + }, + { + "grad_norm": 0.05192302539944649, + "learning_rate": 5.46544838334419e-06, + "loss": 0.0024, + "step": 85730 + }, + { + "grad_norm": 0.024441208690404892, + "learning_rate": 5.4579339934104525e-06, + "loss": 0.0021, + "step": 85740 + }, + { + "grad_norm": 0.019611885771155357, + "learning_rate": 5.450424474530091e-06, + "loss": 0.0016, + "step": 85750 + }, + { + "grad_norm": 0.015512139536440372, + "learning_rate": 5.442919827524323e-06, + "loss": 0.0025, + "step": 85760 + }, + { + "grad_norm": 0.025923142209649086, + "learning_rate": 5.435420053213863e-06, + "loss": 0.0026, + "step": 85770 + }, + { + "grad_norm": 0.020726067945361137, + "learning_rate": 5.427925152418845e-06, + "loss": 0.0029, + "step": 85780 + }, + { + "grad_norm": 0.034748174250125885, + "learning_rate": 5.420435125958934e-06, + "loss": 0.0017, + "step": 85790 + }, + { + "grad_norm": 0.04135693237185478, + "learning_rate": 5.412949974653209e-06, + "loss": 0.0019, + "step": 85800 + }, + { + "grad_norm": 0.018606597557663918, + "learning_rate": 5.405469699320248e-06, + "loss": 0.0019, + "step": 85810 + }, + { + "grad_norm": 0.012356139719486237, + "learning_rate": 5.397994300778064e-06, + "loss": 0.0028, + "step": 85820 + }, + { + "grad_norm": 0.032168202102184296, + "learning_rate": 5.39052377984417e-06, + "loss": 0.0023, + "step": 85830 + }, + { + "grad_norm": 0.04064881056547165, + "learning_rate": 5.383058137335523e-06, + "loss": 0.0021, + "step": 85840 + }, + { + "grad_norm": 0.028775803744792938, + "learning_rate": 5.375597374068553e-06, + "loss": 0.0038, + "step": 85850 + }, + { + "grad_norm": 0.05731421709060669, + "learning_rate": 5.368141490859163e-06, + "loss": 0.0031, + "step": 85860 + }, + { + "grad_norm": 0.027598386630415916, + "learning_rate": 5.360690488522729e-06, + "loss": 0.0022, + "step": 85870 + }, + { + "grad_norm": 0.031215351074934006, + "learning_rate": 5.353244367874055e-06, + "loss": 0.0028, + "step": 85880 + }, + { + "grad_norm": 0.016072073951363564, + "learning_rate": 5.345803129727456e-06, + "loss": 0.0036, + "step": 85890 + }, + { + "grad_norm": 0.01792260818183422, + "learning_rate": 5.338366774896697e-06, + "loss": 0.002, + "step": 85900 + }, + { + "grad_norm": 0.023115796968340874, + "learning_rate": 5.330935304194995e-06, + "loss": 0.0017, + "step": 85910 + }, + { + "grad_norm": 0.023821448907256126, + "learning_rate": 5.323508718435044e-06, + "loss": 0.0024, + "step": 85920 + }, + { + "grad_norm": 0.018760837614536285, + "learning_rate": 5.316087018429011e-06, + "loss": 0.0015, + "step": 85930 + }, + { + "grad_norm": 0.028455717489123344, + "learning_rate": 5.30867020498853e-06, + "loss": 0.0052, + "step": 85940 + }, + { + "grad_norm": 0.01823159120976925, + "learning_rate": 5.30125827892467e-06, + "loss": 0.0021, + "step": 85950 + }, + { + "grad_norm": 0.03817647695541382, + "learning_rate": 5.293851241048015e-06, + "loss": 0.003, + "step": 85960 + }, + { + "grad_norm": 0.022266879677772522, + "learning_rate": 5.286449092168555e-06, + "loss": 0.0019, + "step": 85970 + }, + { + "grad_norm": 0.0258405152708292, + "learning_rate": 5.2790518330958105e-06, + "loss": 0.002, + "step": 85980 + }, + { + "grad_norm": 0.02277095429599285, + "learning_rate": 5.271659464638717e-06, + "loss": 0.0038, + "step": 85990 + }, + { + "grad_norm": 0.017369519919157028, + "learning_rate": 5.264271987605701e-06, + "loss": 0.0023, + "step": 86000 + }, + { + "grad_norm": 0.014348772354424, + "learning_rate": 5.256889402804632e-06, + "loss": 0.0019, + "step": 86010 + }, + { + "grad_norm": 0.0194056648761034, + "learning_rate": 5.249511711042881e-06, + "loss": 0.0022, + "step": 86020 + }, + { + "grad_norm": 0.03512425720691681, + "learning_rate": 5.242138913127237e-06, + "loss": 0.003, + "step": 86030 + }, + { + "grad_norm": 0.029637714847922325, + "learning_rate": 5.234771009863987e-06, + "loss": 0.0024, + "step": 86040 + }, + { + "grad_norm": 0.012851214036345482, + "learning_rate": 5.227408002058881e-06, + "loss": 0.0031, + "step": 86050 + }, + { + "grad_norm": 0.012962778098881245, + "learning_rate": 5.220049890517126e-06, + "loss": 0.0046, + "step": 86060 + }, + { + "grad_norm": 0.04353831708431244, + "learning_rate": 5.212696676043377e-06, + "loss": 0.0028, + "step": 86070 + }, + { + "grad_norm": 0.021752934902906418, + "learning_rate": 5.205348359441798e-06, + "loss": 0.0036, + "step": 86080 + }, + { + "grad_norm": 0.03958326205611229, + "learning_rate": 5.198004941515966e-06, + "loss": 0.003, + "step": 86090 + }, + { + "grad_norm": 0.04053587466478348, + "learning_rate": 5.190666423068952e-06, + "loss": 0.0029, + "step": 86100 + }, + { + "grad_norm": 0.037230391055345535, + "learning_rate": 5.183332804903284e-06, + "loss": 0.0024, + "step": 86110 + }, + { + "grad_norm": 0.03245459869503975, + "learning_rate": 5.176004087820979e-06, + "loss": 0.0027, + "step": 86120 + }, + { + "grad_norm": 0.033725760877132416, + "learning_rate": 5.168680272623455e-06, + "loss": 0.0023, + "step": 86130 + }, + { + "grad_norm": 0.017010675743222237, + "learning_rate": 5.161361360111661e-06, + "loss": 0.0017, + "step": 86140 + }, + { + "grad_norm": 0.029382269829511642, + "learning_rate": 5.154047351085984e-06, + "loss": 0.0017, + "step": 86150 + }, + { + "grad_norm": 0.01918482594192028, + "learning_rate": 5.1467382463462385e-06, + "loss": 0.0016, + "step": 86160 + }, + { + "grad_norm": 0.020128915086388588, + "learning_rate": 5.139434046691788e-06, + "loss": 0.0032, + "step": 86170 + }, + { + "grad_norm": 0.026452189311385155, + "learning_rate": 5.13213475292137e-06, + "loss": 0.0041, + "step": 86180 + }, + { + "grad_norm": 0.02388560026884079, + "learning_rate": 5.12484036583325e-06, + "loss": 0.0017, + "step": 86190 + }, + { + "grad_norm": 0.025624999776482582, + "learning_rate": 5.117550886225109e-06, + "loss": 0.0038, + "step": 86200 + }, + { + "grad_norm": 0.021024340763688087, + "learning_rate": 5.11026631489413e-06, + "loss": 0.0021, + "step": 86210 + }, + { + "grad_norm": 0.02281218394637108, + "learning_rate": 5.102986652636926e-06, + "loss": 0.0017, + "step": 86220 + }, + { + "grad_norm": 0.02589898556470871, + "learning_rate": 5.095711900249606e-06, + "loss": 0.0024, + "step": 86230 + }, + { + "grad_norm": 0.030390972271561623, + "learning_rate": 5.088442058527715e-06, + "loss": 0.002, + "step": 86240 + }, + { + "grad_norm": 0.021803507581353188, + "learning_rate": 5.08117712826629e-06, + "loss": 0.0014, + "step": 86250 + }, + { + "grad_norm": 0.023354804143309593, + "learning_rate": 5.073917110259785e-06, + "loss": 0.0022, + "step": 86260 + }, + { + "grad_norm": 0.02217382751405239, + "learning_rate": 5.066662005302175e-06, + "loss": 0.0019, + "step": 86270 + }, + { + "grad_norm": 0.01829829066991806, + "learning_rate": 5.059411814186838e-06, + "loss": 0.0022, + "step": 86280 + }, + { + "grad_norm": 0.03649444878101349, + "learning_rate": 5.05216653770666e-06, + "loss": 0.0031, + "step": 86290 + }, + { + "grad_norm": 0.04601070657372475, + "learning_rate": 5.044926176653969e-06, + "loss": 0.0034, + "step": 86300 + }, + { + "grad_norm": 0.01992502436041832, + "learning_rate": 5.037690731820571e-06, + "loss": 0.0028, + "step": 86310 + }, + { + "grad_norm": 0.02329990454018116, + "learning_rate": 5.030460203997711e-06, + "loss": 0.0024, + "step": 86320 + }, + { + "grad_norm": 0.07596168667078018, + "learning_rate": 5.0232345939761174e-06, + "loss": 0.0041, + "step": 86330 + }, + { + "grad_norm": 0.02646823599934578, + "learning_rate": 5.016013902545957e-06, + "loss": 0.0026, + "step": 86340 + }, + { + "grad_norm": 0.020469214767217636, + "learning_rate": 5.008798130496889e-06, + "loss": 0.0022, + "step": 86350 + }, + { + "grad_norm": 0.025072218850255013, + "learning_rate": 5.001587278618008e-06, + "loss": 0.0038, + "step": 86360 + }, + { + "grad_norm": 0.021182503551244736, + "learning_rate": 4.994381347697891e-06, + "loss": 0.0018, + "step": 86370 + }, + { + "grad_norm": 0.020891455933451653, + "learning_rate": 4.987180338524572e-06, + "loss": 0.0027, + "step": 86380 + }, + { + "grad_norm": 0.043065350502729416, + "learning_rate": 4.9799842518855275e-06, + "loss": 0.0043, + "step": 86390 + }, + { + "grad_norm": 0.0316641591489315, + "learning_rate": 4.9727930885677285e-06, + "loss": 0.0026, + "step": 86400 + }, + { + "grad_norm": 0.03206326439976692, + "learning_rate": 4.965606849357573e-06, + "loss": 0.0025, + "step": 86410 + }, + { + "grad_norm": 0.030216867104172707, + "learning_rate": 4.9584255350409445e-06, + "loss": 0.0019, + "step": 86420 + }, + { + "grad_norm": 0.012569064274430275, + "learning_rate": 4.951249146403175e-06, + "loss": 0.0017, + "step": 86430 + }, + { + "grad_norm": 0.0328763872385025, + "learning_rate": 4.9440776842290815e-06, + "loss": 0.0042, + "step": 86440 + }, + { + "grad_norm": 0.04027343913912773, + "learning_rate": 4.936911149302903e-06, + "loss": 0.0032, + "step": 86450 + }, + { + "grad_norm": 0.03665668144822121, + "learning_rate": 4.929749542408379e-06, + "loss": 0.0023, + "step": 86460 + }, + { + "grad_norm": 0.010318494401872158, + "learning_rate": 4.922592864328673e-06, + "loss": 0.0033, + "step": 86470 + }, + { + "grad_norm": 0.014565221965312958, + "learning_rate": 4.915441115846436e-06, + "loss": 0.003, + "step": 86480 + }, + { + "grad_norm": 0.015418292954564095, + "learning_rate": 4.908294297743782e-06, + "loss": 0.0017, + "step": 86490 + }, + { + "grad_norm": 0.01579851284623146, + "learning_rate": 4.901152410802273e-06, + "loss": 0.0021, + "step": 86500 + }, + { + "grad_norm": 0.022620653733611107, + "learning_rate": 4.894015455802919e-06, + "loss": 0.0019, + "step": 86510 + }, + { + "grad_norm": 0.05457516387104988, + "learning_rate": 4.8868834335262346e-06, + "loss": 0.0033, + "step": 86520 + }, + { + "grad_norm": 0.01995362713932991, + "learning_rate": 4.87975634475214e-06, + "loss": 0.0021, + "step": 86530 + }, + { + "grad_norm": 0.01780533418059349, + "learning_rate": 4.8726341902600614e-06, + "loss": 0.0027, + "step": 86540 + }, + { + "grad_norm": 0.031496401876211166, + "learning_rate": 4.865516970828843e-06, + "loss": 0.0032, + "step": 86550 + }, + { + "grad_norm": 0.04517406225204468, + "learning_rate": 4.85840468723685e-06, + "loss": 0.0021, + "step": 86560 + }, + { + "grad_norm": 0.012449288740754128, + "learning_rate": 4.85129734026184e-06, + "loss": 0.0029, + "step": 86570 + }, + { + "grad_norm": 0.020477434620261192, + "learning_rate": 4.844194930681073e-06, + "loss": 0.002, + "step": 86580 + }, + { + "grad_norm": 0.021814029663801193, + "learning_rate": 4.837097459271267e-06, + "loss": 0.002, + "step": 86590 + }, + { + "grad_norm": 0.0360632985830307, + "learning_rate": 4.830004926808573e-06, + "loss": 0.0027, + "step": 86600 + }, + { + "grad_norm": 0.016741309314966202, + "learning_rate": 4.822917334068627e-06, + "loss": 0.0026, + "step": 86610 + }, + { + "grad_norm": 0.016259370371699333, + "learning_rate": 4.81583468182652e-06, + "loss": 0.002, + "step": 86620 + }, + { + "grad_norm": 0.0206777174025774, + "learning_rate": 4.808756970856809e-06, + "loss": 0.0021, + "step": 86630 + }, + { + "grad_norm": 0.026450587436556816, + "learning_rate": 4.801684201933476e-06, + "loss": 0.0029, + "step": 86640 + }, + { + "grad_norm": 0.02239147573709488, + "learning_rate": 4.794616375830019e-06, + "loss": 0.0035, + "step": 86650 + }, + { + "grad_norm": 0.02597622200846672, + "learning_rate": 4.787553493319341e-06, + "loss": 0.0015, + "step": 86660 + }, + { + "grad_norm": 0.01378588192164898, + "learning_rate": 4.780495555173836e-06, + "loss": 0.0024, + "step": 86670 + }, + { + "grad_norm": 0.01986154541373253, + "learning_rate": 4.7734425621653475e-06, + "loss": 0.0024, + "step": 86680 + }, + { + "grad_norm": 0.01720321550965309, + "learning_rate": 4.766394515065198e-06, + "loss": 0.0025, + "step": 86690 + }, + { + "grad_norm": 0.036444492638111115, + "learning_rate": 4.75935141464412e-06, + "loss": 0.0027, + "step": 86700 + }, + { + "grad_norm": 0.02002429962158203, + "learning_rate": 4.75231326167237e-06, + "loss": 0.0019, + "step": 86710 + }, + { + "grad_norm": 0.015295428223907948, + "learning_rate": 4.745280056919599e-06, + "loss": 0.0028, + "step": 86720 + }, + { + "grad_norm": 0.021018031984567642, + "learning_rate": 4.738251801154975e-06, + "loss": 0.0031, + "step": 86730 + }, + { + "grad_norm": 0.014500941149890423, + "learning_rate": 4.731228495147061e-06, + "loss": 0.0016, + "step": 86740 + }, + { + "grad_norm": 0.01654554344713688, + "learning_rate": 4.7242101396639595e-06, + "loss": 0.0019, + "step": 86750 + }, + { + "grad_norm": 0.02106471173465252, + "learning_rate": 4.717196735473151e-06, + "loss": 0.0032, + "step": 86760 + }, + { + "grad_norm": 0.024537218734622, + "learning_rate": 4.710188283341643e-06, + "loss": 0.0021, + "step": 86770 + }, + { + "grad_norm": 0.01924620196223259, + "learning_rate": 4.703184784035835e-06, + "loss": 0.0025, + "step": 86780 + }, + { + "grad_norm": 0.02954699657857418, + "learning_rate": 4.696186238321642e-06, + "loss": 0.0036, + "step": 86790 + }, + { + "grad_norm": 0.02343522571027279, + "learning_rate": 4.689192646964408e-06, + "loss": 0.0024, + "step": 86800 + }, + { + "grad_norm": 0.0363476537168026, + "learning_rate": 4.682204010728952e-06, + "loss": 0.0028, + "step": 86810 + }, + { + "grad_norm": 0.022776752710342407, + "learning_rate": 4.67522033037952e-06, + "loss": 0.0026, + "step": 86820 + }, + { + "grad_norm": 0.01946224272251129, + "learning_rate": 4.668241606679846e-06, + "loss": 0.0044, + "step": 86830 + }, + { + "grad_norm": 0.018268639221787453, + "learning_rate": 4.66126784039313e-06, + "loss": 0.0023, + "step": 86840 + }, + { + "grad_norm": 0.017870286479592323, + "learning_rate": 4.6542990322819836e-06, + "loss": 0.0023, + "step": 86850 + }, + { + "grad_norm": 0.02024606615304947, + "learning_rate": 4.64733518310852e-06, + "loss": 0.0026, + "step": 86860 + }, + { + "grad_norm": 0.03392805531620979, + "learning_rate": 4.640376293634291e-06, + "loss": 0.0027, + "step": 86870 + }, + { + "grad_norm": 0.02676679939031601, + "learning_rate": 4.633422364620327e-06, + "loss": 0.0024, + "step": 86880 + }, + { + "grad_norm": 0.023335527628660202, + "learning_rate": 4.6264733968270745e-06, + "loss": 0.0024, + "step": 86890 + }, + { + "grad_norm": 0.020333394408226013, + "learning_rate": 4.619529391014482e-06, + "loss": 0.0022, + "step": 86900 + }, + { + "grad_norm": 0.018923882395029068, + "learning_rate": 4.61259034794192e-06, + "loss": 0.0028, + "step": 86910 + }, + { + "grad_norm": 0.029802750796079636, + "learning_rate": 4.605656268368247e-06, + "loss": 0.0018, + "step": 86920 + }, + { + "grad_norm": 0.019854508340358734, + "learning_rate": 4.598727153051735e-06, + "loss": 0.0022, + "step": 86930 + }, + { + "grad_norm": 0.02742123417556286, + "learning_rate": 4.591803002750178e-06, + "loss": 0.0026, + "step": 86940 + }, + { + "grad_norm": 0.012478168122470379, + "learning_rate": 4.58488381822077e-06, + "loss": 0.0017, + "step": 86950 + }, + { + "grad_norm": 0.014862379059195518, + "learning_rate": 4.5779696002201946e-06, + "loss": 0.002, + "step": 86960 + }, + { + "grad_norm": 0.016395973041653633, + "learning_rate": 4.571060349504563e-06, + "loss": 0.0015, + "step": 86970 + }, + { + "grad_norm": 0.04109250009059906, + "learning_rate": 4.564156066829478e-06, + "loss": 0.0042, + "step": 86980 + }, + { + "grad_norm": 0.04247394576668739, + "learning_rate": 4.557256752949957e-06, + "loss": 0.0031, + "step": 86990 + }, + { + "grad_norm": 0.02164273150265217, + "learning_rate": 4.550362408620534e-06, + "loss": 0.0019, + "step": 87000 + }, + { + "grad_norm": 0.014381041750311852, + "learning_rate": 4.543473034595136e-06, + "loss": 0.0021, + "step": 87010 + }, + { + "grad_norm": 0.026124922558665276, + "learning_rate": 4.536588631627192e-06, + "loss": 0.0024, + "step": 87020 + }, + { + "grad_norm": 0.038974788039922714, + "learning_rate": 4.5297092004695565e-06, + "loss": 0.0024, + "step": 87030 + }, + { + "grad_norm": 0.01724843867123127, + "learning_rate": 4.522834741874555e-06, + "loss": 0.0015, + "step": 87040 + }, + { + "grad_norm": 0.030567103996872902, + "learning_rate": 4.515965256593974e-06, + "loss": 0.0028, + "step": 87050 + }, + { + "grad_norm": 0.019466256722807884, + "learning_rate": 4.509100745379058e-06, + "loss": 0.0031, + "step": 87060 + }, + { + "grad_norm": 0.027500618249177933, + "learning_rate": 4.502241208980479e-06, + "loss": 0.0025, + "step": 87070 + }, + { + "grad_norm": 0.013525105081498623, + "learning_rate": 4.495386648148398e-06, + "loss": 0.0021, + "step": 87080 + }, + { + "grad_norm": 0.01575389690697193, + "learning_rate": 4.488537063632431e-06, + "loss": 0.0029, + "step": 87090 + }, + { + "grad_norm": 0.02178751677274704, + "learning_rate": 4.481692456181608e-06, + "loss": 0.0024, + "step": 87100 + }, + { + "grad_norm": 0.02349991165101528, + "learning_rate": 4.474852826544479e-06, + "loss": 0.0027, + "step": 87110 + }, + { + "grad_norm": 0.021872583776712418, + "learning_rate": 4.468018175468974e-06, + "loss": 0.0025, + "step": 87120 + }, + { + "grad_norm": 0.031435783952474594, + "learning_rate": 4.461188503702568e-06, + "loss": 0.0018, + "step": 87130 + }, + { + "grad_norm": 0.0353640653192997, + "learning_rate": 4.454363811992107e-06, + "loss": 0.0035, + "step": 87140 + }, + { + "grad_norm": 0.01481295470148325, + "learning_rate": 4.447544101083956e-06, + "loss": 0.0026, + "step": 87150 + }, + { + "grad_norm": 0.020719660446047783, + "learning_rate": 4.440729371723884e-06, + "loss": 0.0019, + "step": 87160 + }, + { + "grad_norm": 0.018616747111082077, + "learning_rate": 4.433919624657162e-06, + "loss": 0.0025, + "step": 87170 + }, + { + "grad_norm": 0.01743638515472412, + "learning_rate": 4.427114860628462e-06, + "loss": 0.0031, + "step": 87180 + }, + { + "grad_norm": 0.01803252287209034, + "learning_rate": 4.420315080381982e-06, + "loss": 0.0024, + "step": 87190 + }, + { + "grad_norm": 0.03776074945926666, + "learning_rate": 4.4135202846613045e-06, + "loss": 0.003, + "step": 87200 + }, + { + "grad_norm": 0.02501750737428665, + "learning_rate": 4.406730474209525e-06, + "loss": 0.0019, + "step": 87210 + }, + { + "grad_norm": 0.009360759519040585, + "learning_rate": 4.399945649769138e-06, + "loss": 0.0018, + "step": 87220 + }, + { + "grad_norm": 0.023861432448029518, + "learning_rate": 4.393165812082151e-06, + "loss": 0.0042, + "step": 87230 + }, + { + "grad_norm": 0.03856905549764633, + "learning_rate": 4.386390961889969e-06, + "loss": 0.0034, + "step": 87240 + }, + { + "grad_norm": 0.05445878207683563, + "learning_rate": 4.3796210999334895e-06, + "loss": 0.002, + "step": 87250 + }, + { + "grad_norm": 0.014670414850115776, + "learning_rate": 4.372856226953065e-06, + "loss": 0.0023, + "step": 87260 + }, + { + "grad_norm": 0.017506083473563194, + "learning_rate": 4.366096343688486e-06, + "loss": 0.0026, + "step": 87270 + }, + { + "grad_norm": 0.02273603342473507, + "learning_rate": 4.359341450878995e-06, + "loss": 0.002, + "step": 87280 + }, + { + "grad_norm": 0.014533787034451962, + "learning_rate": 4.352591549263302e-06, + "loss": 0.0022, + "step": 87290 + }, + { + "grad_norm": 0.0331556461751461, + "learning_rate": 4.345846639579582e-06, + "loss": 0.0024, + "step": 87300 + }, + { + "grad_norm": 0.03761688992381096, + "learning_rate": 4.339106722565417e-06, + "loss": 0.0022, + "step": 87310 + }, + { + "grad_norm": 0.016800744459033012, + "learning_rate": 4.332371798957896e-06, + "loss": 0.0023, + "step": 87320 + }, + { + "grad_norm": 0.02410878986120224, + "learning_rate": 4.3256418694935295e-06, + "loss": 0.0023, + "step": 87330 + }, + { + "grad_norm": 0.030265243723988533, + "learning_rate": 4.3189169349083125e-06, + "loss": 0.0021, + "step": 87340 + }, + { + "grad_norm": 0.01646212860941887, + "learning_rate": 4.312196995937645e-06, + "loss": 0.0017, + "step": 87350 + }, + { + "grad_norm": 0.05055468901991844, + "learning_rate": 4.305482053316434e-06, + "loss": 0.0034, + "step": 87360 + }, + { + "grad_norm": 0.042414382100105286, + "learning_rate": 4.2987721077789925e-06, + "loss": 0.0029, + "step": 87370 + }, + { + "grad_norm": 0.017037153244018555, + "learning_rate": 4.2920671600591325e-06, + "loss": 0.0036, + "step": 87380 + }, + { + "grad_norm": 0.0141897639259696, + "learning_rate": 4.285367210890084e-06, + "loss": 0.0015, + "step": 87390 + }, + { + "grad_norm": 0.018433546647429466, + "learning_rate": 4.278672261004551e-06, + "loss": 0.002, + "step": 87400 + }, + { + "grad_norm": 0.02422357350587845, + "learning_rate": 4.2719823111346704e-06, + "loss": 0.0021, + "step": 87410 + }, + { + "grad_norm": 0.02011076919734478, + "learning_rate": 4.265297362012061e-06, + "loss": 0.0016, + "step": 87420 + }, + { + "grad_norm": 0.018056277185678482, + "learning_rate": 4.258617414367766e-06, + "loss": 0.0029, + "step": 87430 + }, + { + "grad_norm": 0.02197236754000187, + "learning_rate": 4.251942468932296e-06, + "loss": 0.0017, + "step": 87440 + }, + { + "grad_norm": 0.03665059432387352, + "learning_rate": 4.2452725264356175e-06, + "loss": 0.0029, + "step": 87450 + }, + { + "grad_norm": 0.017181213945150375, + "learning_rate": 4.2386075876071575e-06, + "loss": 0.0035, + "step": 87460 + }, + { + "grad_norm": 0.019723471254110336, + "learning_rate": 4.231947653175755e-06, + "loss": 0.0022, + "step": 87470 + }, + { + "grad_norm": 0.030522234737873077, + "learning_rate": 4.225292723869761e-06, + "loss": 0.0025, + "step": 87480 + }, + { + "grad_norm": 0.02345437742769718, + "learning_rate": 4.218642800416916e-06, + "loss": 0.0033, + "step": 87490 + }, + { + "grad_norm": 0.01584300771355629, + "learning_rate": 4.211997883544472e-06, + "loss": 0.0029, + "step": 87500 + }, + { + "grad_norm": 0.04318483546376228, + "learning_rate": 4.205357973979096e-06, + "loss": 0.0031, + "step": 87510 + }, + { + "grad_norm": 0.03499557450413704, + "learning_rate": 4.198723072446925e-06, + "loss": 0.0022, + "step": 87520 + }, + { + "grad_norm": 0.024265754967927933, + "learning_rate": 4.192093179673534e-06, + "loss": 0.0024, + "step": 87530 + }, + { + "grad_norm": 0.018564511090517044, + "learning_rate": 4.185468296383959e-06, + "loss": 0.0016, + "step": 87540 + }, + { + "grad_norm": 0.014450423419475555, + "learning_rate": 4.1788484233027005e-06, + "loss": 0.0033, + "step": 87550 + }, + { + "grad_norm": 0.030131110921502113, + "learning_rate": 4.172233561153677e-06, + "loss": 0.0014, + "step": 87560 + }, + { + "grad_norm": 0.02850409969687462, + "learning_rate": 4.165623710660288e-06, + "loss": 0.0021, + "step": 87570 + }, + { + "grad_norm": 0.01738647371530533, + "learning_rate": 4.1590188725453835e-06, + "loss": 0.0023, + "step": 87580 + }, + { + "grad_norm": 0.024632807821035385, + "learning_rate": 4.152419047531259e-06, + "loss": 0.002, + "step": 87590 + }, + { + "grad_norm": 0.01564294844865799, + "learning_rate": 4.145824236339651e-06, + "loss": 0.002, + "step": 87600 + }, + { + "grad_norm": 0.024428654462099075, + "learning_rate": 4.1392344396917695e-06, + "loss": 0.0018, + "step": 87610 + }, + { + "grad_norm": 0.0208274032920599, + "learning_rate": 4.132649658308252e-06, + "loss": 0.0028, + "step": 87620 + }, + { + "grad_norm": 0.02363039366900921, + "learning_rate": 4.126069892909213e-06, + "loss": 0.0026, + "step": 87630 + }, + { + "grad_norm": 0.0157467033714056, + "learning_rate": 4.119495144214191e-06, + "loss": 0.002, + "step": 87640 + }, + { + "grad_norm": 0.01892230473458767, + "learning_rate": 4.112925412942214e-06, + "loss": 0.0013, + "step": 87650 + }, + { + "grad_norm": 0.01741097867488861, + "learning_rate": 4.106360699811712e-06, + "loss": 0.0032, + "step": 87660 + }, + { + "grad_norm": 0.019235456362366676, + "learning_rate": 4.099801005540616e-06, + "loss": 0.0015, + "step": 87670 + }, + { + "grad_norm": 0.01798313297331333, + "learning_rate": 4.093246330846256e-06, + "loss": 0.0023, + "step": 87680 + }, + { + "grad_norm": 0.03927240148186684, + "learning_rate": 4.086696676445467e-06, + "loss": 0.0024, + "step": 87690 + }, + { + "grad_norm": 0.022628502920269966, + "learning_rate": 4.0801520430544914e-06, + "loss": 0.0023, + "step": 87700 + }, + { + "grad_norm": 0.020005639642477036, + "learning_rate": 4.073612431389062e-06, + "loss": 0.0025, + "step": 87710 + }, + { + "grad_norm": 0.01438981294631958, + "learning_rate": 4.067077842164319e-06, + "loss": 0.0028, + "step": 87720 + }, + { + "grad_norm": 0.03245234861969948, + "learning_rate": 4.0605482760948945e-06, + "loss": 0.003, + "step": 87730 + }, + { + "grad_norm": 0.026266373693943024, + "learning_rate": 4.054023733894829e-06, + "loss": 0.0017, + "step": 87740 + }, + { + "grad_norm": 0.03597298264503479, + "learning_rate": 4.047504216277653e-06, + "loss": 0.0031, + "step": 87750 + }, + { + "grad_norm": 0.014541486278176308, + "learning_rate": 4.040989723956323e-06, + "loss": 0.0033, + "step": 87760 + }, + { + "grad_norm": 0.021335743367671967, + "learning_rate": 4.03448025764327e-06, + "loss": 0.002, + "step": 87770 + }, + { + "grad_norm": 0.042642991989851, + "learning_rate": 4.027975818050339e-06, + "loss": 0.0025, + "step": 87780 + }, + { + "grad_norm": 0.021129082888364792, + "learning_rate": 4.021476405888858e-06, + "loss": 0.0019, + "step": 87790 + }, + { + "grad_norm": 0.02059243805706501, + "learning_rate": 4.01498202186959e-06, + "loss": 0.0018, + "step": 87800 + }, + { + "grad_norm": 0.031241411343216896, + "learning_rate": 4.008492666702751e-06, + "loss": 0.0031, + "step": 87810 + }, + { + "grad_norm": 0.0194686371833086, + "learning_rate": 4.002008341098001e-06, + "loss": 0.0019, + "step": 87820 + }, + { + "grad_norm": 0.0173343438655138, + "learning_rate": 3.995529045764462e-06, + "loss": 0.002, + "step": 87830 + }, + { + "grad_norm": 0.017085306346416473, + "learning_rate": 3.989054781410712e-06, + "loss": 0.0018, + "step": 87840 + }, + { + "grad_norm": 0.014752395451068878, + "learning_rate": 3.982585548744744e-06, + "loss": 0.0017, + "step": 87850 + }, + { + "grad_norm": 0.04303120821714401, + "learning_rate": 3.9761213484740435e-06, + "loss": 0.0021, + "step": 87860 + }, + { + "grad_norm": 0.02174793928861618, + "learning_rate": 3.9696621813055055e-06, + "loss": 0.0025, + "step": 87870 + }, + { + "grad_norm": 0.04260198771953583, + "learning_rate": 3.9632080479455044e-06, + "loss": 0.0035, + "step": 87880 + }, + { + "grad_norm": 0.032366156578063965, + "learning_rate": 3.956758949099859e-06, + "loss": 0.0024, + "step": 87890 + }, + { + "grad_norm": 0.029437720775604248, + "learning_rate": 3.9503148854738385e-06, + "loss": 0.0021, + "step": 87900 + }, + { + "grad_norm": 0.02308795042335987, + "learning_rate": 3.943875857772139e-06, + "loss": 0.0018, + "step": 87910 + }, + { + "grad_norm": 0.02173582650721073, + "learning_rate": 3.9374418666989375e-06, + "loss": 0.0028, + "step": 87920 + }, + { + "grad_norm": 0.02074347250163555, + "learning_rate": 3.931012912957838e-06, + "loss": 0.0033, + "step": 87930 + }, + { + "grad_norm": 0.03200666233897209, + "learning_rate": 3.9245889972519045e-06, + "loss": 0.0029, + "step": 87940 + }, + { + "grad_norm": 0.025299686938524246, + "learning_rate": 3.9181701202836265e-06, + "loss": 0.0022, + "step": 87950 + }, + { + "grad_norm": 0.02550964616239071, + "learning_rate": 3.911756282755003e-06, + "loss": 0.0018, + "step": 87960 + }, + { + "grad_norm": 0.018276993185281754, + "learning_rate": 3.905347485367411e-06, + "loss": 0.0018, + "step": 87970 + }, + { + "grad_norm": 0.009894641116261482, + "learning_rate": 3.898943728821725e-06, + "loss": 0.0026, + "step": 87980 + }, + { + "grad_norm": 0.01358176488429308, + "learning_rate": 3.892545013818227e-06, + "loss": 0.0015, + "step": 87990 + }, + { + "grad_norm": 0.014904114417731762, + "learning_rate": 3.886151341056687e-06, + "loss": 0.0031, + "step": 88000 + }, + { + "grad_norm": 0.01703674905002117, + "learning_rate": 3.8797627112363066e-06, + "loss": 0.0025, + "step": 88010 + }, + { + "grad_norm": 0.05778350681066513, + "learning_rate": 3.873379125055743e-06, + "loss": 0.0027, + "step": 88020 + }, + { + "grad_norm": 0.025951601564884186, + "learning_rate": 3.867000583213082e-06, + "loss": 0.0018, + "step": 88030 + }, + { + "grad_norm": 0.031807877123355865, + "learning_rate": 3.860627086405871e-06, + "loss": 0.0018, + "step": 88040 + }, + { + "grad_norm": 0.015254502184689045, + "learning_rate": 3.854258635331126e-06, + "loss": 0.0018, + "step": 88050 + }, + { + "grad_norm": 0.04084784537553787, + "learning_rate": 3.8478952306852665e-06, + "loss": 0.0031, + "step": 88060 + }, + { + "grad_norm": 0.0424080528318882, + "learning_rate": 3.841536873164198e-06, + "loss": 0.0027, + "step": 88070 + }, + { + "grad_norm": 0.02011268585920334, + "learning_rate": 3.8351835634632626e-06, + "loss": 0.0024, + "step": 88080 + }, + { + "grad_norm": 0.02404635027050972, + "learning_rate": 3.828835302277245e-06, + "loss": 0.0019, + "step": 88090 + }, + { + "grad_norm": 0.04053336754441261, + "learning_rate": 3.822492090300378e-06, + "loss": 0.0031, + "step": 88100 + }, + { + "grad_norm": 0.046264782547950745, + "learning_rate": 3.8161539282263566e-06, + "loss": 0.0034, + "step": 88110 + }, + { + "grad_norm": 0.026745939627289772, + "learning_rate": 3.8098208167482995e-06, + "loss": 0.002, + "step": 88120 + }, + { + "grad_norm": 0.022232741117477417, + "learning_rate": 3.803492756558796e-06, + "loss": 0.0025, + "step": 88130 + }, + { + "grad_norm": 0.020189760252833366, + "learning_rate": 3.797169748349855e-06, + "loss": 0.0019, + "step": 88140 + }, + { + "grad_norm": 0.03525779768824577, + "learning_rate": 3.790851792812977e-06, + "loss": 0.0034, + "step": 88150 + }, + { + "grad_norm": 0.031148679554462433, + "learning_rate": 3.784538890639072e-06, + "loss": 0.0021, + "step": 88160 + }, + { + "grad_norm": 0.013488464057445526, + "learning_rate": 3.7782310425185153e-06, + "loss": 0.0017, + "step": 88170 + }, + { + "grad_norm": 0.027454929426312447, + "learning_rate": 3.77192824914111e-06, + "loss": 0.003, + "step": 88180 + }, + { + "grad_norm": 0.01212100125849247, + "learning_rate": 3.7656305111961333e-06, + "loss": 0.0023, + "step": 88190 + }, + { + "grad_norm": 0.04038159176707268, + "learning_rate": 3.7593378293722782e-06, + "loss": 0.0024, + "step": 88200 + }, + { + "grad_norm": 0.02739763632416725, + "learning_rate": 3.7530502043577333e-06, + "loss": 0.0024, + "step": 88210 + }, + { + "grad_norm": 0.01191036682575941, + "learning_rate": 3.746767636840076e-06, + "loss": 0.0022, + "step": 88220 + }, + { + "grad_norm": 0.02230302058160305, + "learning_rate": 3.74049012750638e-06, + "loss": 0.0019, + "step": 88230 + }, + { + "grad_norm": 0.021128054708242416, + "learning_rate": 3.734217677043128e-06, + "loss": 0.0022, + "step": 88240 + }, + { + "grad_norm": 0.036100514233112335, + "learning_rate": 3.727950286136267e-06, + "loss": 0.0026, + "step": 88250 + }, + { + "grad_norm": 0.018402183428406715, + "learning_rate": 3.7216879554711925e-06, + "loss": 0.0021, + "step": 88260 + }, + { + "grad_norm": 0.012924741953611374, + "learning_rate": 3.715430685732757e-06, + "loss": 0.002, + "step": 88270 + }, + { + "grad_norm": 0.01320404838770628, + "learning_rate": 3.7091784776052243e-06, + "loss": 0.002, + "step": 88280 + }, + { + "grad_norm": 0.019448108971118927, + "learning_rate": 3.7029313317723356e-06, + "loss": 0.0025, + "step": 88290 + }, + { + "grad_norm": 0.02254832722246647, + "learning_rate": 3.6966892489172787e-06, + "loss": 0.0021, + "step": 88300 + }, + { + "grad_norm": 0.02122090384364128, + "learning_rate": 3.6904522297226575e-06, + "loss": 0.0022, + "step": 88310 + }, + { + "grad_norm": 0.03238964080810547, + "learning_rate": 3.6842202748705647e-06, + "loss": 0.0032, + "step": 88320 + }, + { + "grad_norm": 0.01372329331934452, + "learning_rate": 3.677993385042494e-06, + "loss": 0.0017, + "step": 88330 + }, + { + "grad_norm": 0.013778390362858772, + "learning_rate": 3.67177156091944e-06, + "loss": 0.0025, + "step": 88340 + }, + { + "grad_norm": 0.039868805557489395, + "learning_rate": 3.6655548031817866e-06, + "loss": 0.0025, + "step": 88350 + }, + { + "grad_norm": 0.020107554271817207, + "learning_rate": 3.6593431125094e-06, + "loss": 0.004, + "step": 88360 + }, + { + "grad_norm": 0.023823807016015053, + "learning_rate": 3.6531364895815766e-06, + "loss": 0.0023, + "step": 88370 + }, + { + "grad_norm": 0.0156708974391222, + "learning_rate": 3.6469349350770677e-06, + "loss": 0.0031, + "step": 88380 + }, + { + "grad_norm": 0.01949547417461872, + "learning_rate": 3.6407384496740525e-06, + "loss": 0.0021, + "step": 88390 + }, + { + "grad_norm": 0.015958912670612335, + "learning_rate": 3.6345470340501954e-06, + "loss": 0.0025, + "step": 88400 + }, + { + "grad_norm": 0.0146560650318861, + "learning_rate": 3.6283606888825537e-06, + "loss": 0.0014, + "step": 88410 + }, + { + "grad_norm": 0.05149611458182335, + "learning_rate": 3.622179414847676e-06, + "loss": 0.0049, + "step": 88420 + }, + { + "grad_norm": 0.03380616754293442, + "learning_rate": 3.616003212621527e-06, + "loss": 0.0035, + "step": 88430 + }, + { + "grad_norm": 0.02915959432721138, + "learning_rate": 3.609832082879533e-06, + "loss": 0.0031, + "step": 88440 + }, + { + "grad_norm": 0.02401912771165371, + "learning_rate": 3.6036660262965485e-06, + "loss": 0.0022, + "step": 88450 + }, + { + "grad_norm": 0.008375341072678566, + "learning_rate": 3.597505043546895e-06, + "loss": 0.0019, + "step": 88460 + }, + { + "grad_norm": 0.02519475482404232, + "learning_rate": 3.5913491353043227e-06, + "loss": 0.0031, + "step": 88470 + }, + { + "grad_norm": 0.02335311286151409, + "learning_rate": 3.5851983022420375e-06, + "loss": 0.0016, + "step": 88480 + }, + { + "grad_norm": 0.010278931818902493, + "learning_rate": 3.5790525450326797e-06, + "loss": 0.0016, + "step": 88490 + }, + { + "grad_norm": 0.015697745606303215, + "learning_rate": 3.572911864348344e-06, + "loss": 0.0024, + "step": 88500 + }, + { + "grad_norm": 0.020684076473116875, + "learning_rate": 3.5667762608605717e-06, + "loss": 0.0026, + "step": 88510 + }, + { + "grad_norm": 0.02649247646331787, + "learning_rate": 3.5606457352403255e-06, + "loss": 0.0031, + "step": 88520 + }, + { + "grad_norm": 0.02360227331519127, + "learning_rate": 3.554520288158042e-06, + "loss": 0.002, + "step": 88530 + }, + { + "grad_norm": 0.013988886959850788, + "learning_rate": 3.5483999202835903e-06, + "loss": 0.0021, + "step": 88540 + }, + { + "grad_norm": 0.04002892225980759, + "learning_rate": 3.5422846322862913e-06, + "loss": 0.0035, + "step": 88550 + }, + { + "grad_norm": 0.04697982966899872, + "learning_rate": 3.5361744248348925e-06, + "loss": 0.0035, + "step": 88560 + }, + { + "grad_norm": 0.03201250359416008, + "learning_rate": 3.5300692985976047e-06, + "loss": 0.0022, + "step": 88570 + }, + { + "grad_norm": 0.022059204056859016, + "learning_rate": 3.52396925424206e-06, + "loss": 0.0018, + "step": 88580 + }, + { + "grad_norm": 0.010658175684511662, + "learning_rate": 3.5178742924353747e-06, + "loss": 0.0023, + "step": 88590 + }, + { + "grad_norm": 0.021940108388662338, + "learning_rate": 3.511784413844066e-06, + "loss": 0.0023, + "step": 88600 + }, + { + "grad_norm": 0.014391728676855564, + "learning_rate": 3.5056996191341287e-06, + "loss": 0.0024, + "step": 88610 + }, + { + "grad_norm": 0.024145672097802162, + "learning_rate": 3.4996199089709692e-06, + "loss": 0.0025, + "step": 88620 + }, + { + "grad_norm": 0.018772214651107788, + "learning_rate": 3.4935452840194725e-06, + "loss": 0.0026, + "step": 88630 + }, + { + "grad_norm": 0.03080497495830059, + "learning_rate": 3.48747574494393e-06, + "loss": 0.0033, + "step": 88640 + }, + { + "grad_norm": 0.014854345470666885, + "learning_rate": 3.4814112924081154e-06, + "loss": 0.0034, + "step": 88650 + }, + { + "grad_norm": 0.023397905752062798, + "learning_rate": 3.475351927075221e-06, + "loss": 0.0027, + "step": 88660 + }, + { + "grad_norm": 0.021229341626167297, + "learning_rate": 3.469297649607894e-06, + "loss": 0.0042, + "step": 88670 + }, + { + "grad_norm": 0.020655134692788124, + "learning_rate": 3.463248460668217e-06, + "loss": 0.0032, + "step": 88680 + }, + { + "grad_norm": 0.0266330037266016, + "learning_rate": 3.457204360917726e-06, + "loss": 0.0016, + "step": 88690 + }, + { + "grad_norm": 0.041640754789114, + "learning_rate": 3.4511653510173827e-06, + "loss": 0.0036, + "step": 88700 + }, + { + "grad_norm": 0.024528909474611282, + "learning_rate": 3.4451314316276074e-06, + "loss": 0.0035, + "step": 88710 + }, + { + "grad_norm": 0.011243273504078388, + "learning_rate": 3.4391026034082674e-06, + "loss": 0.002, + "step": 88720 + }, + { + "grad_norm": 0.013509622775018215, + "learning_rate": 3.433078867018674e-06, + "loss": 0.0018, + "step": 88730 + }, + { + "grad_norm": 0.01492351945489645, + "learning_rate": 3.427060223117551e-06, + "loss": 0.0018, + "step": 88740 + }, + { + "grad_norm": 0.03147720545530319, + "learning_rate": 3.4210466723631042e-06, + "loss": 0.0019, + "step": 88750 + }, + { + "grad_norm": 0.03482186049222946, + "learning_rate": 3.415038215412969e-06, + "loss": 0.0018, + "step": 88760 + }, + { + "grad_norm": 0.02012445777654648, + "learning_rate": 3.4090348529242088e-06, + "loss": 0.0025, + "step": 88770 + }, + { + "grad_norm": 0.02887643873691559, + "learning_rate": 3.4030365855533487e-06, + "loss": 0.0021, + "step": 88780 + }, + { + "grad_norm": 0.012786442413926125, + "learning_rate": 3.397043413956347e-06, + "loss": 0.0019, + "step": 88790 + }, + { + "grad_norm": 0.015183630399405956, + "learning_rate": 3.3910553387886237e-06, + "loss": 0.0036, + "step": 88800 + }, + { + "grad_norm": 0.026867609471082687, + "learning_rate": 3.3850723607049996e-06, + "loss": 0.0022, + "step": 88810 + }, + { + "grad_norm": 0.05361831188201904, + "learning_rate": 3.3790944803597902e-06, + "loss": 0.0038, + "step": 88820 + }, + { + "grad_norm": 0.046556197106838226, + "learning_rate": 3.373121698406706e-06, + "loss": 0.0027, + "step": 88830 + }, + { + "grad_norm": 0.018899323418736458, + "learning_rate": 3.3671540154989302e-06, + "loss": 0.0031, + "step": 88840 + }, + { + "grad_norm": 0.03648029640316963, + "learning_rate": 3.361191432289079e-06, + "loss": 0.0027, + "step": 88850 + }, + { + "grad_norm": 0.03375962749123573, + "learning_rate": 3.3552339494292207e-06, + "loss": 0.0027, + "step": 88860 + }, + { + "grad_norm": 0.04887528717517853, + "learning_rate": 3.349281567570839e-06, + "loss": 0.0034, + "step": 88870 + }, + { + "grad_norm": 0.01185966469347477, + "learning_rate": 3.3433342873648965e-06, + "loss": 0.002, + "step": 88880 + }, + { + "grad_norm": 0.017514543607831, + "learning_rate": 3.337392109461762e-06, + "loss": 0.0028, + "step": 88890 + }, + { + "grad_norm": 0.025622056797146797, + "learning_rate": 3.331455034511266e-06, + "loss": 0.0027, + "step": 88900 + }, + { + "grad_norm": 0.023290980607271194, + "learning_rate": 3.325523063162689e-06, + "loss": 0.0033, + "step": 88910 + }, + { + "grad_norm": 0.02605622261762619, + "learning_rate": 3.3195961960647393e-06, + "loss": 0.0026, + "step": 88920 + }, + { + "grad_norm": 0.018824655562639236, + "learning_rate": 3.31367443386556e-06, + "loss": 0.0017, + "step": 88930 + }, + { + "grad_norm": 0.013488385826349258, + "learning_rate": 3.3077577772127556e-06, + "loss": 0.0018, + "step": 88940 + }, + { + "grad_norm": 0.013521252200007439, + "learning_rate": 3.301846226753358e-06, + "loss": 0.0023, + "step": 88950 + }, + { + "grad_norm": 0.056773312389850616, + "learning_rate": 3.2959397831338444e-06, + "loss": 0.0027, + "step": 88960 + }, + { + "grad_norm": 0.03198476880788803, + "learning_rate": 3.2900384470001432e-06, + "loss": 0.0017, + "step": 88970 + }, + { + "grad_norm": 0.019720347598195076, + "learning_rate": 3.28414221899761e-06, + "loss": 0.0023, + "step": 88980 + }, + { + "grad_norm": 0.01197430957108736, + "learning_rate": 3.278251099771046e-06, + "loss": 0.002, + "step": 88990 + }, + { + "grad_norm": 0.031081993132829666, + "learning_rate": 3.2723650899646906e-06, + "loss": 0.0021, + "step": 89000 + }, + { + "grad_norm": 0.016230778768658638, + "learning_rate": 3.2664841902222466e-06, + "loss": 0.0026, + "step": 89010 + }, + { + "grad_norm": 0.017242934554815292, + "learning_rate": 3.2606084011868208e-06, + "loss": 0.0022, + "step": 89020 + }, + { + "grad_norm": 0.021337108686566353, + "learning_rate": 3.254737723500989e-06, + "loss": 0.0025, + "step": 89030 + }, + { + "grad_norm": 0.02867289073765278, + "learning_rate": 3.2488721578067595e-06, + "loss": 0.0023, + "step": 89040 + }, + { + "grad_norm": 0.010287029668688774, + "learning_rate": 3.243011704745591e-06, + "loss": 0.0036, + "step": 89050 + }, + { + "grad_norm": 0.011643098667263985, + "learning_rate": 3.237156364958355e-06, + "loss": 0.0029, + "step": 89060 + }, + { + "grad_norm": 0.01553061231970787, + "learning_rate": 3.231306139085394e-06, + "loss": 0.0022, + "step": 89070 + }, + { + "grad_norm": 0.04046645015478134, + "learning_rate": 3.2254610277664797e-06, + "loss": 0.0026, + "step": 89080 + }, + { + "grad_norm": 0.015628071501851082, + "learning_rate": 3.2196210316408183e-06, + "loss": 0.0017, + "step": 89090 + }, + { + "grad_norm": 0.02555498108267784, + "learning_rate": 3.2137861513470644e-06, + "loss": 0.0017, + "step": 89100 + }, + { + "grad_norm": 0.03592487424612045, + "learning_rate": 3.2079563875233252e-06, + "loss": 0.0024, + "step": 89110 + }, + { + "grad_norm": 0.019016340374946594, + "learning_rate": 3.2021317408071184e-06, + "loss": 0.0023, + "step": 89120 + }, + { + "grad_norm": 0.03004489466547966, + "learning_rate": 3.196312211835434e-06, + "loss": 0.0047, + "step": 89130 + }, + { + "grad_norm": 0.023570965975522995, + "learning_rate": 3.1904978012446686e-06, + "loss": 0.0033, + "step": 89140 + }, + { + "grad_norm": 0.026729291304945946, + "learning_rate": 3.184688509670691e-06, + "loss": 0.002, + "step": 89150 + }, + { + "grad_norm": 0.011295083910226822, + "learning_rate": 3.178884337748783e-06, + "loss": 0.0023, + "step": 89160 + }, + { + "grad_norm": 0.020635798573493958, + "learning_rate": 3.173085286113697e-06, + "loss": 0.0023, + "step": 89170 + }, + { + "grad_norm": 0.022597748786211014, + "learning_rate": 3.1672913553995986e-06, + "loss": 0.0021, + "step": 89180 + }, + { + "grad_norm": 0.027531679719686508, + "learning_rate": 3.1615025462401138e-06, + "loss": 0.0024, + "step": 89190 + }, + { + "grad_norm": 0.02217499166727066, + "learning_rate": 3.1557188592682808e-06, + "loss": 0.0025, + "step": 89200 + }, + { + "grad_norm": 0.026957225054502487, + "learning_rate": 3.1499402951166045e-06, + "loss": 0.0018, + "step": 89210 + }, + { + "grad_norm": 0.012615077197551727, + "learning_rate": 3.144166854417019e-06, + "loss": 0.0037, + "step": 89220 + }, + { + "grad_norm": 0.06982284784317017, + "learning_rate": 3.1383985378009073e-06, + "loss": 0.0028, + "step": 89230 + }, + { + "grad_norm": 0.04156695306301117, + "learning_rate": 3.1326353458990654e-06, + "loss": 0.0032, + "step": 89240 + }, + { + "grad_norm": 0.03752901405096054, + "learning_rate": 3.126877279341761e-06, + "loss": 0.0039, + "step": 89250 + }, + { + "grad_norm": 0.02267388626933098, + "learning_rate": 3.1211243387586964e-06, + "loss": 0.0022, + "step": 89260 + }, + { + "grad_norm": 0.04262275993824005, + "learning_rate": 3.115376524778979e-06, + "loss": 0.003, + "step": 89270 + }, + { + "grad_norm": 0.03640120103955269, + "learning_rate": 3.109633838031195e-06, + "loss": 0.0034, + "step": 89280 + }, + { + "grad_norm": 0.01989077217876911, + "learning_rate": 3.103896279143359e-06, + "loss": 0.0019, + "step": 89290 + }, + { + "grad_norm": 0.017416896298527718, + "learning_rate": 3.098163848742924e-06, + "loss": 0.0014, + "step": 89300 + }, + { + "grad_norm": 0.026388388127088547, + "learning_rate": 3.0924365474567675e-06, + "loss": 0.002, + "step": 89310 + }, + { + "grad_norm": 0.02839740552008152, + "learning_rate": 3.086714375911237e-06, + "loss": 0.0018, + "step": 89320 + }, + { + "grad_norm": 0.015699971467256546, + "learning_rate": 3.0809973347320775e-06, + "loss": 0.0016, + "step": 89330 + }, + { + "grad_norm": 0.02457539737224579, + "learning_rate": 3.075285424544516e-06, + "loss": 0.0023, + "step": 89340 + }, + { + "grad_norm": 0.010486040264368057, + "learning_rate": 3.069578645973176e-06, + "loss": 0.0024, + "step": 89350 + }, + { + "grad_norm": 0.023888355121016502, + "learning_rate": 3.0638769996421735e-06, + "loss": 0.0016, + "step": 89360 + }, + { + "grad_norm": 0.019940732046961784, + "learning_rate": 3.058180486175005e-06, + "loss": 0.0026, + "step": 89370 + }, + { + "grad_norm": 0.012861749157309532, + "learning_rate": 3.052489106194645e-06, + "loss": 0.004, + "step": 89380 + }, + { + "grad_norm": 0.04430745169520378, + "learning_rate": 3.0468028603234946e-06, + "loss": 0.0025, + "step": 89390 + }, + { + "grad_norm": 0.015150471590459347, + "learning_rate": 3.04112174918339e-06, + "loss": 0.0019, + "step": 89400 + }, + { + "grad_norm": 0.02261846512556076, + "learning_rate": 3.0354457733955954e-06, + "loss": 0.0024, + "step": 89410 + }, + { + "grad_norm": 0.017678560689091682, + "learning_rate": 3.029774933580859e-06, + "loss": 0.0017, + "step": 89420 + }, + { + "grad_norm": 0.023941954597830772, + "learning_rate": 3.0241092303593065e-06, + "loss": 0.0024, + "step": 89430 + }, + { + "grad_norm": 0.027008764445781708, + "learning_rate": 3.018448664350554e-06, + "loss": 0.0023, + "step": 89440 + }, + { + "grad_norm": 0.028138548135757446, + "learning_rate": 3.0127932361736055e-06, + "loss": 0.0024, + "step": 89450 + }, + { + "grad_norm": 0.036296647042036057, + "learning_rate": 3.00714294644695e-06, + "loss": 0.0022, + "step": 89460 + }, + { + "grad_norm": 0.021403588354587555, + "learning_rate": 3.0014977957884828e-06, + "loss": 0.0016, + "step": 89470 + }, + { + "grad_norm": 0.015027711167931557, + "learning_rate": 2.99585778481557e-06, + "loss": 0.003, + "step": 89480 + }, + { + "grad_norm": 0.021525947377085686, + "learning_rate": 2.990222914144969e-06, + "loss": 0.0024, + "step": 89490 + }, + { + "grad_norm": 0.015267075970768929, + "learning_rate": 2.984593184392914e-06, + "loss": 0.0018, + "step": 89500 + }, + { + "grad_norm": 0.020447958260774612, + "learning_rate": 2.978968596175069e-06, + "loss": 0.0025, + "step": 89510 + }, + { + "grad_norm": 0.018126066774129868, + "learning_rate": 2.9733491501065137e-06, + "loss": 0.0016, + "step": 89520 + }, + { + "grad_norm": 0.03207344934344292, + "learning_rate": 2.9677348468018005e-06, + "loss": 0.0028, + "step": 89530 + }, + { + "grad_norm": 0.019372353330254555, + "learning_rate": 2.9621256868748726e-06, + "loss": 0.0027, + "step": 89540 + }, + { + "grad_norm": 0.047735415399074554, + "learning_rate": 2.956521670939183e-06, + "loss": 0.0026, + "step": 89550 + }, + { + "grad_norm": 0.03025713935494423, + "learning_rate": 2.950922799607542e-06, + "loss": 0.0026, + "step": 89560 + }, + { + "grad_norm": 0.04032577946782112, + "learning_rate": 2.9453290734922535e-06, + "loss": 0.0022, + "step": 89570 + }, + { + "grad_norm": 0.01379216369241476, + "learning_rate": 2.939740493205029e-06, + "loss": 0.0022, + "step": 89580 + }, + { + "grad_norm": 0.04294392466545105, + "learning_rate": 2.9341570593570345e-06, + "loss": 0.0035, + "step": 89590 + }, + { + "grad_norm": 0.01506323367357254, + "learning_rate": 2.928578772558843e-06, + "loss": 0.0019, + "step": 89600 + }, + { + "grad_norm": 0.0235229954123497, + "learning_rate": 2.923005633420528e-06, + "loss": 0.002, + "step": 89610 + }, + { + "grad_norm": 0.04501693695783615, + "learning_rate": 2.9174376425515237e-06, + "loss": 0.0035, + "step": 89620 + }, + { + "grad_norm": 0.01755804568529129, + "learning_rate": 2.91187480056076e-06, + "loss": 0.0038, + "step": 89630 + }, + { + "grad_norm": 0.020056329667568207, + "learning_rate": 2.9063171080565664e-06, + "loss": 0.0027, + "step": 89640 + }, + { + "grad_norm": 0.02195962704718113, + "learning_rate": 2.900764565646741e-06, + "loss": 0.0015, + "step": 89650 + }, + { + "grad_norm": 0.013276654295623302, + "learning_rate": 2.895217173938475e-06, + "loss": 0.0027, + "step": 89660 + }, + { + "grad_norm": 0.03105398267507553, + "learning_rate": 2.8896749335384445e-06, + "loss": 0.0021, + "step": 89670 + }, + { + "grad_norm": 0.012128877453505993, + "learning_rate": 2.884137845052731e-06, + "loss": 0.002, + "step": 89680 + }, + { + "grad_norm": 0.0187408234924078, + "learning_rate": 2.8786059090868734e-06, + "loss": 0.0022, + "step": 89690 + }, + { + "grad_norm": 0.01559762004762888, + "learning_rate": 2.87307912624582e-06, + "loss": 0.0024, + "step": 89700 + }, + { + "grad_norm": 0.03561851382255554, + "learning_rate": 2.867557497133977e-06, + "loss": 0.0029, + "step": 89710 + }, + { + "grad_norm": 0.014765194617211819, + "learning_rate": 2.862041022355194e-06, + "loss": 0.0017, + "step": 89720 + }, + { + "grad_norm": 0.03571180999279022, + "learning_rate": 2.8565297025127226e-06, + "loss": 0.0025, + "step": 89730 + }, + { + "grad_norm": 0.021472642198204994, + "learning_rate": 2.8510235382092853e-06, + "loss": 0.0022, + "step": 89740 + }, + { + "grad_norm": 0.018259497359395027, + "learning_rate": 2.845522530047029e-06, + "loss": 0.0028, + "step": 89750 + }, + { + "grad_norm": 0.014758275821805, + "learning_rate": 2.8400266786275387e-06, + "loss": 0.0025, + "step": 89760 + }, + { + "grad_norm": 0.010043053887784481, + "learning_rate": 2.834535984551817e-06, + "loss": 0.0017, + "step": 89770 + }, + { + "grad_norm": 0.022679448127746582, + "learning_rate": 2.8290504484203397e-06, + "loss": 0.0017, + "step": 89780 + }, + { + "grad_norm": 0.015556752681732178, + "learning_rate": 2.8235700708329705e-06, + "loss": 0.0024, + "step": 89790 + }, + { + "grad_norm": 0.022330205887556076, + "learning_rate": 2.818094852389058e-06, + "loss": 0.0019, + "step": 89800 + }, + { + "grad_norm": 0.01313540805131197, + "learning_rate": 2.8126247936873516e-06, + "loss": 0.0028, + "step": 89810 + }, + { + "grad_norm": 0.024319281801581383, + "learning_rate": 2.8071598953260614e-06, + "loss": 0.0016, + "step": 89820 + }, + { + "grad_norm": 0.012206087820231915, + "learning_rate": 2.8017001579028035e-06, + "loss": 0.0024, + "step": 89830 + }, + { + "grad_norm": 0.017504287883639336, + "learning_rate": 2.7962455820146617e-06, + "loss": 0.0024, + "step": 89840 + }, + { + "grad_norm": 0.015037348493933678, + "learning_rate": 2.7907961682581253e-06, + "loss": 0.0014, + "step": 89850 + }, + { + "grad_norm": 0.017042124643921852, + "learning_rate": 2.7853519172291453e-06, + "loss": 0.0019, + "step": 89860 + }, + { + "grad_norm": 0.01706836000084877, + "learning_rate": 2.7799128295230904e-06, + "loss": 0.0024, + "step": 89870 + }, + { + "grad_norm": 0.022379426285624504, + "learning_rate": 2.774478905734784e-06, + "loss": 0.0024, + "step": 89880 + }, + { + "grad_norm": 0.023805709555745125, + "learning_rate": 2.7690501464584516e-06, + "loss": 0.0019, + "step": 89890 + }, + { + "grad_norm": 0.015287086367607117, + "learning_rate": 2.763626552287796e-06, + "loss": 0.0018, + "step": 89900 + }, + { + "grad_norm": 0.015177331864833832, + "learning_rate": 2.7582081238159083e-06, + "loss": 0.0018, + "step": 89910 + }, + { + "grad_norm": 0.014831740409135818, + "learning_rate": 2.7527948616353548e-06, + "loss": 0.0031, + "step": 89920 + }, + { + "grad_norm": 0.015406320802867413, + "learning_rate": 2.7473867663381226e-06, + "loss": 0.0017, + "step": 89930 + }, + { + "grad_norm": 0.014951076358556747, + "learning_rate": 2.7419838385156383e-06, + "loss": 0.0018, + "step": 89940 + }, + { + "grad_norm": 0.013708679005503654, + "learning_rate": 2.7365860787587407e-06, + "loss": 0.0024, + "step": 89950 + }, + { + "grad_norm": 0.014701035805046558, + "learning_rate": 2.7311934876577295e-06, + "loss": 0.002, + "step": 89960 + }, + { + "grad_norm": 0.02166191302239895, + "learning_rate": 2.725806065802339e-06, + "loss": 0.0022, + "step": 89970 + }, + { + "grad_norm": 0.009663552977144718, + "learning_rate": 2.720423813781714e-06, + "loss": 0.0019, + "step": 89980 + }, + { + "grad_norm": 0.019851012155413628, + "learning_rate": 2.715046732184462e-06, + "loss": 0.0019, + "step": 89990 + }, + { + "grad_norm": 0.024573707953095436, + "learning_rate": 2.7096748215986013e-06, + "loss": 0.0028, + "step": 90000 + }, + { + "grad_norm": 0.014993350021541119, + "learning_rate": 2.704308082611612e-06, + "loss": 0.0016, + "step": 90010 + }, + { + "grad_norm": 0.024771559983491898, + "learning_rate": 2.698946515810374e-06, + "loss": 0.0015, + "step": 90020 + }, + { + "grad_norm": 0.016732681542634964, + "learning_rate": 2.6935901217812367e-06, + "loss": 0.0032, + "step": 90030 + }, + { + "grad_norm": 0.019977448508143425, + "learning_rate": 2.688238901109952e-06, + "loss": 0.0017, + "step": 90040 + }, + { + "grad_norm": 0.01778051257133484, + "learning_rate": 2.68289285438173e-06, + "loss": 0.004, + "step": 90050 + }, + { + "grad_norm": 0.013286244124174118, + "learning_rate": 2.6775519821812033e-06, + "loss": 0.0031, + "step": 90060 + }, + { + "grad_norm": 0.011970015242695808, + "learning_rate": 2.672216285092449e-06, + "loss": 0.0013, + "step": 90070 + }, + { + "grad_norm": 0.023663874715566635, + "learning_rate": 2.666885763698962e-06, + "loss": 0.0014, + "step": 90080 + }, + { + "grad_norm": 0.03757324442267418, + "learning_rate": 2.661560418583692e-06, + "loss": 0.0023, + "step": 90090 + }, + { + "grad_norm": 0.01356517430394888, + "learning_rate": 2.656240250328995e-06, + "loss": 0.0016, + "step": 90100 + }, + { + "grad_norm": 0.016168558970093727, + "learning_rate": 2.650925259516679e-06, + "loss": 0.0013, + "step": 90110 + }, + { + "grad_norm": 0.0215110145509243, + "learning_rate": 2.645615446727995e-06, + "loss": 0.0024, + "step": 90120 + }, + { + "grad_norm": 0.018189437687397003, + "learning_rate": 2.6403108125436115e-06, + "loss": 0.0017, + "step": 90130 + }, + { + "grad_norm": 0.07015414535999298, + "learning_rate": 2.6350113575436264e-06, + "loss": 0.0031, + "step": 90140 + }, + { + "grad_norm": 0.01023081038147211, + "learning_rate": 2.629717082307598e-06, + "loss": 0.0017, + "step": 90150 + }, + { + "grad_norm": 0.013279285281896591, + "learning_rate": 2.6244279874144793e-06, + "loss": 0.0016, + "step": 90160 + }, + { + "grad_norm": 0.022618988528847694, + "learning_rate": 2.6191440734426862e-06, + "loss": 0.0027, + "step": 90170 + }, + { + "grad_norm": 0.02021166868507862, + "learning_rate": 2.613865340970062e-06, + "loss": 0.002, + "step": 90180 + }, + { + "grad_norm": 0.016131877899169922, + "learning_rate": 2.608591790573889e-06, + "loss": 0.0025, + "step": 90190 + }, + { + "grad_norm": 0.010189102962613106, + "learning_rate": 2.603323422830861e-06, + "loss": 0.0037, + "step": 90200 + }, + { + "grad_norm": 0.018096257001161575, + "learning_rate": 2.5980602383171183e-06, + "loss": 0.0026, + "step": 90210 + }, + { + "grad_norm": 0.03451094776391983, + "learning_rate": 2.5928022376082496e-06, + "loss": 0.0028, + "step": 90220 + }, + { + "grad_norm": 0.017724117264151573, + "learning_rate": 2.5875494212792505e-06, + "loss": 0.0014, + "step": 90230 + }, + { + "grad_norm": 0.01283317431807518, + "learning_rate": 2.5823017899045565e-06, + "loss": 0.0032, + "step": 90240 + }, + { + "grad_norm": 0.008469223976135254, + "learning_rate": 2.577059344058047e-06, + "loss": 0.0016, + "step": 90250 + }, + { + "grad_norm": 0.010164371691644192, + "learning_rate": 2.5718220843130415e-06, + "loss": 0.0024, + "step": 90260 + }, + { + "grad_norm": 0.0351487472653389, + "learning_rate": 2.5665900112422535e-06, + "loss": 0.002, + "step": 90270 + }, + { + "grad_norm": 0.020210085436701775, + "learning_rate": 2.561363125417876e-06, + "loss": 0.0026, + "step": 90280 + }, + { + "grad_norm": 0.0569133497774601, + "learning_rate": 2.556141427411496e-06, + "loss": 0.0028, + "step": 90290 + }, + { + "grad_norm": 0.01041068509221077, + "learning_rate": 2.550924917794156e-06, + "loss": 0.0031, + "step": 90300 + }, + { + "grad_norm": 0.0476142019033432, + "learning_rate": 2.545713597136329e-06, + "loss": 0.0024, + "step": 90310 + }, + { + "grad_norm": 0.009860595688223839, + "learning_rate": 2.5405074660079298e-06, + "loss": 0.0017, + "step": 90320 + }, + { + "grad_norm": 0.014486596919596195, + "learning_rate": 2.5353065249782647e-06, + "loss": 0.0029, + "step": 90330 + }, + { + "grad_norm": 0.04650630056858063, + "learning_rate": 2.530110774616129e-06, + "loss": 0.0018, + "step": 90340 + }, + { + "grad_norm": 0.014852553606033325, + "learning_rate": 2.524920215489701e-06, + "loss": 0.0018, + "step": 90350 + }, + { + "grad_norm": 0.023346055299043655, + "learning_rate": 2.519734848166627e-06, + "loss": 0.0019, + "step": 90360 + }, + { + "grad_norm": 0.020488198846578598, + "learning_rate": 2.514554673213948e-06, + "loss": 0.0021, + "step": 90370 + }, + { + "grad_norm": 0.03124960884451866, + "learning_rate": 2.5093796911981936e-06, + "loss": 0.002, + "step": 90380 + }, + { + "grad_norm": 0.024665694683790207, + "learning_rate": 2.5042099026852672e-06, + "loss": 0.003, + "step": 90390 + }, + { + "grad_norm": 0.01654595509171486, + "learning_rate": 2.499045308240544e-06, + "loss": 0.0027, + "step": 90400 + }, + { + "grad_norm": 0.01633097231388092, + "learning_rate": 2.4938859084288003e-06, + "loss": 0.0024, + "step": 90410 + }, + { + "grad_norm": 0.03019111603498459, + "learning_rate": 2.4887317038142734e-06, + "loss": 0.0016, + "step": 90420 + }, + { + "grad_norm": 0.014905605465173721, + "learning_rate": 2.4835826949606124e-06, + "loss": 0.0028, + "step": 90430 + }, + { + "grad_norm": 0.010662300512194633, + "learning_rate": 2.4784388824309167e-06, + "loss": 0.0013, + "step": 90440 + }, + { + "grad_norm": 0.029651599004864693, + "learning_rate": 2.473300266787687e-06, + "loss": 0.0024, + "step": 90450 + }, + { + "grad_norm": 0.017726071178913116, + "learning_rate": 2.46816684859289e-06, + "loss": 0.0028, + "step": 90460 + }, + { + "grad_norm": 0.01900009997189045, + "learning_rate": 2.4630386284079043e-06, + "loss": 0.0017, + "step": 90470 + }, + { + "grad_norm": 0.023812832310795784, + "learning_rate": 2.4579156067935427e-06, + "loss": 0.0019, + "step": 90480 + }, + { + "grad_norm": 0.03779950365424156, + "learning_rate": 2.452797784310046e-06, + "loss": 0.0025, + "step": 90490 + }, + { + "grad_norm": 0.04471295699477196, + "learning_rate": 2.447685161517105e-06, + "loss": 0.0025, + "step": 90500 + }, + { + "grad_norm": 0.03445639833807945, + "learning_rate": 2.4425777389738224e-06, + "loss": 0.0039, + "step": 90510 + }, + { + "grad_norm": 0.025157280266284943, + "learning_rate": 2.43747551723873e-06, + "loss": 0.0024, + "step": 90520 + }, + { + "grad_norm": 0.019906992092728615, + "learning_rate": 2.4323784968698136e-06, + "loss": 0.0023, + "step": 90530 + }, + { + "grad_norm": 0.02632165513932705, + "learning_rate": 2.4272866784244565e-06, + "loss": 0.0024, + "step": 90540 + }, + { + "grad_norm": 0.03035512939095497, + "learning_rate": 2.422200062459518e-06, + "loss": 0.0031, + "step": 90550 + }, + { + "grad_norm": 0.016455985605716705, + "learning_rate": 2.41711864953123e-06, + "loss": 0.0035, + "step": 90560 + }, + { + "grad_norm": 0.03638874739408493, + "learning_rate": 2.412042440195322e-06, + "loss": 0.002, + "step": 90570 + }, + { + "grad_norm": 0.013890103437006474, + "learning_rate": 2.4069714350068993e-06, + "loss": 0.0019, + "step": 90580 + }, + { + "grad_norm": 0.014236791990697384, + "learning_rate": 2.4019056345205294e-06, + "loss": 0.0026, + "step": 90590 + }, + { + "grad_norm": 0.043265506625175476, + "learning_rate": 2.396845039290191e-06, + "loss": 0.0021, + "step": 90600 + }, + { + "grad_norm": 0.014517923817038536, + "learning_rate": 2.3917896498693194e-06, + "loss": 0.0019, + "step": 90610 + }, + { + "grad_norm": 0.026595473289489746, + "learning_rate": 2.3867394668107335e-06, + "loss": 0.0019, + "step": 90620 + }, + { + "grad_norm": 0.016129644587635994, + "learning_rate": 2.3816944906667527e-06, + "loss": 0.0033, + "step": 90630 + }, + { + "grad_norm": 0.018311863765120506, + "learning_rate": 2.3766547219890634e-06, + "loss": 0.0017, + "step": 90640 + }, + { + "grad_norm": 0.04152851924300194, + "learning_rate": 2.371620161328819e-06, + "loss": 0.0023, + "step": 90650 + }, + { + "grad_norm": 0.020945396274328232, + "learning_rate": 2.366590809236574e-06, + "loss": 0.0017, + "step": 90660 + }, + { + "grad_norm": 0.0429592989385128, + "learning_rate": 2.3615666662623494e-06, + "loss": 0.0027, + "step": 90670 + }, + { + "grad_norm": 0.019307328388094902, + "learning_rate": 2.3565477329555664e-06, + "loss": 0.0025, + "step": 90680 + }, + { + "grad_norm": 0.03569059446454048, + "learning_rate": 2.3515340098650975e-06, + "loss": 0.0016, + "step": 90690 + }, + { + "grad_norm": 0.0422389842569828, + "learning_rate": 2.3465254975392258e-06, + "loss": 0.0024, + "step": 90700 + }, + { + "grad_norm": 0.04335566610097885, + "learning_rate": 2.3415221965256805e-06, + "loss": 0.0026, + "step": 90710 + }, + { + "grad_norm": 0.013869221322238445, + "learning_rate": 2.3365241073716237e-06, + "loss": 0.0019, + "step": 90720 + }, + { + "grad_norm": 0.02334478311240673, + "learning_rate": 2.3315312306236183e-06, + "loss": 0.0046, + "step": 90730 + }, + { + "grad_norm": 0.03693876415491104, + "learning_rate": 2.326543566827699e-06, + "loss": 0.0026, + "step": 90740 + }, + { + "grad_norm": 0.03735907003283501, + "learning_rate": 2.3215611165292862e-06, + "loss": 0.0037, + "step": 90750 + }, + { + "grad_norm": 0.02568061836063862, + "learning_rate": 2.3165838802732765e-06, + "loss": 0.0026, + "step": 90760 + }, + { + "grad_norm": 0.02347010374069214, + "learning_rate": 2.311611858603957e-06, + "loss": 0.0025, + "step": 90770 + }, + { + "grad_norm": 0.017098670825362206, + "learning_rate": 2.3066450520650763e-06, + "loss": 0.0013, + "step": 90780 + }, + { + "grad_norm": 0.009404325857758522, + "learning_rate": 2.3016834611997773e-06, + "loss": 0.0024, + "step": 90790 + }, + { + "grad_norm": 0.048544783145189285, + "learning_rate": 2.296727086550665e-06, + "loss": 0.003, + "step": 90800 + }, + { + "grad_norm": 0.03089512698352337, + "learning_rate": 2.2917759286597507e-06, + "loss": 0.0022, + "step": 90810 + }, + { + "grad_norm": 0.01652979850769043, + "learning_rate": 2.2868299880685006e-06, + "loss": 0.0025, + "step": 90820 + }, + { + "grad_norm": 0.016029948368668556, + "learning_rate": 2.2818892653177827e-06, + "loss": 0.0025, + "step": 90830 + }, + { + "grad_norm": 0.05674741789698601, + "learning_rate": 2.2769537609479196e-06, + "loss": 0.0032, + "step": 90840 + }, + { + "grad_norm": 0.04209890961647034, + "learning_rate": 2.2720234754986357e-06, + "loss": 0.0026, + "step": 90850 + }, + { + "grad_norm": 0.017132757231593132, + "learning_rate": 2.2670984095091108e-06, + "loss": 0.0018, + "step": 90860 + }, + { + "grad_norm": 0.014810566790401936, + "learning_rate": 2.262178563517936e-06, + "loss": 0.0018, + "step": 90870 + }, + { + "grad_norm": 0.014275037683546543, + "learning_rate": 2.257263938063137e-06, + "loss": 0.0026, + "step": 90880 + }, + { + "grad_norm": 0.014178628101944923, + "learning_rate": 2.2523545336821715e-06, + "loss": 0.0037, + "step": 90890 + }, + { + "grad_norm": 0.039015091955661774, + "learning_rate": 2.247450350911939e-06, + "loss": 0.0017, + "step": 90900 + }, + { + "grad_norm": 0.025811558589339256, + "learning_rate": 2.2425513902887328e-06, + "loss": 0.0028, + "step": 90910 + }, + { + "grad_norm": 0.017607081681489944, + "learning_rate": 2.2376576523483015e-06, + "loss": 0.0021, + "step": 90920 + }, + { + "grad_norm": 0.04697834327816963, + "learning_rate": 2.232769137625829e-06, + "loss": 0.0032, + "step": 90930 + }, + { + "grad_norm": 0.058639634400606155, + "learning_rate": 2.227885846655903e-06, + "loss": 0.0036, + "step": 90940 + }, + { + "grad_norm": 0.010727199725806713, + "learning_rate": 2.2230077799725523e-06, + "loss": 0.0023, + "step": 90950 + }, + { + "grad_norm": 0.020874876528978348, + "learning_rate": 2.2181349381092396e-06, + "loss": 0.0034, + "step": 90960 + }, + { + "grad_norm": 0.011134439148008823, + "learning_rate": 2.2132673215988552e-06, + "loss": 0.0016, + "step": 90970 + }, + { + "grad_norm": 0.019890425726771355, + "learning_rate": 2.2084049309737065e-06, + "loss": 0.0022, + "step": 90980 + }, + { + "grad_norm": 0.01919279247522354, + "learning_rate": 2.2035477667655513e-06, + "loss": 0.0015, + "step": 90990 + }, + { + "grad_norm": 0.008509882725775242, + "learning_rate": 2.1986958295055315e-06, + "loss": 0.0028, + "step": 91000 + }, + { + "grad_norm": 0.027538100257515907, + "learning_rate": 2.193849119724284e-06, + "loss": 0.0018, + "step": 91010 + }, + { + "grad_norm": 0.020119087770581245, + "learning_rate": 2.1890076379518175e-06, + "loss": 0.0028, + "step": 91020 + }, + { + "grad_norm": 0.02176668308675289, + "learning_rate": 2.1841713847175925e-06, + "loss": 0.0038, + "step": 91030 + }, + { + "grad_norm": 0.023405374959111214, + "learning_rate": 2.1793403605504968e-06, + "loss": 0.0023, + "step": 91040 + }, + { + "grad_norm": 0.040697112679481506, + "learning_rate": 2.1745145659788413e-06, + "loss": 0.0022, + "step": 91050 + }, + { + "grad_norm": 0.010019616223871708, + "learning_rate": 2.1696940015303645e-06, + "loss": 0.0025, + "step": 91060 + }, + { + "grad_norm": 0.017984559759497643, + "learning_rate": 2.1648786677322397e-06, + "loss": 0.0014, + "step": 91070 + }, + { + "grad_norm": 0.01559508964419365, + "learning_rate": 2.1600685651110673e-06, + "loss": 0.0015, + "step": 91080 + }, + { + "grad_norm": 0.02493547648191452, + "learning_rate": 2.1552636941928717e-06, + "loss": 0.0019, + "step": 91090 + }, + { + "grad_norm": 0.028546659275889397, + "learning_rate": 2.1504640555031043e-06, + "loss": 0.0017, + "step": 91100 + }, + { + "grad_norm": 0.021041732281446457, + "learning_rate": 2.1456696495666506e-06, + "loss": 0.0017, + "step": 91110 + }, + { + "grad_norm": 0.010566523298621178, + "learning_rate": 2.1408804769078084e-06, + "loss": 0.0028, + "step": 91120 + }, + { + "grad_norm": 0.023470014333724976, + "learning_rate": 2.136096538050325e-06, + "loss": 0.0029, + "step": 91130 + }, + { + "grad_norm": 0.020141076296567917, + "learning_rate": 2.13131783351736e-06, + "loss": 0.0025, + "step": 91140 + }, + { + "grad_norm": 0.030595120042562485, + "learning_rate": 2.126544363831512e-06, + "loss": 0.0026, + "step": 91150 + }, + { + "grad_norm": 0.02744648978114128, + "learning_rate": 2.121776129514791e-06, + "loss": 0.0024, + "step": 91160 + }, + { + "grad_norm": 0.04336639866232872, + "learning_rate": 2.1170131310886523e-06, + "loss": 0.0031, + "step": 91170 + }, + { + "grad_norm": 0.037448201328516006, + "learning_rate": 2.112255369073968e-06, + "loss": 0.0029, + "step": 91180 + }, + { + "grad_norm": 0.039915625005960464, + "learning_rate": 2.1075028439910328e-06, + "loss": 0.0032, + "step": 91190 + }, + { + "grad_norm": 0.026769742369651794, + "learning_rate": 2.1027555563595813e-06, + "loss": 0.0021, + "step": 91200 + }, + { + "grad_norm": 0.012523406185209751, + "learning_rate": 2.0980135066987706e-06, + "loss": 0.0019, + "step": 91210 + }, + { + "grad_norm": 0.013206972740590572, + "learning_rate": 2.093276695527191e-06, + "loss": 0.0027, + "step": 91220 + }, + { + "grad_norm": 0.050351690500974655, + "learning_rate": 2.0885451233628396e-06, + "loss": 0.0032, + "step": 91230 + }, + { + "grad_norm": 0.025184446945786476, + "learning_rate": 2.0838187907231633e-06, + "loss": 0.0019, + "step": 91240 + }, + { + "grad_norm": 0.027351438999176025, + "learning_rate": 2.0790976981250153e-06, + "loss": 0.0044, + "step": 91250 + }, + { + "grad_norm": 0.021577689796686172, + "learning_rate": 2.0743818460847e-06, + "loss": 0.0022, + "step": 91260 + }, + { + "grad_norm": 0.02408759854733944, + "learning_rate": 2.069671235117926e-06, + "loss": 0.0029, + "step": 91270 + }, + { + "grad_norm": 0.016854578629136086, + "learning_rate": 2.0649658657398484e-06, + "loss": 0.0025, + "step": 91280 + }, + { + "grad_norm": 0.03914337977766991, + "learning_rate": 2.060265738465034e-06, + "loss": 0.0022, + "step": 91290 + }, + { + "grad_norm": 0.02205785922706127, + "learning_rate": 2.055570853807487e-06, + "loss": 0.0029, + "step": 91300 + }, + { + "grad_norm": 0.01856323331594467, + "learning_rate": 2.0508812122806196e-06, + "loss": 0.0026, + "step": 91310 + }, + { + "grad_norm": 0.012511871755123138, + "learning_rate": 2.046196814397294e-06, + "loss": 0.0016, + "step": 91320 + }, + { + "grad_norm": 0.022493204101920128, + "learning_rate": 2.0415176606697896e-06, + "loss": 0.0022, + "step": 91330 + }, + { + "grad_norm": 0.02814723365008831, + "learning_rate": 2.0368437516098126e-06, + "loss": 0.0031, + "step": 91340 + }, + { + "grad_norm": 0.013267644681036472, + "learning_rate": 2.032175087728483e-06, + "loss": 0.0021, + "step": 91350 + }, + { + "grad_norm": 0.013703178614377975, + "learning_rate": 2.027511669536375e-06, + "loss": 0.0019, + "step": 91360 + }, + { + "grad_norm": 0.015719277784228325, + "learning_rate": 2.0228534975434643e-06, + "loss": 0.0028, + "step": 91370 + }, + { + "grad_norm": 0.020269395783543587, + "learning_rate": 2.0182005722591548e-06, + "loss": 0.002, + "step": 91380 + }, + { + "grad_norm": 0.014376056380569935, + "learning_rate": 2.0135528941922944e-06, + "loss": 0.0032, + "step": 91390 + }, + { + "grad_norm": 0.024119645357131958, + "learning_rate": 2.008910463851149e-06, + "loss": 0.0026, + "step": 91400 + }, + { + "grad_norm": 0.009806450456380844, + "learning_rate": 2.0042732817433963e-06, + "loss": 0.0012, + "step": 91410 + }, + { + "grad_norm": 0.03561551496386528, + "learning_rate": 1.999641348376158e-06, + "loss": 0.003, + "step": 91420 + }, + { + "grad_norm": 0.017238428816199303, + "learning_rate": 1.9950146642559787e-06, + "loss": 0.0021, + "step": 91430 + }, + { + "grad_norm": 0.01200926024466753, + "learning_rate": 1.990393229888815e-06, + "loss": 0.0019, + "step": 91440 + }, + { + "grad_norm": 0.03648338466882706, + "learning_rate": 1.9857770457800684e-06, + "loss": 0.0029, + "step": 91450 + }, + { + "grad_norm": 0.018906405195593834, + "learning_rate": 1.9811661124345517e-06, + "loss": 0.0023, + "step": 91460 + }, + { + "grad_norm": 0.04380624741315842, + "learning_rate": 1.976560430356522e-06, + "loss": 0.0021, + "step": 91470 + }, + { + "grad_norm": 0.02090422436594963, + "learning_rate": 1.9719600000496383e-06, + "loss": 0.0018, + "step": 91480 + }, + { + "grad_norm": 0.02427992783486843, + "learning_rate": 1.967364822017004e-06, + "loss": 0.0029, + "step": 91490 + }, + { + "grad_norm": 0.012610163539648056, + "learning_rate": 1.962774896761127e-06, + "loss": 0.0016, + "step": 91500 + }, + { + "grad_norm": 0.01970115676522255, + "learning_rate": 1.958190224783968e-06, + "loss": 0.002, + "step": 91510 + }, + { + "grad_norm": 0.013640332967042923, + "learning_rate": 1.9536108065868975e-06, + "loss": 0.0025, + "step": 91520 + }, + { + "grad_norm": 0.032737940549850464, + "learning_rate": 1.949036642670715e-06, + "loss": 0.0051, + "step": 91530 + }, + { + "grad_norm": 0.03575201332569122, + "learning_rate": 1.9444677335356377e-06, + "loss": 0.0028, + "step": 91540 + }, + { + "grad_norm": 0.01651838794350624, + "learning_rate": 1.939904079681326e-06, + "loss": 0.0023, + "step": 91550 + }, + { + "grad_norm": 0.041277702897787094, + "learning_rate": 1.935345681606837e-06, + "loss": 0.0042, + "step": 91560 + }, + { + "grad_norm": 0.021158304065465927, + "learning_rate": 1.930792539810694e-06, + "loss": 0.0023, + "step": 91570 + }, + { + "grad_norm": 0.0267284344881773, + "learning_rate": 1.9262446547907877e-06, + "loss": 0.0031, + "step": 91580 + }, + { + "grad_norm": 0.019085662439465523, + "learning_rate": 1.921702027044503e-06, + "loss": 0.002, + "step": 91590 + }, + { + "grad_norm": 0.023946257308125496, + "learning_rate": 1.917164657068593e-06, + "loss": 0.0035, + "step": 91600 + }, + { + "grad_norm": 0.06287392228841782, + "learning_rate": 1.912632545359272e-06, + "loss": 0.0028, + "step": 91610 + }, + { + "grad_norm": 0.035319846123456955, + "learning_rate": 1.9081056924121488e-06, + "loss": 0.003, + "step": 91620 + }, + { + "grad_norm": 0.02702626772224903, + "learning_rate": 1.9035840987222887e-06, + "loss": 0.0021, + "step": 91630 + }, + { + "grad_norm": 0.013753378763794899, + "learning_rate": 1.8990677647841515e-06, + "loss": 0.0015, + "step": 91640 + }, + { + "grad_norm": 0.010275150649249554, + "learning_rate": 1.8945566910916534e-06, + "loss": 0.0027, + "step": 91650 + }, + { + "grad_norm": 0.011190405115485191, + "learning_rate": 1.8900508781381054e-06, + "loss": 0.0035, + "step": 91660 + }, + { + "grad_norm": 0.02783922478556633, + "learning_rate": 1.8855503264162633e-06, + "loss": 0.0025, + "step": 91670 + }, + { + "grad_norm": 0.016543840989470482, + "learning_rate": 1.8810550364182998e-06, + "loss": 0.0022, + "step": 91680 + }, + { + "grad_norm": 0.023816198110580444, + "learning_rate": 1.8765650086358112e-06, + "loss": 0.0019, + "step": 91690 + }, + { + "grad_norm": 0.024671219289302826, + "learning_rate": 1.8720802435598151e-06, + "loss": 0.0027, + "step": 91700 + }, + { + "grad_norm": 0.01989654079079628, + "learning_rate": 1.8676007416807694e-06, + "loss": 0.0026, + "step": 91710 + }, + { + "grad_norm": 0.026067662984132767, + "learning_rate": 1.8631265034885437e-06, + "loss": 0.0022, + "step": 91720 + }, + { + "grad_norm": 0.03591921925544739, + "learning_rate": 1.858657529472424e-06, + "loss": 0.0022, + "step": 91730 + }, + { + "grad_norm": 0.0193356703966856, + "learning_rate": 1.8541938201211473e-06, + "loss": 0.0024, + "step": 91740 + }, + { + "grad_norm": 0.020353728905320168, + "learning_rate": 1.8497353759228398e-06, + "loss": 0.0029, + "step": 91750 + }, + { + "grad_norm": 0.06436920166015625, + "learning_rate": 1.8452821973650835e-06, + "loss": 0.004, + "step": 91760 + }, + { + "grad_norm": 0.03793146088719368, + "learning_rate": 1.840834284934856e-06, + "loss": 0.002, + "step": 91770 + }, + { + "grad_norm": 0.028243744745850563, + "learning_rate": 1.8363916391185954e-06, + "loss": 0.0015, + "step": 91780 + }, + { + "grad_norm": 0.030394582077860832, + "learning_rate": 1.8319542604021245e-06, + "loss": 0.002, + "step": 91790 + }, + { + "grad_norm": 0.03629840910434723, + "learning_rate": 1.8275221492707273e-06, + "loss": 0.002, + "step": 91800 + }, + { + "grad_norm": 0.0168822780251503, + "learning_rate": 1.8230953062090717e-06, + "loss": 0.0024, + "step": 91810 + }, + { + "grad_norm": 0.010624134913086891, + "learning_rate": 1.8186737317012926e-06, + "loss": 0.004, + "step": 91820 + }, + { + "grad_norm": 0.031231572851538658, + "learning_rate": 1.8142574262308977e-06, + "loss": 0.0034, + "step": 91830 + }, + { + "grad_norm": 0.012700033374130726, + "learning_rate": 1.8098463902808727e-06, + "loss": 0.0021, + "step": 91840 + }, + { + "grad_norm": 0.011557129211723804, + "learning_rate": 1.805440624333593e-06, + "loss": 0.0021, + "step": 91850 + }, + { + "grad_norm": 0.01846330426633358, + "learning_rate": 1.8010401288708734e-06, + "loss": 0.0019, + "step": 91860 + }, + { + "grad_norm": 0.01858992502093315, + "learning_rate": 1.7966449043739343e-06, + "loss": 0.0024, + "step": 91870 + }, + { + "grad_norm": 0.03592892736196518, + "learning_rate": 1.79225495132343e-06, + "loss": 0.0019, + "step": 91880 + }, + { + "grad_norm": 0.021946720778942108, + "learning_rate": 1.7878702701994487e-06, + "loss": 0.0015, + "step": 91890 + }, + { + "grad_norm": 0.056974511593580246, + "learning_rate": 1.783490861481496e-06, + "loss": 0.0033, + "step": 91900 + }, + { + "grad_norm": 0.011900139041244984, + "learning_rate": 1.7791167256484832e-06, + "loss": 0.0015, + "step": 91910 + }, + { + "grad_norm": 0.03491552919149399, + "learning_rate": 1.774747863178766e-06, + "loss": 0.0031, + "step": 91920 + }, + { + "grad_norm": 0.03133043274283409, + "learning_rate": 1.7703842745501186e-06, + "loss": 0.0018, + "step": 91930 + }, + { + "grad_norm": 0.03810003399848938, + "learning_rate": 1.7660259602397366e-06, + "loss": 0.0018, + "step": 91940 + }, + { + "grad_norm": 0.03782160207629204, + "learning_rate": 1.7616729207242388e-06, + "loss": 0.0033, + "step": 91950 + }, + { + "grad_norm": 0.03977847471833229, + "learning_rate": 1.7573251564796556e-06, + "loss": 0.0029, + "step": 91960 + }, + { + "grad_norm": 0.03692584112286568, + "learning_rate": 1.7529826679814676e-06, + "loss": 0.002, + "step": 91970 + }, + { + "grad_norm": 0.01334875263273716, + "learning_rate": 1.7486454557045562e-06, + "loss": 0.0013, + "step": 91980 + }, + { + "grad_norm": 0.022967176511883736, + "learning_rate": 1.7443135201232364e-06, + "loss": 0.0024, + "step": 91990 + }, + { + "grad_norm": 0.01076300349086523, + "learning_rate": 1.7399868617112347e-06, + "loss": 0.0026, + "step": 92000 + }, + { + "grad_norm": 0.04196975752711296, + "learning_rate": 1.7356654809417171e-06, + "loss": 0.0016, + "step": 92010 + }, + { + "grad_norm": 0.011105312034487724, + "learning_rate": 1.7313493782872448e-06, + "loss": 0.0021, + "step": 92020 + }, + { + "grad_norm": 0.019912315532565117, + "learning_rate": 1.7270385542198453e-06, + "loss": 0.002, + "step": 92030 + }, + { + "grad_norm": 0.015698114410042763, + "learning_rate": 1.7227330092109307e-06, + "loss": 0.0043, + "step": 92040 + }, + { + "grad_norm": 0.0524146631360054, + "learning_rate": 1.718432743731352e-06, + "loss": 0.0026, + "step": 92050 + }, + { + "grad_norm": 0.031671248376369476, + "learning_rate": 1.7141377582513718e-06, + "loss": 0.0028, + "step": 92060 + }, + { + "grad_norm": 0.024930132552981377, + "learning_rate": 1.709848053240698e-06, + "loss": 0.0031, + "step": 92070 + }, + { + "grad_norm": 0.031352486461400986, + "learning_rate": 1.7055636291684328e-06, + "loss": 0.0052, + "step": 92080 + }, + { + "grad_norm": 0.018761195242404938, + "learning_rate": 1.7012844865031185e-06, + "loss": 0.0018, + "step": 92090 + }, + { + "grad_norm": 0.012346884235739708, + "learning_rate": 1.6970106257127138e-06, + "loss": 0.0018, + "step": 92100 + }, + { + "grad_norm": 0.0172087661921978, + "learning_rate": 1.6927420472646117e-06, + "loss": 0.002, + "step": 92110 + }, + { + "grad_norm": 0.0744134932756424, + "learning_rate": 1.6884787516256106e-06, + "loss": 0.0027, + "step": 92120 + }, + { + "grad_norm": 0.019127344712615013, + "learning_rate": 1.6842207392619325e-06, + "loss": 0.0016, + "step": 92130 + }, + { + "grad_norm": 0.009180591441690922, + "learning_rate": 1.679968010639238e-06, + "loss": 0.0026, + "step": 92140 + }, + { + "grad_norm": 0.04495637118816376, + "learning_rate": 1.6757205662225829e-06, + "loss": 0.0032, + "step": 92150 + }, + { + "grad_norm": 0.031226826831698418, + "learning_rate": 1.6714784064764844e-06, + "loss": 0.0024, + "step": 92160 + }, + { + "grad_norm": 0.009703872725367546, + "learning_rate": 1.6672415318648383e-06, + "loss": 0.0012, + "step": 92170 + }, + { + "grad_norm": 0.016858471557497978, + "learning_rate": 1.6630099428510016e-06, + "loss": 0.0019, + "step": 92180 + }, + { + "grad_norm": 0.00729716382920742, + "learning_rate": 1.6587836398977153e-06, + "loss": 0.002, + "step": 92190 + }, + { + "grad_norm": 0.018772853538393974, + "learning_rate": 1.6545626234671762e-06, + "loss": 0.0027, + "step": 92200 + }, + { + "grad_norm": 0.011594832874834538, + "learning_rate": 1.650346894020971e-06, + "loss": 0.0018, + "step": 92210 + }, + { + "grad_norm": 0.037880588322877884, + "learning_rate": 1.6461364520201472e-06, + "loss": 0.0025, + "step": 92220 + }, + { + "grad_norm": 0.057214654982089996, + "learning_rate": 1.6419312979251366e-06, + "loss": 0.0037, + "step": 92230 + }, + { + "grad_norm": 0.017813751474022865, + "learning_rate": 1.6377314321958214e-06, + "loss": 0.0014, + "step": 92240 + }, + { + "grad_norm": 0.015548999421298504, + "learning_rate": 1.6335368552914787e-06, + "loss": 0.002, + "step": 92250 + }, + { + "grad_norm": 0.015242846682667732, + "learning_rate": 1.6293475676708358e-06, + "loss": 0.0015, + "step": 92260 + }, + { + "grad_norm": 0.012288833037018776, + "learning_rate": 1.6251635697920097e-06, + "loss": 0.0038, + "step": 92270 + }, + { + "grad_norm": 0.017128419131040573, + "learning_rate": 1.6209848621125624e-06, + "loss": 0.003, + "step": 92280 + }, + { + "grad_norm": 0.021324310451745987, + "learning_rate": 1.6168114450894778e-06, + "loss": 0.0046, + "step": 92290 + }, + { + "grad_norm": 0.018668726086616516, + "learning_rate": 1.612643319179158e-06, + "loss": 0.0017, + "step": 92300 + }, + { + "grad_norm": 0.014730815775692463, + "learning_rate": 1.6084804848374046e-06, + "loss": 0.0025, + "step": 92310 + }, + { + "grad_norm": 0.015808021649718285, + "learning_rate": 1.6043229425194817e-06, + "loss": 0.0042, + "step": 92320 + }, + { + "grad_norm": 0.017095278948545456, + "learning_rate": 1.6001706926800308e-06, + "loss": 0.0026, + "step": 92330 + }, + { + "grad_norm": 0.03963102027773857, + "learning_rate": 1.5960237357731444e-06, + "loss": 0.0047, + "step": 92340 + }, + { + "grad_norm": 0.01749611645936966, + "learning_rate": 1.5918820722523264e-06, + "loss": 0.0018, + "step": 92350 + }, + { + "grad_norm": 0.012060071341693401, + "learning_rate": 1.587745702570509e-06, + "loss": 0.0025, + "step": 92360 + }, + { + "grad_norm": 0.04182910546660423, + "learning_rate": 1.5836146271800356e-06, + "loss": 0.0024, + "step": 92370 + }, + { + "grad_norm": 0.01356507558375597, + "learning_rate": 1.5794888465326728e-06, + "loss": 0.0021, + "step": 92380 + }, + { + "grad_norm": 0.02605275996029377, + "learning_rate": 1.5753683610796155e-06, + "loss": 0.0017, + "step": 92390 + }, + { + "grad_norm": 0.011454381048679352, + "learning_rate": 1.5712531712714528e-06, + "loss": 0.0019, + "step": 92400 + }, + { + "grad_norm": 0.02030136249959469, + "learning_rate": 1.5671432775582474e-06, + "loss": 0.0025, + "step": 92410 + }, + { + "grad_norm": 0.029687389731407166, + "learning_rate": 1.563038680389428e-06, + "loss": 0.0026, + "step": 92420 + }, + { + "grad_norm": 0.01683778688311577, + "learning_rate": 1.5589393802138808e-06, + "loss": 0.0022, + "step": 92430 + }, + { + "grad_norm": 0.022371487691998482, + "learning_rate": 1.554845377479891e-06, + "loss": 0.0017, + "step": 92440 + }, + { + "grad_norm": 0.019283682107925415, + "learning_rate": 1.550756672635173e-06, + "loss": 0.0019, + "step": 92450 + }, + { + "grad_norm": 0.00892780814319849, + "learning_rate": 1.5466732661268635e-06, + "loss": 0.0022, + "step": 92460 + }, + { + "grad_norm": 0.019277194514870644, + "learning_rate": 1.5425951584015163e-06, + "loss": 0.0038, + "step": 92470 + }, + { + "grad_norm": 0.035522542893886566, + "learning_rate": 1.5385223499051026e-06, + "loss": 0.0025, + "step": 92480 + }, + { + "grad_norm": 0.011474743485450745, + "learning_rate": 1.5344548410830328e-06, + "loss": 0.0019, + "step": 92490 + }, + { + "grad_norm": 0.051673050969839096, + "learning_rate": 1.5303926323801121e-06, + "loss": 0.0024, + "step": 92500 + }, + { + "grad_norm": 0.016718270257115364, + "learning_rate": 1.5263357242405795e-06, + "loss": 0.0014, + "step": 92510 + }, + { + "grad_norm": 0.028964348137378693, + "learning_rate": 1.5222841171080915e-06, + "loss": 0.002, + "step": 92520 + }, + { + "grad_norm": 0.024957535788416862, + "learning_rate": 1.5182378114257268e-06, + "loss": 0.0024, + "step": 92530 + }, + { + "grad_norm": 0.02174822799861431, + "learning_rate": 1.5141968076359869e-06, + "loss": 0.0023, + "step": 92540 + }, + { + "grad_norm": 0.014645094983279705, + "learning_rate": 1.5101611061807908e-06, + "loss": 0.0024, + "step": 92550 + }, + { + "grad_norm": 0.01686832681298256, + "learning_rate": 1.506130707501463e-06, + "loss": 0.0015, + "step": 92560 + }, + { + "grad_norm": 0.038122549653053284, + "learning_rate": 1.5021056120387734e-06, + "loss": 0.0022, + "step": 92570 + }, + { + "grad_norm": 0.01175700407475233, + "learning_rate": 1.4980858202329084e-06, + "loss": 0.002, + "step": 92580 + }, + { + "grad_norm": 0.02896813675761223, + "learning_rate": 1.4940713325234445e-06, + "loss": 0.0025, + "step": 92590 + }, + { + "grad_norm": 0.03550036624073982, + "learning_rate": 1.4900621493494137e-06, + "loss": 0.0023, + "step": 92600 + }, + { + "grad_norm": 0.03249985724687576, + "learning_rate": 1.4860582711492544e-06, + "loss": 0.0033, + "step": 92610 + }, + { + "grad_norm": 0.014694894663989544, + "learning_rate": 1.4820596983608325e-06, + "loss": 0.0019, + "step": 92620 + }, + { + "grad_norm": 0.016384799033403397, + "learning_rate": 1.4780664314214044e-06, + "loss": 0.0033, + "step": 92630 + }, + { + "grad_norm": 0.021816985681653023, + "learning_rate": 1.474078470767687e-06, + "loss": 0.0043, + "step": 92640 + }, + { + "grad_norm": 0.030487315729260445, + "learning_rate": 1.470095816835787e-06, + "loss": 0.0024, + "step": 92650 + }, + { + "grad_norm": 0.02387566864490509, + "learning_rate": 1.4661184700612395e-06, + "loss": 0.0019, + "step": 92660 + }, + { + "grad_norm": 0.010383524931967258, + "learning_rate": 1.4621464308790133e-06, + "loss": 0.0014, + "step": 92670 + }, + { + "grad_norm": 0.03936708718538284, + "learning_rate": 1.4581796997234775e-06, + "loss": 0.003, + "step": 92680 + }, + { + "grad_norm": 0.030798329040408134, + "learning_rate": 1.4542182770284297e-06, + "loss": 0.002, + "step": 92690 + }, + { + "grad_norm": 0.03397350013256073, + "learning_rate": 1.450262163227084e-06, + "loss": 0.0037, + "step": 92700 + }, + { + "grad_norm": 0.020420467481017113, + "learning_rate": 1.4463113587520671e-06, + "loss": 0.0031, + "step": 92710 + }, + { + "grad_norm": 0.023963352665305138, + "learning_rate": 1.4423658640354443e-06, + "loss": 0.0026, + "step": 92720 + }, + { + "grad_norm": 0.021930400282144547, + "learning_rate": 1.438425679508687e-06, + "loss": 0.0019, + "step": 92730 + }, + { + "grad_norm": 0.034245964139699936, + "learning_rate": 1.4344908056026895e-06, + "loss": 0.0029, + "step": 92740 + }, + { + "grad_norm": 0.02390623837709427, + "learning_rate": 1.4305612427477521e-06, + "loss": 0.0016, + "step": 92750 + }, + { + "grad_norm": 0.029477329924702644, + "learning_rate": 1.42663699137362e-06, + "loss": 0.0018, + "step": 92760 + }, + { + "grad_norm": 0.012957790866494179, + "learning_rate": 1.4227180519094386e-06, + "loss": 0.0018, + "step": 92770 + }, + { + "grad_norm": 0.01838185265660286, + "learning_rate": 1.4188044247837706e-06, + "loss": 0.0026, + "step": 92780 + }, + { + "grad_norm": 0.015036797150969505, + "learning_rate": 1.4148961104246128e-06, + "loss": 0.0028, + "step": 92790 + }, + { + "grad_norm": 0.023767635226249695, + "learning_rate": 1.4109931092593731e-06, + "loss": 0.0019, + "step": 92800 + }, + { + "grad_norm": 0.008663459680974483, + "learning_rate": 1.4070954217148658e-06, + "loss": 0.0037, + "step": 92810 + }, + { + "grad_norm": 0.01372005045413971, + "learning_rate": 1.4032030482173497e-06, + "loss": 0.0023, + "step": 92820 + }, + { + "grad_norm": 0.030037324875593185, + "learning_rate": 1.3993159891924901e-06, + "loss": 0.0021, + "step": 92830 + }, + { + "grad_norm": 0.017896799370646477, + "learning_rate": 1.3954342450653524e-06, + "loss": 0.0017, + "step": 92840 + }, + { + "grad_norm": 0.015665298327803612, + "learning_rate": 1.3915578162604582e-06, + "loss": 0.002, + "step": 92850 + }, + { + "grad_norm": 0.03096741810441017, + "learning_rate": 1.3876867032017126e-06, + "loss": 0.0026, + "step": 92860 + }, + { + "grad_norm": 0.011899460107088089, + "learning_rate": 1.383820906312472e-06, + "loss": 0.0034, + "step": 92870 + }, + { + "grad_norm": 0.027522992342710495, + "learning_rate": 1.3799604260154752e-06, + "loss": 0.0026, + "step": 92880 + }, + { + "grad_norm": 0.03857860341668129, + "learning_rate": 1.376105262732913e-06, + "loss": 0.0023, + "step": 92890 + }, + { + "grad_norm": 0.010908368043601513, + "learning_rate": 1.37225541688637e-06, + "loss": 0.0017, + "step": 92900 + }, + { + "grad_norm": 0.012550517916679382, + "learning_rate": 1.3684108888968594e-06, + "loss": 0.0033, + "step": 92910 + }, + { + "grad_norm": 0.0370565690100193, + "learning_rate": 1.364571679184823e-06, + "loss": 0.0023, + "step": 92920 + }, + { + "grad_norm": 0.015820564702153206, + "learning_rate": 1.3607377881701077e-06, + "loss": 0.002, + "step": 92930 + }, + { + "grad_norm": 0.015324960462749004, + "learning_rate": 1.3569092162719787e-06, + "loss": 0.0027, + "step": 92940 + }, + { + "grad_norm": 0.04513721913099289, + "learning_rate": 1.353085963909123e-06, + "loss": 0.0032, + "step": 92950 + }, + { + "grad_norm": 0.02506205625832081, + "learning_rate": 1.3492680314996454e-06, + "loss": 0.0021, + "step": 92960 + }, + { + "grad_norm": 0.014403065666556358, + "learning_rate": 1.345455419461078e-06, + "loss": 0.0029, + "step": 92970 + }, + { + "grad_norm": 0.031055361032485962, + "learning_rate": 1.3416481282103431e-06, + "loss": 0.0024, + "step": 92980 + }, + { + "grad_norm": 0.014350826852023602, + "learning_rate": 1.337846158163819e-06, + "loss": 0.0018, + "step": 92990 + }, + { + "grad_norm": 0.008586553856730461, + "learning_rate": 1.334049509737273e-06, + "loss": 0.0018, + "step": 93000 + }, + { + "grad_norm": 0.05802973359823227, + "learning_rate": 1.3302581833459116e-06, + "loss": 0.0013, + "step": 93010 + }, + { + "grad_norm": 0.021165847778320312, + "learning_rate": 1.3264721794043366e-06, + "loss": 0.0028, + "step": 93020 + }, + { + "grad_norm": 0.013030299916863441, + "learning_rate": 1.3226914983265836e-06, + "loss": 0.0013, + "step": 93030 + }, + { + "grad_norm": 0.024248603731393814, + "learning_rate": 1.3189161405260997e-06, + "loss": 0.0017, + "step": 93040 + }, + { + "grad_norm": 0.013537861406803131, + "learning_rate": 1.3151461064157656e-06, + "loss": 0.0018, + "step": 93050 + }, + { + "grad_norm": 0.0353720523416996, + "learning_rate": 1.3113813964078459e-06, + "loss": 0.0024, + "step": 93060 + }, + { + "grad_norm": 0.022167149931192398, + "learning_rate": 1.3076220109140614e-06, + "loss": 0.0021, + "step": 93070 + }, + { + "grad_norm": 0.02076791785657406, + "learning_rate": 1.3038679503455275e-06, + "loss": 0.0027, + "step": 93080 + }, + { + "grad_norm": 0.010149764828383923, + "learning_rate": 1.3001192151127773e-06, + "loss": 0.002, + "step": 93090 + }, + { + "grad_norm": 0.02007897011935711, + "learning_rate": 1.2963758056257657e-06, + "loss": 0.0035, + "step": 93100 + }, + { + "grad_norm": 0.02602553926408291, + "learning_rate": 1.2926377222938768e-06, + "loss": 0.0028, + "step": 93110 + }, + { + "grad_norm": 0.007534467615187168, + "learning_rate": 1.2889049655259e-06, + "loss": 0.0016, + "step": 93120 + }, + { + "grad_norm": 0.03598771244287491, + "learning_rate": 1.2851775357300312e-06, + "loss": 0.0021, + "step": 93130 + }, + { + "grad_norm": 0.01570749655365944, + "learning_rate": 1.281455433313916e-06, + "loss": 0.0017, + "step": 93140 + }, + { + "grad_norm": 0.02506762184202671, + "learning_rate": 1.2777386586845796e-06, + "loss": 0.0019, + "step": 93150 + }, + { + "grad_norm": 0.014325710013508797, + "learning_rate": 1.2740272122484964e-06, + "loss": 0.0025, + "step": 93160 + }, + { + "grad_norm": 0.03503662720322609, + "learning_rate": 1.2703210944115308e-06, + "loss": 0.0024, + "step": 93170 + }, + { + "grad_norm": 0.011500329710543156, + "learning_rate": 1.2666203055789916e-06, + "loss": 0.0021, + "step": 93180 + }, + { + "grad_norm": 0.008963054046034813, + "learning_rate": 1.2629248461555832e-06, + "loss": 0.0015, + "step": 93190 + }, + { + "grad_norm": 0.02137506753206253, + "learning_rate": 1.2592347165454431e-06, + "loss": 0.0034, + "step": 93200 + }, + { + "grad_norm": 0.016774242743849754, + "learning_rate": 1.2555499171521102e-06, + "loss": 0.002, + "step": 93210 + }, + { + "grad_norm": 0.011139837093651295, + "learning_rate": 1.251870448378556e-06, + "loss": 0.0028, + "step": 93220 + }, + { + "grad_norm": 0.037979647517204285, + "learning_rate": 1.2481963106271478e-06, + "loss": 0.003, + "step": 93230 + }, + { + "grad_norm": 0.016454188153147697, + "learning_rate": 1.2445275042997084e-06, + "loss": 0.0022, + "step": 93240 + }, + { + "grad_norm": 0.018174368888139725, + "learning_rate": 1.2408640297974283e-06, + "loss": 0.0019, + "step": 93250 + }, + { + "grad_norm": 0.0563533753156662, + "learning_rate": 1.2372058875209535e-06, + "loss": 0.0038, + "step": 93260 + }, + { + "grad_norm": 0.013146210461854935, + "learning_rate": 1.2335530778703252e-06, + "loss": 0.003, + "step": 93270 + }, + { + "grad_norm": 0.012114324606955051, + "learning_rate": 1.2299056012450182e-06, + "loss": 0.0028, + "step": 93280 + }, + { + "grad_norm": 0.03382180631160736, + "learning_rate": 1.2262634580439026e-06, + "loss": 0.002, + "step": 93290 + }, + { + "grad_norm": 0.017663758248090744, + "learning_rate": 1.2226266486652926e-06, + "loss": 0.0018, + "step": 93300 + }, + { + "grad_norm": 0.018202438950538635, + "learning_rate": 1.2189951735068927e-06, + "loss": 0.0014, + "step": 93310 + }, + { + "grad_norm": 0.017574485391378403, + "learning_rate": 1.2153690329658407e-06, + "loss": 0.0017, + "step": 93320 + }, + { + "grad_norm": 0.027495993301272392, + "learning_rate": 1.2117482274386859e-06, + "loss": 0.0024, + "step": 93330 + }, + { + "grad_norm": 0.011524206958711147, + "learning_rate": 1.2081327573213953e-06, + "loss": 0.0025, + "step": 93340 + }, + { + "grad_norm": 0.018623940646648407, + "learning_rate": 1.2045226230093465e-06, + "loss": 0.0019, + "step": 93350 + }, + { + "grad_norm": 0.009855702519416809, + "learning_rate": 1.2009178248973296e-06, + "loss": 0.0048, + "step": 93360 + }, + { + "grad_norm": 0.013616781681776047, + "learning_rate": 1.1973183633795849e-06, + "loss": 0.002, + "step": 93370 + }, + { + "grad_norm": 0.04401370882987976, + "learning_rate": 1.193724238849725e-06, + "loss": 0.0021, + "step": 93380 + }, + { + "grad_norm": 0.011754178442060947, + "learning_rate": 1.190135451700808e-06, + "loss": 0.0016, + "step": 93390 + }, + { + "grad_norm": 0.019668420776724815, + "learning_rate": 1.186552002325292e-06, + "loss": 0.0023, + "step": 93400 + }, + { + "grad_norm": 0.01843269169330597, + "learning_rate": 1.1829738911150578e-06, + "loss": 0.0019, + "step": 93410 + }, + { + "grad_norm": 0.03293688967823982, + "learning_rate": 1.1794011184613985e-06, + "loss": 0.0018, + "step": 93420 + }, + { + "grad_norm": 0.045906912535429, + "learning_rate": 1.17583368475504e-06, + "loss": 0.0028, + "step": 93430 + }, + { + "grad_norm": 0.025571653619408607, + "learning_rate": 1.1722715903860982e-06, + "loss": 0.0024, + "step": 93440 + }, + { + "grad_norm": 0.016594715416431427, + "learning_rate": 1.168714835744128e-06, + "loss": 0.0021, + "step": 93450 + }, + { + "grad_norm": 0.008765090256929398, + "learning_rate": 1.165163421218085e-06, + "loss": 0.0013, + "step": 93460 + }, + { + "grad_norm": 0.012495254166424274, + "learning_rate": 1.1616173471963477e-06, + "loss": 0.0015, + "step": 93470 + }, + { + "grad_norm": 0.01936822384595871, + "learning_rate": 1.1580766140667109e-06, + "loss": 0.0024, + "step": 93480 + }, + { + "grad_norm": 0.021876312792301178, + "learning_rate": 1.1545412222163764e-06, + "loss": 0.0035, + "step": 93490 + }, + { + "grad_norm": 0.013899683021008968, + "learning_rate": 1.1510111720319795e-06, + "loss": 0.0024, + "step": 93500 + }, + { + "grad_norm": 0.027011599391698837, + "learning_rate": 1.1474864638995663e-06, + "loss": 0.0024, + "step": 93510 + }, + { + "grad_norm": 0.0312516875565052, + "learning_rate": 1.1439670982045735e-06, + "loss": 0.0019, + "step": 93520 + }, + { + "grad_norm": 0.02215360291302204, + "learning_rate": 1.1404530753318877e-06, + "loss": 0.0021, + "step": 93530 + }, + { + "grad_norm": 0.019448550418019295, + "learning_rate": 1.136944395665801e-06, + "loss": 0.0027, + "step": 93540 + }, + { + "grad_norm": 0.012704642489552498, + "learning_rate": 1.1334410595900014e-06, + "loss": 0.0024, + "step": 93550 + }, + { + "grad_norm": 0.01633080095052719, + "learning_rate": 1.129943067487621e-06, + "loss": 0.0023, + "step": 93560 + }, + { + "grad_norm": 0.02086005173623562, + "learning_rate": 1.126450419741193e-06, + "loss": 0.0016, + "step": 93570 + }, + { + "grad_norm": 0.03692728653550148, + "learning_rate": 1.1229631167326671e-06, + "loss": 0.0031, + "step": 93580 + }, + { + "grad_norm": 0.02169022150337696, + "learning_rate": 1.1194811588434052e-06, + "loss": 0.0018, + "step": 93590 + }, + { + "grad_norm": 0.02528269775211811, + "learning_rate": 1.1160045464542023e-06, + "loss": 0.0018, + "step": 93600 + }, + { + "grad_norm": 0.016835741698741913, + "learning_rate": 1.1125332799452325e-06, + "loss": 0.0024, + "step": 93610 + }, + { + "grad_norm": 0.017662297934293747, + "learning_rate": 1.109067359696131e-06, + "loss": 0.0025, + "step": 93620 + }, + { + "grad_norm": 0.02315017580986023, + "learning_rate": 1.1056067860859166e-06, + "loss": 0.0026, + "step": 93630 + }, + { + "grad_norm": 0.030675336718559265, + "learning_rate": 1.102151559493031e-06, + "loss": 0.0027, + "step": 93640 + }, + { + "grad_norm": 0.02242569997906685, + "learning_rate": 1.0987016802953332e-06, + "loss": 0.0025, + "step": 93650 + }, + { + "grad_norm": 0.017204491421580315, + "learning_rate": 1.0952571488701048e-06, + "loss": 0.0017, + "step": 93660 + }, + { + "grad_norm": 0.034227050840854645, + "learning_rate": 1.0918179655940163e-06, + "loss": 0.0021, + "step": 93670 + }, + { + "grad_norm": 0.038229357451200485, + "learning_rate": 1.088384130843184e-06, + "loss": 0.0016, + "step": 93680 + }, + { + "grad_norm": 0.012117317877709866, + "learning_rate": 1.0849556449931296e-06, + "loss": 0.0014, + "step": 93690 + }, + { + "grad_norm": 0.00869752373546362, + "learning_rate": 1.0815325084187866e-06, + "loss": 0.0034, + "step": 93700 + }, + { + "grad_norm": 0.04042673856019974, + "learning_rate": 1.0781147214944943e-06, + "loss": 0.0042, + "step": 93710 + }, + { + "grad_norm": 0.017755400389432907, + "learning_rate": 1.0747022845940313e-06, + "loss": 0.0024, + "step": 93720 + }, + { + "grad_norm": 0.0184558667242527, + "learning_rate": 1.0712951980905605e-06, + "loss": 0.0027, + "step": 93730 + }, + { + "grad_norm": 0.03650465980172157, + "learning_rate": 1.0678934623566839e-06, + "loss": 0.0019, + "step": 93740 + }, + { + "grad_norm": 0.02977975271642208, + "learning_rate": 1.0644970777644091e-06, + "loss": 0.0024, + "step": 93750 + }, + { + "grad_norm": 0.0331244021654129, + "learning_rate": 1.0611060446851673e-06, + "loss": 0.0025, + "step": 93760 + }, + { + "grad_norm": 0.006774549838155508, + "learning_rate": 1.0577203634897836e-06, + "loss": 0.0023, + "step": 93770 + }, + { + "grad_norm": 0.018700145184993744, + "learning_rate": 1.0543400345485178e-06, + "loss": 0.0024, + "step": 93780 + }, + { + "grad_norm": 0.031171442940831184, + "learning_rate": 1.0509650582310404e-06, + "loss": 0.0017, + "step": 93790 + }, + { + "grad_norm": 0.03017251566052437, + "learning_rate": 1.0475954349064287e-06, + "loss": 0.0018, + "step": 93800 + }, + { + "grad_norm": 0.029032038524746895, + "learning_rate": 1.0442311649431825e-06, + "loss": 0.0027, + "step": 93810 + }, + { + "grad_norm": 0.025146890431642532, + "learning_rate": 1.0408722487092071e-06, + "loss": 0.0019, + "step": 93820 + }, + { + "grad_norm": 0.015846965834498405, + "learning_rate": 1.0375186865718424e-06, + "loss": 0.0019, + "step": 93830 + }, + { + "grad_norm": 0.01701968163251877, + "learning_rate": 1.0341704788978113e-06, + "loss": 0.0032, + "step": 93840 + }, + { + "grad_norm": 0.018369583413004875, + "learning_rate": 1.0308276260532879e-06, + "loss": 0.0042, + "step": 93850 + }, + { + "grad_norm": 0.014869870617985725, + "learning_rate": 1.0274901284038241e-06, + "loss": 0.0019, + "step": 93860 + }, + { + "grad_norm": 0.038569219410419464, + "learning_rate": 1.0241579863144114e-06, + "loss": 0.0026, + "step": 93870 + }, + { + "grad_norm": 0.020240481942892075, + "learning_rate": 1.0208312001494413e-06, + "loss": 0.0024, + "step": 93880 + }, + { + "grad_norm": 0.014039483852684498, + "learning_rate": 1.0175097702727453e-06, + "loss": 0.0022, + "step": 93890 + }, + { + "grad_norm": 0.010270940139889717, + "learning_rate": 1.0141936970475275e-06, + "loss": 0.002, + "step": 93900 + }, + { + "grad_norm": 0.029774710536003113, + "learning_rate": 1.0108829808364417e-06, + "loss": 0.0039, + "step": 93910 + }, + { + "grad_norm": 0.022095786407589912, + "learning_rate": 1.0075776220015376e-06, + "loss": 0.0014, + "step": 93920 + }, + { + "grad_norm": 0.03819805383682251, + "learning_rate": 1.0042776209042816e-06, + "loss": 0.0023, + "step": 93930 + }, + { + "grad_norm": 0.006034504156559706, + "learning_rate": 1.000982977905568e-06, + "loss": 0.0028, + "step": 93940 + }, + { + "grad_norm": 0.008796082809567451, + "learning_rate": 9.976936933656867e-07, + "loss": 0.0024, + "step": 93950 + }, + { + "grad_norm": 0.042713869363069534, + "learning_rate": 9.944097676443442e-07, + "loss": 0.0031, + "step": 93960 + }, + { + "grad_norm": 0.019979000091552734, + "learning_rate": 9.911312011006757e-07, + "loss": 0.0019, + "step": 93970 + }, + { + "grad_norm": 0.046782415360212326, + "learning_rate": 9.878579940932109e-07, + "loss": 0.002, + "step": 93980 + }, + { + "grad_norm": 0.01494690217077732, + "learning_rate": 9.84590146979908e-07, + "loss": 0.0025, + "step": 93990 + }, + { + "grad_norm": 0.014580224640667439, + "learning_rate": 9.813276601181365e-07, + "loss": 0.002, + "step": 94000 + }, + { + "grad_norm": 0.016640786081552505, + "learning_rate": 9.780705338646724e-07, + "loss": 0.0037, + "step": 94010 + }, + { + "grad_norm": 0.016302529722452164, + "learning_rate": 9.748187685757083e-07, + "loss": 0.0022, + "step": 94020 + }, + { + "grad_norm": 0.013053222559392452, + "learning_rate": 9.715723646068543e-07, + "loss": 0.002, + "step": 94030 + }, + { + "grad_norm": 0.01309970673173666, + "learning_rate": 9.68331322313143e-07, + "loss": 0.0017, + "step": 94040 + }, + { + "grad_norm": 0.012158606201410294, + "learning_rate": 9.650956420489909e-07, + "loss": 0.0021, + "step": 94050 + }, + { + "grad_norm": 0.0187035221606493, + "learning_rate": 9.618653241682595e-07, + "loss": 0.0038, + "step": 94060 + }, + { + "grad_norm": 0.03558240830898285, + "learning_rate": 9.586403690242053e-07, + "loss": 0.0024, + "step": 94070 + }, + { + "grad_norm": 0.016136635094881058, + "learning_rate": 9.554207769695122e-07, + "loss": 0.0034, + "step": 94080 + }, + { + "grad_norm": 0.016794756054878235, + "learning_rate": 9.522065483562659e-07, + "loss": 0.0024, + "step": 94090 + }, + { + "grad_norm": 0.017723578959703445, + "learning_rate": 9.489976835359681e-07, + "loss": 0.0021, + "step": 94100 + }, + { + "grad_norm": 0.009863480925559998, + "learning_rate": 9.457941828595329e-07, + "loss": 0.0023, + "step": 94110 + }, + { + "grad_norm": 0.026173891499638557, + "learning_rate": 9.425960466772965e-07, + "loss": 0.0026, + "step": 94120 + }, + { + "grad_norm": 0.04580235108733177, + "learning_rate": 9.394032753390014e-07, + "loss": 0.0028, + "step": 94130 + }, + { + "grad_norm": 0.024452699348330498, + "learning_rate": 9.362158691938017e-07, + "loss": 0.0037, + "step": 94140 + }, + { + "grad_norm": 0.011497577652335167, + "learning_rate": 9.330338285902685e-07, + "loss": 0.0014, + "step": 94150 + }, + { + "grad_norm": 0.03351437672972679, + "learning_rate": 9.298571538763845e-07, + "loss": 0.0021, + "step": 94160 + }, + { + "grad_norm": 0.018861370161175728, + "learning_rate": 9.266858453995441e-07, + "loss": 0.0017, + "step": 94170 + }, + { + "grad_norm": 0.023968247696757317, + "learning_rate": 9.235199035065645e-07, + "loss": 0.0023, + "step": 94180 + }, + { + "grad_norm": 0.021739007905125618, + "learning_rate": 9.203593285436574e-07, + "loss": 0.0015, + "step": 94190 + }, + { + "grad_norm": 0.03661530464887619, + "learning_rate": 9.172041208564686e-07, + "loss": 0.0021, + "step": 94200 + }, + { + "grad_norm": 0.037464991211891174, + "learning_rate": 9.140542807900332e-07, + "loss": 0.003, + "step": 94210 + }, + { + "grad_norm": 0.014576021581888199, + "learning_rate": 9.109098086888313e-07, + "loss": 0.0024, + "step": 94220 + }, + { + "grad_norm": 0.011434022337198257, + "learning_rate": 9.077707048967265e-07, + "loss": 0.0021, + "step": 94230 + }, + { + "grad_norm": 0.017312634736299515, + "learning_rate": 9.046369697570057e-07, + "loss": 0.0022, + "step": 94240 + }, + { + "grad_norm": 0.013763113878667355, + "learning_rate": 9.015086036123721e-07, + "loss": 0.0017, + "step": 94250 + }, + { + "grad_norm": 0.01181859988719225, + "learning_rate": 8.983856068049413e-07, + "loss": 0.0018, + "step": 94260 + }, + { + "grad_norm": 0.03218343108892441, + "learning_rate": 8.952679796762398e-07, + "loss": 0.0017, + "step": 94270 + }, + { + "grad_norm": 0.029635094106197357, + "learning_rate": 8.921557225671951e-07, + "loss": 0.0029, + "step": 94280 + }, + { + "grad_norm": 0.009410032071173191, + "learning_rate": 8.890488358181792e-07, + "loss": 0.0017, + "step": 94290 + }, + { + "grad_norm": 0.043787285685539246, + "learning_rate": 8.859473197689372e-07, + "loss": 0.0026, + "step": 94300 + }, + { + "grad_norm": 0.01653148978948593, + "learning_rate": 8.828511747586588e-07, + "loss": 0.0016, + "step": 94310 + }, + { + "grad_norm": 0.04967065528035164, + "learning_rate": 8.797604011259287e-07, + "loss": 0.0029, + "step": 94320 + }, + { + "grad_norm": 0.021821284666657448, + "learning_rate": 8.766749992087487e-07, + "loss": 0.0022, + "step": 94330 + }, + { + "grad_norm": 0.025604302063584328, + "learning_rate": 8.735949693445378e-07, + "loss": 0.0019, + "step": 94340 + }, + { + "grad_norm": 0.038418009877204895, + "learning_rate": 8.705203118701211e-07, + "loss": 0.0027, + "step": 94350 + }, + { + "grad_norm": 0.018686192110180855, + "learning_rate": 8.674510271217295e-07, + "loss": 0.0017, + "step": 94360 + }, + { + "grad_norm": 0.017951568588614464, + "learning_rate": 8.643871154350336e-07, + "loss": 0.0024, + "step": 94370 + }, + { + "grad_norm": 0.0332985520362854, + "learning_rate": 8.613285771450818e-07, + "loss": 0.0019, + "step": 94380 + }, + { + "grad_norm": 0.008176818490028381, + "learning_rate": 8.582754125863623e-07, + "loss": 0.0033, + "step": 94390 + }, + { + "grad_norm": 0.023154808208346367, + "learning_rate": 8.55227622092758e-07, + "loss": 0.0023, + "step": 94400 + }, + { + "grad_norm": 0.0146810756996274, + "learning_rate": 8.521852059975743e-07, + "loss": 0.0023, + "step": 94410 + }, + { + "grad_norm": 0.01325855404138565, + "learning_rate": 8.491481646335231e-07, + "loss": 0.0021, + "step": 94420 + }, + { + "grad_norm": 0.023499570786952972, + "learning_rate": 8.46116498332733e-07, + "loss": 0.0019, + "step": 94430 + }, + { + "grad_norm": 0.01618862897157669, + "learning_rate": 8.430902074267333e-07, + "loss": 0.003, + "step": 94440 + }, + { + "grad_norm": 0.01923183910548687, + "learning_rate": 8.400692922464925e-07, + "loss": 0.0022, + "step": 94450 + }, + { + "grad_norm": 0.028167152777314186, + "learning_rate": 8.370537531223577e-07, + "loss": 0.0021, + "step": 94460 + }, + { + "grad_norm": 0.01927466131746769, + "learning_rate": 8.340435903841092e-07, + "loss": 0.0018, + "step": 94470 + }, + { + "grad_norm": 0.036533571779727936, + "learning_rate": 8.310388043609341e-07, + "loss": 0.0021, + "step": 94480 + }, + { + "grad_norm": 0.044406939297914505, + "learning_rate": 8.280393953814303e-07, + "loss": 0.0023, + "step": 94490 + }, + { + "grad_norm": 0.012584816664457321, + "learning_rate": 8.250453637736077e-07, + "loss": 0.0017, + "step": 94500 + }, + { + "grad_norm": 0.02059088461101055, + "learning_rate": 8.22056709864899e-07, + "loss": 0.0022, + "step": 94510 + }, + { + "grad_norm": 0.024831579998135567, + "learning_rate": 8.190734339821204e-07, + "loss": 0.0017, + "step": 94520 + }, + { + "grad_norm": 0.04944482445716858, + "learning_rate": 8.160955364515333e-07, + "loss": 0.0021, + "step": 94530 + }, + { + "grad_norm": 0.009943296201527119, + "learning_rate": 8.131230175987936e-07, + "loss": 0.0021, + "step": 94540 + }, + { + "grad_norm": 0.021572617813944817, + "learning_rate": 8.101558777489693e-07, + "loss": 0.0022, + "step": 94550 + }, + { + "grad_norm": 0.016948126256465912, + "learning_rate": 8.071941172265396e-07, + "loss": 0.0049, + "step": 94560 + }, + { + "grad_norm": 0.015232768841087818, + "learning_rate": 8.042377363553955e-07, + "loss": 0.003, + "step": 94570 + }, + { + "grad_norm": 0.01065575610846281, + "learning_rate": 8.01286735458856e-07, + "loss": 0.0022, + "step": 94580 + }, + { + "grad_norm": 0.02215678058564663, + "learning_rate": 7.983411148596298e-07, + "loss": 0.0024, + "step": 94590 + }, + { + "grad_norm": 0.028242267668247223, + "learning_rate": 7.954008748798425e-07, + "loss": 0.0031, + "step": 94600 + }, + { + "grad_norm": 0.015424640849232674, + "learning_rate": 7.924660158410369e-07, + "loss": 0.0026, + "step": 94610 + }, + { + "grad_norm": 0.013104891404509544, + "learning_rate": 7.895365380641728e-07, + "loss": 0.0021, + "step": 94620 + }, + { + "grad_norm": 0.023528195917606354, + "learning_rate": 7.866124418695941e-07, + "loss": 0.0015, + "step": 94630 + }, + { + "grad_norm": 0.008655432611703873, + "learning_rate": 7.83693727577095e-07, + "loss": 0.002, + "step": 94640 + }, + { + "grad_norm": 0.014933916740119457, + "learning_rate": 7.807803955058535e-07, + "loss": 0.0024, + "step": 94650 + }, + { + "grad_norm": 0.010405493900179863, + "learning_rate": 7.778724459744702e-07, + "loss": 0.002, + "step": 94660 + }, + { + "grad_norm": 0.021231073886156082, + "learning_rate": 7.749698793009464e-07, + "loss": 0.0014, + "step": 94670 + }, + { + "grad_norm": 0.0234710443764925, + "learning_rate": 7.720726958027169e-07, + "loss": 0.0027, + "step": 94680 + }, + { + "grad_norm": 0.033140912652015686, + "learning_rate": 7.691808957965951e-07, + "loss": 0.0038, + "step": 94690 + }, + { + "grad_norm": 0.044556692242622375, + "learning_rate": 7.662944795988336e-07, + "loss": 0.0031, + "step": 94700 + }, + { + "grad_norm": 0.017048846930265427, + "learning_rate": 7.634134475250854e-07, + "loss": 0.0016, + "step": 94710 + }, + { + "grad_norm": 0.02188914269208908, + "learning_rate": 7.605377998904262e-07, + "loss": 0.0026, + "step": 94720 + }, + { + "grad_norm": 0.01812036894261837, + "learning_rate": 7.5766753700931e-07, + "loss": 0.0024, + "step": 94730 + }, + { + "grad_norm": 0.026020418852567673, + "learning_rate": 7.54802659195647e-07, + "loss": 0.0016, + "step": 94740 + }, + { + "grad_norm": 0.02789951115846634, + "learning_rate": 7.519431667627253e-07, + "loss": 0.0016, + "step": 94750 + }, + { + "grad_norm": 0.06280826777219772, + "learning_rate": 7.490890600232503e-07, + "loss": 0.0052, + "step": 94760 + }, + { + "grad_norm": 0.04471515864133835, + "learning_rate": 7.462403392893502e-07, + "loss": 0.0024, + "step": 94770 + }, + { + "grad_norm": 0.009212574921548367, + "learning_rate": 7.433970048725536e-07, + "loss": 0.0024, + "step": 94780 + }, + { + "grad_norm": 0.03552260249853134, + "learning_rate": 7.405590570838062e-07, + "loss": 0.0034, + "step": 94790 + }, + { + "grad_norm": 0.0076012094505131245, + "learning_rate": 7.377264962334596e-07, + "loss": 0.0015, + "step": 94800 + }, + { + "grad_norm": 0.02598176896572113, + "learning_rate": 7.348993226312773e-07, + "loss": 0.0016, + "step": 94810 + }, + { + "grad_norm": 0.014420031569898129, + "learning_rate": 7.32077536586434e-07, + "loss": 0.0033, + "step": 94820 + }, + { + "grad_norm": 0.010919381864368916, + "learning_rate": 7.292611384075221e-07, + "loss": 0.0014, + "step": 94830 + }, + { + "grad_norm": 0.01426728069782257, + "learning_rate": 7.264501284025338e-07, + "loss": 0.0019, + "step": 94840 + }, + { + "grad_norm": 0.01279844157397747, + "learning_rate": 7.236445068788789e-07, + "loss": 0.0017, + "step": 94850 + }, + { + "grad_norm": 0.011420377530157566, + "learning_rate": 7.20844274143373e-07, + "loss": 0.0043, + "step": 94860 + }, + { + "grad_norm": 0.027465244755148888, + "learning_rate": 7.180494305022489e-07, + "loss": 0.0019, + "step": 94870 + }, + { + "grad_norm": 0.02076101489365101, + "learning_rate": 7.152599762611456e-07, + "loss": 0.0026, + "step": 94880 + }, + { + "grad_norm": 0.029590345919132233, + "learning_rate": 7.124759117251079e-07, + "loss": 0.0019, + "step": 94890 + }, + { + "grad_norm": 0.010441062971949577, + "learning_rate": 7.096972371986033e-07, + "loss": 0.0028, + "step": 94900 + }, + { + "grad_norm": 0.013474841602146626, + "learning_rate": 7.069239529855054e-07, + "loss": 0.0028, + "step": 94910 + }, + { + "grad_norm": 0.024679260328412056, + "learning_rate": 7.041560593890939e-07, + "loss": 0.0027, + "step": 94920 + }, + { + "grad_norm": 0.010760647244751453, + "learning_rate": 7.013935567120599e-07, + "loss": 0.0021, + "step": 94930 + }, + { + "grad_norm": 0.01691567897796631, + "learning_rate": 6.986364452565009e-07, + "loss": 0.0012, + "step": 94940 + }, + { + "grad_norm": 0.04206069931387901, + "learning_rate": 6.95884725323942e-07, + "loss": 0.0019, + "step": 94950 + }, + { + "grad_norm": 0.032159242779016495, + "learning_rate": 6.931383972152982e-07, + "loss": 0.0018, + "step": 94960 + }, + { + "grad_norm": 0.018758732825517654, + "learning_rate": 6.903974612309127e-07, + "loss": 0.0032, + "step": 94970 + }, + { + "grad_norm": 0.042373619973659515, + "learning_rate": 6.876619176705179e-07, + "loss": 0.0035, + "step": 94980 + }, + { + "grad_norm": 0.012484540231525898, + "learning_rate": 6.8493176683328e-07, + "loss": 0.0017, + "step": 94990 + }, + { + "grad_norm": 0.02366303652524948, + "learning_rate": 6.822070090177657e-07, + "loss": 0.0017, + "step": 95000 + }, + { + "grad_norm": 0.0390155166387558, + "learning_rate": 6.794876445219367e-07, + "loss": 0.0023, + "step": 95010 + }, + { + "grad_norm": 0.017281346023082733, + "learning_rate": 6.767736736431885e-07, + "loss": 0.0026, + "step": 95020 + }, + { + "grad_norm": 0.013375820592045784, + "learning_rate": 6.740650966783113e-07, + "loss": 0.0018, + "step": 95030 + }, + { + "grad_norm": 0.020469682291150093, + "learning_rate": 6.713619139235239e-07, + "loss": 0.0022, + "step": 95040 + }, + { + "grad_norm": 0.037846025079488754, + "learning_rate": 6.686641256744286e-07, + "loss": 0.0028, + "step": 95050 + }, + { + "grad_norm": 0.01362440176308155, + "learning_rate": 6.659717322260562e-07, + "loss": 0.0017, + "step": 95060 + }, + { + "grad_norm": 0.013609779067337513, + "learning_rate": 6.632847338728377e-07, + "loss": 0.0024, + "step": 95070 + }, + { + "grad_norm": 0.02261618711054325, + "learning_rate": 6.60603130908627e-07, + "loss": 0.0026, + "step": 95080 + }, + { + "grad_norm": 0.009986752644181252, + "learning_rate": 6.579269236266783e-07, + "loss": 0.0017, + "step": 95090 + }, + { + "grad_norm": 0.0351376086473465, + "learning_rate": 6.552561123196632e-07, + "loss": 0.003, + "step": 95100 + }, + { + "grad_norm": 0.02387223206460476, + "learning_rate": 6.525906972796425e-07, + "loss": 0.0016, + "step": 95110 + }, + { + "grad_norm": 0.030934186652302742, + "learning_rate": 6.499306787981108e-07, + "loss": 0.0016, + "step": 95120 + }, + { + "grad_norm": 0.01747436448931694, + "learning_rate": 6.472760571659631e-07, + "loss": 0.0034, + "step": 95130 + }, + { + "grad_norm": 0.03593292087316513, + "learning_rate": 6.446268326735061e-07, + "loss": 0.0021, + "step": 95140 + }, + { + "grad_norm": 0.03524952754378319, + "learning_rate": 6.41983005610458e-07, + "loss": 0.0022, + "step": 95150 + }, + { + "grad_norm": 0.02875296212732792, + "learning_rate": 6.393445762659378e-07, + "loss": 0.0016, + "step": 95160 + }, + { + "grad_norm": 0.013484036549925804, + "learning_rate": 6.36711544928481e-07, + "loss": 0.0032, + "step": 95170 + }, + { + "grad_norm": 0.04033312201499939, + "learning_rate": 6.340839118860298e-07, + "loss": 0.0025, + "step": 95180 + }, + { + "grad_norm": 0.021890763193368912, + "learning_rate": 6.314616774259485e-07, + "loss": 0.0027, + "step": 95190 + }, + { + "grad_norm": 0.031016111373901367, + "learning_rate": 6.288448418349858e-07, + "loss": 0.0017, + "step": 95200 + }, + { + "grad_norm": 0.01773473434150219, + "learning_rate": 6.262334053993235e-07, + "loss": 0.0017, + "step": 95210 + }, + { + "grad_norm": 0.01647769846022129, + "learning_rate": 6.236273684045501e-07, + "loss": 0.0022, + "step": 95220 + }, + { + "grad_norm": 0.013046934269368649, + "learning_rate": 6.210267311356488e-07, + "loss": 0.0021, + "step": 95230 + }, + { + "grad_norm": 0.015610083937644958, + "learning_rate": 6.184314938770197e-07, + "loss": 0.0017, + "step": 95240 + }, + { + "grad_norm": 0.01099046878516674, + "learning_rate": 6.158416569124858e-07, + "loss": 0.0018, + "step": 95250 + }, + { + "grad_norm": 0.026878543198108673, + "learning_rate": 6.132572205252541e-07, + "loss": 0.0032, + "step": 95260 + }, + { + "grad_norm": 0.011422734707593918, + "learning_rate": 6.106781849979648e-07, + "loss": 0.0017, + "step": 95270 + }, + { + "grad_norm": 0.01502870861440897, + "learning_rate": 6.081045506126482e-07, + "loss": 0.0031, + "step": 95280 + }, + { + "grad_norm": 0.014643647707998753, + "learning_rate": 6.055363176507678e-07, + "loss": 0.0025, + "step": 95290 + }, + { + "grad_norm": 0.024700621142983437, + "learning_rate": 6.029734863931602e-07, + "loss": 0.004, + "step": 95300 + }, + { + "grad_norm": 0.011173649691045284, + "learning_rate": 6.004160571201179e-07, + "loss": 0.0044, + "step": 95310 + }, + { + "grad_norm": 0.01386498473584652, + "learning_rate": 5.978640301112947e-07, + "loss": 0.0017, + "step": 95320 + }, + { + "grad_norm": 0.016715483739972115, + "learning_rate": 5.9531740564579e-07, + "loss": 0.0021, + "step": 95330 + }, + { + "grad_norm": 0.02257847972214222, + "learning_rate": 5.927761840020918e-07, + "loss": 0.0018, + "step": 95340 + }, + { + "grad_norm": 0.01106646005064249, + "learning_rate": 5.902403654581113e-07, + "loss": 0.0034, + "step": 95350 + }, + { + "grad_norm": 0.0087981466203928, + "learning_rate": 5.877099502911542e-07, + "loss": 0.0019, + "step": 95360 + }, + { + "grad_norm": 0.01209649071097374, + "learning_rate": 5.851849387779496e-07, + "loss": 0.0022, + "step": 95370 + }, + { + "grad_norm": 0.03867724910378456, + "learning_rate": 5.826653311946262e-07, + "loss": 0.0025, + "step": 95380 + }, + { + "grad_norm": 0.024580402299761772, + "learning_rate": 5.801511278167249e-07, + "loss": 0.0028, + "step": 95390 + }, + { + "grad_norm": 0.02949349768459797, + "learning_rate": 5.776423289191868e-07, + "loss": 0.0018, + "step": 95400 + }, + { + "grad_norm": 0.013010243885219097, + "learning_rate": 5.751389347763869e-07, + "loss": 0.0031, + "step": 95410 + }, + { + "grad_norm": 0.015209930948913097, + "learning_rate": 5.726409456620841e-07, + "loss": 0.0016, + "step": 95420 + }, + { + "grad_norm": 0.03387782350182533, + "learning_rate": 5.701483618494541e-07, + "loss": 0.0021, + "step": 95430 + }, + { + "grad_norm": 0.01625644788146019, + "learning_rate": 5.67661183611079e-07, + "loss": 0.0025, + "step": 95440 + }, + { + "grad_norm": 0.03204552084207535, + "learning_rate": 5.651794112189579e-07, + "loss": 0.002, + "step": 95450 + }, + { + "grad_norm": 0.03946413844823837, + "learning_rate": 5.627030449444903e-07, + "loss": 0.0027, + "step": 95460 + }, + { + "grad_norm": 0.010413197800517082, + "learning_rate": 5.60232085058493e-07, + "loss": 0.0025, + "step": 95470 + }, + { + "grad_norm": 0.014637158252298832, + "learning_rate": 5.577665318311776e-07, + "loss": 0.0018, + "step": 95480 + }, + { + "grad_norm": 0.04705701023340225, + "learning_rate": 5.553063855321838e-07, + "loss": 0.0024, + "step": 95490 + }, + { + "grad_norm": 0.04741205647587776, + "learning_rate": 5.528516464305467e-07, + "loss": 0.0029, + "step": 95500 + }, + { + "grad_norm": 0.011249350383877754, + "learning_rate": 5.504023147947068e-07, + "loss": 0.0025, + "step": 95510 + }, + { + "grad_norm": 0.024250924587249756, + "learning_rate": 5.479583908925279e-07, + "loss": 0.0022, + "step": 95520 + }, + { + "grad_norm": 0.01927255652844906, + "learning_rate": 5.455198749912626e-07, + "loss": 0.0018, + "step": 95530 + }, + { + "grad_norm": 0.013138987123966217, + "learning_rate": 5.430867673575979e-07, + "loss": 0.0025, + "step": 95540 + }, + { + "grad_norm": 0.01051765214651823, + "learning_rate": 5.406590682575985e-07, + "loss": 0.002, + "step": 95550 + }, + { + "grad_norm": 0.024360155686736107, + "learning_rate": 5.382367779567687e-07, + "loss": 0.0037, + "step": 95560 + }, + { + "grad_norm": 0.008610817603766918, + "learning_rate": 5.358198967200023e-07, + "loss": 0.0018, + "step": 95570 + }, + { + "grad_norm": 0.028258666396141052, + "learning_rate": 5.334084248115989e-07, + "loss": 0.0026, + "step": 95580 + }, + { + "grad_norm": 0.02154066227376461, + "learning_rate": 5.310023624952754e-07, + "loss": 0.0018, + "step": 95590 + }, + { + "grad_norm": 0.014467069879174232, + "learning_rate": 5.286017100341656e-07, + "loss": 0.0023, + "step": 95600 + }, + { + "grad_norm": 0.031069744378328323, + "learning_rate": 5.262064676907874e-07, + "loss": 0.0027, + "step": 95610 + }, + { + "grad_norm": 0.010385644622147083, + "learning_rate": 5.238166357270924e-07, + "loss": 0.0015, + "step": 95620 + }, + { + "grad_norm": 0.017660243436694145, + "learning_rate": 5.214322144044215e-07, + "loss": 0.0024, + "step": 95630 + }, + { + "grad_norm": 0.03231072053313255, + "learning_rate": 5.190532039835327e-07, + "loss": 0.0022, + "step": 95640 + }, + { + "grad_norm": 0.01361115463078022, + "learning_rate": 5.166796047245903e-07, + "loss": 0.0016, + "step": 95650 + }, + { + "grad_norm": 0.016036277636885643, + "learning_rate": 5.143114168871699e-07, + "loss": 0.0032, + "step": 95660 + }, + { + "grad_norm": 0.013586190529167652, + "learning_rate": 5.119486407302532e-07, + "loss": 0.0018, + "step": 95670 + }, + { + "grad_norm": 0.011613818816840649, + "learning_rate": 5.095912765122335e-07, + "loss": 0.0016, + "step": 95680 + }, + { + "grad_norm": 0.026933645829558372, + "learning_rate": 5.07239324490899e-07, + "loss": 0.0032, + "step": 95690 + }, + { + "grad_norm": 0.03312406316399574, + "learning_rate": 5.048927849234608e-07, + "loss": 0.0025, + "step": 95700 + }, + { + "grad_norm": 0.010160047560930252, + "learning_rate": 5.025516580665358e-07, + "loss": 0.0021, + "step": 95710 + }, + { + "grad_norm": 0.018829265609383583, + "learning_rate": 5.002159441761412e-07, + "loss": 0.0021, + "step": 95720 + }, + { + "grad_norm": 0.03277336806058884, + "learning_rate": 4.978856435077062e-07, + "loss": 0.0021, + "step": 95730 + }, + { + "grad_norm": 0.011694798246026039, + "learning_rate": 4.955607563160769e-07, + "loss": 0.0033, + "step": 95740 + }, + { + "grad_norm": 0.018407629802823067, + "learning_rate": 4.932412828554944e-07, + "loss": 0.0022, + "step": 95750 + }, + { + "grad_norm": 0.022742407396435738, + "learning_rate": 4.909272233796114e-07, + "loss": 0.0022, + "step": 95760 + }, + { + "grad_norm": 0.016147365793585777, + "learning_rate": 4.88618578141492e-07, + "loss": 0.0034, + "step": 95770 + }, + { + "grad_norm": 0.018057959154248238, + "learning_rate": 4.863153473936011e-07, + "loss": 0.0019, + "step": 95780 + }, + { + "grad_norm": 0.014941149391233921, + "learning_rate": 4.840175313878259e-07, + "loss": 0.0035, + "step": 95790 + }, + { + "grad_norm": 0.04448818042874336, + "learning_rate": 4.817251303754433e-07, + "loss": 0.0022, + "step": 95800 + }, + { + "grad_norm": 0.01375417411327362, + "learning_rate": 4.794381446071583e-07, + "loss": 0.0029, + "step": 95810 + }, + { + "grad_norm": 0.013218801468610764, + "learning_rate": 4.771565743330597e-07, + "loss": 0.0017, + "step": 95820 + }, + { + "grad_norm": 0.013160083442926407, + "learning_rate": 4.7488041980266463e-07, + "loss": 0.0017, + "step": 95830 + }, + { + "grad_norm": 0.018418125808238983, + "learning_rate": 4.726096812648795e-07, + "loss": 0.0019, + "step": 95840 + }, + { + "grad_norm": 0.04568907991051674, + "learning_rate": 4.703443589680445e-07, + "loss": 0.0025, + "step": 95850 + }, + { + "grad_norm": 0.03150274604558945, + "learning_rate": 4.6808445315987826e-07, + "loss": 0.0016, + "step": 95860 + }, + { + "grad_norm": 0.026663199067115784, + "learning_rate": 4.658299640875274e-07, + "loss": 0.0025, + "step": 95870 + }, + { + "grad_norm": 0.02592870593070984, + "learning_rate": 4.635808919975393e-07, + "loss": 0.0027, + "step": 95880 + }, + { + "grad_norm": 0.01930629275739193, + "learning_rate": 4.6133723713586706e-07, + "loss": 0.0018, + "step": 95890 + }, + { + "grad_norm": 0.018025020137429237, + "learning_rate": 4.590989997478756e-07, + "loss": 0.0022, + "step": 95900 + }, + { + "grad_norm": 0.018534645438194275, + "learning_rate": 4.5686618007833027e-07, + "loss": 0.0022, + "step": 95910 + }, + { + "grad_norm": 0.012462575919926167, + "learning_rate": 4.54638778371419e-07, + "loss": 0.0019, + "step": 95920 + }, + { + "grad_norm": 0.028566036373376846, + "learning_rate": 4.524167948707192e-07, + "loss": 0.0024, + "step": 95930 + }, + { + "grad_norm": 0.02945270575582981, + "learning_rate": 4.5020022981921986e-07, + "loss": 0.0027, + "step": 95940 + }, + { + "grad_norm": 0.020162196829915047, + "learning_rate": 4.47989083459327e-07, + "loss": 0.0024, + "step": 95950 + }, + { + "grad_norm": 0.03143192082643509, + "learning_rate": 4.457833560328528e-07, + "loss": 0.0019, + "step": 95960 + }, + { + "grad_norm": 0.03625509887933731, + "learning_rate": 4.4358304778100434e-07, + "loss": 0.0025, + "step": 95970 + }, + { + "grad_norm": 0.023744063451886177, + "learning_rate": 4.413881589444058e-07, + "loss": 0.0017, + "step": 95980 + }, + { + "grad_norm": 0.00788840651512146, + "learning_rate": 4.391986897630873e-07, + "loss": 0.0017, + "step": 95990 + }, + { + "grad_norm": 0.016618529334664345, + "learning_rate": 4.370146404764908e-07, + "loss": 0.0026, + "step": 96000 + }, + { + "grad_norm": 0.01953333429992199, + "learning_rate": 4.3483601132345285e-07, + "loss": 0.0019, + "step": 96010 + }, + { + "grad_norm": 0.037918440997600555, + "learning_rate": 4.32662802542233e-07, + "loss": 0.0034, + "step": 96020 + }, + { + "grad_norm": 0.022436928004026413, + "learning_rate": 4.3049501437047444e-07, + "loss": 0.0015, + "step": 96030 + }, + { + "grad_norm": 0.02372855320572853, + "learning_rate": 4.283326470452653e-07, + "loss": 0.0023, + "step": 96040 + }, + { + "grad_norm": 0.023316117003560066, + "learning_rate": 4.2617570080306647e-07, + "loss": 0.0023, + "step": 96050 + }, + { + "grad_norm": 0.029850095510482788, + "learning_rate": 4.2402417587975585e-07, + "loss": 0.0021, + "step": 96060 + }, + { + "grad_norm": 0.009055301547050476, + "learning_rate": 4.218780725106286e-07, + "loss": 0.0019, + "step": 96070 + }, + { + "grad_norm": 0.016052449122071266, + "learning_rate": 4.197373909303748e-07, + "loss": 0.0026, + "step": 96080 + }, + { + "grad_norm": 0.014251726679503918, + "learning_rate": 4.17602131373096e-07, + "loss": 0.0021, + "step": 96090 + }, + { + "grad_norm": 0.029472455382347107, + "learning_rate": 4.154722940723055e-07, + "loss": 0.0033, + "step": 96100 + }, + { + "grad_norm": 0.02378895878791809, + "learning_rate": 4.1334787926091136e-07, + "loss": 0.0028, + "step": 96110 + }, + { + "grad_norm": 0.018518617376685143, + "learning_rate": 4.1122888717124444e-07, + "loss": 0.0021, + "step": 96120 + }, + { + "grad_norm": 0.028449757024645805, + "learning_rate": 4.091153180350249e-07, + "loss": 0.0044, + "step": 96130 + }, + { + "grad_norm": 0.01871752180159092, + "learning_rate": 4.0700717208340125e-07, + "loss": 0.0019, + "step": 96140 + }, + { + "grad_norm": 0.011674679815769196, + "learning_rate": 4.049044495469112e-07, + "loss": 0.0014, + "step": 96150 + }, + { + "grad_norm": 0.02832586131989956, + "learning_rate": 4.0280715065549866e-07, + "loss": 0.0021, + "step": 96160 + }, + { + "grad_norm": 0.015144885517656803, + "learning_rate": 4.0071527563853575e-07, + "loss": 0.0027, + "step": 96170 + }, + { + "grad_norm": 0.018926121294498444, + "learning_rate": 3.9862882472477823e-07, + "loss": 0.0027, + "step": 96180 + }, + { + "grad_norm": 0.012049640528857708, + "learning_rate": 3.9654779814239375e-07, + "loss": 0.0012, + "step": 96190 + }, + { + "grad_norm": 0.008641717955470085, + "learning_rate": 3.944721961189668e-07, + "loss": 0.0015, + "step": 96200 + }, + { + "grad_norm": 0.013218880631029606, + "learning_rate": 3.9240201888147697e-07, + "loss": 0.0024, + "step": 96210 + }, + { + "grad_norm": 0.009465521201491356, + "learning_rate": 3.9033726665632096e-07, + "loss": 0.0016, + "step": 96220 + }, + { + "grad_norm": 0.02063337154686451, + "learning_rate": 3.882779396692959e-07, + "loss": 0.0028, + "step": 96230 + }, + { + "grad_norm": 0.011749807745218277, + "learning_rate": 3.8622403814559947e-07, + "loss": 0.0044, + "step": 96240 + }, + { + "grad_norm": 0.011028573848307133, + "learning_rate": 3.841755623098575e-07, + "loss": 0.0022, + "step": 96250 + }, + { + "grad_norm": 0.01861059106886387, + "learning_rate": 3.821325123860742e-07, + "loss": 0.0033, + "step": 96260 + }, + { + "grad_norm": 0.014327337965369225, + "learning_rate": 3.800948885976818e-07, + "loss": 0.0019, + "step": 96270 + }, + { + "grad_norm": 0.01206484716385603, + "learning_rate": 3.780626911675078e-07, + "loss": 0.0016, + "step": 96280 + }, + { + "grad_norm": 0.01709475927054882, + "learning_rate": 3.76035920317791e-07, + "loss": 0.0013, + "step": 96290 + }, + { + "grad_norm": 0.014141629450023174, + "learning_rate": 3.740145762701819e-07, + "loss": 0.0017, + "step": 96300 + }, + { + "grad_norm": 0.015097673051059246, + "learning_rate": 3.7199865924572585e-07, + "loss": 0.0025, + "step": 96310 + }, + { + "grad_norm": 0.015545112080872059, + "learning_rate": 3.699881694648799e-07, + "loss": 0.0019, + "step": 96320 + }, + { + "grad_norm": 0.015448471531271935, + "learning_rate": 3.679831071475126e-07, + "loss": 0.002, + "step": 96330 + }, + { + "grad_norm": 0.00923586543649435, + "learning_rate": 3.65983472512893e-07, + "loss": 0.0033, + "step": 96340 + }, + { + "grad_norm": 0.014951914548873901, + "learning_rate": 3.6398926577969616e-07, + "loss": 0.0018, + "step": 96350 + }, + { + "grad_norm": 0.022590553387999535, + "learning_rate": 3.620004871660032e-07, + "loss": 0.0014, + "step": 96360 + }, + { + "grad_norm": 0.03665937855839729, + "learning_rate": 3.600171368893124e-07, + "loss": 0.0041, + "step": 96370 + }, + { + "grad_norm": 0.011547042056918144, + "learning_rate": 3.580392151665113e-07, + "loss": 0.0013, + "step": 96380 + }, + { + "grad_norm": 0.026691127568483353, + "learning_rate": 3.5606672221391024e-07, + "loss": 0.0023, + "step": 96390 + }, + { + "grad_norm": 0.010644998401403427, + "learning_rate": 3.540996582472089e-07, + "loss": 0.0027, + "step": 96400 + }, + { + "grad_norm": 0.016380775719881058, + "learning_rate": 3.521380234815297e-07, + "loss": 0.0012, + "step": 96410 + }, + { + "grad_norm": 0.011553443036973476, + "learning_rate": 3.5018181813138985e-07, + "loss": 0.0022, + "step": 96420 + }, + { + "grad_norm": 0.01193907205015421, + "learning_rate": 3.482310424107238e-07, + "loss": 0.0015, + "step": 96430 + }, + { + "grad_norm": 0.009215881116688251, + "learning_rate": 3.4628569653285537e-07, + "loss": 0.0016, + "step": 96440 + }, + { + "grad_norm": 0.018148286268115044, + "learning_rate": 3.44345780710531e-07, + "loss": 0.0029, + "step": 96450 + }, + { + "grad_norm": 0.009078916162252426, + "learning_rate": 3.424112951558978e-07, + "loss": 0.0027, + "step": 96460 + }, + { + "grad_norm": 0.03411485627293587, + "learning_rate": 3.40482240080503e-07, + "loss": 0.0028, + "step": 96470 + }, + { + "grad_norm": 0.01903751865029335, + "learning_rate": 3.3855861569531133e-07, + "loss": 0.0031, + "step": 96480 + }, + { + "grad_norm": 0.022544600069522858, + "learning_rate": 3.3664042221068226e-07, + "loss": 0.0032, + "step": 96490 + }, + { + "grad_norm": 0.011744809336960316, + "learning_rate": 3.347276598363924e-07, + "loss": 0.002, + "step": 96500 + }, + { + "grad_norm": 0.01903526298701763, + "learning_rate": 3.328203287816134e-07, + "loss": 0.0027, + "step": 96510 + }, + { + "grad_norm": 0.014492284506559372, + "learning_rate": 3.3091842925493386e-07, + "loss": 0.0018, + "step": 96520 + }, + { + "grad_norm": 0.018707552924752235, + "learning_rate": 3.2902196146433196e-07, + "loss": 0.0014, + "step": 96530 + }, + { + "grad_norm": 0.013643653132021427, + "learning_rate": 3.2713092561720836e-07, + "loss": 0.002, + "step": 96540 + }, + { + "grad_norm": 0.04722658172249794, + "learning_rate": 3.2524532192036996e-07, + "loss": 0.0027, + "step": 96550 + }, + { + "grad_norm": 0.0255085788667202, + "learning_rate": 3.2336515058001836e-07, + "loss": 0.0024, + "step": 96560 + }, + { + "grad_norm": 0.013698605820536613, + "learning_rate": 3.2149041180176143e-07, + "loss": 0.0025, + "step": 96570 + }, + { + "grad_norm": 0.02410028874874115, + "learning_rate": 3.1962110579062954e-07, + "loss": 0.0025, + "step": 96580 + }, + { + "grad_norm": 0.0323580801486969, + "learning_rate": 3.177572327510314e-07, + "loss": 0.0018, + "step": 96590 + }, + { + "grad_norm": 0.03914826735854149, + "learning_rate": 3.158987928868151e-07, + "loss": 0.002, + "step": 96600 + }, + { + "grad_norm": 0.019962674006819725, + "learning_rate": 3.1404578640120144e-07, + "loss": 0.0026, + "step": 96610 + }, + { + "grad_norm": 0.01869111880660057, + "learning_rate": 3.1219821349683933e-07, + "loss": 0.0023, + "step": 96620 + }, + { + "grad_norm": 0.037774428725242615, + "learning_rate": 3.103560743757783e-07, + "loss": 0.0026, + "step": 96630 + }, + { + "grad_norm": 0.010295573621988297, + "learning_rate": 3.0851936923946834e-07, + "loss": 0.0018, + "step": 96640 + }, + { + "grad_norm": 0.011348806321620941, + "learning_rate": 3.06688098288771e-07, + "loss": 0.0025, + "step": 96650 + }, + { + "grad_norm": 0.022146426141262054, + "learning_rate": 3.0486226172394825e-07, + "loss": 0.0035, + "step": 96660 + }, + { + "grad_norm": 0.0392325334250927, + "learning_rate": 3.030418597446738e-07, + "loss": 0.0042, + "step": 96670 + }, + { + "grad_norm": 0.01737319678068161, + "learning_rate": 3.012268925500272e-07, + "loss": 0.0031, + "step": 96680 + }, + { + "grad_norm": 0.00903885904699564, + "learning_rate": 2.994173603384831e-07, + "loss": 0.0023, + "step": 96690 + }, + { + "grad_norm": 0.01896730437874794, + "learning_rate": 2.9761326330793317e-07, + "loss": 0.0021, + "step": 96700 + }, + { + "grad_norm": 0.024497441947460175, + "learning_rate": 2.958146016556751e-07, + "loss": 0.0017, + "step": 96710 + }, + { + "grad_norm": 0.012845862656831741, + "learning_rate": 2.94021375578396e-07, + "loss": 0.0033, + "step": 96720 + }, + { + "grad_norm": 0.007061031647026539, + "learning_rate": 2.9223358527221135e-07, + "loss": 0.0019, + "step": 96730 + }, + { + "grad_norm": 0.027622435241937637, + "learning_rate": 2.904512309326257e-07, + "loss": 0.0029, + "step": 96740 + }, + { + "grad_norm": 0.01738232560455799, + "learning_rate": 2.8867431275455545e-07, + "loss": 0.0023, + "step": 96750 + }, + { + "grad_norm": 0.020008131861686707, + "learning_rate": 2.86902830932323e-07, + "loss": 0.0016, + "step": 96760 + }, + { + "grad_norm": 0.023202622309327126, + "learning_rate": 2.851367856596565e-07, + "loss": 0.0019, + "step": 96770 + }, + { + "grad_norm": 0.016693085432052612, + "learning_rate": 2.83376177129685e-07, + "loss": 0.0015, + "step": 96780 + }, + { + "grad_norm": 0.02554006315767765, + "learning_rate": 2.8162100553494884e-07, + "loss": 0.0021, + "step": 96790 + }, + { + "grad_norm": 0.015999235212802887, + "learning_rate": 2.798712710673834e-07, + "loss": 0.0019, + "step": 96800 + }, + { + "grad_norm": 0.01915118284523487, + "learning_rate": 2.7812697391834676e-07, + "loss": 0.0022, + "step": 96810 + }, + { + "grad_norm": 0.025216352194547653, + "learning_rate": 2.7638811427858627e-07, + "loss": 0.0028, + "step": 96820 + }, + { + "grad_norm": 0.014857148751616478, + "learning_rate": 2.746546923382665e-07, + "loss": 0.0016, + "step": 96830 + }, + { + "grad_norm": 0.013202152214944363, + "learning_rate": 2.729267082869469e-07, + "loss": 0.0023, + "step": 96840 + }, + { + "grad_norm": 0.015763169154524803, + "learning_rate": 2.712041623135986e-07, + "loss": 0.002, + "step": 96850 + }, + { + "grad_norm": 0.02879166789352894, + "learning_rate": 2.694870546065931e-07, + "loss": 0.0028, + "step": 96860 + }, + { + "grad_norm": 0.03848950192332268, + "learning_rate": 2.677753853537246e-07, + "loss": 0.0023, + "step": 96870 + }, + { + "grad_norm": 0.00978169683367014, + "learning_rate": 2.660691547421601e-07, + "loss": 0.0012, + "step": 96880 + }, + { + "grad_norm": 0.029436860233545303, + "learning_rate": 2.6436836295850033e-07, + "loss": 0.0023, + "step": 96890 + }, + { + "grad_norm": 0.040062420070171356, + "learning_rate": 2.6267301018874093e-07, + "loss": 0.0016, + "step": 96900 + }, + { + "grad_norm": 0.031999371945858, + "learning_rate": 2.6098309661828355e-07, + "loss": 0.0025, + "step": 96910 + }, + { + "grad_norm": 0.024298572912812233, + "learning_rate": 2.592986224319305e-07, + "loss": 0.0021, + "step": 96920 + }, + { + "grad_norm": 0.011499284766614437, + "learning_rate": 2.576195878139009e-07, + "loss": 0.0032, + "step": 96930 + }, + { + "grad_norm": 0.02155723236501217, + "learning_rate": 2.5594599294780364e-07, + "loss": 0.0018, + "step": 96940 + }, + { + "grad_norm": 0.009414401836693287, + "learning_rate": 2.542778380166644e-07, + "loss": 0.0031, + "step": 96950 + }, + { + "grad_norm": 0.038715921342372894, + "learning_rate": 2.526151232029095e-07, + "loss": 0.002, + "step": 96960 + }, + { + "grad_norm": 0.03232539817690849, + "learning_rate": 2.5095784868836567e-07, + "loss": 0.0025, + "step": 96970 + }, + { + "grad_norm": 0.01061921939253807, + "learning_rate": 2.493060146542825e-07, + "loss": 0.0014, + "step": 96980 + }, + { + "grad_norm": 0.03037482127547264, + "learning_rate": 2.476596212812876e-07, + "loss": 0.0031, + "step": 96990 + }, + { + "grad_norm": 0.01653354987502098, + "learning_rate": 2.4601866874943703e-07, + "loss": 0.0014, + "step": 97000 + }, + { + "grad_norm": 0.018909813836216927, + "learning_rate": 2.4438315723818163e-07, + "loss": 0.0015, + "step": 97010 + }, + { + "grad_norm": 0.013009687885642052, + "learning_rate": 2.4275308692637833e-07, + "loss": 0.0016, + "step": 97020 + }, + { + "grad_norm": 0.01397312618792057, + "learning_rate": 2.411284579922901e-07, + "loss": 0.0025, + "step": 97030 + }, + { + "grad_norm": 0.008631809614598751, + "learning_rate": 2.395092706135804e-07, + "loss": 0.0033, + "step": 97040 + }, + { + "grad_norm": 0.02838323824107647, + "learning_rate": 2.3789552496731872e-07, + "loss": 0.0025, + "step": 97050 + }, + { + "grad_norm": 0.011577107943594456, + "learning_rate": 2.3628722122999158e-07, + "loss": 0.0024, + "step": 97060 + }, + { + "grad_norm": 0.011532672680914402, + "learning_rate": 2.3468435957747503e-07, + "loss": 0.0028, + "step": 97070 + }, + { + "grad_norm": 0.016192136332392693, + "learning_rate": 2.330869401850566e-07, + "loss": 0.0027, + "step": 97080 + }, + { + "grad_norm": 0.035237640142440796, + "learning_rate": 2.3149496322742437e-07, + "loss": 0.0036, + "step": 97090 + }, + { + "grad_norm": 0.009942259639501572, + "learning_rate": 2.2990842887868346e-07, + "loss": 0.0016, + "step": 97100 + }, + { + "grad_norm": 0.011858155950903893, + "learning_rate": 2.2832733731232292e-07, + "loss": 0.0024, + "step": 97110 + }, + { + "grad_norm": 0.017905831336975098, + "learning_rate": 2.2675168870124886e-07, + "loss": 0.0029, + "step": 97120 + }, + { + "grad_norm": 0.03301175683736801, + "learning_rate": 2.2518148321778455e-07, + "loss": 0.0044, + "step": 97130 + }, + { + "grad_norm": 0.02187752164900303, + "learning_rate": 2.2361672103363706e-07, + "loss": 0.0017, + "step": 97140 + }, + { + "grad_norm": 0.04160372540354729, + "learning_rate": 2.220574023199251e-07, + "loss": 0.0036, + "step": 97150 + }, + { + "grad_norm": 0.012137039564549923, + "learning_rate": 2.2050352724717894e-07, + "loss": 0.0018, + "step": 97160 + }, + { + "grad_norm": 0.014769304543733597, + "learning_rate": 2.1895509598532372e-07, + "loss": 0.0021, + "step": 97170 + }, + { + "grad_norm": 0.00955505482852459, + "learning_rate": 2.1741210870369066e-07, + "loss": 0.0016, + "step": 97180 + }, + { + "grad_norm": 0.021209359169006348, + "learning_rate": 2.1587456557102814e-07, + "loss": 0.0027, + "step": 97190 + }, + { + "grad_norm": 0.023083766922354698, + "learning_rate": 2.143424667554683e-07, + "loss": 0.0014, + "step": 97200 + }, + { + "grad_norm": 0.011135424487292767, + "learning_rate": 2.128158124245716e-07, + "loss": 0.0027, + "step": 97210 + }, + { + "grad_norm": 0.018942750990390778, + "learning_rate": 2.1129460274527668e-07, + "loss": 0.0024, + "step": 97220 + }, + { + "grad_norm": 0.026672612875699997, + "learning_rate": 2.0977883788395046e-07, + "loss": 0.0023, + "step": 97230 + }, + { + "grad_norm": 0.04671815410256386, + "learning_rate": 2.0826851800634928e-07, + "loss": 0.0028, + "step": 97240 + }, + { + "grad_norm": 0.010925612412393093, + "learning_rate": 2.0676364327764653e-07, + "loss": 0.0025, + "step": 97250 + }, + { + "grad_norm": 0.039011452347040176, + "learning_rate": 2.0526421386240502e-07, + "loss": 0.0039, + "step": 97260 + }, + { + "grad_norm": 0.013321821577847004, + "learning_rate": 2.0377022992461025e-07, + "loss": 0.0024, + "step": 97270 + }, + { + "grad_norm": 0.01442747749388218, + "learning_rate": 2.0228169162763156e-07, + "loss": 0.0019, + "step": 97280 + }, + { + "grad_norm": 0.022685369476675987, + "learning_rate": 2.0079859913425536e-07, + "loss": 0.0019, + "step": 97290 + }, + { + "grad_norm": 0.024363648146390915, + "learning_rate": 1.9932095260667417e-07, + "loss": 0.0028, + "step": 97300 + }, + { + "grad_norm": 0.0345594547688961, + "learning_rate": 1.9784875220648092e-07, + "loss": 0.0021, + "step": 97310 + }, + { + "grad_norm": 0.028392981737852097, + "learning_rate": 1.9638199809466907e-07, + "loss": 0.0026, + "step": 97320 + }, + { + "grad_norm": 0.04677630960941315, + "learning_rate": 1.9492069043164364e-07, + "loss": 0.0022, + "step": 97330 + }, + { + "grad_norm": 0.023161573335528374, + "learning_rate": 1.9346482937721567e-07, + "loss": 0.0018, + "step": 97340 + }, + { + "grad_norm": 0.014926702715456486, + "learning_rate": 1.920144150905856e-07, + "loss": 0.0016, + "step": 97350 + }, + { + "grad_norm": 0.02212005853652954, + "learning_rate": 1.9056944773037655e-07, + "loss": 0.0019, + "step": 97360 + }, + { + "grad_norm": 0.013750246725976467, + "learning_rate": 1.8912992745460102e-07, + "loss": 0.0026, + "step": 97370 + }, + { + "grad_norm": 0.03346386179327965, + "learning_rate": 1.876958544206886e-07, + "loss": 0.0018, + "step": 97380 + }, + { + "grad_norm": 0.014786372892558575, + "learning_rate": 1.8626722878546942e-07, + "loss": 0.0025, + "step": 97390 + }, + { + "grad_norm": 0.014020700938999653, + "learning_rate": 1.8484405070516851e-07, + "loss": 0.0018, + "step": 97400 + }, + { + "grad_norm": 0.023198841139674187, + "learning_rate": 1.8342632033542805e-07, + "loss": 0.002, + "step": 97410 + }, + { + "grad_norm": 0.01683477871119976, + "learning_rate": 1.8201403783129066e-07, + "loss": 0.0014, + "step": 97420 + }, + { + "grad_norm": 0.039607200771570206, + "learning_rate": 1.806072033471884e-07, + "loss": 0.0027, + "step": 97430 + }, + { + "grad_norm": 0.00985272228717804, + "learning_rate": 1.79205817036987e-07, + "loss": 0.0021, + "step": 97440 + }, + { + "grad_norm": 0.016940593719482422, + "learning_rate": 1.7780987905393065e-07, + "loss": 0.0022, + "step": 97450 + }, + { + "grad_norm": 0.012388158589601517, + "learning_rate": 1.764193895506805e-07, + "loss": 0.0025, + "step": 97460 + }, + { + "grad_norm": 0.02357078157365322, + "learning_rate": 1.7503434867929824e-07, + "loss": 0.0027, + "step": 97470 + }, + { + "grad_norm": 0.023054730147123337, + "learning_rate": 1.7365475659125165e-07, + "loss": 0.0027, + "step": 97480 + }, + { + "grad_norm": 0.022499412298202515, + "learning_rate": 1.7228061343740332e-07, + "loss": 0.0017, + "step": 97490 + }, + { + "grad_norm": 0.00745259877294302, + "learning_rate": 1.7091191936803308e-07, + "loss": 0.0028, + "step": 97500 + }, + { + "grad_norm": 0.014391738921403885, + "learning_rate": 1.6954867453281563e-07, + "loss": 0.0018, + "step": 97510 + }, + { + "grad_norm": 0.013902370817959309, + "learning_rate": 1.6819087908084285e-07, + "loss": 0.0025, + "step": 97520 + }, + { + "grad_norm": 0.04184846207499504, + "learning_rate": 1.6683853316058485e-07, + "loss": 0.0029, + "step": 97530 + }, + { + "grad_norm": 0.016551073640584946, + "learning_rate": 1.6549163691995106e-07, + "loss": 0.0016, + "step": 97540 + }, + { + "grad_norm": 0.013773885555565357, + "learning_rate": 1.641501905062237e-07, + "loss": 0.0016, + "step": 97550 + }, + { + "grad_norm": 0.010804284363985062, + "learning_rate": 1.628141940661021e-07, + "loss": 0.0027, + "step": 97560 + }, + { + "grad_norm": 0.008572546765208244, + "learning_rate": 1.6148364774569158e-07, + "loss": 0.0027, + "step": 97570 + }, + { + "grad_norm": 0.016624441370368004, + "learning_rate": 1.6015855169050355e-07, + "loss": 0.0025, + "step": 97580 + }, + { + "grad_norm": 0.013223333284258842, + "learning_rate": 1.5883890604543873e-07, + "loss": 0.0029, + "step": 97590 + }, + { + "grad_norm": 0.015262219123542309, + "learning_rate": 1.5752471095482057e-07, + "loss": 0.0028, + "step": 97600 + }, + { + "grad_norm": 0.01898823492228985, + "learning_rate": 1.5621596656235638e-07, + "loss": 0.002, + "step": 97610 + }, + { + "grad_norm": 0.007757059298455715, + "learning_rate": 1.5491267301117608e-07, + "loss": 0.0015, + "step": 97620 + }, + { + "grad_norm": 0.021247586235404015, + "learning_rate": 1.536148304438101e-07, + "loss": 0.0015, + "step": 97630 + }, + { + "grad_norm": 0.028663670644164085, + "learning_rate": 1.5232243900217823e-07, + "loss": 0.0019, + "step": 97640 + }, + { + "grad_norm": 0.014129070565104485, + "learning_rate": 1.51035498827623e-07, + "loss": 0.0018, + "step": 97650 + }, + { + "grad_norm": 0.023976502940058708, + "learning_rate": 1.497540100608763e-07, + "loss": 0.0022, + "step": 97660 + }, + { + "grad_norm": 0.035724375396966934, + "learning_rate": 1.4847797284208153e-07, + "loss": 0.0033, + "step": 97670 + }, + { + "grad_norm": 0.013340819627046585, + "learning_rate": 1.472073873107882e-07, + "loss": 0.0017, + "step": 97680 + }, + { + "grad_norm": 0.01377755869179964, + "learning_rate": 1.459422536059407e-07, + "loss": 0.0016, + "step": 97690 + }, + { + "grad_norm": 0.015574509277939796, + "learning_rate": 1.4468257186589508e-07, + "loss": 0.0022, + "step": 97700 + }, + { + "grad_norm": 0.030702373012900352, + "learning_rate": 1.4342834222840217e-07, + "loss": 0.0025, + "step": 97710 + }, + { + "grad_norm": 0.012414728291332722, + "learning_rate": 1.4217956483063566e-07, + "loss": 0.0022, + "step": 97720 + }, + { + "grad_norm": 0.020460354164242744, + "learning_rate": 1.409362398091474e-07, + "loss": 0.0026, + "step": 97730 + }, + { + "grad_norm": 0.014669341966509819, + "learning_rate": 1.3969836729990638e-07, + "loss": 0.003, + "step": 97740 + }, + { + "grad_norm": 0.012019340880215168, + "learning_rate": 1.384659474382932e-07, + "loss": 0.0014, + "step": 97750 + }, + { + "grad_norm": 0.018670065328478813, + "learning_rate": 1.372389803590779e-07, + "loss": 0.0021, + "step": 97760 + }, + { + "grad_norm": 0.010860530659556389, + "learning_rate": 1.360174661964364e-07, + "loss": 0.0018, + "step": 97770 + }, + { + "grad_norm": 0.01105946023017168, + "learning_rate": 1.3480140508396188e-07, + "loss": 0.0037, + "step": 97780 + }, + { + "grad_norm": 0.016613420099020004, + "learning_rate": 1.335907971546313e-07, + "loss": 0.0026, + "step": 97790 + }, + { + "grad_norm": 0.01703745871782303, + "learning_rate": 1.3238564254083875e-07, + "loss": 0.002, + "step": 97800 + }, + { + "grad_norm": 0.014653868041932583, + "learning_rate": 1.3118594137437323e-07, + "loss": 0.0029, + "step": 97810 + }, + { + "grad_norm": 0.009496936574578285, + "learning_rate": 1.2999169378644094e-07, + "loss": 0.0023, + "step": 97820 + }, + { + "grad_norm": 0.018230758607387543, + "learning_rate": 1.2880289990763738e-07, + "loss": 0.0023, + "step": 97830 + }, + { + "grad_norm": 0.025236696004867554, + "learning_rate": 1.2761955986796968e-07, + "loss": 0.0017, + "step": 97840 + }, + { + "grad_norm": 0.016122110188007355, + "learning_rate": 1.264416737968399e-07, + "loss": 0.0018, + "step": 97850 + }, + { + "grad_norm": 0.03453029319643974, + "learning_rate": 1.2526924182307275e-07, + "loss": 0.002, + "step": 97860 + }, + { + "grad_norm": 0.01363109890371561, + "learning_rate": 1.2410226407487124e-07, + "loss": 0.0027, + "step": 97870 + }, + { + "grad_norm": 0.011456038802862167, + "learning_rate": 1.229407406798555e-07, + "loss": 0.0024, + "step": 97880 + }, + { + "grad_norm": 0.011103127151727676, + "learning_rate": 1.2178467176505726e-07, + "loss": 0.0024, + "step": 97890 + }, + { + "grad_norm": 0.0185333751142025, + "learning_rate": 1.2063405745689205e-07, + "loss": 0.0018, + "step": 97900 + }, + { + "grad_norm": 0.023326801136136055, + "learning_rate": 1.1948889788119812e-07, + "loss": 0.0027, + "step": 97910 + }, + { + "grad_norm": 0.023757247254252434, + "learning_rate": 1.1834919316320303e-07, + "loss": 0.0021, + "step": 97920 + }, + { + "grad_norm": 0.01320461556315422, + "learning_rate": 1.1721494342754048e-07, + "loss": 0.0025, + "step": 97930 + }, + { + "grad_norm": 0.02505657449364662, + "learning_rate": 1.1608614879825563e-07, + "loss": 0.0031, + "step": 97940 + }, + { + "grad_norm": 0.025928186252713203, + "learning_rate": 1.1496280939879422e-07, + "loss": 0.0022, + "step": 97950 + }, + { + "grad_norm": 0.030954308807849884, + "learning_rate": 1.1384492535199687e-07, + "loss": 0.0035, + "step": 97960 + }, + { + "grad_norm": 0.015175669454038143, + "learning_rate": 1.1273249678011578e-07, + "loss": 0.0015, + "step": 97970 + }, + { + "grad_norm": 0.018927140161395073, + "learning_rate": 1.1162552380480362e-07, + "loss": 0.0022, + "step": 97980 + }, + { + "grad_norm": 0.03178715705871582, + "learning_rate": 1.1052400654711359e-07, + "loss": 0.0022, + "step": 97990 + }, + { + "grad_norm": 0.045043494552373886, + "learning_rate": 1.0942794512751597e-07, + "loss": 0.0032, + "step": 98000 + }, + { + "grad_norm": 0.012806781567633152, + "learning_rate": 1.0833733966587045e-07, + "loss": 0.0021, + "step": 98010 + }, + { + "grad_norm": 0.01434660516679287, + "learning_rate": 1.0725219028143718e-07, + "loss": 0.0019, + "step": 98020 + }, + { + "grad_norm": 0.02431434765458107, + "learning_rate": 1.0617249709289345e-07, + "loss": 0.0023, + "step": 98030 + }, + { + "grad_norm": 0.0216518621891737, + "learning_rate": 1.0509826021831703e-07, + "loss": 0.0029, + "step": 98040 + }, + { + "grad_norm": 0.025816448032855988, + "learning_rate": 1.0402947977517508e-07, + "loss": 0.0019, + "step": 98050 + }, + { + "grad_norm": 0.052181970328092575, + "learning_rate": 1.0296615588035185e-07, + "loss": 0.0017, + "step": 98060 + }, + { + "grad_norm": 0.010667874477803707, + "learning_rate": 1.0190828865012659e-07, + "loss": 0.0027, + "step": 98070 + }, + { + "grad_norm": 0.031085258349776268, + "learning_rate": 1.008558782001956e-07, + "loss": 0.0017, + "step": 98080 + }, + { + "grad_norm": 0.023694276809692383, + "learning_rate": 9.980892464563906e-08, + "loss": 0.0021, + "step": 98090 + }, + { + "grad_norm": 0.018886979669332504, + "learning_rate": 9.876742810095985e-08, + "loss": 0.005, + "step": 98100 + }, + { + "grad_norm": 0.018443847075104713, + "learning_rate": 9.77313886800446e-08, + "loss": 0.0015, + "step": 98110 + }, + { + "grad_norm": 0.026740478351712227, + "learning_rate": 9.670080649619717e-08, + "loss": 0.0041, + "step": 98120 + }, + { + "grad_norm": 0.010601402260363102, + "learning_rate": 9.567568166212737e-08, + "loss": 0.0022, + "step": 98130 + }, + { + "grad_norm": 0.017156530171632767, + "learning_rate": 9.465601428992888e-08, + "loss": 0.0018, + "step": 98140 + }, + { + "grad_norm": 0.04575338587164879, + "learning_rate": 9.364180449111803e-08, + "loss": 0.003, + "step": 98150 + }, + { + "grad_norm": 0.012958889827132225, + "learning_rate": 9.263305237661169e-08, + "loss": 0.0029, + "step": 98160 + }, + { + "grad_norm": 0.0055887470953166485, + "learning_rate": 9.162975805671603e-08, + "loss": 0.0022, + "step": 98170 + }, + { + "grad_norm": 0.015043635852634907, + "learning_rate": 9.063192164115442e-08, + "loss": 0.0031, + "step": 98180 + }, + { + "grad_norm": 0.01749999262392521, + "learning_rate": 8.963954323904511e-08, + "loss": 0.002, + "step": 98190 + }, + { + "grad_norm": 0.035876307636499405, + "learning_rate": 8.865262295891796e-08, + "loss": 0.0025, + "step": 98200 + }, + { + "grad_norm": 0.019720301032066345, + "learning_rate": 8.767116090870331e-08, + "loss": 0.0024, + "step": 98210 + }, + { + "grad_norm": 0.0284575242549181, + "learning_rate": 8.669515719572086e-08, + "loss": 0.0016, + "step": 98220 + }, + { + "grad_norm": 0.017857687547802925, + "learning_rate": 8.572461192671855e-08, + "loss": 0.0018, + "step": 98230 + }, + { + "grad_norm": 0.033691976219415665, + "learning_rate": 8.475952520782815e-08, + "loss": 0.0025, + "step": 98240 + }, + { + "grad_norm": 0.02550995908677578, + "learning_rate": 8.379989714458747e-08, + "loss": 0.0034, + "step": 98250 + }, + { + "grad_norm": 0.014965016394853592, + "learning_rate": 8.284572784194034e-08, + "loss": 0.002, + "step": 98260 + }, + { + "grad_norm": 0.02002554014325142, + "learning_rate": 8.189701740424216e-08, + "loss": 0.0013, + "step": 98270 + }, + { + "grad_norm": 0.01978696882724762, + "learning_rate": 8.095376593522663e-08, + "loss": 0.0032, + "step": 98280 + }, + { + "grad_norm": 0.021835807710886, + "learning_rate": 8.001597353806123e-08, + "loss": 0.0018, + "step": 98290 + }, + { + "grad_norm": 0.026928819715976715, + "learning_rate": 7.90836403152917e-08, + "loss": 0.0033, + "step": 98300 + }, + { + "grad_norm": 0.01621377095580101, + "learning_rate": 7.815676636888092e-08, + "loss": 0.0012, + "step": 98310 + }, + { + "grad_norm": 0.019869806244969368, + "learning_rate": 7.723535180019226e-08, + "loss": 0.0014, + "step": 98320 + }, + { + "grad_norm": 0.035163894295692444, + "learning_rate": 7.6319396709984e-08, + "loss": 0.0017, + "step": 98330 + }, + { + "grad_norm": 0.02511213719844818, + "learning_rate": 7.540890119843158e-08, + "loss": 0.0043, + "step": 98340 + }, + { + "grad_norm": 0.015445667318999767, + "learning_rate": 7.450386536509979e-08, + "loss": 0.0023, + "step": 98350 + }, + { + "grad_norm": 0.010322703048586845, + "learning_rate": 7.360428930895947e-08, + "loss": 0.0035, + "step": 98360 + }, + { + "grad_norm": 0.01048996951431036, + "learning_rate": 7.271017312839302e-08, + "loss": 0.0028, + "step": 98370 + }, + { + "grad_norm": 0.015779918059706688, + "learning_rate": 7.182151692118333e-08, + "loss": 0.0031, + "step": 98380 + }, + { + "grad_norm": 0.010847683995962143, + "learning_rate": 7.093832078449713e-08, + "loss": 0.0026, + "step": 98390 + }, + { + "grad_norm": 0.034295812249183655, + "learning_rate": 7.006058481493493e-08, + "loss": 0.0019, + "step": 98400 + }, + { + "grad_norm": 0.054298967123031616, + "learning_rate": 6.918830910847552e-08, + "loss": 0.0047, + "step": 98410 + }, + { + "grad_norm": 0.018000714480876923, + "learning_rate": 6.832149376052033e-08, + "loss": 0.0017, + "step": 98420 + }, + { + "grad_norm": 0.0320492759346962, + "learning_rate": 6.746013886584912e-08, + "loss": 0.0023, + "step": 98430 + }, + { + "grad_norm": 0.029048960655927658, + "learning_rate": 6.660424451866431e-08, + "loss": 0.0015, + "step": 98440 + }, + { + "grad_norm": 0.036208536475896835, + "learning_rate": 6.575381081256327e-08, + "loss": 0.0021, + "step": 98450 + }, + { + "grad_norm": 0.013380326330661774, + "learning_rate": 6.49088378405549e-08, + "loss": 0.0029, + "step": 98460 + }, + { + "grad_norm": 0.022421129047870636, + "learning_rate": 6.406932569503755e-08, + "loss": 0.0029, + "step": 98470 + }, + { + "grad_norm": 0.022376755252480507, + "learning_rate": 6.323527446782662e-08, + "loss": 0.0021, + "step": 98480 + }, + { + "grad_norm": 0.020352311432361603, + "learning_rate": 6.240668425012142e-08, + "loss": 0.003, + "step": 98490 + }, + { + "grad_norm": 0.05672487989068031, + "learning_rate": 6.158355513254388e-08, + "loss": 0.0041, + "step": 98500 + }, + { + "grad_norm": 0.010877455584704876, + "learning_rate": 6.076588720510534e-08, + "loss": 0.0027, + "step": 98510 + }, + { + "grad_norm": 0.014275286346673965, + "learning_rate": 5.995368055722316e-08, + "loss": 0.0019, + "step": 98520 + }, + { + "grad_norm": 0.014037169516086578, + "learning_rate": 5.914693527773185e-08, + "loss": 0.0031, + "step": 98530 + }, + { + "grad_norm": 0.02634992077946663, + "learning_rate": 5.834565145483861e-08, + "loss": 0.002, + "step": 98540 + }, + { + "grad_norm": 0.02535979263484478, + "learning_rate": 5.754982917618446e-08, + "loss": 0.0021, + "step": 98550 + }, + { + "grad_norm": 0.018553579226136208, + "learning_rate": 5.675946852878866e-08, + "loss": 0.0018, + "step": 98560 + }, + { + "grad_norm": 0.016594242304563522, + "learning_rate": 5.59745695990932e-08, + "loss": 0.0026, + "step": 98570 + }, + { + "grad_norm": 0.01409692782908678, + "learning_rate": 5.51951324729294e-08, + "loss": 0.0022, + "step": 98580 + }, + { + "grad_norm": 0.03045891970396042, + "learning_rate": 5.4421157235529093e-08, + "loss": 0.0017, + "step": 98590 + }, + { + "grad_norm": 0.009371398016810417, + "learning_rate": 5.365264397154679e-08, + "loss": 0.0019, + "step": 98600 + }, + { + "grad_norm": 0.02416408248245716, + "learning_rate": 5.288959276501526e-08, + "loss": 0.0017, + "step": 98610 + }, + { + "grad_norm": 0.03576011210680008, + "learning_rate": 5.2132003699378876e-08, + "loss": 0.0034, + "step": 98620 + }, + { + "grad_norm": 0.010911649093031883, + "learning_rate": 5.137987685749912e-08, + "loss": 0.0019, + "step": 98630 + }, + { + "grad_norm": 0.012601627968251705, + "learning_rate": 5.0633212321610227e-08, + "loss": 0.0015, + "step": 98640 + }, + { + "grad_norm": 0.009913766756653786, + "learning_rate": 4.989201017338019e-08, + "loss": 0.0019, + "step": 98650 + }, + { + "grad_norm": 0.019205765798687935, + "learning_rate": 4.9156270493860846e-08, + "loss": 0.0022, + "step": 98660 + }, + { + "grad_norm": 0.01725056767463684, + "learning_rate": 4.842599336351561e-08, + "loss": 0.0031, + "step": 98670 + }, + { + "grad_norm": 0.019649870693683624, + "learning_rate": 4.7701178862197274e-08, + "loss": 0.0016, + "step": 98680 + }, + { + "grad_norm": 0.02679242566227913, + "learning_rate": 4.698182706918131e-08, + "loss": 0.0027, + "step": 98690 + }, + { + "grad_norm": 0.042141590267419815, + "learning_rate": 4.6267938063121466e-08, + "loss": 0.0023, + "step": 98700 + }, + { + "grad_norm": 0.011166684329509735, + "learning_rate": 4.555951192209973e-08, + "loss": 0.0042, + "step": 98710 + }, + { + "grad_norm": 0.009440281428396702, + "learning_rate": 4.485654872358747e-08, + "loss": 0.0025, + "step": 98720 + }, + { + "grad_norm": 0.0074928151443600655, + "learning_rate": 4.415904854445097e-08, + "loss": 0.0038, + "step": 98730 + }, + { + "grad_norm": 0.024868851527571678, + "learning_rate": 4.34670114609792e-08, + "loss": 0.0021, + "step": 98740 + }, + { + "grad_norm": 0.020813066512346268, + "learning_rate": 4.278043754884498e-08, + "loss": 0.0021, + "step": 98750 + }, + { + "grad_norm": 0.017503339797258377, + "learning_rate": 4.209932688312712e-08, + "loss": 0.0025, + "step": 98760 + }, + { + "grad_norm": 0.01170830987393856, + "learning_rate": 4.14236795383216e-08, + "loss": 0.0014, + "step": 98770 + }, + { + "grad_norm": 0.013144508935511112, + "learning_rate": 4.075349558830821e-08, + "loss": 0.0021, + "step": 98780 + }, + { + "grad_norm": 0.01171713788062334, + "learning_rate": 4.008877510638387e-08, + "loss": 0.0017, + "step": 98790 + }, + { + "grad_norm": 0.008830811828374863, + "learning_rate": 3.942951816523488e-08, + "loss": 0.0014, + "step": 98800 + }, + { + "grad_norm": 0.014825209975242615, + "learning_rate": 3.8775724836959125e-08, + "loss": 0.0033, + "step": 98810 + }, + { + "grad_norm": 0.014196877367794514, + "learning_rate": 3.812739519305497e-08, + "loss": 0.0018, + "step": 98820 + }, + { + "grad_norm": 0.050007760524749756, + "learning_rate": 3.748452930442126e-08, + "loss": 0.0021, + "step": 98830 + }, + { + "grad_norm": 0.026592759415507317, + "learning_rate": 3.6847127241362855e-08, + "loss": 0.0022, + "step": 98840 + }, + { + "grad_norm": 0.04446227476000786, + "learning_rate": 3.621518907358512e-08, + "loss": 0.0019, + "step": 98850 + }, + { + "grad_norm": 0.041188888251781464, + "learning_rate": 3.558871487019388e-08, + "loss": 0.0017, + "step": 98860 + }, + { + "grad_norm": 0.03780186176300049, + "learning_rate": 3.4967704699701007e-08, + "loss": 0.0022, + "step": 98870 + }, + { + "grad_norm": 0.02916085347533226, + "learning_rate": 3.435215863001884e-08, + "loss": 0.0019, + "step": 98880 + }, + { + "grad_norm": 0.027421925216913223, + "learning_rate": 3.374207672846019e-08, + "loss": 0.0034, + "step": 98890 + }, + { + "grad_norm": 0.020774826407432556, + "learning_rate": 3.313745906174392e-08, + "loss": 0.0022, + "step": 98900 + }, + { + "grad_norm": 0.04217850789427757, + "learning_rate": 3.253830569599492e-08, + "loss": 0.0023, + "step": 98910 + }, + { + "grad_norm": 0.01291655283421278, + "learning_rate": 3.1944616696727436e-08, + "loss": 0.0051, + "step": 98920 + }, + { + "grad_norm": 0.01636394113302231, + "learning_rate": 3.135639212887287e-08, + "loss": 0.0017, + "step": 98930 + }, + { + "grad_norm": 0.009956923313438892, + "learning_rate": 3.077363205675754e-08, + "loss": 0.0018, + "step": 98940 + }, + { + "grad_norm": 0.023747622966766357, + "learning_rate": 3.0196336544113804e-08, + "loss": 0.0035, + "step": 98950 + }, + { + "grad_norm": 0.015296036377549171, + "learning_rate": 2.9624505654063383e-08, + "loss": 0.0023, + "step": 98960 + }, + { + "grad_norm": 0.022058825939893723, + "learning_rate": 2.90581394491507e-08, + "loss": 0.0016, + "step": 98970 + }, + { + "grad_norm": 0.012159302830696106, + "learning_rate": 2.8497237991309545e-08, + "loss": 0.0026, + "step": 98980 + }, + { + "grad_norm": 0.013720466755330563, + "learning_rate": 2.7941801341879735e-08, + "loss": 0.0024, + "step": 98990 + }, + { + "grad_norm": 0.03724495694041252, + "learning_rate": 2.7391829561601578e-08, + "loss": 0.0026, + "step": 99000 + }, + { + "grad_norm": 0.03218362480401993, + "learning_rate": 2.6847322710621403e-08, + "loss": 0.0025, + "step": 99010 + }, + { + "grad_norm": 0.00876943115144968, + "learning_rate": 2.6308280848486022e-08, + "loss": 0.0026, + "step": 99020 + }, + { + "grad_norm": 0.01903924159705639, + "learning_rate": 2.5774704034137177e-08, + "loss": 0.0024, + "step": 99030 + }, + { + "grad_norm": 0.011983918957412243, + "learning_rate": 2.524659232593929e-08, + "loss": 0.0028, + "step": 99040 + }, + { + "grad_norm": 0.01405230164527893, + "learning_rate": 2.472394578163506e-08, + "loss": 0.0025, + "step": 99050 + }, + { + "grad_norm": 0.018179018050432205, + "learning_rate": 2.4206764458378772e-08, + "loss": 0.0014, + "step": 99060 + }, + { + "grad_norm": 0.007551696617156267, + "learning_rate": 2.3695048412736285e-08, + "loss": 0.0032, + "step": 99070 + }, + { + "grad_norm": 0.014426070265471935, + "learning_rate": 2.3188797700668395e-08, + "loss": 0.0032, + "step": 99080 + }, + { + "grad_norm": 0.012568224221467972, + "learning_rate": 2.268801237753082e-08, + "loss": 0.0029, + "step": 99090 + }, + { + "grad_norm": 0.02342373877763748, + "learning_rate": 2.2192692498090862e-08, + "loss": 0.0027, + "step": 99100 + }, + { + "grad_norm": 0.03252587467432022, + "learning_rate": 2.170283811652185e-08, + "loss": 0.0022, + "step": 99110 + }, + { + "grad_norm": 0.04567780718207359, + "learning_rate": 2.12184492863865e-08, + "loss": 0.0026, + "step": 99120 + }, + { + "grad_norm": 0.027941366657614708, + "learning_rate": 2.073952606066465e-08, + "loss": 0.002, + "step": 99130 + }, + { + "grad_norm": 0.015686433762311935, + "learning_rate": 2.026606849171997e-08, + "loss": 0.0022, + "step": 99140 + }, + { + "grad_norm": 0.014631997793912888, + "learning_rate": 1.9798076631333262e-08, + "loss": 0.0027, + "step": 99150 + }, + { + "grad_norm": 0.009745026007294655, + "learning_rate": 1.933555053069136e-08, + "loss": 0.0022, + "step": 99160 + }, + { + "grad_norm": 0.010025698691606522, + "learning_rate": 1.887849024036492e-08, + "loss": 0.0034, + "step": 99170 + }, + { + "grad_norm": 0.016980309039354324, + "learning_rate": 1.8426895810341736e-08, + "loss": 0.002, + "step": 99180 + }, + { + "grad_norm": 0.037066973745822906, + "learning_rate": 1.798076729000453e-08, + "loss": 0.0019, + "step": 99190 + }, + { + "grad_norm": 0.014872337691485882, + "learning_rate": 1.75401047281476e-08, + "loss": 0.003, + "step": 99200 + }, + { + "grad_norm": 0.02882576175034046, + "learning_rate": 1.7104908172954626e-08, + "loss": 0.0025, + "step": 99210 + }, + { + "grad_norm": 0.013588226400315762, + "learning_rate": 1.6675177672020868e-08, + "loss": 0.0017, + "step": 99220 + }, + { + "grad_norm": 0.02646513655781746, + "learning_rate": 1.6250913272342073e-08, + "loss": 0.0028, + "step": 99230 + }, + { + "grad_norm": 0.042842522263526917, + "learning_rate": 1.5832115020314453e-08, + "loss": 0.0022, + "step": 99240 + }, + { + "grad_norm": 0.013051135465502739, + "learning_rate": 1.5418782961734712e-08, + "loss": 0.0021, + "step": 99250 + }, + { + "grad_norm": 0.012267791666090488, + "learning_rate": 1.501091714181113e-08, + "loss": 0.0032, + "step": 99260 + }, + { + "grad_norm": 0.013776509091258049, + "learning_rate": 1.460851760513582e-08, + "loss": 0.0021, + "step": 99270 + }, + { + "grad_norm": 0.0075170123018324375, + "learning_rate": 1.4211584395723566e-08, + "loss": 0.0028, + "step": 99280 + }, + { + "grad_norm": 0.025714857503771782, + "learning_rate": 1.382011755697854e-08, + "loss": 0.003, + "step": 99290 + }, + { + "grad_norm": 0.017184050753712654, + "learning_rate": 1.3434117131716494e-08, + "loss": 0.0028, + "step": 99300 + }, + { + "grad_norm": 0.008985456079244614, + "learning_rate": 1.3053583162142557e-08, + "loss": 0.0035, + "step": 99310 + }, + { + "grad_norm": 0.01839558221399784, + "learning_rate": 1.2678515689873438e-08, + "loss": 0.0021, + "step": 99320 + }, + { + "grad_norm": 0.01339408103376627, + "learning_rate": 1.2308914755931877e-08, + "loss": 0.0016, + "step": 99330 + }, + { + "grad_norm": 0.011384516023099422, + "learning_rate": 1.1944780400729993e-08, + "loss": 0.0019, + "step": 99340 + }, + { + "grad_norm": 0.014742787927389145, + "learning_rate": 1.1586112664085935e-08, + "loss": 0.0018, + "step": 99350 + }, + { + "grad_norm": 0.022810161113739014, + "learning_rate": 1.1232911585234984e-08, + "loss": 0.0028, + "step": 99360 + }, + { + "grad_norm": 0.029011044651269913, + "learning_rate": 1.0885177202790697e-08, + "loss": 0.0027, + "step": 99370 + }, + { + "grad_norm": 0.011623158119618893, + "learning_rate": 1.0542909554789315e-08, + "loss": 0.0025, + "step": 99380 + }, + { + "grad_norm": 0.014148887246847153, + "learning_rate": 1.0206108678656456e-08, + "loss": 0.0022, + "step": 99390 + }, + { + "grad_norm": 0.015329134650528431, + "learning_rate": 9.874774611223769e-09, + "loss": 0.0017, + "step": 99400 + }, + { + "grad_norm": 0.012992587871849537, + "learning_rate": 9.548907388728933e-09, + "loss": 0.0035, + "step": 99410 + }, + { + "grad_norm": 0.028567232191562653, + "learning_rate": 9.228507046804557e-09, + "loss": 0.0024, + "step": 99420 + }, + { + "grad_norm": 0.0439557209610939, + "learning_rate": 8.91357362048928e-09, + "loss": 0.0028, + "step": 99430 + }, + { + "grad_norm": 0.021205104887485504, + "learning_rate": 8.604107144227769e-09, + "loss": 0.0014, + "step": 99440 + }, + { + "grad_norm": 0.009340949356555939, + "learning_rate": 8.300107651859623e-09, + "loss": 0.0026, + "step": 99450 + }, + { + "grad_norm": 0.00778231443837285, + "learning_rate": 8.001575176630472e-09, + "loss": 0.0023, + "step": 99460 + }, + { + "grad_norm": 0.025941871106624603, + "learning_rate": 7.708509751186422e-09, + "loss": 0.002, + "step": 99470 + }, + { + "grad_norm": 0.025843501091003418, + "learning_rate": 7.420911407579611e-09, + "loss": 0.002, + "step": 99480 + }, + { + "grad_norm": 0.018077561631798744, + "learning_rate": 7.13878017725711e-09, + "loss": 0.0015, + "step": 99490 + }, + { + "grad_norm": 0.02571904845535755, + "learning_rate": 6.8621160910720125e-09, + "loss": 0.0017, + "step": 99500 + }, + { + "grad_norm": 0.010611212812364101, + "learning_rate": 6.59091917928345e-09, + "loss": 0.003, + "step": 99510 + }, + { + "grad_norm": 0.027114199474453926, + "learning_rate": 6.32518947155103e-09, + "loss": 0.0026, + "step": 99520 + }, + { + "grad_norm": 0.011905906721949577, + "learning_rate": 6.064926996929288e-09, + "loss": 0.0021, + "step": 99530 + }, + { + "grad_norm": 0.009000486694276333, + "learning_rate": 5.810131783884343e-09, + "loss": 0.0015, + "step": 99540 + }, + { + "grad_norm": 0.014320729300379753, + "learning_rate": 5.5608038602772415e-09, + "loss": 0.0027, + "step": 99550 + }, + { + "grad_norm": 0.03546932712197304, + "learning_rate": 5.316943253375062e-09, + "loss": 0.0027, + "step": 99560 + }, + { + "grad_norm": 0.03343016281723976, + "learning_rate": 5.07854998984536e-09, + "loss": 0.0023, + "step": 99570 + }, + { + "grad_norm": 0.015170261263847351, + "learning_rate": 4.845624095756174e-09, + "loss": 0.0025, + "step": 99580 + }, + { + "grad_norm": 0.029782574623823166, + "learning_rate": 4.618165596587121e-09, + "loss": 0.0027, + "step": 99590 + }, + { + "grad_norm": 0.0069761499762535095, + "learning_rate": 4.396174517207197e-09, + "loss": 0.0022, + "step": 99600 + }, + { + "grad_norm": 0.008634127676486969, + "learning_rate": 4.179650881896979e-09, + "loss": 0.0019, + "step": 99610 + }, + { + "grad_norm": 0.0299113430082798, + "learning_rate": 3.968594714331975e-09, + "loss": 0.0017, + "step": 99620 + }, + { + "grad_norm": 0.03337327763438225, + "learning_rate": 3.763006037593719e-09, + "loss": 0.0022, + "step": 99630 + }, + { + "grad_norm": 0.015452370047569275, + "learning_rate": 3.5628848741586786e-09, + "loss": 0.0027, + "step": 99640 + }, + { + "grad_norm": 0.026659226045012474, + "learning_rate": 3.368231245926001e-09, + "loss": 0.0041, + "step": 99650 + }, + { + "grad_norm": 0.028154175728559494, + "learning_rate": 3.179045174167561e-09, + "loss": 0.0019, + "step": 99660 + }, + { + "grad_norm": 0.008106652647256851, + "learning_rate": 2.9953266795834657e-09, + "loss": 0.0019, + "step": 99670 + }, + { + "grad_norm": 0.011467207223176956, + "learning_rate": 2.817075782263201e-09, + "loss": 0.0019, + "step": 99680 + }, + { + "grad_norm": 0.026825422421097755, + "learning_rate": 2.6442925016967325e-09, + "loss": 0.0019, + "step": 99690 + }, + { + "grad_norm": 0.018892455846071243, + "learning_rate": 2.4769768567800555e-09, + "loss": 0.0028, + "step": 99700 + }, + { + "grad_norm": 0.05030769482254982, + "learning_rate": 2.315128865809646e-09, + "loss": 0.0026, + "step": 99710 + }, + { + "grad_norm": 0.015323612838983536, + "learning_rate": 2.1587485464880097e-09, + "loss": 0.003, + "step": 99720 + }, + { + "grad_norm": 0.016265323385596275, + "learning_rate": 2.0078359159125813e-09, + "loss": 0.0021, + "step": 99730 + }, + { + "grad_norm": 0.022670771926641464, + "learning_rate": 1.8623909905923776e-09, + "loss": 0.0018, + "step": 99740 + }, + { + "grad_norm": 0.019658870995044708, + "learning_rate": 1.7224137864257916e-09, + "loss": 0.0013, + "step": 99750 + }, + { + "grad_norm": 0.031437166035175323, + "learning_rate": 1.5879043187283505e-09, + "loss": 0.0057, + "step": 99760 + }, + { + "grad_norm": 0.011365072801709175, + "learning_rate": 1.458862602204958e-09, + "loss": 0.0017, + "step": 99770 + }, + { + "grad_norm": 0.011600139550864697, + "learning_rate": 1.3352886509720996e-09, + "loss": 0.0015, + "step": 99780 + }, + { + "grad_norm": 0.02101188525557518, + "learning_rate": 1.2171824785356389e-09, + "loss": 0.0019, + "step": 99790 + }, + { + "grad_norm": 0.06026791036128998, + "learning_rate": 1.1045440978185718e-09, + "loss": 0.0036, + "step": 99800 + }, + { + "grad_norm": 0.019365085288882256, + "learning_rate": 9.973735211332714e-10, + "loss": 0.002, + "step": 99810 + }, + { + "grad_norm": 0.01802242174744606, + "learning_rate": 8.956707602036929e-10, + "loss": 0.0028, + "step": 99820 + }, + { + "grad_norm": 0.07098503410816193, + "learning_rate": 7.994358261542712e-10, + "loss": 0.0031, + "step": 99830 + }, + { + "grad_norm": 0.037890538573265076, + "learning_rate": 7.086687295043692e-10, + "loss": 0.0016, + "step": 99840 + }, + { + "grad_norm": 0.03027118742465973, + "learning_rate": 6.23369480179381e-10, + "loss": 0.0015, + "step": 99850 + }, + { + "grad_norm": 0.04252694174647331, + "learning_rate": 5.43538087510731e-10, + "loss": 0.0022, + "step": 99860 + }, + { + "grad_norm": 0.01779131591320038, + "learning_rate": 4.691745602303233e-10, + "loss": 0.0027, + "step": 99870 + }, + { + "grad_norm": 0.03824256360530853, + "learning_rate": 4.0027890646499034e-10, + "loss": 0.0028, + "step": 99880 + }, + { + "grad_norm": 0.01051343698054552, + "learning_rate": 3.368511337531466e-10, + "loss": 0.0019, + "step": 99890 + }, + { + "grad_norm": 0.010008512064814568, + "learning_rate": 2.788912490281348e-10, + "loss": 0.0017, + "step": 99900 + }, + { + "grad_norm": 0.03866519033908844, + "learning_rate": 2.2639925863487954e-10, + "loss": 0.0019, + "step": 99910 + }, + { + "grad_norm": 0.05177580192685127, + "learning_rate": 1.7937516830213163e-10, + "loss": 0.0021, + "step": 99920 + }, + { + "grad_norm": 0.012847380712628365, + "learning_rate": 1.3781898318687702e-10, + "loss": 0.0025, + "step": 99930 + }, + { + "grad_norm": 0.009968340396881104, + "learning_rate": 1.0173070781882566e-10, + "loss": 0.0021, + "step": 99940 + }, + { + "grad_norm": 0.024153931066393852, + "learning_rate": 7.111034615592261e-11, + "loss": 0.0022, + "step": 99950 + }, + { + "grad_norm": 0.010382208041846752, + "learning_rate": 4.595790153993918e-11, + "loss": 0.0021, + "step": 99960 + }, + { + "grad_norm": 0.016667352989315987, + "learning_rate": 2.6273376729779585e-11, + "loss": 0.0027, + "step": 99970 + }, + { + "grad_norm": 0.04764968901872635, + "learning_rate": 1.2056773868174276e-11, + "loss": 0.0024, + "step": 99980 + }, + { + "grad_norm": 0.02393447980284691, + "learning_rate": 3.3080945149865926e-12, + "loss": 0.0025, + "step": 99990 + }, + { + "grad_norm": 0.021094350144267082, + "learning_rate": 2.7339625008337977e-14, + "loss": 0.0023, + "step": 100000 + } + ], + "logging_steps": 10, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9223372036854775807, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 80, + "trial_name": null, + "trial_params": null +}