diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,21034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 67.26457399103138, + "eval_steps": 500, + "global_step": 30000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02242152466367713, + "grad_norm": 2.8593437671661377, + "learning_rate": 3.0000000000000004e-07, + "loss": 1.1764, + "step": 10 + }, + { + "epoch": 0.04484304932735426, + "grad_norm": 2.103414535522461, + "learning_rate": 6.333333333333333e-07, + "loss": 1.0972, + "step": 20 + }, + { + "epoch": 0.06726457399103139, + "grad_norm": 1.930586814880371, + "learning_rate": 9.666666666666668e-07, + "loss": 1.1024, + "step": 30 + }, + { + "epoch": 0.08968609865470852, + "grad_norm": 1.4859470129013062, + "learning_rate": 1.3e-06, + "loss": 1.1084, + "step": 40 + }, + { + "epoch": 0.11210762331838565, + "grad_norm": 1.544628381729126, + "learning_rate": 1.6333333333333333e-06, + "loss": 1.0872, + "step": 50 + }, + { + "epoch": 0.13452914798206278, + "grad_norm": 1.40129554271698, + "learning_rate": 1.9666666666666668e-06, + "loss": 1.0849, + "step": 60 + }, + { + "epoch": 0.15695067264573992, + "grad_norm": 1.3877208232879639, + "learning_rate": 2.3e-06, + "loss": 1.0717, + "step": 70 + }, + { + "epoch": 0.17937219730941703, + "grad_norm": 1.3080300092697144, + "learning_rate": 2.6333333333333337e-06, + "loss": 1.0498, + "step": 80 + }, + { + "epoch": 0.20179372197309417, + "grad_norm": 1.0701993703842163, + "learning_rate": 2.966666666666667e-06, + "loss": 1.0515, + "step": 90 + }, + { + "epoch": 0.2242152466367713, + "grad_norm": 1.11537766456604, + "learning_rate": 3.3e-06, + "loss": 1.0447, + "step": 100 + }, + { + "epoch": 0.24663677130044842, + "grad_norm": 1.848969578742981, + "learning_rate": 3.633333333333334e-06, + "loss": 1.0479, + "step": 110 + }, + { + "epoch": 0.26905829596412556, + "grad_norm": 1.3609281778335571, + "learning_rate": 3.966666666666667e-06, + "loss": 1.0183, + "step": 120 + }, + { + "epoch": 0.2914798206278027, + "grad_norm": 1.346229910850525, + "learning_rate": 4.2999999999999995e-06, + "loss": 1.0091, + "step": 130 + }, + { + "epoch": 0.31390134529147984, + "grad_norm": 1.5698864459991455, + "learning_rate": 4.633333333333334e-06, + "loss": 1.002, + "step": 140 + }, + { + "epoch": 0.336322869955157, + "grad_norm": 2.146062135696411, + "learning_rate": 4.966666666666667e-06, + "loss": 1.0162, + "step": 150 + }, + { + "epoch": 0.35874439461883406, + "grad_norm": 1.8605276346206665, + "learning_rate": 5.3e-06, + "loss": 1.007, + "step": 160 + }, + { + "epoch": 0.3811659192825112, + "grad_norm": 2.0874569416046143, + "learning_rate": 5.633333333333333e-06, + "loss": 0.9973, + "step": 170 + }, + { + "epoch": 0.40358744394618834, + "grad_norm": 2.123976469039917, + "learning_rate": 5.9666666666666666e-06, + "loss": 0.9859, + "step": 180 + }, + { + "epoch": 0.4260089686098655, + "grad_norm": 2.0814409255981445, + "learning_rate": 6.300000000000001e-06, + "loss": 0.9805, + "step": 190 + }, + { + "epoch": 0.4484304932735426, + "grad_norm": 2.8240673542022705, + "learning_rate": 6.633333333333333e-06, + "loss": 0.9803, + "step": 200 + }, + { + "epoch": 0.47085201793721976, + "grad_norm": 3.9738168716430664, + "learning_rate": 6.966666666666667e-06, + "loss": 0.9686, + "step": 210 + }, + { + "epoch": 0.49327354260089684, + "grad_norm": 1.5403579473495483, + "learning_rate": 7.2999999999999996e-06, + "loss": 0.9652, + "step": 220 + }, + { + "epoch": 0.515695067264574, + "grad_norm": 3.4330947399139404, + "learning_rate": 7.633333333333334e-06, + "loss": 0.9645, + "step": 230 + }, + { + "epoch": 0.5381165919282511, + "grad_norm": 4.259314060211182, + "learning_rate": 7.966666666666666e-06, + "loss": 0.9513, + "step": 240 + }, + { + "epoch": 0.5605381165919282, + "grad_norm": 4.664320945739746, + "learning_rate": 8.3e-06, + "loss": 0.954, + "step": 250 + }, + { + "epoch": 0.5829596412556054, + "grad_norm": 2.8254787921905518, + "learning_rate": 8.633333333333334e-06, + "loss": 0.9597, + "step": 260 + }, + { + "epoch": 0.6053811659192825, + "grad_norm": 3.9277849197387695, + "learning_rate": 8.966666666666668e-06, + "loss": 0.9324, + "step": 270 + }, + { + "epoch": 0.6278026905829597, + "grad_norm": 3.1486003398895264, + "learning_rate": 9.3e-06, + "loss": 0.9311, + "step": 280 + }, + { + "epoch": 0.6502242152466368, + "grad_norm": 3.5154097080230713, + "learning_rate": 9.633333333333335e-06, + "loss": 0.8379, + "step": 290 + }, + { + "epoch": 0.672645739910314, + "grad_norm": 5.0450825691223145, + "learning_rate": 9.966666666666667e-06, + "loss": 0.7915, + "step": 300 + }, + { + "epoch": 0.695067264573991, + "grad_norm": 6.500288009643555, + "learning_rate": 1.03e-05, + "loss": 0.7542, + "step": 310 + }, + { + "epoch": 0.7174887892376681, + "grad_norm": 5.467141628265381, + "learning_rate": 1.0633333333333334e-05, + "loss": 0.6255, + "step": 320 + }, + { + "epoch": 0.7399103139013453, + "grad_norm": 9.860288619995117, + "learning_rate": 1.0966666666666666e-05, + "loss": 0.5827, + "step": 330 + }, + { + "epoch": 0.7623318385650224, + "grad_norm": 5.387350559234619, + "learning_rate": 1.13e-05, + "loss": 0.4949, + "step": 340 + }, + { + "epoch": 0.7847533632286996, + "grad_norm": 4.816288471221924, + "learning_rate": 1.1633333333333334e-05, + "loss": 0.4853, + "step": 350 + }, + { + "epoch": 0.8071748878923767, + "grad_norm": 5.154407978057861, + "learning_rate": 1.1966666666666668e-05, + "loss": 0.4426, + "step": 360 + }, + { + "epoch": 0.8295964125560538, + "grad_norm": 3.595567226409912, + "learning_rate": 1.23e-05, + "loss": 0.3526, + "step": 370 + }, + { + "epoch": 0.852017937219731, + "grad_norm": 5.783238410949707, + "learning_rate": 1.2633333333333333e-05, + "loss": 0.3561, + "step": 380 + }, + { + "epoch": 0.874439461883408, + "grad_norm": 7.453920841217041, + "learning_rate": 1.2966666666666669e-05, + "loss": 0.3475, + "step": 390 + }, + { + "epoch": 0.8968609865470852, + "grad_norm": 7.180427551269531, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.2934, + "step": 400 + }, + { + "epoch": 0.9192825112107623, + "grad_norm": 6.282733917236328, + "learning_rate": 1.3633333333333334e-05, + "loss": 0.312, + "step": 410 + }, + { + "epoch": 0.9417040358744395, + "grad_norm": 4.615654468536377, + "learning_rate": 1.3966666666666666e-05, + "loss": 0.2717, + "step": 420 + }, + { + "epoch": 0.9641255605381166, + "grad_norm": 4.9121785163879395, + "learning_rate": 1.43e-05, + "loss": 0.2484, + "step": 430 + }, + { + "epoch": 0.9865470852017937, + "grad_norm": 3.356607675552368, + "learning_rate": 1.4633333333333334e-05, + "loss": 0.2486, + "step": 440 + }, + { + "epoch": 1.0089686098654709, + "grad_norm": 6.950911998748779, + "learning_rate": 1.4966666666666668e-05, + "loss": 0.2357, + "step": 450 + }, + { + "epoch": 1.031390134529148, + "grad_norm": 4.933413982391357, + "learning_rate": 1.53e-05, + "loss": 0.1885, + "step": 460 + }, + { + "epoch": 1.053811659192825, + "grad_norm": 3.3198843002319336, + "learning_rate": 1.563333333333333e-05, + "loss": 0.2062, + "step": 470 + }, + { + "epoch": 1.0762331838565022, + "grad_norm": 5.206143856048584, + "learning_rate": 1.5966666666666667e-05, + "loss": 0.1794, + "step": 480 + }, + { + "epoch": 1.0986547085201794, + "grad_norm": 3.2667694091796875, + "learning_rate": 1.63e-05, + "loss": 0.1751, + "step": 490 + }, + { + "epoch": 1.1210762331838564, + "grad_norm": 6.955554485321045, + "learning_rate": 1.6633333333333336e-05, + "loss": 0.151, + "step": 500 + }, + { + "epoch": 1.1434977578475336, + "grad_norm": 4.2592668533325195, + "learning_rate": 1.6966666666666668e-05, + "loss": 0.1907, + "step": 510 + }, + { + "epoch": 1.1659192825112108, + "grad_norm": 3.028491497039795, + "learning_rate": 1.73e-05, + "loss": 0.1214, + "step": 520 + }, + { + "epoch": 1.188340807174888, + "grad_norm": 3.556159257888794, + "learning_rate": 1.7633333333333336e-05, + "loss": 0.1358, + "step": 530 + }, + { + "epoch": 1.210762331838565, + "grad_norm": 3.3941338062286377, + "learning_rate": 1.796666666666667e-05, + "loss": 0.1594, + "step": 540 + }, + { + "epoch": 1.2331838565022422, + "grad_norm": 3.9592978954315186, + "learning_rate": 1.83e-05, + "loss": 0.1116, + "step": 550 + }, + { + "epoch": 1.2556053811659194, + "grad_norm": 1.4032593965530396, + "learning_rate": 1.8633333333333333e-05, + "loss": 0.1282, + "step": 560 + }, + { + "epoch": 1.2780269058295963, + "grad_norm": 2.9294779300689697, + "learning_rate": 1.896666666666667e-05, + "loss": 0.1416, + "step": 570 + }, + { + "epoch": 1.3004484304932735, + "grad_norm": 1.9981155395507812, + "learning_rate": 1.93e-05, + "loss": 0.1191, + "step": 580 + }, + { + "epoch": 1.3228699551569507, + "grad_norm": 2.146605968475342, + "learning_rate": 1.9633333333333334e-05, + "loss": 0.0802, + "step": 590 + }, + { + "epoch": 1.3452914798206277, + "grad_norm": 3.5656702518463135, + "learning_rate": 1.9966666666666666e-05, + "loss": 0.1043, + "step": 600 + }, + { + "epoch": 1.3677130044843049, + "grad_norm": 2.8747925758361816, + "learning_rate": 2.0300000000000002e-05, + "loss": 0.1018, + "step": 610 + }, + { + "epoch": 1.390134529147982, + "grad_norm": 3.261945962905884, + "learning_rate": 2.0633333333333335e-05, + "loss": 0.0934, + "step": 620 + }, + { + "epoch": 1.4125560538116593, + "grad_norm": 2.549551486968994, + "learning_rate": 2.0966666666666667e-05, + "loss": 0.0733, + "step": 630 + }, + { + "epoch": 1.4349775784753362, + "grad_norm": 1.798831820487976, + "learning_rate": 2.13e-05, + "loss": 0.0577, + "step": 640 + }, + { + "epoch": 1.4573991031390134, + "grad_norm": 1.0434660911560059, + "learning_rate": 2.1633333333333332e-05, + "loss": 0.0527, + "step": 650 + }, + { + "epoch": 1.4798206278026906, + "grad_norm": 2.0140838623046875, + "learning_rate": 2.1966666666666668e-05, + "loss": 0.0425, + "step": 660 + }, + { + "epoch": 1.5022421524663678, + "grad_norm": 3.355604648590088, + "learning_rate": 2.23e-05, + "loss": 0.0134, + "step": 670 + }, + { + "epoch": 1.5246636771300448, + "grad_norm": 1.723442792892456, + "learning_rate": 2.2633333333333336e-05, + "loss": 0.0378, + "step": 680 + }, + { + "epoch": 1.547085201793722, + "grad_norm": 2.05584979057312, + "learning_rate": 2.2966666666666668e-05, + "loss": -0.0046, + "step": 690 + }, + { + "epoch": 1.5695067264573992, + "grad_norm": 2.653679132461548, + "learning_rate": 2.3300000000000004e-05, + "loss": 0.0086, + "step": 700 + }, + { + "epoch": 1.5919282511210762, + "grad_norm": 2.301997661590576, + "learning_rate": 2.3633333333333336e-05, + "loss": 0.0248, + "step": 710 + }, + { + "epoch": 1.6143497757847534, + "grad_norm": 1.5578806400299072, + "learning_rate": 2.396666666666667e-05, + "loss": -0.0083, + "step": 720 + }, + { + "epoch": 1.6367713004484306, + "grad_norm": 2.1358554363250732, + "learning_rate": 2.43e-05, + "loss": 0.0191, + "step": 730 + }, + { + "epoch": 1.6591928251121075, + "grad_norm": 1.6231119632720947, + "learning_rate": 2.4633333333333334e-05, + "loss": -0.0065, + "step": 740 + }, + { + "epoch": 1.6816143497757847, + "grad_norm": 2.325103759765625, + "learning_rate": 2.496666666666667e-05, + "loss": -0.003, + "step": 750 + }, + { + "epoch": 1.704035874439462, + "grad_norm": 3.0770046710968018, + "learning_rate": 2.5300000000000002e-05, + "loss": -0.0019, + "step": 760 + }, + { + "epoch": 1.726457399103139, + "grad_norm": 2.482830047607422, + "learning_rate": 2.5633333333333338e-05, + "loss": -0.0017, + "step": 770 + }, + { + "epoch": 1.7488789237668163, + "grad_norm": 2.603203058242798, + "learning_rate": 2.5966666666666667e-05, + "loss": -0.0037, + "step": 780 + }, + { + "epoch": 1.7713004484304933, + "grad_norm": 2.433870553970337, + "learning_rate": 2.6300000000000002e-05, + "loss": -0.019, + "step": 790 + }, + { + "epoch": 1.7937219730941703, + "grad_norm": 1.810959815979004, + "learning_rate": 2.663333333333333e-05, + "loss": -0.0083, + "step": 800 + }, + { + "epoch": 1.8161434977578477, + "grad_norm": 2.2846381664276123, + "learning_rate": 2.6966666666666667e-05, + "loss": -0.0371, + "step": 810 + }, + { + "epoch": 1.8385650224215246, + "grad_norm": 1.4882988929748535, + "learning_rate": 2.7300000000000003e-05, + "loss": -0.0035, + "step": 820 + }, + { + "epoch": 1.8609865470852018, + "grad_norm": 2.27659273147583, + "learning_rate": 2.7633333333333332e-05, + "loss": -0.0064, + "step": 830 + }, + { + "epoch": 1.883408071748879, + "grad_norm": 1.9097706079483032, + "learning_rate": 2.7966666666666668e-05, + "loss": -0.034, + "step": 840 + }, + { + "epoch": 1.905829596412556, + "grad_norm": 1.5221699476242065, + "learning_rate": 2.83e-05, + "loss": -0.0329, + "step": 850 + }, + { + "epoch": 1.9282511210762332, + "grad_norm": 1.5773190259933472, + "learning_rate": 2.8633333333333336e-05, + "loss": -0.0267, + "step": 860 + }, + { + "epoch": 1.9506726457399104, + "grad_norm": 1.5778934955596924, + "learning_rate": 2.8966666666666668e-05, + "loss": -0.0271, + "step": 870 + }, + { + "epoch": 1.9730941704035874, + "grad_norm": 1.805106520652771, + "learning_rate": 2.93e-05, + "loss": -0.0351, + "step": 880 + }, + { + "epoch": 1.9955156950672646, + "grad_norm": 1.7890651226043701, + "learning_rate": 2.9633333333333336e-05, + "loss": -0.0341, + "step": 890 + }, + { + "epoch": 2.0179372197309418, + "grad_norm": 1.9790332317352295, + "learning_rate": 2.9966666666666672e-05, + "loss": -0.0291, + "step": 900 + }, + { + "epoch": 2.0403587443946187, + "grad_norm": 2.211232900619507, + "learning_rate": 3.03e-05, + "loss": -0.0253, + "step": 910 + }, + { + "epoch": 2.062780269058296, + "grad_norm": 1.43012273311615, + "learning_rate": 3.063333333333334e-05, + "loss": -0.0224, + "step": 920 + }, + { + "epoch": 2.085201793721973, + "grad_norm": 1.6110597848892212, + "learning_rate": 3.096666666666666e-05, + "loss": -0.0059, + "step": 930 + }, + { + "epoch": 2.10762331838565, + "grad_norm": 2.448148012161255, + "learning_rate": 3.13e-05, + "loss": -0.0458, + "step": 940 + }, + { + "epoch": 2.1300448430493275, + "grad_norm": 2.1753642559051514, + "learning_rate": 3.1633333333333334e-05, + "loss": -0.043, + "step": 950 + }, + { + "epoch": 2.1524663677130045, + "grad_norm": 2.1201279163360596, + "learning_rate": 3.196666666666667e-05, + "loss": -0.0185, + "step": 960 + }, + { + "epoch": 2.1748878923766815, + "grad_norm": 1.7672967910766602, + "learning_rate": 3.2300000000000006e-05, + "loss": -0.0481, + "step": 970 + }, + { + "epoch": 2.197309417040359, + "grad_norm": 1.9174526929855347, + "learning_rate": 3.263333333333333e-05, + "loss": -0.0441, + "step": 980 + }, + { + "epoch": 2.219730941704036, + "grad_norm": 1.3616913557052612, + "learning_rate": 3.296666666666667e-05, + "loss": -0.0548, + "step": 990 + }, + { + "epoch": 2.242152466367713, + "grad_norm": 1.9164612293243408, + "learning_rate": 3.33e-05, + "loss": -0.0568, + "step": 1000 + }, + { + "epoch": 2.2645739910313902, + "grad_norm": 1.6245977878570557, + "learning_rate": 3.3633333333333335e-05, + "loss": -0.0294, + "step": 1010 + }, + { + "epoch": 2.286995515695067, + "grad_norm": 1.4428335428237915, + "learning_rate": 3.396666666666667e-05, + "loss": -0.0529, + "step": 1020 + }, + { + "epoch": 2.3094170403587446, + "grad_norm": 1.1278951168060303, + "learning_rate": 3.430000000000001e-05, + "loss": -0.0563, + "step": 1030 + }, + { + "epoch": 2.3318385650224216, + "grad_norm": 1.6174685955047607, + "learning_rate": 3.463333333333333e-05, + "loss": -0.0531, + "step": 1040 + }, + { + "epoch": 2.3542600896860986, + "grad_norm": 1.2457331418991089, + "learning_rate": 3.496666666666667e-05, + "loss": -0.0528, + "step": 1050 + }, + { + "epoch": 2.376681614349776, + "grad_norm": 1.038232684135437, + "learning_rate": 3.53e-05, + "loss": -0.0473, + "step": 1060 + }, + { + "epoch": 2.399103139013453, + "grad_norm": 1.268385648727417, + "learning_rate": 3.563333333333334e-05, + "loss": -0.0587, + "step": 1070 + }, + { + "epoch": 2.42152466367713, + "grad_norm": 1.3676249980926514, + "learning_rate": 3.596666666666667e-05, + "loss": -0.0426, + "step": 1080 + }, + { + "epoch": 2.4439461883408073, + "grad_norm": 1.9183441400527954, + "learning_rate": 3.63e-05, + "loss": -0.0484, + "step": 1090 + }, + { + "epoch": 2.4663677130044843, + "grad_norm": 1.083093523979187, + "learning_rate": 3.6633333333333334e-05, + "loss": -0.0511, + "step": 1100 + }, + { + "epoch": 2.4887892376681613, + "grad_norm": 1.5112096071243286, + "learning_rate": 3.6966666666666666e-05, + "loss": -0.057, + "step": 1110 + }, + { + "epoch": 2.5112107623318387, + "grad_norm": 1.961930274963379, + "learning_rate": 3.73e-05, + "loss": -0.0601, + "step": 1120 + }, + { + "epoch": 2.5336322869955157, + "grad_norm": 1.0357246398925781, + "learning_rate": 3.763333333333334e-05, + "loss": -0.0628, + "step": 1130 + }, + { + "epoch": 2.5560538116591927, + "grad_norm": 1.108612298965454, + "learning_rate": 3.796666666666667e-05, + "loss": -0.0702, + "step": 1140 + }, + { + "epoch": 2.57847533632287, + "grad_norm": 1.4638097286224365, + "learning_rate": 3.83e-05, + "loss": -0.054, + "step": 1150 + }, + { + "epoch": 2.600896860986547, + "grad_norm": 1.2864552736282349, + "learning_rate": 3.8633333333333335e-05, + "loss": -0.0632, + "step": 1160 + }, + { + "epoch": 2.623318385650224, + "grad_norm": 1.3219536542892456, + "learning_rate": 3.896666666666667e-05, + "loss": -0.0537, + "step": 1170 + }, + { + "epoch": 2.6457399103139014, + "grad_norm": 1.3292843103408813, + "learning_rate": 3.9300000000000007e-05, + "loss": -0.0622, + "step": 1180 + }, + { + "epoch": 2.6681614349775784, + "grad_norm": 1.9371554851531982, + "learning_rate": 3.963333333333333e-05, + "loss": -0.051, + "step": 1190 + }, + { + "epoch": 2.6905829596412554, + "grad_norm": 1.4588171243667603, + "learning_rate": 3.996666666666667e-05, + "loss": -0.037, + "step": 1200 + }, + { + "epoch": 2.713004484304933, + "grad_norm": 1.726974368095398, + "learning_rate": 4.0300000000000004e-05, + "loss": -0.0283, + "step": 1210 + }, + { + "epoch": 2.7354260089686098, + "grad_norm": 1.7426941394805908, + "learning_rate": 4.0633333333333336e-05, + "loss": -0.0509, + "step": 1220 + }, + { + "epoch": 2.7578475336322867, + "grad_norm": 1.8302170038223267, + "learning_rate": 4.096666666666667e-05, + "loss": -0.0715, + "step": 1230 + }, + { + "epoch": 2.780269058295964, + "grad_norm": 1.3612316846847534, + "learning_rate": 4.13e-05, + "loss": -0.0577, + "step": 1240 + }, + { + "epoch": 2.802690582959641, + "grad_norm": 0.9968832731246948, + "learning_rate": 4.1633333333333333e-05, + "loss": -0.0749, + "step": 1250 + }, + { + "epoch": 2.8251121076233185, + "grad_norm": 1.6731867790222168, + "learning_rate": 4.196666666666667e-05, + "loss": -0.0755, + "step": 1260 + }, + { + "epoch": 2.8475336322869955, + "grad_norm": 1.6570099592208862, + "learning_rate": 4.23e-05, + "loss": -0.0737, + "step": 1270 + }, + { + "epoch": 2.8699551569506725, + "grad_norm": 1.4828505516052246, + "learning_rate": 4.263333333333334e-05, + "loss": -0.0594, + "step": 1280 + }, + { + "epoch": 2.89237668161435, + "grad_norm": 0.97749924659729, + "learning_rate": 4.296666666666666e-05, + "loss": -0.0715, + "step": 1290 + }, + { + "epoch": 2.914798206278027, + "grad_norm": 1.5770204067230225, + "learning_rate": 4.33e-05, + "loss": -0.0617, + "step": 1300 + }, + { + "epoch": 2.9372197309417043, + "grad_norm": 1.0189112424850464, + "learning_rate": 4.3633333333333335e-05, + "loss": -0.0749, + "step": 1310 + }, + { + "epoch": 2.9596412556053813, + "grad_norm": 1.359694480895996, + "learning_rate": 4.396666666666667e-05, + "loss": -0.064, + "step": 1320 + }, + { + "epoch": 2.9820627802690582, + "grad_norm": 1.3608776330947876, + "learning_rate": 4.43e-05, + "loss": -0.0621, + "step": 1330 + }, + { + "epoch": 3.004484304932735, + "grad_norm": 1.6376254558563232, + "learning_rate": 4.463333333333334e-05, + "loss": -0.0414, + "step": 1340 + }, + { + "epoch": 3.0269058295964126, + "grad_norm": 1.42030930519104, + "learning_rate": 4.496666666666667e-05, + "loss": -0.0686, + "step": 1350 + }, + { + "epoch": 3.0493273542600896, + "grad_norm": 1.1046743392944336, + "learning_rate": 4.53e-05, + "loss": -0.0613, + "step": 1360 + }, + { + "epoch": 3.071748878923767, + "grad_norm": 1.3924795389175415, + "learning_rate": 4.5633333333333336e-05, + "loss": -0.0777, + "step": 1370 + }, + { + "epoch": 3.094170403587444, + "grad_norm": 1.115049123764038, + "learning_rate": 4.596666666666667e-05, + "loss": -0.0612, + "step": 1380 + }, + { + "epoch": 3.116591928251121, + "grad_norm": 1.1089330911636353, + "learning_rate": 4.630000000000001e-05, + "loss": -0.0686, + "step": 1390 + }, + { + "epoch": 3.1390134529147984, + "grad_norm": 1.0089328289031982, + "learning_rate": 4.663333333333333e-05, + "loss": -0.0755, + "step": 1400 + }, + { + "epoch": 3.1614349775784754, + "grad_norm": 1.6058446168899536, + "learning_rate": 4.696666666666667e-05, + "loss": -0.0517, + "step": 1410 + }, + { + "epoch": 3.1838565022421523, + "grad_norm": 1.4118638038635254, + "learning_rate": 4.73e-05, + "loss": -0.0669, + "step": 1420 + }, + { + "epoch": 3.2062780269058297, + "grad_norm": 1.276788353919983, + "learning_rate": 4.763333333333334e-05, + "loss": -0.0634, + "step": 1430 + }, + { + "epoch": 3.2286995515695067, + "grad_norm": 1.1224974393844604, + "learning_rate": 4.796666666666667e-05, + "loss": -0.0877, + "step": 1440 + }, + { + "epoch": 3.2511210762331837, + "grad_norm": 0.824547529220581, + "learning_rate": 4.83e-05, + "loss": -0.0711, + "step": 1450 + }, + { + "epoch": 3.273542600896861, + "grad_norm": 1.4268635511398315, + "learning_rate": 4.8633333333333334e-05, + "loss": -0.0816, + "step": 1460 + }, + { + "epoch": 3.295964125560538, + "grad_norm": 1.0582841634750366, + "learning_rate": 4.8966666666666667e-05, + "loss": -0.0935, + "step": 1470 + }, + { + "epoch": 3.318385650224215, + "grad_norm": 1.3871642351150513, + "learning_rate": 4.93e-05, + "loss": -0.0901, + "step": 1480 + }, + { + "epoch": 3.3408071748878925, + "grad_norm": 1.3054825067520142, + "learning_rate": 4.963333333333334e-05, + "loss": -0.0791, + "step": 1490 + }, + { + "epoch": 3.3632286995515694, + "grad_norm": 1.2213541269302368, + "learning_rate": 4.996666666666667e-05, + "loss": -0.0642, + "step": 1500 + }, + { + "epoch": 3.3856502242152464, + "grad_norm": 1.2010600566864014, + "learning_rate": 5.03e-05, + "loss": -0.0903, + "step": 1510 + }, + { + "epoch": 3.408071748878924, + "grad_norm": 1.475408673286438, + "learning_rate": 5.0633333333333335e-05, + "loss": -0.0815, + "step": 1520 + }, + { + "epoch": 3.430493273542601, + "grad_norm": 0.6221925616264343, + "learning_rate": 5.0966666666666674e-05, + "loss": -0.0725, + "step": 1530 + }, + { + "epoch": 3.452914798206278, + "grad_norm": 0.9402658343315125, + "learning_rate": 5.130000000000001e-05, + "loss": -0.089, + "step": 1540 + }, + { + "epoch": 3.475336322869955, + "grad_norm": 1.4921103715896606, + "learning_rate": 5.163333333333333e-05, + "loss": -0.0664, + "step": 1550 + }, + { + "epoch": 3.497757847533632, + "grad_norm": 0.9545705914497375, + "learning_rate": 5.196666666666667e-05, + "loss": -0.0953, + "step": 1560 + }, + { + "epoch": 3.520179372197309, + "grad_norm": 1.2624174356460571, + "learning_rate": 5.2300000000000004e-05, + "loss": -0.0862, + "step": 1570 + }, + { + "epoch": 3.5426008968609866, + "grad_norm": 1.3019720315933228, + "learning_rate": 5.2633333333333336e-05, + "loss": -0.074, + "step": 1580 + }, + { + "epoch": 3.5650224215246635, + "grad_norm": 1.1289862394332886, + "learning_rate": 5.296666666666666e-05, + "loss": -0.0746, + "step": 1590 + }, + { + "epoch": 3.587443946188341, + "grad_norm": 0.8495747447013855, + "learning_rate": 5.330000000000001e-05, + "loss": -0.0718, + "step": 1600 + }, + { + "epoch": 3.609865470852018, + "grad_norm": 0.9497025609016418, + "learning_rate": 5.3633333333333334e-05, + "loss": -0.0639, + "step": 1610 + }, + { + "epoch": 3.6322869955156953, + "grad_norm": 0.7229017019271851, + "learning_rate": 5.3966666666666666e-05, + "loss": -0.0809, + "step": 1620 + }, + { + "epoch": 3.6547085201793723, + "grad_norm": 0.8344466090202332, + "learning_rate": 5.4300000000000005e-05, + "loss": -0.081, + "step": 1630 + }, + { + "epoch": 3.6771300448430493, + "grad_norm": 1.9861055612564087, + "learning_rate": 5.463333333333334e-05, + "loss": -0.0807, + "step": 1640 + }, + { + "epoch": 3.6995515695067267, + "grad_norm": 1.646215796470642, + "learning_rate": 5.496666666666666e-05, + "loss": -0.086, + "step": 1650 + }, + { + "epoch": 3.7219730941704037, + "grad_norm": 0.9487037062644958, + "learning_rate": 5.530000000000001e-05, + "loss": -0.0635, + "step": 1660 + }, + { + "epoch": 3.7443946188340806, + "grad_norm": 1.0273593664169312, + "learning_rate": 5.5633333333333335e-05, + "loss": -0.0893, + "step": 1670 + }, + { + "epoch": 3.766816143497758, + "grad_norm": 1.068937063217163, + "learning_rate": 5.596666666666667e-05, + "loss": -0.0893, + "step": 1680 + }, + { + "epoch": 3.789237668161435, + "grad_norm": 0.9658722281455994, + "learning_rate": 5.63e-05, + "loss": -0.07, + "step": 1690 + }, + { + "epoch": 3.811659192825112, + "grad_norm": 1.0547469854354858, + "learning_rate": 5.663333333333334e-05, + "loss": -0.0902, + "step": 1700 + }, + { + "epoch": 3.8340807174887894, + "grad_norm": 0.9740731716156006, + "learning_rate": 5.696666666666667e-05, + "loss": -0.0811, + "step": 1710 + }, + { + "epoch": 3.8565022421524664, + "grad_norm": 0.8880306482315063, + "learning_rate": 5.73e-05, + "loss": -0.0706, + "step": 1720 + }, + { + "epoch": 3.8789237668161434, + "grad_norm": 1.3421764373779297, + "learning_rate": 5.7633333333333336e-05, + "loss": -0.0893, + "step": 1730 + }, + { + "epoch": 3.901345291479821, + "grad_norm": 1.1800276041030884, + "learning_rate": 5.796666666666667e-05, + "loss": -0.073, + "step": 1740 + }, + { + "epoch": 3.9237668161434978, + "grad_norm": 0.49711787700653076, + "learning_rate": 5.83e-05, + "loss": -0.097, + "step": 1750 + }, + { + "epoch": 3.9461883408071747, + "grad_norm": 0.6228187084197998, + "learning_rate": 5.863333333333334e-05, + "loss": -0.0956, + "step": 1760 + }, + { + "epoch": 3.968609865470852, + "grad_norm": 0.7569265365600586, + "learning_rate": 5.896666666666667e-05, + "loss": -0.0953, + "step": 1770 + }, + { + "epoch": 3.991031390134529, + "grad_norm": 1.0208141803741455, + "learning_rate": 5.93e-05, + "loss": -0.1062, + "step": 1780 + }, + { + "epoch": 4.013452914798206, + "grad_norm": 0.9890314936637878, + "learning_rate": 5.9633333333333344e-05, + "loss": -0.0729, + "step": 1790 + }, + { + "epoch": 4.0358744394618835, + "grad_norm": 0.7891241908073425, + "learning_rate": 5.996666666666667e-05, + "loss": -0.0866, + "step": 1800 + }, + { + "epoch": 4.058295964125561, + "grad_norm": 0.9507431983947754, + "learning_rate": 6.03e-05, + "loss": -0.0904, + "step": 1810 + }, + { + "epoch": 4.0807174887892375, + "grad_norm": 0.9766170978546143, + "learning_rate": 6.063333333333333e-05, + "loss": -0.1001, + "step": 1820 + }, + { + "epoch": 4.103139013452915, + "grad_norm": 0.8405773043632507, + "learning_rate": 6.0966666666666674e-05, + "loss": -0.0799, + "step": 1830 + }, + { + "epoch": 4.125560538116592, + "grad_norm": 1.0838018655776978, + "learning_rate": 6.13e-05, + "loss": -0.0817, + "step": 1840 + }, + { + "epoch": 4.147982062780269, + "grad_norm": 0.7248189449310303, + "learning_rate": 6.163333333333333e-05, + "loss": -0.0834, + "step": 1850 + }, + { + "epoch": 4.170403587443946, + "grad_norm": 0.8020162582397461, + "learning_rate": 6.196666666666668e-05, + "loss": -0.0925, + "step": 1860 + }, + { + "epoch": 4.192825112107624, + "grad_norm": 0.9303234815597534, + "learning_rate": 6.23e-05, + "loss": -0.1034, + "step": 1870 + }, + { + "epoch": 4.2152466367713, + "grad_norm": 0.6722277402877808, + "learning_rate": 6.263333333333333e-05, + "loss": -0.0928, + "step": 1880 + }, + { + "epoch": 4.237668161434978, + "grad_norm": 1.1840920448303223, + "learning_rate": 6.296666666666667e-05, + "loss": -0.0712, + "step": 1890 + }, + { + "epoch": 4.260089686098655, + "grad_norm": 0.7121480107307434, + "learning_rate": 6.330000000000001e-05, + "loss": -0.0797, + "step": 1900 + }, + { + "epoch": 4.2825112107623315, + "grad_norm": 0.9245591759681702, + "learning_rate": 6.363333333333334e-05, + "loss": -0.0881, + "step": 1910 + }, + { + "epoch": 4.304932735426009, + "grad_norm": 0.7961669564247131, + "learning_rate": 6.396666666666667e-05, + "loss": -0.101, + "step": 1920 + }, + { + "epoch": 4.327354260089686, + "grad_norm": 0.8889129161834717, + "learning_rate": 6.43e-05, + "loss": -0.0895, + "step": 1930 + }, + { + "epoch": 4.349775784753363, + "grad_norm": 0.8278612494468689, + "learning_rate": 6.463333333333334e-05, + "loss": -0.1089, + "step": 1940 + }, + { + "epoch": 4.37219730941704, + "grad_norm": 0.6175897717475891, + "learning_rate": 6.496666666666667e-05, + "loss": -0.0917, + "step": 1950 + }, + { + "epoch": 4.394618834080718, + "grad_norm": 0.899528443813324, + "learning_rate": 6.53e-05, + "loss": -0.087, + "step": 1960 + }, + { + "epoch": 4.417040358744394, + "grad_norm": 0.5548889636993408, + "learning_rate": 6.563333333333333e-05, + "loss": -0.086, + "step": 1970 + }, + { + "epoch": 4.439461883408072, + "grad_norm": 0.8355442881584167, + "learning_rate": 6.596666666666667e-05, + "loss": -0.0875, + "step": 1980 + }, + { + "epoch": 4.461883408071749, + "grad_norm": 0.8408334851264954, + "learning_rate": 6.630000000000001e-05, + "loss": -0.0962, + "step": 1990 + }, + { + "epoch": 4.484304932735426, + "grad_norm": 0.6637967824935913, + "learning_rate": 6.663333333333333e-05, + "loss": -0.0929, + "step": 2000 + }, + { + "epoch": 4.506726457399103, + "grad_norm": 1.145219087600708, + "learning_rate": 6.696666666666666e-05, + "loss": -0.0956, + "step": 2010 + }, + { + "epoch": 4.5291479820627805, + "grad_norm": 0.7107167840003967, + "learning_rate": 6.730000000000001e-05, + "loss": -0.1062, + "step": 2020 + }, + { + "epoch": 4.551569506726457, + "grad_norm": 0.6327930092811584, + "learning_rate": 6.763333333333334e-05, + "loss": -0.0984, + "step": 2030 + }, + { + "epoch": 4.573991031390134, + "grad_norm": 0.6914843320846558, + "learning_rate": 6.796666666666666e-05, + "loss": -0.1001, + "step": 2040 + }, + { + "epoch": 4.596412556053812, + "grad_norm": 0.7101470232009888, + "learning_rate": 6.83e-05, + "loss": -0.095, + "step": 2050 + }, + { + "epoch": 4.618834080717489, + "grad_norm": 0.6585995554924011, + "learning_rate": 6.863333333333334e-05, + "loss": -0.0893, + "step": 2060 + }, + { + "epoch": 4.641255605381166, + "grad_norm": 0.6516417860984802, + "learning_rate": 6.896666666666667e-05, + "loss": -0.0834, + "step": 2070 + }, + { + "epoch": 4.663677130044843, + "grad_norm": 0.7963947057723999, + "learning_rate": 6.93e-05, + "loss": -0.0881, + "step": 2080 + }, + { + "epoch": 4.68609865470852, + "grad_norm": 0.9499969482421875, + "learning_rate": 6.963333333333334e-05, + "loss": -0.1002, + "step": 2090 + }, + { + "epoch": 4.708520179372197, + "grad_norm": 0.7943589091300964, + "learning_rate": 6.996666666666667e-05, + "loss": -0.0906, + "step": 2100 + }, + { + "epoch": 4.7309417040358746, + "grad_norm": 0.8600579500198364, + "learning_rate": 7.03e-05, + "loss": -0.0903, + "step": 2110 + }, + { + "epoch": 4.753363228699552, + "grad_norm": 0.9938388466835022, + "learning_rate": 7.063333333333333e-05, + "loss": -0.0798, + "step": 2120 + }, + { + "epoch": 4.7757847533632285, + "grad_norm": 0.7214065194129944, + "learning_rate": 7.096666666666667e-05, + "loss": -0.0833, + "step": 2130 + }, + { + "epoch": 4.798206278026906, + "grad_norm": 0.6525754332542419, + "learning_rate": 7.13e-05, + "loss": -0.104, + "step": 2140 + }, + { + "epoch": 4.820627802690583, + "grad_norm": 0.8375402092933655, + "learning_rate": 7.163333333333334e-05, + "loss": -0.1079, + "step": 2150 + }, + { + "epoch": 4.84304932735426, + "grad_norm": 1.1122022867202759, + "learning_rate": 7.196666666666668e-05, + "loss": -0.0848, + "step": 2160 + }, + { + "epoch": 4.865470852017937, + "grad_norm": 0.9028182625770569, + "learning_rate": 7.23e-05, + "loss": -0.0986, + "step": 2170 + }, + { + "epoch": 4.887892376681615, + "grad_norm": 0.7137097120285034, + "learning_rate": 7.263333333333334e-05, + "loss": -0.1092, + "step": 2180 + }, + { + "epoch": 4.910313901345291, + "grad_norm": 1.0649327039718628, + "learning_rate": 7.296666666666667e-05, + "loss": -0.0996, + "step": 2190 + }, + { + "epoch": 4.932735426008969, + "grad_norm": 0.7981933951377869, + "learning_rate": 7.33e-05, + "loss": -0.1036, + "step": 2200 + }, + { + "epoch": 4.955156950672646, + "grad_norm": 0.8131219148635864, + "learning_rate": 7.363333333333334e-05, + "loss": -0.0928, + "step": 2210 + }, + { + "epoch": 4.977578475336323, + "grad_norm": 0.6674619317054749, + "learning_rate": 7.396666666666667e-05, + "loss": -0.1048, + "step": 2220 + }, + { + "epoch": 5.0, + "grad_norm": 0.7587264776229858, + "learning_rate": 7.43e-05, + "loss": -0.0905, + "step": 2230 + }, + { + "epoch": 5.022421524663677, + "grad_norm": 0.7497463822364807, + "learning_rate": 7.463333333333334e-05, + "loss": -0.1021, + "step": 2240 + }, + { + "epoch": 5.044843049327354, + "grad_norm": 0.6458138823509216, + "learning_rate": 7.496666666666667e-05, + "loss": -0.0926, + "step": 2250 + }, + { + "epoch": 5.067264573991031, + "grad_norm": 0.5432078838348389, + "learning_rate": 7.53e-05, + "loss": -0.0854, + "step": 2260 + }, + { + "epoch": 5.089686098654709, + "grad_norm": 0.9242632985115051, + "learning_rate": 7.563333333333333e-05, + "loss": -0.0836, + "step": 2270 + }, + { + "epoch": 5.112107623318385, + "grad_norm": 0.968149721622467, + "learning_rate": 7.596666666666668e-05, + "loss": -0.0891, + "step": 2280 + }, + { + "epoch": 5.134529147982063, + "grad_norm": 0.791774570941925, + "learning_rate": 7.630000000000001e-05, + "loss": -0.0882, + "step": 2290 + }, + { + "epoch": 5.15695067264574, + "grad_norm": 0.7600582838058472, + "learning_rate": 7.663333333333333e-05, + "loss": -0.0991, + "step": 2300 + }, + { + "epoch": 5.179372197309417, + "grad_norm": 0.8126775622367859, + "learning_rate": 7.696666666666668e-05, + "loss": -0.1048, + "step": 2310 + }, + { + "epoch": 5.201793721973094, + "grad_norm": 1.0526039600372314, + "learning_rate": 7.730000000000001e-05, + "loss": -0.0893, + "step": 2320 + }, + { + "epoch": 5.2242152466367715, + "grad_norm": 0.8687767386436462, + "learning_rate": 7.763333333333334e-05, + "loss": -0.0992, + "step": 2330 + }, + { + "epoch": 5.246636771300448, + "grad_norm": 0.7301262617111206, + "learning_rate": 7.796666666666666e-05, + "loss": -0.0951, + "step": 2340 + }, + { + "epoch": 5.2690582959641254, + "grad_norm": 0.6618801951408386, + "learning_rate": 7.83e-05, + "loss": -0.0997, + "step": 2350 + }, + { + "epoch": 5.291479820627803, + "grad_norm": 0.7612600922584534, + "learning_rate": 7.863333333333334e-05, + "loss": -0.097, + "step": 2360 + }, + { + "epoch": 5.31390134529148, + "grad_norm": 0.5339545607566833, + "learning_rate": 7.896666666666667e-05, + "loss": -0.1007, + "step": 2370 + }, + { + "epoch": 5.336322869955157, + "grad_norm": 0.7369050979614258, + "learning_rate": 7.93e-05, + "loss": -0.1021, + "step": 2380 + }, + { + "epoch": 5.358744394618834, + "grad_norm": 0.7824501991271973, + "learning_rate": 7.963333333333334e-05, + "loss": -0.0842, + "step": 2390 + }, + { + "epoch": 5.381165919282511, + "grad_norm": 0.7174873948097229, + "learning_rate": 7.996666666666667e-05, + "loss": -0.1032, + "step": 2400 + }, + { + "epoch": 5.403587443946188, + "grad_norm": 0.6876048445701599, + "learning_rate": 8.030000000000001e-05, + "loss": -0.0984, + "step": 2410 + }, + { + "epoch": 5.426008968609866, + "grad_norm": 0.8116436004638672, + "learning_rate": 8.063333333333333e-05, + "loss": -0.1003, + "step": 2420 + }, + { + "epoch": 5.448430493273543, + "grad_norm": 0.6764798164367676, + "learning_rate": 8.096666666666667e-05, + "loss": -0.1073, + "step": 2430 + }, + { + "epoch": 5.4708520179372195, + "grad_norm": 0.9937434196472168, + "learning_rate": 8.13e-05, + "loss": -0.1036, + "step": 2440 + }, + { + "epoch": 5.493273542600897, + "grad_norm": 0.8729003667831421, + "learning_rate": 8.163333333333334e-05, + "loss": -0.0998, + "step": 2450 + }, + { + "epoch": 5.515695067264574, + "grad_norm": 0.6114308834075928, + "learning_rate": 8.196666666666668e-05, + "loss": -0.0915, + "step": 2460 + }, + { + "epoch": 5.538116591928251, + "grad_norm": 0.8853445053100586, + "learning_rate": 8.23e-05, + "loss": -0.1121, + "step": 2470 + }, + { + "epoch": 5.560538116591928, + "grad_norm": 0.7466941475868225, + "learning_rate": 8.263333333333334e-05, + "loss": -0.1026, + "step": 2480 + }, + { + "epoch": 5.582959641255606, + "grad_norm": 1.073790431022644, + "learning_rate": 8.296666666666667e-05, + "loss": -0.088, + "step": 2490 + }, + { + "epoch": 5.605381165919282, + "grad_norm": 0.6316530704498291, + "learning_rate": 8.33e-05, + "loss": -0.0938, + "step": 2500 + }, + { + "epoch": 5.62780269058296, + "grad_norm": 0.813811719417572, + "learning_rate": 8.363333333333334e-05, + "loss": -0.0989, + "step": 2510 + }, + { + "epoch": 5.650224215246637, + "grad_norm": 0.9441471695899963, + "learning_rate": 8.396666666666667e-05, + "loss": -0.1118, + "step": 2520 + }, + { + "epoch": 5.672645739910314, + "grad_norm": 0.6153741478919983, + "learning_rate": 8.43e-05, + "loss": -0.1067, + "step": 2530 + }, + { + "epoch": 5.695067264573991, + "grad_norm": 0.7946006059646606, + "learning_rate": 8.463333333333335e-05, + "loss": -0.084, + "step": 2540 + }, + { + "epoch": 5.7174887892376685, + "grad_norm": 0.844622790813446, + "learning_rate": 8.496666666666667e-05, + "loss": -0.1048, + "step": 2550 + }, + { + "epoch": 5.739910313901345, + "grad_norm": 0.8332926630973816, + "learning_rate": 8.53e-05, + "loss": -0.0951, + "step": 2560 + }, + { + "epoch": 5.762331838565022, + "grad_norm": 0.84544837474823, + "learning_rate": 8.563333333333333e-05, + "loss": -0.0953, + "step": 2570 + }, + { + "epoch": 5.7847533632287, + "grad_norm": 0.4595879912376404, + "learning_rate": 8.596666666666668e-05, + "loss": -0.0993, + "step": 2580 + }, + { + "epoch": 5.807174887892376, + "grad_norm": 0.6701271533966064, + "learning_rate": 8.63e-05, + "loss": -0.1111, + "step": 2590 + }, + { + "epoch": 5.829596412556054, + "grad_norm": 0.9394120573997498, + "learning_rate": 8.663333333333333e-05, + "loss": -0.0949, + "step": 2600 + }, + { + "epoch": 5.852017937219731, + "grad_norm": 0.622581422328949, + "learning_rate": 8.696666666666668e-05, + "loss": -0.0963, + "step": 2610 + }, + { + "epoch": 5.874439461883408, + "grad_norm": 0.6261661052703857, + "learning_rate": 8.730000000000001e-05, + "loss": -0.1048, + "step": 2620 + }, + { + "epoch": 5.896860986547085, + "grad_norm": 0.7219703793525696, + "learning_rate": 8.763333333333334e-05, + "loss": -0.0938, + "step": 2630 + }, + { + "epoch": 5.9192825112107625, + "grad_norm": 0.8336865305900574, + "learning_rate": 8.796666666666667e-05, + "loss": -0.0977, + "step": 2640 + }, + { + "epoch": 5.941704035874439, + "grad_norm": 0.4945419430732727, + "learning_rate": 8.83e-05, + "loss": -0.106, + "step": 2650 + }, + { + "epoch": 5.9641255605381165, + "grad_norm": 0.5819895267486572, + "learning_rate": 8.863333333333334e-05, + "loss": -0.1059, + "step": 2660 + }, + { + "epoch": 5.986547085201794, + "grad_norm": 0.9691244959831238, + "learning_rate": 8.896666666666667e-05, + "loss": -0.0931, + "step": 2670 + }, + { + "epoch": 6.00896860986547, + "grad_norm": 0.6417266726493835, + "learning_rate": 8.93e-05, + "loss": -0.1047, + "step": 2680 + }, + { + "epoch": 6.031390134529148, + "grad_norm": 0.576359748840332, + "learning_rate": 8.963333333333333e-05, + "loss": -0.1153, + "step": 2690 + }, + { + "epoch": 6.053811659192825, + "grad_norm": 0.7676611542701721, + "learning_rate": 8.996666666666667e-05, + "loss": -0.1028, + "step": 2700 + }, + { + "epoch": 6.076233183856502, + "grad_norm": 0.6697026491165161, + "learning_rate": 9.030000000000001e-05, + "loss": -0.1144, + "step": 2710 + }, + { + "epoch": 6.098654708520179, + "grad_norm": 0.7314311861991882, + "learning_rate": 9.063333333333333e-05, + "loss": -0.0945, + "step": 2720 + }, + { + "epoch": 6.121076233183857, + "grad_norm": 0.7500597834587097, + "learning_rate": 9.096666666666666e-05, + "loss": -0.1147, + "step": 2730 + }, + { + "epoch": 6.143497757847534, + "grad_norm": 1.0830050706863403, + "learning_rate": 9.130000000000001e-05, + "loss": -0.1027, + "step": 2740 + }, + { + "epoch": 6.165919282511211, + "grad_norm": 0.6779115200042725, + "learning_rate": 9.163333333333334e-05, + "loss": -0.1026, + "step": 2750 + }, + { + "epoch": 6.188340807174888, + "grad_norm": 0.7023285627365112, + "learning_rate": 9.196666666666666e-05, + "loss": -0.1086, + "step": 2760 + }, + { + "epoch": 6.210762331838565, + "grad_norm": 0.6069226861000061, + "learning_rate": 9.230000000000001e-05, + "loss": -0.1092, + "step": 2770 + }, + { + "epoch": 6.233183856502242, + "grad_norm": 0.7797667980194092, + "learning_rate": 9.263333333333334e-05, + "loss": -0.1053, + "step": 2780 + }, + { + "epoch": 6.255605381165919, + "grad_norm": 0.7983332276344299, + "learning_rate": 9.296666666666667e-05, + "loss": -0.099, + "step": 2790 + }, + { + "epoch": 6.278026905829597, + "grad_norm": 0.5318055748939514, + "learning_rate": 9.33e-05, + "loss": -0.1087, + "step": 2800 + }, + { + "epoch": 6.300448430493273, + "grad_norm": 0.44819873571395874, + "learning_rate": 9.363333333333334e-05, + "loss": -0.1175, + "step": 2810 + }, + { + "epoch": 6.322869955156951, + "grad_norm": 0.8027081489562988, + "learning_rate": 9.396666666666667e-05, + "loss": -0.1151, + "step": 2820 + }, + { + "epoch": 6.345291479820628, + "grad_norm": 0.9329254031181335, + "learning_rate": 9.43e-05, + "loss": -0.0936, + "step": 2830 + }, + { + "epoch": 6.367713004484305, + "grad_norm": 0.567365288734436, + "learning_rate": 9.463333333333333e-05, + "loss": -0.1036, + "step": 2840 + }, + { + "epoch": 6.390134529147982, + "grad_norm": 0.6076239347457886, + "learning_rate": 9.496666666666667e-05, + "loss": -0.1074, + "step": 2850 + }, + { + "epoch": 6.4125560538116595, + "grad_norm": 0.49229711294174194, + "learning_rate": 9.53e-05, + "loss": -0.1026, + "step": 2860 + }, + { + "epoch": 6.434977578475336, + "grad_norm": 0.858203649520874, + "learning_rate": 9.563333333333334e-05, + "loss": -0.0947, + "step": 2870 + }, + { + "epoch": 6.457399103139013, + "grad_norm": 0.4436509907245636, + "learning_rate": 9.596666666666668e-05, + "loss": -0.1129, + "step": 2880 + }, + { + "epoch": 6.479820627802691, + "grad_norm": 0.5742620229721069, + "learning_rate": 9.63e-05, + "loss": -0.1032, + "step": 2890 + }, + { + "epoch": 6.502242152466367, + "grad_norm": 0.4815448224544525, + "learning_rate": 9.663333333333334e-05, + "loss": -0.1046, + "step": 2900 + }, + { + "epoch": 6.524663677130045, + "grad_norm": 0.39441612362861633, + "learning_rate": 9.696666666666667e-05, + "loss": -0.11, + "step": 2910 + }, + { + "epoch": 6.547085201793722, + "grad_norm": 0.622872531414032, + "learning_rate": 9.730000000000001e-05, + "loss": -0.1061, + "step": 2920 + }, + { + "epoch": 6.569506726457399, + "grad_norm": 0.786956787109375, + "learning_rate": 9.763333333333334e-05, + "loss": -0.1081, + "step": 2930 + }, + { + "epoch": 6.591928251121076, + "grad_norm": 0.9675863981246948, + "learning_rate": 9.796666666666667e-05, + "loss": -0.0982, + "step": 2940 + }, + { + "epoch": 6.614349775784754, + "grad_norm": 0.8166079521179199, + "learning_rate": 9.83e-05, + "loss": -0.0955, + "step": 2950 + }, + { + "epoch": 6.63677130044843, + "grad_norm": 0.8242588639259338, + "learning_rate": 9.863333333333334e-05, + "loss": -0.1041, + "step": 2960 + }, + { + "epoch": 6.6591928251121075, + "grad_norm": 0.8162567615509033, + "learning_rate": 9.896666666666667e-05, + "loss": -0.1099, + "step": 2970 + }, + { + "epoch": 6.681614349775785, + "grad_norm": 0.9118414521217346, + "learning_rate": 9.93e-05, + "loss": -0.104, + "step": 2980 + }, + { + "epoch": 6.704035874439462, + "grad_norm": 0.9838742017745972, + "learning_rate": 9.963333333333333e-05, + "loss": -0.0979, + "step": 2990 + }, + { + "epoch": 6.726457399103139, + "grad_norm": 0.6396962404251099, + "learning_rate": 9.996666666666668e-05, + "loss": -0.0841, + "step": 3000 + }, + { + "epoch": 6.748878923766816, + "grad_norm": 0.5115916728973389, + "learning_rate": 9.999999384858465e-05, + "loss": -0.1083, + "step": 3010 + }, + { + "epoch": 6.771300448430493, + "grad_norm": 0.6931324601173401, + "learning_rate": 9.999997258443473e-05, + "loss": -0.1084, + "step": 3020 + }, + { + "epoch": 6.79372197309417, + "grad_norm": 0.9119125008583069, + "learning_rate": 9.999993613161331e-05, + "loss": -0.1066, + "step": 3030 + }, + { + "epoch": 6.816143497757848, + "grad_norm": 0.8088449835777283, + "learning_rate": 9.999988449013146e-05, + "loss": -0.1066, + "step": 3040 + }, + { + "epoch": 6.838565022421525, + "grad_norm": 0.7452880144119263, + "learning_rate": 9.99998176600049e-05, + "loss": -0.0955, + "step": 3050 + }, + { + "epoch": 6.860986547085202, + "grad_norm": 0.7163184285163879, + "learning_rate": 9.999973564125389e-05, + "loss": -0.1116, + "step": 3060 + }, + { + "epoch": 6.883408071748879, + "grad_norm": 0.7636846303939819, + "learning_rate": 9.999963843390335e-05, + "loss": -0.1094, + "step": 3070 + }, + { + "epoch": 6.905829596412556, + "grad_norm": 0.7808240652084351, + "learning_rate": 9.999952603798282e-05, + "loss": -0.0807, + "step": 3080 + }, + { + "epoch": 6.928251121076233, + "grad_norm": 0.5670726299285889, + "learning_rate": 9.999939845352646e-05, + "loss": -0.1079, + "step": 3090 + }, + { + "epoch": 6.95067264573991, + "grad_norm": 0.5878632068634033, + "learning_rate": 9.999925568057298e-05, + "loss": -0.1035, + "step": 3100 + }, + { + "epoch": 6.973094170403588, + "grad_norm": 0.6165591478347778, + "learning_rate": 9.999909771916578e-05, + "loss": -0.1118, + "step": 3110 + }, + { + "epoch": 6.995515695067264, + "grad_norm": 0.8269200325012207, + "learning_rate": 9.999892456935285e-05, + "loss": -0.102, + "step": 3120 + }, + { + "epoch": 7.017937219730942, + "grad_norm": 0.5839239358901978, + "learning_rate": 9.999873623118679e-05, + "loss": -0.1099, + "step": 3130 + }, + { + "epoch": 7.040358744394619, + "grad_norm": 0.4625862240791321, + "learning_rate": 9.999853270472479e-05, + "loss": -0.1177, + "step": 3140 + }, + { + "epoch": 7.062780269058296, + "grad_norm": 0.44386714696884155, + "learning_rate": 9.999831399002871e-05, + "loss": -0.1106, + "step": 3150 + }, + { + "epoch": 7.085201793721973, + "grad_norm": 0.6586745977401733, + "learning_rate": 9.999808008716494e-05, + "loss": -0.116, + "step": 3160 + }, + { + "epoch": 7.1076233183856505, + "grad_norm": 0.5604522824287415, + "learning_rate": 9.999783099620459e-05, + "loss": -0.109, + "step": 3170 + }, + { + "epoch": 7.130044843049327, + "grad_norm": 0.5218386054039001, + "learning_rate": 9.999756671722328e-05, + "loss": -0.1189, + "step": 3180 + }, + { + "epoch": 7.1524663677130045, + "grad_norm": 0.7545985579490662, + "learning_rate": 9.99972872503013e-05, + "loss": -0.1164, + "step": 3190 + }, + { + "epoch": 7.174887892376682, + "grad_norm": 1.0072766542434692, + "learning_rate": 9.999699259552359e-05, + "loss": -0.1095, + "step": 3200 + }, + { + "epoch": 7.197309417040358, + "grad_norm": 0.5404777526855469, + "learning_rate": 9.99966827529796e-05, + "loss": -0.1224, + "step": 3210 + }, + { + "epoch": 7.219730941704036, + "grad_norm": 0.6633430123329163, + "learning_rate": 9.999635772276348e-05, + "loss": -0.1202, + "step": 3220 + }, + { + "epoch": 7.242152466367713, + "grad_norm": 0.5383176803588867, + "learning_rate": 9.999601750497396e-05, + "loss": -0.1218, + "step": 3230 + }, + { + "epoch": 7.26457399103139, + "grad_norm": 0.7511604428291321, + "learning_rate": 9.99956620997144e-05, + "loss": -0.1208, + "step": 3240 + }, + { + "epoch": 7.286995515695067, + "grad_norm": 0.5967193841934204, + "learning_rate": 9.999529150709275e-05, + "loss": -0.1168, + "step": 3250 + }, + { + "epoch": 7.309417040358745, + "grad_norm": 0.631586492061615, + "learning_rate": 9.999490572722158e-05, + "loss": -0.1271, + "step": 3260 + }, + { + "epoch": 7.331838565022421, + "grad_norm": 0.7206928730010986, + "learning_rate": 9.99945047602181e-05, + "loss": -0.1167, + "step": 3270 + }, + { + "epoch": 7.354260089686099, + "grad_norm": 0.6484280228614807, + "learning_rate": 9.99940886062041e-05, + "loss": -0.1152, + "step": 3280 + }, + { + "epoch": 7.376681614349776, + "grad_norm": 0.5606514811515808, + "learning_rate": 9.999365726530599e-05, + "loss": -0.1206, + "step": 3290 + }, + { + "epoch": 7.3991031390134525, + "grad_norm": 0.51255863904953, + "learning_rate": 9.999321073765481e-05, + "loss": -0.1035, + "step": 3300 + }, + { + "epoch": 7.42152466367713, + "grad_norm": 0.41374754905700684, + "learning_rate": 9.99927490233862e-05, + "loss": -0.1218, + "step": 3310 + }, + { + "epoch": 7.443946188340807, + "grad_norm": 0.5154651403427124, + "learning_rate": 9.999227212264043e-05, + "loss": -0.1227, + "step": 3320 + }, + { + "epoch": 7.466367713004484, + "grad_norm": 0.6977572441101074, + "learning_rate": 9.999178003556236e-05, + "loss": -0.1153, + "step": 3330 + }, + { + "epoch": 7.488789237668161, + "grad_norm": 0.6290366649627686, + "learning_rate": 9.999127276230146e-05, + "loss": -0.12, + "step": 3340 + }, + { + "epoch": 7.511210762331839, + "grad_norm": 0.5932716727256775, + "learning_rate": 9.999075030301184e-05, + "loss": -0.1208, + "step": 3350 + }, + { + "epoch": 7.533632286995516, + "grad_norm": 0.3738214373588562, + "learning_rate": 9.999021265785221e-05, + "loss": -0.1151, + "step": 3360 + }, + { + "epoch": 7.556053811659193, + "grad_norm": 0.5968793630599976, + "learning_rate": 9.998965982698589e-05, + "loss": -0.1068, + "step": 3370 + }, + { + "epoch": 7.57847533632287, + "grad_norm": 0.5902677178382874, + "learning_rate": 9.998909181058082e-05, + "loss": -0.1227, + "step": 3380 + }, + { + "epoch": 7.600896860986547, + "grad_norm": 0.7086104154586792, + "learning_rate": 9.998850860880953e-05, + "loss": -0.1214, + "step": 3390 + }, + { + "epoch": 7.623318385650224, + "grad_norm": 0.9520558714866638, + "learning_rate": 9.998791022184922e-05, + "loss": -0.1167, + "step": 3400 + }, + { + "epoch": 7.645739910313901, + "grad_norm": 0.7702134847640991, + "learning_rate": 9.99872966498816e-05, + "loss": -0.1233, + "step": 3410 + }, + { + "epoch": 7.668161434977579, + "grad_norm": 0.7153990864753723, + "learning_rate": 9.998666789309313e-05, + "loss": -0.1123, + "step": 3420 + }, + { + "epoch": 7.690582959641255, + "grad_norm": 0.6247199773788452, + "learning_rate": 9.998602395167475e-05, + "loss": -0.1262, + "step": 3430 + }, + { + "epoch": 7.713004484304933, + "grad_norm": 0.8495981097221375, + "learning_rate": 9.998536482582213e-05, + "loss": -0.1047, + "step": 3440 + }, + { + "epoch": 7.73542600896861, + "grad_norm": 0.6043784022331238, + "learning_rate": 9.998469051573544e-05, + "loss": -0.1166, + "step": 3450 + }, + { + "epoch": 7.757847533632287, + "grad_norm": 0.5217121839523315, + "learning_rate": 9.998400102161954e-05, + "loss": -0.1184, + "step": 3460 + }, + { + "epoch": 7.780269058295964, + "grad_norm": 0.6448701024055481, + "learning_rate": 9.998329634368388e-05, + "loss": -0.1218, + "step": 3470 + }, + { + "epoch": 7.802690582959642, + "grad_norm": 0.565177857875824, + "learning_rate": 9.998257648214253e-05, + "loss": -0.1283, + "step": 3480 + }, + { + "epoch": 7.825112107623318, + "grad_norm": 0.7194846272468567, + "learning_rate": 9.998184143721417e-05, + "loss": -0.119, + "step": 3490 + }, + { + "epoch": 7.8475336322869955, + "grad_norm": 0.4791514575481415, + "learning_rate": 9.998109120912206e-05, + "loss": -0.1168, + "step": 3500 + }, + { + "epoch": 7.869955156950673, + "grad_norm": 0.4432215392589569, + "learning_rate": 9.998032579809411e-05, + "loss": -0.1197, + "step": 3510 + }, + { + "epoch": 7.8923766816143495, + "grad_norm": 0.6095607876777649, + "learning_rate": 9.997954520436286e-05, + "loss": -0.111, + "step": 3520 + }, + { + "epoch": 7.914798206278027, + "grad_norm": 0.4287019968032837, + "learning_rate": 9.997874942816538e-05, + "loss": -0.1268, + "step": 3530 + }, + { + "epoch": 7.937219730941704, + "grad_norm": 0.6171551942825317, + "learning_rate": 9.997793846974345e-05, + "loss": -0.1133, + "step": 3540 + }, + { + "epoch": 7.959641255605381, + "grad_norm": 0.6005836129188538, + "learning_rate": 9.997711232934341e-05, + "loss": -0.1249, + "step": 3550 + }, + { + "epoch": 7.982062780269058, + "grad_norm": 0.6053005456924438, + "learning_rate": 9.99762710072162e-05, + "loss": -0.1224, + "step": 3560 + }, + { + "epoch": 8.004484304932735, + "grad_norm": 0.5149649977684021, + "learning_rate": 9.997541450361743e-05, + "loss": -0.1169, + "step": 3570 + }, + { + "epoch": 8.026905829596412, + "grad_norm": 0.5294902920722961, + "learning_rate": 9.997454281880723e-05, + "loss": -0.1159, + "step": 3580 + }, + { + "epoch": 8.04932735426009, + "grad_norm": 0.49333590269088745, + "learning_rate": 9.997365595305044e-05, + "loss": -0.1227, + "step": 3590 + }, + { + "epoch": 8.071748878923767, + "grad_norm": 0.6038964986801147, + "learning_rate": 9.997275390661644e-05, + "loss": -0.1201, + "step": 3600 + }, + { + "epoch": 8.094170403587444, + "grad_norm": 0.5499889254570007, + "learning_rate": 9.997183667977926e-05, + "loss": -0.1092, + "step": 3610 + }, + { + "epoch": 8.116591928251122, + "grad_norm": 0.5781327486038208, + "learning_rate": 9.997090427281752e-05, + "loss": -0.1176, + "step": 3620 + }, + { + "epoch": 8.139013452914797, + "grad_norm": 0.5215573310852051, + "learning_rate": 9.996995668601448e-05, + "loss": -0.1168, + "step": 3630 + }, + { + "epoch": 8.161434977578475, + "grad_norm": 0.5627871751785278, + "learning_rate": 9.996899391965798e-05, + "loss": -0.1311, + "step": 3640 + }, + { + "epoch": 8.183856502242152, + "grad_norm": 0.39359569549560547, + "learning_rate": 9.996801597404048e-05, + "loss": -0.1292, + "step": 3650 + }, + { + "epoch": 8.20627802690583, + "grad_norm": 0.6450111865997314, + "learning_rate": 9.996702284945905e-05, + "loss": -0.1212, + "step": 3660 + }, + { + "epoch": 8.228699551569507, + "grad_norm": 0.5577991008758545, + "learning_rate": 9.996601454621539e-05, + "loss": -0.1225, + "step": 3670 + }, + { + "epoch": 8.251121076233185, + "grad_norm": 0.5759717226028442, + "learning_rate": 9.996499106461577e-05, + "loss": -0.1247, + "step": 3680 + }, + { + "epoch": 8.27354260089686, + "grad_norm": 0.3726295828819275, + "learning_rate": 9.996395240497112e-05, + "loss": -0.1275, + "step": 3690 + }, + { + "epoch": 8.295964125560538, + "grad_norm": 0.5470470786094666, + "learning_rate": 9.996289856759696e-05, + "loss": -0.1266, + "step": 3700 + }, + { + "epoch": 8.318385650224215, + "grad_norm": 0.6610553860664368, + "learning_rate": 9.996182955281342e-05, + "loss": -0.1195, + "step": 3710 + }, + { + "epoch": 8.340807174887892, + "grad_norm": 0.6338909268379211, + "learning_rate": 9.996074536094519e-05, + "loss": -0.1187, + "step": 3720 + }, + { + "epoch": 8.36322869955157, + "grad_norm": 0.3718850314617157, + "learning_rate": 9.995964599232168e-05, + "loss": -0.1257, + "step": 3730 + }, + { + "epoch": 8.385650224215247, + "grad_norm": 0.6951971054077148, + "learning_rate": 9.995853144727683e-05, + "loss": -0.1113, + "step": 3740 + }, + { + "epoch": 8.408071748878923, + "grad_norm": 0.5831679105758667, + "learning_rate": 9.99574017261492e-05, + "loss": -0.1236, + "step": 3750 + }, + { + "epoch": 8.4304932735426, + "grad_norm": 0.4067491888999939, + "learning_rate": 9.995625682928198e-05, + "loss": -0.1254, + "step": 3760 + }, + { + "epoch": 8.452914798206278, + "grad_norm": 0.42632895708084106, + "learning_rate": 9.995509675702295e-05, + "loss": -0.1295, + "step": 3770 + }, + { + "epoch": 8.475336322869955, + "grad_norm": 0.3260636627674103, + "learning_rate": 9.995392150972451e-05, + "loss": -0.1309, + "step": 3780 + }, + { + "epoch": 8.497757847533633, + "grad_norm": 0.37365737557411194, + "learning_rate": 9.995273108774366e-05, + "loss": -0.1213, + "step": 3790 + }, + { + "epoch": 8.52017937219731, + "grad_norm": 0.3300149738788605, + "learning_rate": 9.995152549144205e-05, + "loss": -0.1159, + "step": 3800 + }, + { + "epoch": 8.542600896860986, + "grad_norm": 0.6665132641792297, + "learning_rate": 9.995030472118587e-05, + "loss": -0.123, + "step": 3810 + }, + { + "epoch": 8.565022421524663, + "grad_norm": 0.6764480471611023, + "learning_rate": 9.9949068777346e-05, + "loss": -0.1205, + "step": 3820 + }, + { + "epoch": 8.58744394618834, + "grad_norm": 0.5458853244781494, + "learning_rate": 9.994781766029786e-05, + "loss": -0.1267, + "step": 3830 + }, + { + "epoch": 8.609865470852018, + "grad_norm": 0.5136150121688843, + "learning_rate": 9.994655137042151e-05, + "loss": -0.114, + "step": 3840 + }, + { + "epoch": 8.632286995515695, + "grad_norm": 0.5338091850280762, + "learning_rate": 9.99452699081016e-05, + "loss": -0.1266, + "step": 3850 + }, + { + "epoch": 8.654708520179373, + "grad_norm": 0.3969349265098572, + "learning_rate": 9.994397327372743e-05, + "loss": -0.1183, + "step": 3860 + }, + { + "epoch": 8.67713004484305, + "grad_norm": 0.5371072292327881, + "learning_rate": 9.994266146769286e-05, + "loss": -0.1205, + "step": 3870 + }, + { + "epoch": 8.699551569506726, + "grad_norm": 0.394082635641098, + "learning_rate": 9.994133449039642e-05, + "loss": -0.129, + "step": 3880 + }, + { + "epoch": 8.721973094170403, + "grad_norm": 0.40953928232192993, + "learning_rate": 9.993999234224118e-05, + "loss": -0.125, + "step": 3890 + }, + { + "epoch": 8.74439461883408, + "grad_norm": 0.5386145114898682, + "learning_rate": 9.993863502363485e-05, + "loss": -0.1287, + "step": 3900 + }, + { + "epoch": 8.766816143497758, + "grad_norm": 0.5551478266716003, + "learning_rate": 9.993726253498976e-05, + "loss": -0.1286, + "step": 3910 + }, + { + "epoch": 8.789237668161435, + "grad_norm": 0.6725710034370422, + "learning_rate": 9.993587487672282e-05, + "loss": -0.1292, + "step": 3920 + }, + { + "epoch": 8.811659192825113, + "grad_norm": 0.6091046333312988, + "learning_rate": 9.993447204925558e-05, + "loss": -0.1286, + "step": 3930 + }, + { + "epoch": 8.834080717488789, + "grad_norm": 0.7290878891944885, + "learning_rate": 9.993305405301416e-05, + "loss": -0.1198, + "step": 3940 + }, + { + "epoch": 8.856502242152466, + "grad_norm": 0.44792264699935913, + "learning_rate": 9.993162088842935e-05, + "loss": -0.122, + "step": 3950 + }, + { + "epoch": 8.878923766816143, + "grad_norm": 0.49644070863723755, + "learning_rate": 9.993017255593646e-05, + "loss": -0.1178, + "step": 3960 + }, + { + "epoch": 8.90134529147982, + "grad_norm": 0.5213329195976257, + "learning_rate": 9.992870905597548e-05, + "loss": -0.1248, + "step": 3970 + }, + { + "epoch": 8.923766816143498, + "grad_norm": 0.3881312310695648, + "learning_rate": 9.9927230388991e-05, + "loss": -0.135, + "step": 3980 + }, + { + "epoch": 8.946188340807176, + "grad_norm": 0.4722111225128174, + "learning_rate": 9.992573655543215e-05, + "loss": -0.1306, + "step": 3990 + }, + { + "epoch": 8.968609865470851, + "grad_norm": 0.524956464767456, + "learning_rate": 9.992422755575277e-05, + "loss": -0.1342, + "step": 4000 + }, + { + "epoch": 8.991031390134529, + "grad_norm": 0.5489113926887512, + "learning_rate": 9.992270339041123e-05, + "loss": -0.125, + "step": 4010 + }, + { + "epoch": 9.013452914798206, + "grad_norm": 0.5005899667739868, + "learning_rate": 9.992116405987053e-05, + "loss": -0.131, + "step": 4020 + }, + { + "epoch": 9.035874439461884, + "grad_norm": 0.45033952593803406, + "learning_rate": 9.991960956459828e-05, + "loss": -0.1298, + "step": 4030 + }, + { + "epoch": 9.058295964125561, + "grad_norm": 0.47831815481185913, + "learning_rate": 9.991803990506669e-05, + "loss": -0.12, + "step": 4040 + }, + { + "epoch": 9.080717488789238, + "grad_norm": 0.45893552899360657, + "learning_rate": 9.991645508175258e-05, + "loss": -0.1215, + "step": 4050 + }, + { + "epoch": 9.103139013452914, + "grad_norm": 0.5285263061523438, + "learning_rate": 9.99148550951374e-05, + "loss": -0.1215, + "step": 4060 + }, + { + "epoch": 9.125560538116591, + "grad_norm": 0.725619375705719, + "learning_rate": 9.991323994570716e-05, + "loss": -0.1154, + "step": 4070 + }, + { + "epoch": 9.147982062780269, + "grad_norm": 0.4078959822654724, + "learning_rate": 9.99116096339525e-05, + "loss": -0.1208, + "step": 4080 + }, + { + "epoch": 9.170403587443946, + "grad_norm": 0.4810819625854492, + "learning_rate": 9.990996416036869e-05, + "loss": -0.1274, + "step": 4090 + }, + { + "epoch": 9.192825112107624, + "grad_norm": 0.5880482792854309, + "learning_rate": 9.990830352545555e-05, + "loss": -0.1274, + "step": 4100 + }, + { + "epoch": 9.215246636771301, + "grad_norm": 0.5794179439544678, + "learning_rate": 9.990662772971756e-05, + "loss": -0.1352, + "step": 4110 + }, + { + "epoch": 9.237668161434977, + "grad_norm": 0.6041808724403381, + "learning_rate": 9.990493677366376e-05, + "loss": -0.1272, + "step": 4120 + }, + { + "epoch": 9.260089686098654, + "grad_norm": 0.47625377774238586, + "learning_rate": 9.990323065780786e-05, + "loss": -0.1359, + "step": 4130 + }, + { + "epoch": 9.282511210762332, + "grad_norm": 0.6207675933837891, + "learning_rate": 9.990150938266808e-05, + "loss": -0.1308, + "step": 4140 + }, + { + "epoch": 9.304932735426009, + "grad_norm": 0.42055758833885193, + "learning_rate": 9.989977294876733e-05, + "loss": -0.1317, + "step": 4150 + }, + { + "epoch": 9.327354260089686, + "grad_norm": 0.3935517966747284, + "learning_rate": 9.989802135663308e-05, + "loss": -0.1368, + "step": 4160 + }, + { + "epoch": 9.349775784753364, + "grad_norm": 0.5192646384239197, + "learning_rate": 9.989625460679743e-05, + "loss": -0.1369, + "step": 4170 + }, + { + "epoch": 9.372197309417041, + "grad_norm": 0.5425310730934143, + "learning_rate": 9.989447269979706e-05, + "loss": -0.1333, + "step": 4180 + }, + { + "epoch": 9.394618834080717, + "grad_norm": 0.42103567719459534, + "learning_rate": 9.989267563617328e-05, + "loss": -0.1315, + "step": 4190 + }, + { + "epoch": 9.417040358744394, + "grad_norm": 0.40444204211235046, + "learning_rate": 9.989086341647198e-05, + "loss": -0.1335, + "step": 4200 + }, + { + "epoch": 9.439461883408072, + "grad_norm": 0.43359705805778503, + "learning_rate": 9.988903604124366e-05, + "loss": -0.1326, + "step": 4210 + }, + { + "epoch": 9.461883408071749, + "grad_norm": 0.5441893339157104, + "learning_rate": 9.988719351104343e-05, + "loss": -0.1338, + "step": 4220 + }, + { + "epoch": 9.484304932735427, + "grad_norm": 0.43692800402641296, + "learning_rate": 9.9885335826431e-05, + "loss": -0.134, + "step": 4230 + }, + { + "epoch": 9.506726457399104, + "grad_norm": 0.4971902370452881, + "learning_rate": 9.988346298797071e-05, + "loss": -0.1347, + "step": 4240 + }, + { + "epoch": 9.52914798206278, + "grad_norm": 0.5100210309028625, + "learning_rate": 9.988157499623146e-05, + "loss": -0.1265, + "step": 4250 + }, + { + "epoch": 9.551569506726457, + "grad_norm": 0.49340662360191345, + "learning_rate": 9.987967185178677e-05, + "loss": -0.1379, + "step": 4260 + }, + { + "epoch": 9.573991031390134, + "grad_norm": 0.5542442798614502, + "learning_rate": 9.987775355521476e-05, + "loss": -0.1318, + "step": 4270 + }, + { + "epoch": 9.596412556053812, + "grad_norm": 0.38446134328842163, + "learning_rate": 9.987582010709817e-05, + "loss": -0.1286, + "step": 4280 + }, + { + "epoch": 9.61883408071749, + "grad_norm": 0.5509389638900757, + "learning_rate": 9.987387150802431e-05, + "loss": -0.107, + "step": 4290 + }, + { + "epoch": 9.641255605381167, + "grad_norm": 0.45033392310142517, + "learning_rate": 9.987190775858517e-05, + "loss": -0.1344, + "step": 4300 + }, + { + "epoch": 9.663677130044842, + "grad_norm": 0.4841996133327484, + "learning_rate": 9.98699288593772e-05, + "loss": -0.1278, + "step": 4310 + }, + { + "epoch": 9.68609865470852, + "grad_norm": 0.503036618232727, + "learning_rate": 9.986793481100161e-05, + "loss": -0.1315, + "step": 4320 + }, + { + "epoch": 9.708520179372197, + "grad_norm": 0.5943097472190857, + "learning_rate": 9.986592561406412e-05, + "loss": -0.1264, + "step": 4330 + }, + { + "epoch": 9.730941704035875, + "grad_norm": 0.6215860843658447, + "learning_rate": 9.986390126917503e-05, + "loss": -0.1392, + "step": 4340 + }, + { + "epoch": 9.753363228699552, + "grad_norm": 0.34727153182029724, + "learning_rate": 9.986186177694933e-05, + "loss": -0.1396, + "step": 4350 + }, + { + "epoch": 9.77578475336323, + "grad_norm": 0.6673478484153748, + "learning_rate": 9.985980713800656e-05, + "loss": -0.1197, + "step": 4360 + }, + { + "epoch": 9.798206278026905, + "grad_norm": 0.3921808898448944, + "learning_rate": 9.985773735297084e-05, + "loss": -0.1391, + "step": 4370 + }, + { + "epoch": 9.820627802690582, + "grad_norm": 0.4512217044830322, + "learning_rate": 9.985565242247092e-05, + "loss": -0.1318, + "step": 4380 + }, + { + "epoch": 9.84304932735426, + "grad_norm": 0.3998130261898041, + "learning_rate": 9.985355234714016e-05, + "loss": -0.1211, + "step": 4390 + }, + { + "epoch": 9.865470852017937, + "grad_norm": 0.4130302667617798, + "learning_rate": 9.985143712761652e-05, + "loss": -0.1369, + "step": 4400 + }, + { + "epoch": 9.887892376681615, + "grad_norm": 0.3959788978099823, + "learning_rate": 9.984930676454252e-05, + "loss": -0.1362, + "step": 4410 + }, + { + "epoch": 9.910313901345292, + "grad_norm": 0.3466271460056305, + "learning_rate": 9.984716125856532e-05, + "loss": -0.1365, + "step": 4420 + }, + { + "epoch": 9.932735426008968, + "grad_norm": 0.4647205173969269, + "learning_rate": 9.984500061033667e-05, + "loss": -0.1424, + "step": 4430 + }, + { + "epoch": 9.955156950672645, + "grad_norm": 0.4567134976387024, + "learning_rate": 9.984282482051293e-05, + "loss": -0.1309, + "step": 4440 + }, + { + "epoch": 9.977578475336323, + "grad_norm": 0.3165355324745178, + "learning_rate": 9.9840633889755e-05, + "loss": -0.1327, + "step": 4450 + }, + { + "epoch": 10.0, + "grad_norm": 0.692799985408783, + "learning_rate": 9.983842781872848e-05, + "loss": -0.1266, + "step": 4460 + }, + { + "epoch": 10.022421524663677, + "grad_norm": 0.3778446614742279, + "learning_rate": 9.98362066081035e-05, + "loss": -0.1368, + "step": 4470 + }, + { + "epoch": 10.044843049327355, + "grad_norm": 0.7582626938819885, + "learning_rate": 9.983397025855479e-05, + "loss": -0.1393, + "step": 4480 + }, + { + "epoch": 10.067264573991032, + "grad_norm": 0.5475302338600159, + "learning_rate": 9.983171877076171e-05, + "loss": -0.1343, + "step": 4490 + }, + { + "epoch": 10.089686098654708, + "grad_norm": 0.4849795699119568, + "learning_rate": 9.98294521454082e-05, + "loss": -0.1207, + "step": 4500 + }, + { + "epoch": 10.112107623318385, + "grad_norm": 0.44956883788108826, + "learning_rate": 9.98271703831828e-05, + "loss": -0.1312, + "step": 4510 + }, + { + "epoch": 10.134529147982063, + "grad_norm": 0.42070114612579346, + "learning_rate": 9.982487348477865e-05, + "loss": -0.127, + "step": 4520 + }, + { + "epoch": 10.15695067264574, + "grad_norm": 0.3169249892234802, + "learning_rate": 9.982256145089347e-05, + "loss": -0.1345, + "step": 4530 + }, + { + "epoch": 10.179372197309418, + "grad_norm": 0.42456910014152527, + "learning_rate": 9.982023428222962e-05, + "loss": -0.142, + "step": 4540 + }, + { + "epoch": 10.201793721973095, + "grad_norm": 0.48868677020072937, + "learning_rate": 9.981789197949403e-05, + "loss": -0.1237, + "step": 4550 + }, + { + "epoch": 10.22421524663677, + "grad_norm": 0.6314699053764343, + "learning_rate": 9.98155345433982e-05, + "loss": -0.1328, + "step": 4560 + }, + { + "epoch": 10.246636771300448, + "grad_norm": 0.45935890078544617, + "learning_rate": 9.981316197465831e-05, + "loss": -0.1359, + "step": 4570 + }, + { + "epoch": 10.269058295964125, + "grad_norm": 0.5844445824623108, + "learning_rate": 9.981077427399504e-05, + "loss": -0.1319, + "step": 4580 + }, + { + "epoch": 10.291479820627803, + "grad_norm": 0.5084129571914673, + "learning_rate": 9.980837144213371e-05, + "loss": -0.1358, + "step": 4590 + }, + { + "epoch": 10.31390134529148, + "grad_norm": 0.4576917886734009, + "learning_rate": 9.980595347980426e-05, + "loss": -0.1406, + "step": 4600 + }, + { + "epoch": 10.336322869955158, + "grad_norm": 0.4116396903991699, + "learning_rate": 9.980352038774119e-05, + "loss": -0.1246, + "step": 4610 + }, + { + "epoch": 10.358744394618833, + "grad_norm": 0.3448329269886017, + "learning_rate": 9.98010721666836e-05, + "loss": -0.1249, + "step": 4620 + }, + { + "epoch": 10.38116591928251, + "grad_norm": 0.2976236939430237, + "learning_rate": 9.979860881737523e-05, + "loss": -0.1385, + "step": 4630 + }, + { + "epoch": 10.403587443946188, + "grad_norm": 0.39066582918167114, + "learning_rate": 9.979613034056434e-05, + "loss": -0.135, + "step": 4640 + }, + { + "epoch": 10.426008968609866, + "grad_norm": 0.4522007405757904, + "learning_rate": 9.979363673700386e-05, + "loss": -0.123, + "step": 4650 + }, + { + "epoch": 10.448430493273543, + "grad_norm": 0.3970237672328949, + "learning_rate": 9.979112800745124e-05, + "loss": -0.1233, + "step": 4660 + }, + { + "epoch": 10.47085201793722, + "grad_norm": 0.6112013459205627, + "learning_rate": 9.978860415266861e-05, + "loss": -0.1288, + "step": 4670 + }, + { + "epoch": 10.493273542600896, + "grad_norm": 0.49546417593955994, + "learning_rate": 9.978606517342262e-05, + "loss": -0.1371, + "step": 4680 + }, + { + "epoch": 10.515695067264573, + "grad_norm": 0.4015596807003021, + "learning_rate": 9.978351107048456e-05, + "loss": -0.1266, + "step": 4690 + }, + { + "epoch": 10.538116591928251, + "grad_norm": 0.7407628893852234, + "learning_rate": 9.978094184463029e-05, + "loss": -0.13, + "step": 4700 + }, + { + "epoch": 10.560538116591928, + "grad_norm": 0.44165852665901184, + "learning_rate": 9.977835749664029e-05, + "loss": -0.1286, + "step": 4710 + }, + { + "epoch": 10.582959641255606, + "grad_norm": 0.4933101534843445, + "learning_rate": 9.97757580272996e-05, + "loss": -0.1354, + "step": 4720 + }, + { + "epoch": 10.605381165919283, + "grad_norm": 0.37937578558921814, + "learning_rate": 9.977314343739786e-05, + "loss": -0.141, + "step": 4730 + }, + { + "epoch": 10.62780269058296, + "grad_norm": 0.4293757379055023, + "learning_rate": 9.977051372772934e-05, + "loss": -0.1286, + "step": 4740 + }, + { + "epoch": 10.650224215246636, + "grad_norm": 0.43979141116142273, + "learning_rate": 9.976786889909286e-05, + "loss": -0.1431, + "step": 4750 + }, + { + "epoch": 10.672645739910314, + "grad_norm": 0.5140541195869446, + "learning_rate": 9.976520895229185e-05, + "loss": -0.1408, + "step": 4760 + }, + { + "epoch": 10.695067264573991, + "grad_norm": 0.5759459137916565, + "learning_rate": 9.976253388813433e-05, + "loss": -0.1312, + "step": 4770 + }, + { + "epoch": 10.717488789237668, + "grad_norm": 0.501994788646698, + "learning_rate": 9.975984370743293e-05, + "loss": -0.133, + "step": 4780 + }, + { + "epoch": 10.739910313901346, + "grad_norm": 0.3887803256511688, + "learning_rate": 9.975713841100485e-05, + "loss": -0.1248, + "step": 4790 + }, + { + "epoch": 10.762331838565022, + "grad_norm": 0.5181452631950378, + "learning_rate": 9.975441799967187e-05, + "loss": -0.1339, + "step": 4800 + }, + { + "epoch": 10.784753363228699, + "grad_norm": 0.48668190836906433, + "learning_rate": 9.975168247426039e-05, + "loss": -0.1331, + "step": 4810 + }, + { + "epoch": 10.807174887892376, + "grad_norm": 0.6463840007781982, + "learning_rate": 9.974893183560139e-05, + "loss": -0.1376, + "step": 4820 + }, + { + "epoch": 10.829596412556054, + "grad_norm": 0.5911107063293457, + "learning_rate": 9.974616608453045e-05, + "loss": -0.1345, + "step": 4830 + }, + { + "epoch": 10.852017937219731, + "grad_norm": 0.3955610692501068, + "learning_rate": 9.974338522188772e-05, + "loss": -0.1361, + "step": 4840 + }, + { + "epoch": 10.874439461883409, + "grad_norm": 0.5267197489738464, + "learning_rate": 9.974058924851797e-05, + "loss": -0.139, + "step": 4850 + }, + { + "epoch": 10.896860986547086, + "grad_norm": 0.42466285824775696, + "learning_rate": 9.973777816527051e-05, + "loss": -0.1348, + "step": 4860 + }, + { + "epoch": 10.919282511210762, + "grad_norm": 0.5357335209846497, + "learning_rate": 9.973495197299931e-05, + "loss": -0.131, + "step": 4870 + }, + { + "epoch": 10.941704035874439, + "grad_norm": 0.40197139978408813, + "learning_rate": 9.973211067256287e-05, + "loss": -0.1375, + "step": 4880 + }, + { + "epoch": 10.964125560538116, + "grad_norm": 0.5422472953796387, + "learning_rate": 9.97292542648243e-05, + "loss": -0.1307, + "step": 4890 + }, + { + "epoch": 10.986547085201794, + "grad_norm": 0.37425848841667175, + "learning_rate": 9.972638275065131e-05, + "loss": -0.1377, + "step": 4900 + }, + { + "epoch": 11.008968609865471, + "grad_norm": 0.5298210978507996, + "learning_rate": 9.972349613091621e-05, + "loss": -0.1337, + "step": 4910 + }, + { + "epoch": 11.031390134529149, + "grad_norm": 0.2817547023296356, + "learning_rate": 9.972059440649584e-05, + "loss": -0.1354, + "step": 4920 + }, + { + "epoch": 11.053811659192824, + "grad_norm": 0.3404162526130676, + "learning_rate": 9.971767757827168e-05, + "loss": -0.1406, + "step": 4930 + }, + { + "epoch": 11.076233183856502, + "grad_norm": 0.5789584517478943, + "learning_rate": 9.971474564712982e-05, + "loss": -0.1365, + "step": 4940 + }, + { + "epoch": 11.09865470852018, + "grad_norm": 0.3369409441947937, + "learning_rate": 9.971179861396084e-05, + "loss": -0.1386, + "step": 4950 + }, + { + "epoch": 11.121076233183857, + "grad_norm": 0.5342329740524292, + "learning_rate": 9.970883647966003e-05, + "loss": -0.1291, + "step": 4960 + }, + { + "epoch": 11.143497757847534, + "grad_norm": 0.42882004380226135, + "learning_rate": 9.970585924512717e-05, + "loss": -0.1331, + "step": 4970 + }, + { + "epoch": 11.165919282511211, + "grad_norm": 0.3823615610599518, + "learning_rate": 9.970286691126669e-05, + "loss": -0.1457, + "step": 4980 + }, + { + "epoch": 11.188340807174887, + "grad_norm": 0.4143936336040497, + "learning_rate": 9.969985947898756e-05, + "loss": -0.1342, + "step": 4990 + }, + { + "epoch": 11.210762331838565, + "grad_norm": 0.45572948455810547, + "learning_rate": 9.969683694920337e-05, + "loss": -0.1254, + "step": 5000 + }, + { + "epoch": 11.233183856502242, + "grad_norm": 0.5306189656257629, + "learning_rate": 9.969379932283228e-05, + "loss": -0.1306, + "step": 5010 + }, + { + "epoch": 11.25560538116592, + "grad_norm": 0.5298721194267273, + "learning_rate": 9.969074660079704e-05, + "loss": -0.1331, + "step": 5020 + }, + { + "epoch": 11.278026905829597, + "grad_norm": 0.3914864659309387, + "learning_rate": 9.968767878402501e-05, + "loss": -0.1293, + "step": 5030 + }, + { + "epoch": 11.300448430493274, + "grad_norm": 0.40067964792251587, + "learning_rate": 9.968459587344808e-05, + "loss": -0.1357, + "step": 5040 + }, + { + "epoch": 11.32286995515695, + "grad_norm": 0.4707348644733429, + "learning_rate": 9.968149787000278e-05, + "loss": -0.1344, + "step": 5050 + }, + { + "epoch": 11.345291479820627, + "grad_norm": 0.4549867808818817, + "learning_rate": 9.967838477463018e-05, + "loss": -0.1394, + "step": 5060 + }, + { + "epoch": 11.367713004484305, + "grad_norm": 0.5184374451637268, + "learning_rate": 9.967525658827597e-05, + "loss": -0.1309, + "step": 5070 + }, + { + "epoch": 11.390134529147982, + "grad_norm": 0.5870062708854675, + "learning_rate": 9.967211331189042e-05, + "loss": -0.1299, + "step": 5080 + }, + { + "epoch": 11.41255605381166, + "grad_norm": 0.3453776240348816, + "learning_rate": 9.966895494642834e-05, + "loss": -0.1279, + "step": 5090 + }, + { + "epoch": 11.434977578475337, + "grad_norm": 0.5785014629364014, + "learning_rate": 9.96657814928492e-05, + "loss": -0.1237, + "step": 5100 + }, + { + "epoch": 11.457399103139014, + "grad_norm": 0.34571027755737305, + "learning_rate": 9.966259295211697e-05, + "loss": -0.1352, + "step": 5110 + }, + { + "epoch": 11.47982062780269, + "grad_norm": 0.3148207664489746, + "learning_rate": 9.965938932520028e-05, + "loss": -0.1302, + "step": 5120 + }, + { + "epoch": 11.502242152466367, + "grad_norm": 0.5486571788787842, + "learning_rate": 9.965617061307229e-05, + "loss": -0.1404, + "step": 5130 + }, + { + "epoch": 11.524663677130045, + "grad_norm": 0.3157196342945099, + "learning_rate": 9.965293681671077e-05, + "loss": -0.1315, + "step": 5140 + }, + { + "epoch": 11.547085201793722, + "grad_norm": 0.3966981768608093, + "learning_rate": 9.964968793709804e-05, + "loss": -0.1351, + "step": 5150 + }, + { + "epoch": 11.5695067264574, + "grad_norm": 0.47505366802215576, + "learning_rate": 9.964642397522106e-05, + "loss": -0.1398, + "step": 5160 + }, + { + "epoch": 11.591928251121077, + "grad_norm": 0.34607037901878357, + "learning_rate": 9.96431449320713e-05, + "loss": -0.136, + "step": 5170 + }, + { + "epoch": 11.614349775784753, + "grad_norm": 0.3214232325553894, + "learning_rate": 9.963985080864486e-05, + "loss": -0.1331, + "step": 5180 + }, + { + "epoch": 11.63677130044843, + "grad_norm": 0.405215859413147, + "learning_rate": 9.96365416059424e-05, + "loss": -0.1383, + "step": 5190 + }, + { + "epoch": 11.659192825112108, + "grad_norm": 0.3769086003303528, + "learning_rate": 9.963321732496919e-05, + "loss": -0.1384, + "step": 5200 + }, + { + "epoch": 11.681614349775785, + "grad_norm": 0.4699119031429291, + "learning_rate": 9.962987796673506e-05, + "loss": -0.1308, + "step": 5210 + }, + { + "epoch": 11.704035874439462, + "grad_norm": 0.5318476557731628, + "learning_rate": 9.962652353225438e-05, + "loss": -0.1389, + "step": 5220 + }, + { + "epoch": 11.72645739910314, + "grad_norm": 0.5085122585296631, + "learning_rate": 9.962315402254619e-05, + "loss": -0.1242, + "step": 5230 + }, + { + "epoch": 11.748878923766815, + "grad_norm": 0.47774866223335266, + "learning_rate": 9.9619769438634e-05, + "loss": -0.1381, + "step": 5240 + }, + { + "epoch": 11.771300448430493, + "grad_norm": 0.32737770676612854, + "learning_rate": 9.9616369781546e-05, + "loss": -0.1411, + "step": 5250 + }, + { + "epoch": 11.79372197309417, + "grad_norm": 0.38896140456199646, + "learning_rate": 9.961295505231491e-05, + "loss": -0.1387, + "step": 5260 + }, + { + "epoch": 11.816143497757848, + "grad_norm": 0.44387832283973694, + "learning_rate": 9.960952525197804e-05, + "loss": -0.1406, + "step": 5270 + }, + { + "epoch": 11.838565022421525, + "grad_norm": 0.44631630182266235, + "learning_rate": 9.960608038157724e-05, + "loss": -0.1425, + "step": 5280 + }, + { + "epoch": 11.860986547085203, + "grad_norm": 0.3442399799823761, + "learning_rate": 9.960262044215901e-05, + "loss": -0.1377, + "step": 5290 + }, + { + "epoch": 11.883408071748878, + "grad_norm": 0.31273022294044495, + "learning_rate": 9.959914543477435e-05, + "loss": -0.1324, + "step": 5300 + }, + { + "epoch": 11.905829596412556, + "grad_norm": 0.4650351405143738, + "learning_rate": 9.959565536047892e-05, + "loss": -0.1196, + "step": 5310 + }, + { + "epoch": 11.928251121076233, + "grad_norm": 0.4521084129810333, + "learning_rate": 9.959215022033288e-05, + "loss": -0.1378, + "step": 5320 + }, + { + "epoch": 11.95067264573991, + "grad_norm": 0.3215019702911377, + "learning_rate": 9.9588630015401e-05, + "loss": -0.1263, + "step": 5330 + }, + { + "epoch": 11.973094170403588, + "grad_norm": 0.44157519936561584, + "learning_rate": 9.958509474675264e-05, + "loss": -0.1368, + "step": 5340 + }, + { + "epoch": 11.995515695067265, + "grad_norm": 0.34459158778190613, + "learning_rate": 9.958154441546171e-05, + "loss": -0.1285, + "step": 5350 + }, + { + "epoch": 12.01793721973094, + "grad_norm": 0.44108375906944275, + "learning_rate": 9.957797902260673e-05, + "loss": -0.1329, + "step": 5360 + }, + { + "epoch": 12.040358744394618, + "grad_norm": 0.3333558738231659, + "learning_rate": 9.957439856927073e-05, + "loss": -0.1451, + "step": 5370 + }, + { + "epoch": 12.062780269058296, + "grad_norm": 0.3925810158252716, + "learning_rate": 9.957080305654139e-05, + "loss": -0.1329, + "step": 5380 + }, + { + "epoch": 12.085201793721973, + "grad_norm": 0.4209113121032715, + "learning_rate": 9.956719248551092e-05, + "loss": -0.1338, + "step": 5390 + }, + { + "epoch": 12.10762331838565, + "grad_norm": 0.42234304547309875, + "learning_rate": 9.956356685727612e-05, + "loss": -0.133, + "step": 5400 + }, + { + "epoch": 12.130044843049328, + "grad_norm": 0.3387395441532135, + "learning_rate": 9.955992617293836e-05, + "loss": -0.1309, + "step": 5410 + }, + { + "epoch": 12.152466367713004, + "grad_norm": 0.3341435194015503, + "learning_rate": 9.955627043360358e-05, + "loss": -0.1297, + "step": 5420 + }, + { + "epoch": 12.174887892376681, + "grad_norm": 0.6385065913200378, + "learning_rate": 9.955259964038231e-05, + "loss": -0.1413, + "step": 5430 + }, + { + "epoch": 12.197309417040358, + "grad_norm": 0.5569019913673401, + "learning_rate": 9.954891379438962e-05, + "loss": -0.1317, + "step": 5440 + }, + { + "epoch": 12.219730941704036, + "grad_norm": 0.3742257356643677, + "learning_rate": 9.954521289674519e-05, + "loss": -0.1344, + "step": 5450 + }, + { + "epoch": 12.242152466367713, + "grad_norm": 0.5086711049079895, + "learning_rate": 9.954149694857325e-05, + "loss": -0.1383, + "step": 5460 + }, + { + "epoch": 12.26457399103139, + "grad_norm": 0.6796200275421143, + "learning_rate": 9.953776595100258e-05, + "loss": -0.1294, + "step": 5470 + }, + { + "epoch": 12.286995515695068, + "grad_norm": 0.40828844904899597, + "learning_rate": 9.95340199051666e-05, + "loss": -0.142, + "step": 5480 + }, + { + "epoch": 12.309417040358744, + "grad_norm": 0.443377286195755, + "learning_rate": 9.953025881220325e-05, + "loss": -0.1414, + "step": 5490 + }, + { + "epoch": 12.331838565022421, + "grad_norm": 0.5683825016021729, + "learning_rate": 9.952648267325504e-05, + "loss": -0.1407, + "step": 5500 + }, + { + "epoch": 12.354260089686099, + "grad_norm": 0.5705673694610596, + "learning_rate": 9.952269148946905e-05, + "loss": -0.1368, + "step": 5510 + }, + { + "epoch": 12.376681614349776, + "grad_norm": 0.43344447016716003, + "learning_rate": 9.951888526199697e-05, + "loss": -0.1405, + "step": 5520 + }, + { + "epoch": 12.399103139013453, + "grad_norm": 0.34680911898612976, + "learning_rate": 9.951506399199501e-05, + "loss": -0.137, + "step": 5530 + }, + { + "epoch": 12.42152466367713, + "grad_norm": 0.46449288725852966, + "learning_rate": 9.951122768062399e-05, + "loss": -0.1415, + "step": 5540 + }, + { + "epoch": 12.443946188340806, + "grad_norm": 0.6197992563247681, + "learning_rate": 9.950737632904927e-05, + "loss": -0.1385, + "step": 5550 + }, + { + "epoch": 12.466367713004484, + "grad_norm": 0.5213712453842163, + "learning_rate": 9.950350993844077e-05, + "loss": -0.1396, + "step": 5560 + }, + { + "epoch": 12.488789237668161, + "grad_norm": 0.48573926091194153, + "learning_rate": 9.949962850997303e-05, + "loss": -0.1396, + "step": 5570 + }, + { + "epoch": 12.511210762331839, + "grad_norm": 0.6117490530014038, + "learning_rate": 9.949573204482512e-05, + "loss": -0.1438, + "step": 5580 + }, + { + "epoch": 12.533632286995516, + "grad_norm": 0.42156192660331726, + "learning_rate": 9.949182054418064e-05, + "loss": -0.1424, + "step": 5590 + }, + { + "epoch": 12.556053811659194, + "grad_norm": 0.4734836220741272, + "learning_rate": 9.948789400922787e-05, + "loss": -0.1378, + "step": 5600 + }, + { + "epoch": 12.57847533632287, + "grad_norm": 0.3737230896949768, + "learning_rate": 9.948395244115953e-05, + "loss": -0.1337, + "step": 5610 + }, + { + "epoch": 12.600896860986547, + "grad_norm": 0.48187026381492615, + "learning_rate": 9.9479995841173e-05, + "loss": -0.1372, + "step": 5620 + }, + { + "epoch": 12.623318385650224, + "grad_norm": 0.3334769904613495, + "learning_rate": 9.947602421047017e-05, + "loss": -0.1473, + "step": 5630 + }, + { + "epoch": 12.645739910313901, + "grad_norm": 0.5309147238731384, + "learning_rate": 9.947203755025753e-05, + "loss": -0.137, + "step": 5640 + }, + { + "epoch": 12.668161434977579, + "grad_norm": 0.4597820043563843, + "learning_rate": 9.946803586174611e-05, + "loss": -0.1389, + "step": 5650 + }, + { + "epoch": 12.690582959641256, + "grad_norm": 0.45660680532455444, + "learning_rate": 9.946401914615151e-05, + "loss": -0.1405, + "step": 5660 + }, + { + "epoch": 12.713004484304932, + "grad_norm": 0.4343927800655365, + "learning_rate": 9.945998740469394e-05, + "loss": -0.1451, + "step": 5670 + }, + { + "epoch": 12.73542600896861, + "grad_norm": 0.5114085078239441, + "learning_rate": 9.945594063859809e-05, + "loss": -0.1303, + "step": 5680 + }, + { + "epoch": 12.757847533632287, + "grad_norm": 0.4380705952644348, + "learning_rate": 9.94518788490933e-05, + "loss": -0.1463, + "step": 5690 + }, + { + "epoch": 12.780269058295964, + "grad_norm": 0.42615318298339844, + "learning_rate": 9.944780203741341e-05, + "loss": -0.1375, + "step": 5700 + }, + { + "epoch": 12.802690582959642, + "grad_norm": 0.3518386483192444, + "learning_rate": 9.944371020479686e-05, + "loss": -0.1422, + "step": 5710 + }, + { + "epoch": 12.825112107623319, + "grad_norm": 0.3756152391433716, + "learning_rate": 9.943960335248662e-05, + "loss": -0.1414, + "step": 5720 + }, + { + "epoch": 12.847533632286996, + "grad_norm": 0.46412789821624756, + "learning_rate": 9.943548148173027e-05, + "loss": -0.1335, + "step": 5730 + }, + { + "epoch": 12.869955156950672, + "grad_norm": 0.46839919686317444, + "learning_rate": 9.943134459377992e-05, + "loss": -0.146, + "step": 5740 + }, + { + "epoch": 12.89237668161435, + "grad_norm": 0.458434134721756, + "learning_rate": 9.942719268989222e-05, + "loss": -0.1333, + "step": 5750 + }, + { + "epoch": 12.914798206278027, + "grad_norm": 0.27323615550994873, + "learning_rate": 9.942302577132844e-05, + "loss": -0.1453, + "step": 5760 + }, + { + "epoch": 12.937219730941704, + "grad_norm": 0.5818614959716797, + "learning_rate": 9.941884383935438e-05, + "loss": -0.1418, + "step": 5770 + }, + { + "epoch": 12.959641255605382, + "grad_norm": 0.35653284192085266, + "learning_rate": 9.941464689524039e-05, + "loss": -0.1336, + "step": 5780 + }, + { + "epoch": 12.982062780269057, + "grad_norm": 0.5418176054954529, + "learning_rate": 9.941043494026139e-05, + "loss": -0.1383, + "step": 5790 + }, + { + "epoch": 13.004484304932735, + "grad_norm": 0.4487108290195465, + "learning_rate": 9.940620797569685e-05, + "loss": -0.1352, + "step": 5800 + }, + { + "epoch": 13.026905829596412, + "grad_norm": 0.7404613494873047, + "learning_rate": 9.940196600283082e-05, + "loss": -0.1215, + "step": 5810 + }, + { + "epoch": 13.04932735426009, + "grad_norm": 0.34617137908935547, + "learning_rate": 9.939770902295192e-05, + "loss": -0.1275, + "step": 5820 + }, + { + "epoch": 13.071748878923767, + "grad_norm": 0.41317448019981384, + "learning_rate": 9.939343703735329e-05, + "loss": -0.1361, + "step": 5830 + }, + { + "epoch": 13.094170403587444, + "grad_norm": 0.5754908323287964, + "learning_rate": 9.938915004733264e-05, + "loss": -0.1356, + "step": 5840 + }, + { + "epoch": 13.116591928251122, + "grad_norm": 0.4682377278804779, + "learning_rate": 9.938484805419224e-05, + "loss": -0.135, + "step": 5850 + }, + { + "epoch": 13.139013452914797, + "grad_norm": 0.6941953301429749, + "learning_rate": 9.938053105923894e-05, + "loss": -0.1348, + "step": 5860 + }, + { + "epoch": 13.161434977578475, + "grad_norm": 0.4488303065299988, + "learning_rate": 9.937619906378413e-05, + "loss": -0.1365, + "step": 5870 + }, + { + "epoch": 13.183856502242152, + "grad_norm": 0.3381268084049225, + "learning_rate": 9.937185206914374e-05, + "loss": -0.1395, + "step": 5880 + }, + { + "epoch": 13.20627802690583, + "grad_norm": 0.3932095766067505, + "learning_rate": 9.936749007663829e-05, + "loss": -0.1376, + "step": 5890 + }, + { + "epoch": 13.228699551569507, + "grad_norm": 0.35394659638404846, + "learning_rate": 9.93631130875928e-05, + "loss": -0.1434, + "step": 5900 + }, + { + "epoch": 13.251121076233185, + "grad_norm": 0.3450533151626587, + "learning_rate": 9.935872110333692e-05, + "loss": -0.1343, + "step": 5910 + }, + { + "epoch": 13.27354260089686, + "grad_norm": 0.3863537907600403, + "learning_rate": 9.935431412520484e-05, + "loss": -0.1389, + "step": 5920 + }, + { + "epoch": 13.295964125560538, + "grad_norm": 0.36543577909469604, + "learning_rate": 9.934989215453523e-05, + "loss": -0.1348, + "step": 5930 + }, + { + "epoch": 13.318385650224215, + "grad_norm": 0.4336704909801483, + "learning_rate": 9.934545519267139e-05, + "loss": -0.1391, + "step": 5940 + }, + { + "epoch": 13.340807174887892, + "grad_norm": 0.5602973103523254, + "learning_rate": 9.934100324096117e-05, + "loss": -0.1433, + "step": 5950 + }, + { + "epoch": 13.36322869955157, + "grad_norm": 0.40416571497917175, + "learning_rate": 9.933653630075692e-05, + "loss": -0.1401, + "step": 5960 + }, + { + "epoch": 13.385650224215247, + "grad_norm": 0.48348987102508545, + "learning_rate": 9.93320543734156e-05, + "loss": -0.1429, + "step": 5970 + }, + { + "epoch": 13.408071748878923, + "grad_norm": 0.5168440341949463, + "learning_rate": 9.932755746029871e-05, + "loss": -0.1458, + "step": 5980 + }, + { + "epoch": 13.4304932735426, + "grad_norm": 0.28034335374832153, + "learning_rate": 9.932304556277228e-05, + "loss": -0.1379, + "step": 5990 + }, + { + "epoch": 13.452914798206278, + "grad_norm": 0.41921067237854004, + "learning_rate": 9.93185186822069e-05, + "loss": -0.1387, + "step": 6000 + }, + { + "epoch": 13.475336322869955, + "grad_norm": 0.34593984484672546, + "learning_rate": 9.931397681997773e-05, + "loss": -0.1413, + "step": 6010 + }, + { + "epoch": 13.497757847533633, + "grad_norm": 0.5669784545898438, + "learning_rate": 9.930941997746446e-05, + "loss": -0.1447, + "step": 6020 + }, + { + "epoch": 13.52017937219731, + "grad_norm": 0.34188055992126465, + "learning_rate": 9.930484815605134e-05, + "loss": -0.1392, + "step": 6030 + }, + { + "epoch": 13.542600896860986, + "grad_norm": 0.5301915407180786, + "learning_rate": 9.930026135712717e-05, + "loss": -0.1351, + "step": 6040 + }, + { + "epoch": 13.565022421524663, + "grad_norm": 0.49475425481796265, + "learning_rate": 9.92956595820853e-05, + "loss": -0.1415, + "step": 6050 + }, + { + "epoch": 13.58744394618834, + "grad_norm": 0.44758179783821106, + "learning_rate": 9.929104283232362e-05, + "loss": -0.1449, + "step": 6060 + }, + { + "epoch": 13.609865470852018, + "grad_norm": 0.4410509765148163, + "learning_rate": 9.92864111092446e-05, + "loss": -0.1454, + "step": 6070 + }, + { + "epoch": 13.632286995515695, + "grad_norm": 0.3272562623023987, + "learning_rate": 9.92817644142552e-05, + "loss": -0.1492, + "step": 6080 + }, + { + "epoch": 13.654708520179373, + "grad_norm": 0.6473413705825806, + "learning_rate": 9.927710274876698e-05, + "loss": -0.1422, + "step": 6090 + }, + { + "epoch": 13.67713004484305, + "grad_norm": 0.45129796862602234, + "learning_rate": 9.927242611419603e-05, + "loss": -0.1384, + "step": 6100 + }, + { + "epoch": 13.699551569506726, + "grad_norm": 0.5076704025268555, + "learning_rate": 9.926773451196301e-05, + "loss": -0.1347, + "step": 6110 + }, + { + "epoch": 13.721973094170403, + "grad_norm": 0.34707728028297424, + "learning_rate": 9.926302794349306e-05, + "loss": -0.1426, + "step": 6120 + }, + { + "epoch": 13.74439461883408, + "grad_norm": 0.42725563049316406, + "learning_rate": 9.925830641021594e-05, + "loss": -0.1323, + "step": 6130 + }, + { + "epoch": 13.766816143497758, + "grad_norm": 0.48327404260635376, + "learning_rate": 9.925356991356593e-05, + "loss": -0.1417, + "step": 6140 + }, + { + "epoch": 13.789237668161435, + "grad_norm": 0.3898431956768036, + "learning_rate": 9.924881845498184e-05, + "loss": -0.1401, + "step": 6150 + }, + { + "epoch": 13.811659192825113, + "grad_norm": 0.47031664848327637, + "learning_rate": 9.924405203590705e-05, + "loss": -0.1397, + "step": 6160 + }, + { + "epoch": 13.834080717488789, + "grad_norm": 0.35833191871643066, + "learning_rate": 9.923927065778946e-05, + "loss": -0.1455, + "step": 6170 + }, + { + "epoch": 13.856502242152466, + "grad_norm": 0.4074867367744446, + "learning_rate": 9.923447432208154e-05, + "loss": -0.1392, + "step": 6180 + }, + { + "epoch": 13.878923766816143, + "grad_norm": 0.5107132196426392, + "learning_rate": 9.922966303024027e-05, + "loss": -0.1446, + "step": 6190 + }, + { + "epoch": 13.90134529147982, + "grad_norm": 0.4251682162284851, + "learning_rate": 9.922483678372721e-05, + "loss": -0.1366, + "step": 6200 + }, + { + "epoch": 13.923766816143498, + "grad_norm": 0.3804589509963989, + "learning_rate": 9.921999558400845e-05, + "loss": -0.1468, + "step": 6210 + }, + { + "epoch": 13.946188340807176, + "grad_norm": 0.35110732913017273, + "learning_rate": 9.92151394325546e-05, + "loss": -0.1306, + "step": 6220 + }, + { + "epoch": 13.968609865470851, + "grad_norm": 0.44491109251976013, + "learning_rate": 9.921026833084084e-05, + "loss": -0.1392, + "step": 6230 + }, + { + "epoch": 13.991031390134529, + "grad_norm": 0.4939764440059662, + "learning_rate": 9.920538228034689e-05, + "loss": -0.1458, + "step": 6240 + }, + { + "epoch": 14.013452914798206, + "grad_norm": 0.37471622228622437, + "learning_rate": 9.920048128255699e-05, + "loss": -0.1413, + "step": 6250 + }, + { + "epoch": 14.035874439461884, + "grad_norm": 0.48630228638648987, + "learning_rate": 9.919556533895995e-05, + "loss": -0.1317, + "step": 6260 + }, + { + "epoch": 14.058295964125561, + "grad_norm": 0.5187991261482239, + "learning_rate": 9.919063445104907e-05, + "loss": -0.1382, + "step": 6270 + }, + { + "epoch": 14.080717488789238, + "grad_norm": 0.39143308997154236, + "learning_rate": 9.918568862032227e-05, + "loss": -0.1414, + "step": 6280 + }, + { + "epoch": 14.103139013452914, + "grad_norm": 0.42336800694465637, + "learning_rate": 9.918072784828194e-05, + "loss": -0.1377, + "step": 6290 + }, + { + "epoch": 14.125560538116591, + "grad_norm": 0.4480321407318115, + "learning_rate": 9.917575213643501e-05, + "loss": -0.1397, + "step": 6300 + }, + { + "epoch": 14.147982062780269, + "grad_norm": 0.36302435398101807, + "learning_rate": 9.917076148629302e-05, + "loss": -0.1451, + "step": 6310 + }, + { + "epoch": 14.170403587443946, + "grad_norm": 0.39850515127182007, + "learning_rate": 9.916575589937196e-05, + "loss": -0.1403, + "step": 6320 + }, + { + "epoch": 14.192825112107624, + "grad_norm": 0.45773690938949585, + "learning_rate": 9.916073537719239e-05, + "loss": -0.141, + "step": 6330 + }, + { + "epoch": 14.215246636771301, + "grad_norm": 0.4140288233757019, + "learning_rate": 9.915569992127944e-05, + "loss": -0.1397, + "step": 6340 + }, + { + "epoch": 14.237668161434977, + "grad_norm": 0.4811212718486786, + "learning_rate": 9.915064953316273e-05, + "loss": -0.1425, + "step": 6350 + }, + { + "epoch": 14.260089686098654, + "grad_norm": 0.2676592469215393, + "learning_rate": 9.914558421437645e-05, + "loss": -0.1404, + "step": 6360 + }, + { + "epoch": 14.282511210762332, + "grad_norm": 0.6000551581382751, + "learning_rate": 9.914050396645929e-05, + "loss": -0.1406, + "step": 6370 + }, + { + "epoch": 14.304932735426009, + "grad_norm": 0.42349889874458313, + "learning_rate": 9.913540879095452e-05, + "loss": -0.1387, + "step": 6380 + }, + { + "epoch": 14.327354260089686, + "grad_norm": 0.4587444067001343, + "learning_rate": 9.913029868940987e-05, + "loss": -0.134, + "step": 6390 + }, + { + "epoch": 14.349775784753364, + "grad_norm": 0.639128565788269, + "learning_rate": 9.912517366337772e-05, + "loss": -0.1407, + "step": 6400 + }, + { + "epoch": 14.372197309417041, + "grad_norm": 0.3033718764781952, + "learning_rate": 9.912003371441487e-05, + "loss": -0.1372, + "step": 6410 + }, + { + "epoch": 14.394618834080717, + "grad_norm": 0.44374650716781616, + "learning_rate": 9.911487884408271e-05, + "loss": -0.1449, + "step": 6420 + }, + { + "epoch": 14.417040358744394, + "grad_norm": 0.27006521821022034, + "learning_rate": 9.910970905394719e-05, + "loss": -0.1401, + "step": 6430 + }, + { + "epoch": 14.439461883408072, + "grad_norm": 0.38786303997039795, + "learning_rate": 9.91045243455787e-05, + "loss": -0.1489, + "step": 6440 + }, + { + "epoch": 14.461883408071749, + "grad_norm": 0.498124897480011, + "learning_rate": 9.909932472055225e-05, + "loss": -0.1382, + "step": 6450 + }, + { + "epoch": 14.484304932735427, + "grad_norm": 0.453251451253891, + "learning_rate": 9.909411018044734e-05, + "loss": -0.1504, + "step": 6460 + }, + { + "epoch": 14.506726457399104, + "grad_norm": 0.3343622088432312, + "learning_rate": 9.908888072684802e-05, + "loss": -0.1467, + "step": 6470 + }, + { + "epoch": 14.52914798206278, + "grad_norm": 0.4836690425872803, + "learning_rate": 9.908363636134285e-05, + "loss": -0.1518, + "step": 6480 + }, + { + "epoch": 14.551569506726457, + "grad_norm": 0.438132107257843, + "learning_rate": 9.907837708552493e-05, + "loss": -0.1492, + "step": 6490 + }, + { + "epoch": 14.573991031390134, + "grad_norm": 0.4381200075149536, + "learning_rate": 9.90731029009919e-05, + "loss": -0.1448, + "step": 6500 + }, + { + "epoch": 14.596412556053812, + "grad_norm": 0.33745649456977844, + "learning_rate": 9.906781380934589e-05, + "loss": -0.133, + "step": 6510 + }, + { + "epoch": 14.61883408071749, + "grad_norm": 0.41227295994758606, + "learning_rate": 9.906250981219362e-05, + "loss": -0.1366, + "step": 6520 + }, + { + "epoch": 14.641255605381167, + "grad_norm": 0.3250630795955658, + "learning_rate": 9.905719091114628e-05, + "loss": -0.1495, + "step": 6530 + }, + { + "epoch": 14.663677130044842, + "grad_norm": 0.42704418301582336, + "learning_rate": 9.905185710781964e-05, + "loss": -0.1372, + "step": 6540 + }, + { + "epoch": 14.68609865470852, + "grad_norm": 0.3337622582912445, + "learning_rate": 9.904650840383392e-05, + "loss": -0.1433, + "step": 6550 + }, + { + "epoch": 14.708520179372197, + "grad_norm": 0.29745057225227356, + "learning_rate": 9.904114480081397e-05, + "loss": -0.149, + "step": 6560 + }, + { + "epoch": 14.730941704035875, + "grad_norm": 0.4344085156917572, + "learning_rate": 9.903576630038906e-05, + "loss": -0.1403, + "step": 6570 + }, + { + "epoch": 14.753363228699552, + "grad_norm": 0.2361091673374176, + "learning_rate": 9.903037290419309e-05, + "loss": -0.1494, + "step": 6580 + }, + { + "epoch": 14.77578475336323, + "grad_norm": 0.34881117939949036, + "learning_rate": 9.902496461386439e-05, + "loss": -0.1414, + "step": 6590 + }, + { + "epoch": 14.798206278026905, + "grad_norm": 0.47468218207359314, + "learning_rate": 9.901954143104588e-05, + "loss": -0.147, + "step": 6600 + }, + { + "epoch": 14.820627802690582, + "grad_norm": 0.47375693917274475, + "learning_rate": 9.901410335738496e-05, + "loss": -0.1532, + "step": 6610 + }, + { + "epoch": 14.84304932735426, + "grad_norm": 0.49154558777809143, + "learning_rate": 9.900865039453358e-05, + "loss": -0.1337, + "step": 6620 + }, + { + "epoch": 14.865470852017937, + "grad_norm": 0.44578391313552856, + "learning_rate": 9.900318254414821e-05, + "loss": -0.1393, + "step": 6630 + }, + { + "epoch": 14.887892376681615, + "grad_norm": 0.2762792706489563, + "learning_rate": 9.899769980788985e-05, + "loss": -0.1443, + "step": 6640 + }, + { + "epoch": 14.910313901345292, + "grad_norm": 0.29444819688796997, + "learning_rate": 9.899220218742398e-05, + "loss": -0.1509, + "step": 6650 + }, + { + "epoch": 14.932735426008968, + "grad_norm": 0.4439631998538971, + "learning_rate": 9.898668968442066e-05, + "loss": -0.1415, + "step": 6660 + }, + { + "epoch": 14.955156950672645, + "grad_norm": 0.35893431305885315, + "learning_rate": 9.898116230055443e-05, + "loss": -0.1421, + "step": 6670 + }, + { + "epoch": 14.977578475336323, + "grad_norm": 0.3123100697994232, + "learning_rate": 9.897562003750437e-05, + "loss": -0.1478, + "step": 6680 + }, + { + "epoch": 15.0, + "grad_norm": 0.522997260093689, + "learning_rate": 9.897006289695407e-05, + "loss": -0.1403, + "step": 6690 + }, + { + "epoch": 15.022421524663677, + "grad_norm": 0.2685193717479706, + "learning_rate": 9.896449088059164e-05, + "loss": -0.1427, + "step": 6700 + }, + { + "epoch": 15.044843049327355, + "grad_norm": 0.42428380250930786, + "learning_rate": 9.89589039901097e-05, + "loss": -0.1458, + "step": 6710 + }, + { + "epoch": 15.067264573991032, + "grad_norm": 0.3141867518424988, + "learning_rate": 9.895330222720542e-05, + "loss": -0.1421, + "step": 6720 + }, + { + "epoch": 15.089686098654708, + "grad_norm": 0.3226049840450287, + "learning_rate": 9.894768559358047e-05, + "loss": -0.1331, + "step": 6730 + }, + { + "epoch": 15.112107623318385, + "grad_norm": 0.3568348288536072, + "learning_rate": 9.894205409094101e-05, + "loss": -0.1333, + "step": 6740 + }, + { + "epoch": 15.134529147982063, + "grad_norm": 0.41922834515571594, + "learning_rate": 9.893640772099777e-05, + "loss": -0.1443, + "step": 6750 + }, + { + "epoch": 15.15695067264574, + "grad_norm": 0.37045833468437195, + "learning_rate": 9.893074648546595e-05, + "loss": -0.1352, + "step": 6760 + }, + { + "epoch": 15.179372197309418, + "grad_norm": 0.4902575612068176, + "learning_rate": 9.892507038606528e-05, + "loss": -0.142, + "step": 6770 + }, + { + "epoch": 15.201793721973095, + "grad_norm": 0.3224155306816101, + "learning_rate": 9.891937942452003e-05, + "loss": -0.1425, + "step": 6780 + }, + { + "epoch": 15.22421524663677, + "grad_norm": 0.33972200751304626, + "learning_rate": 9.891367360255895e-05, + "loss": -0.1462, + "step": 6790 + }, + { + "epoch": 15.246636771300448, + "grad_norm": 0.38146016001701355, + "learning_rate": 9.890795292191532e-05, + "loss": -0.1249, + "step": 6800 + }, + { + "epoch": 15.269058295964125, + "grad_norm": 0.47818154096603394, + "learning_rate": 9.890221738432694e-05, + "loss": -0.1367, + "step": 6810 + }, + { + "epoch": 15.291479820627803, + "grad_norm": 0.4736446142196655, + "learning_rate": 9.88964669915361e-05, + "loss": -0.1432, + "step": 6820 + }, + { + "epoch": 15.31390134529148, + "grad_norm": 0.4243835210800171, + "learning_rate": 9.889070174528963e-05, + "loss": -0.1396, + "step": 6830 + }, + { + "epoch": 15.336322869955158, + "grad_norm": 0.32086607813835144, + "learning_rate": 9.888492164733883e-05, + "loss": -0.1447, + "step": 6840 + }, + { + "epoch": 15.358744394618833, + "grad_norm": 0.4105203449726105, + "learning_rate": 9.88791266994396e-05, + "loss": -0.1423, + "step": 6850 + }, + { + "epoch": 15.38116591928251, + "grad_norm": 0.49435123801231384, + "learning_rate": 9.887331690335223e-05, + "loss": -0.1443, + "step": 6860 + }, + { + "epoch": 15.403587443946188, + "grad_norm": 0.4403253197669983, + "learning_rate": 9.886749226084163e-05, + "loss": -0.1461, + "step": 6870 + }, + { + "epoch": 15.426008968609866, + "grad_norm": 0.48111993074417114, + "learning_rate": 9.886165277367714e-05, + "loss": -0.1324, + "step": 6880 + }, + { + "epoch": 15.448430493273543, + "grad_norm": 0.3069981038570404, + "learning_rate": 9.885579844363265e-05, + "loss": -0.1466, + "step": 6890 + }, + { + "epoch": 15.47085201793722, + "grad_norm": 0.46991047263145447, + "learning_rate": 9.884992927248656e-05, + "loss": -0.1475, + "step": 6900 + }, + { + "epoch": 15.493273542600896, + "grad_norm": 0.5131191611289978, + "learning_rate": 9.884404526202178e-05, + "loss": -0.1424, + "step": 6910 + }, + { + "epoch": 15.515695067264573, + "grad_norm": 0.4218796193599701, + "learning_rate": 9.883814641402568e-05, + "loss": -0.1415, + "step": 6920 + }, + { + "epoch": 15.538116591928251, + "grad_norm": 0.38680392503738403, + "learning_rate": 9.88322327302902e-05, + "loss": -0.1507, + "step": 6930 + }, + { + "epoch": 15.560538116591928, + "grad_norm": 0.24988237023353577, + "learning_rate": 9.882630421261176e-05, + "loss": -0.1491, + "step": 6940 + }, + { + "epoch": 15.582959641255606, + "grad_norm": 0.23891040682792664, + "learning_rate": 9.88203608627913e-05, + "loss": -0.144, + "step": 6950 + }, + { + "epoch": 15.605381165919283, + "grad_norm": 0.35642877221107483, + "learning_rate": 9.881440268263422e-05, + "loss": -0.1417, + "step": 6960 + }, + { + "epoch": 15.62780269058296, + "grad_norm": 0.5908177495002747, + "learning_rate": 9.880842967395048e-05, + "loss": -0.1428, + "step": 6970 + }, + { + "epoch": 15.650224215246636, + "grad_norm": 0.44157925248146057, + "learning_rate": 9.880244183855452e-05, + "loss": -0.1454, + "step": 6980 + }, + { + "epoch": 15.672645739910314, + "grad_norm": 0.5988585352897644, + "learning_rate": 9.879643917826527e-05, + "loss": -0.1278, + "step": 6990 + }, + { + "epoch": 15.695067264573991, + "grad_norm": 0.5100188255310059, + "learning_rate": 9.87904216949062e-05, + "loss": -0.1413, + "step": 7000 + }, + { + "epoch": 15.717488789237668, + "grad_norm": 0.4657297134399414, + "learning_rate": 9.878438939030526e-05, + "loss": -0.1381, + "step": 7010 + }, + { + "epoch": 15.739910313901346, + "grad_norm": 0.332792729139328, + "learning_rate": 9.877834226629489e-05, + "loss": -0.1458, + "step": 7020 + }, + { + "epoch": 15.762331838565022, + "grad_norm": 0.4815354347229004, + "learning_rate": 9.877228032471206e-05, + "loss": -0.1469, + "step": 7030 + }, + { + "epoch": 15.784753363228699, + "grad_norm": 0.2665725648403168, + "learning_rate": 9.876620356739823e-05, + "loss": -0.1464, + "step": 7040 + }, + { + "epoch": 15.807174887892376, + "grad_norm": 0.3742716908454895, + "learning_rate": 9.876011199619935e-05, + "loss": -0.1461, + "step": 7050 + }, + { + "epoch": 15.829596412556054, + "grad_norm": 0.434566855430603, + "learning_rate": 9.875400561296589e-05, + "loss": -0.1477, + "step": 7060 + }, + { + "epoch": 15.852017937219731, + "grad_norm": 0.3979370594024658, + "learning_rate": 9.874788441955278e-05, + "loss": -0.1383, + "step": 7070 + }, + { + "epoch": 15.874439461883409, + "grad_norm": 0.5311012864112854, + "learning_rate": 9.874174841781951e-05, + "loss": -0.1389, + "step": 7080 + }, + { + "epoch": 15.896860986547086, + "grad_norm": 0.4379273056983948, + "learning_rate": 9.873559760963003e-05, + "loss": -0.1438, + "step": 7090 + }, + { + "epoch": 15.919282511210762, + "grad_norm": 0.40004923939704895, + "learning_rate": 9.872943199685278e-05, + "loss": -0.1426, + "step": 7100 + }, + { + "epoch": 15.941704035874439, + "grad_norm": 0.3735175132751465, + "learning_rate": 9.872325158136071e-05, + "loss": -0.1432, + "step": 7110 + }, + { + "epoch": 15.964125560538116, + "grad_norm": 0.4588760435581207, + "learning_rate": 9.871705636503128e-05, + "loss": -0.1426, + "step": 7120 + }, + { + "epoch": 15.986547085201794, + "grad_norm": 0.44648829102516174, + "learning_rate": 9.871084634974641e-05, + "loss": -0.1384, + "step": 7130 + }, + { + "epoch": 16.00896860986547, + "grad_norm": 0.576957106590271, + "learning_rate": 9.870462153739257e-05, + "loss": -0.1359, + "step": 7140 + }, + { + "epoch": 16.031390134529147, + "grad_norm": 0.4325287640094757, + "learning_rate": 9.869838192986067e-05, + "loss": -0.1418, + "step": 7150 + }, + { + "epoch": 16.053811659192824, + "grad_norm": 0.3753845989704132, + "learning_rate": 9.869212752904616e-05, + "loss": -0.1396, + "step": 7160 + }, + { + "epoch": 16.076233183856502, + "grad_norm": 0.3643123209476471, + "learning_rate": 9.868585833684894e-05, + "loss": -0.1457, + "step": 7170 + }, + { + "epoch": 16.09865470852018, + "grad_norm": 0.347683310508728, + "learning_rate": 9.867957435517342e-05, + "loss": -0.1462, + "step": 7180 + }, + { + "epoch": 16.121076233183857, + "grad_norm": 0.30135977268218994, + "learning_rate": 9.867327558592854e-05, + "loss": -0.1422, + "step": 7190 + }, + { + "epoch": 16.143497757847534, + "grad_norm": 0.392813116312027, + "learning_rate": 9.866696203102766e-05, + "loss": -0.1332, + "step": 7200 + }, + { + "epoch": 16.16591928251121, + "grad_norm": 0.38795000314712524, + "learning_rate": 9.86606336923887e-05, + "loss": -0.148, + "step": 7210 + }, + { + "epoch": 16.18834080717489, + "grad_norm": 0.28879621624946594, + "learning_rate": 9.865429057193403e-05, + "loss": -0.1472, + "step": 7220 + }, + { + "epoch": 16.210762331838566, + "grad_norm": 0.49020159244537354, + "learning_rate": 9.864793267159053e-05, + "loss": -0.1399, + "step": 7230 + }, + { + "epoch": 16.233183856502244, + "grad_norm": 0.47808998823165894, + "learning_rate": 9.864155999328957e-05, + "loss": -0.1454, + "step": 7240 + }, + { + "epoch": 16.255605381165918, + "grad_norm": 0.3638955056667328, + "learning_rate": 9.8635172538967e-05, + "loss": -0.1423, + "step": 7250 + }, + { + "epoch": 16.278026905829595, + "grad_norm": 0.4358319938182831, + "learning_rate": 9.862877031056312e-05, + "loss": -0.1468, + "step": 7260 + }, + { + "epoch": 16.300448430493272, + "grad_norm": 0.3548693358898163, + "learning_rate": 9.862235331002279e-05, + "loss": -0.1395, + "step": 7270 + }, + { + "epoch": 16.32286995515695, + "grad_norm": 0.6182172894477844, + "learning_rate": 9.861592153929533e-05, + "loss": -0.1464, + "step": 7280 + }, + { + "epoch": 16.345291479820627, + "grad_norm": 0.3771834969520569, + "learning_rate": 9.860947500033455e-05, + "loss": -0.1487, + "step": 7290 + }, + { + "epoch": 16.367713004484305, + "grad_norm": 0.38616496324539185, + "learning_rate": 9.86030136950987e-05, + "loss": -0.1401, + "step": 7300 + }, + { + "epoch": 16.390134529147982, + "grad_norm": 0.44089990854263306, + "learning_rate": 9.85965376255506e-05, + "loss": -0.15, + "step": 7310 + }, + { + "epoch": 16.41255605381166, + "grad_norm": 0.3979402184486389, + "learning_rate": 9.859004679365747e-05, + "loss": -0.1489, + "step": 7320 + }, + { + "epoch": 16.434977578475337, + "grad_norm": 0.31395456194877625, + "learning_rate": 9.858354120139108e-05, + "loss": -0.149, + "step": 7330 + }, + { + "epoch": 16.457399103139014, + "grad_norm": 0.3987247049808502, + "learning_rate": 9.857702085072764e-05, + "loss": -0.1458, + "step": 7340 + }, + { + "epoch": 16.47982062780269, + "grad_norm": 0.4228513538837433, + "learning_rate": 9.857048574364787e-05, + "loss": -0.1473, + "step": 7350 + }, + { + "epoch": 16.50224215246637, + "grad_norm": 0.2852928042411804, + "learning_rate": 9.856393588213698e-05, + "loss": -0.14, + "step": 7360 + }, + { + "epoch": 16.524663677130043, + "grad_norm": 0.3443228304386139, + "learning_rate": 9.855737126818458e-05, + "loss": -0.142, + "step": 7370 + }, + { + "epoch": 16.54708520179372, + "grad_norm": 0.3560522198677063, + "learning_rate": 9.855079190378491e-05, + "loss": -0.1426, + "step": 7380 + }, + { + "epoch": 16.569506726457398, + "grad_norm": 0.42066729068756104, + "learning_rate": 9.854419779093655e-05, + "loss": -0.1475, + "step": 7390 + }, + { + "epoch": 16.591928251121075, + "grad_norm": 0.3639271557331085, + "learning_rate": 9.853758893164264e-05, + "loss": -0.1474, + "step": 7400 + }, + { + "epoch": 16.614349775784753, + "grad_norm": 0.3005513846874237, + "learning_rate": 9.853096532791078e-05, + "loss": -0.1502, + "step": 7410 + }, + { + "epoch": 16.63677130044843, + "grad_norm": 0.3646838366985321, + "learning_rate": 9.852432698175304e-05, + "loss": -0.1369, + "step": 7420 + }, + { + "epoch": 16.659192825112108, + "grad_norm": 0.37552177906036377, + "learning_rate": 9.851767389518597e-05, + "loss": -0.1427, + "step": 7430 + }, + { + "epoch": 16.681614349775785, + "grad_norm": 0.2886190116405487, + "learning_rate": 9.85110060702306e-05, + "loss": -0.1491, + "step": 7440 + }, + { + "epoch": 16.704035874439462, + "grad_norm": 0.3688383400440216, + "learning_rate": 9.850432350891245e-05, + "loss": -0.1483, + "step": 7450 + }, + { + "epoch": 16.72645739910314, + "grad_norm": 0.24576137959957123, + "learning_rate": 9.84976262132615e-05, + "loss": -0.151, + "step": 7460 + }, + { + "epoch": 16.748878923766817, + "grad_norm": 0.260562926530838, + "learning_rate": 9.849091418531222e-05, + "loss": -0.1479, + "step": 7470 + }, + { + "epoch": 16.771300448430495, + "grad_norm": 0.6725562810897827, + "learning_rate": 9.848418742710353e-05, + "loss": -0.1425, + "step": 7480 + }, + { + "epoch": 16.793721973094172, + "grad_norm": 0.2998345196247101, + "learning_rate": 9.847744594067885e-05, + "loss": -0.1466, + "step": 7490 + }, + { + "epoch": 16.816143497757846, + "grad_norm": 0.5050772428512573, + "learning_rate": 9.847068972808607e-05, + "loss": -0.1399, + "step": 7500 + }, + { + "epoch": 16.838565022421523, + "grad_norm": 0.4619629979133606, + "learning_rate": 9.846391879137756e-05, + "loss": -0.1476, + "step": 7510 + }, + { + "epoch": 16.8609865470852, + "grad_norm": 0.31068533658981323, + "learning_rate": 9.845713313261012e-05, + "loss": -0.1329, + "step": 7520 + }, + { + "epoch": 16.883408071748878, + "grad_norm": 0.3498291075229645, + "learning_rate": 9.845033275384505e-05, + "loss": -0.1332, + "step": 7530 + }, + { + "epoch": 16.905829596412556, + "grad_norm": 0.38093793392181396, + "learning_rate": 9.844351765714818e-05, + "loss": -0.1405, + "step": 7540 + }, + { + "epoch": 16.928251121076233, + "grad_norm": 0.6441341042518616, + "learning_rate": 9.843668784458971e-05, + "loss": -0.142, + "step": 7550 + }, + { + "epoch": 16.95067264573991, + "grad_norm": 0.5038818717002869, + "learning_rate": 9.842984331824437e-05, + "loss": -0.1442, + "step": 7560 + }, + { + "epoch": 16.973094170403588, + "grad_norm": 0.3514540195465088, + "learning_rate": 9.842298408019133e-05, + "loss": -0.1549, + "step": 7570 + }, + { + "epoch": 16.995515695067265, + "grad_norm": 0.5086261630058289, + "learning_rate": 9.841611013251429e-05, + "loss": -0.1405, + "step": 7580 + }, + { + "epoch": 17.017937219730943, + "grad_norm": 0.35050278902053833, + "learning_rate": 9.840922147730133e-05, + "loss": -0.1427, + "step": 7590 + }, + { + "epoch": 17.04035874439462, + "grad_norm": 0.5817588567733765, + "learning_rate": 9.840231811664506e-05, + "loss": -0.1452, + "step": 7600 + }, + { + "epoch": 17.062780269058297, + "grad_norm": 0.4485257565975189, + "learning_rate": 9.839540005264252e-05, + "loss": -0.1473, + "step": 7610 + }, + { + "epoch": 17.08520179372197, + "grad_norm": 0.4109024405479431, + "learning_rate": 9.838846728739527e-05, + "loss": -0.1521, + "step": 7620 + }, + { + "epoch": 17.10762331838565, + "grad_norm": 0.31952735781669617, + "learning_rate": 9.838151982300927e-05, + "loss": -0.1331, + "step": 7630 + }, + { + "epoch": 17.130044843049326, + "grad_norm": 0.42983272671699524, + "learning_rate": 9.8374557661595e-05, + "loss": -0.1448, + "step": 7640 + }, + { + "epoch": 17.152466367713004, + "grad_norm": 0.41342827677726746, + "learning_rate": 9.836758080526735e-05, + "loss": -0.1368, + "step": 7650 + }, + { + "epoch": 17.17488789237668, + "grad_norm": 0.42036131024360657, + "learning_rate": 9.836058925614575e-05, + "loss": -0.1448, + "step": 7660 + }, + { + "epoch": 17.19730941704036, + "grad_norm": 0.3506125509738922, + "learning_rate": 9.8353583016354e-05, + "loss": -0.148, + "step": 7670 + }, + { + "epoch": 17.219730941704036, + "grad_norm": 0.38504672050476074, + "learning_rate": 9.834656208802044e-05, + "loss": -0.1448, + "step": 7680 + }, + { + "epoch": 17.242152466367713, + "grad_norm": 0.49738579988479614, + "learning_rate": 9.833952647327784e-05, + "loss": -0.1448, + "step": 7690 + }, + { + "epoch": 17.26457399103139, + "grad_norm": 0.3485558331012726, + "learning_rate": 9.833247617426342e-05, + "loss": -0.1467, + "step": 7700 + }, + { + "epoch": 17.286995515695068, + "grad_norm": 0.2884437143802643, + "learning_rate": 9.832541119311889e-05, + "loss": -0.1505, + "step": 7710 + }, + { + "epoch": 17.309417040358746, + "grad_norm": 0.5279978513717651, + "learning_rate": 9.83183315319904e-05, + "loss": -0.1406, + "step": 7720 + }, + { + "epoch": 17.331838565022423, + "grad_norm": 0.41244232654571533, + "learning_rate": 9.831123719302855e-05, + "loss": -0.145, + "step": 7730 + }, + { + "epoch": 17.3542600896861, + "grad_norm": 0.4663519859313965, + "learning_rate": 9.830412817838842e-05, + "loss": -0.1485, + "step": 7740 + }, + { + "epoch": 17.376681614349774, + "grad_norm": 0.3285347521305084, + "learning_rate": 9.829700449022956e-05, + "loss": -0.1483, + "step": 7750 + }, + { + "epoch": 17.39910313901345, + "grad_norm": 0.3750251829624176, + "learning_rate": 9.828986613071593e-05, + "loss": -0.1507, + "step": 7760 + }, + { + "epoch": 17.42152466367713, + "grad_norm": 0.44817596673965454, + "learning_rate": 9.828271310201601e-05, + "loss": -0.1507, + "step": 7770 + }, + { + "epoch": 17.443946188340806, + "grad_norm": 0.4599224030971527, + "learning_rate": 9.827554540630268e-05, + "loss": -0.1478, + "step": 7780 + }, + { + "epoch": 17.466367713004484, + "grad_norm": 0.3315490484237671, + "learning_rate": 9.826836304575329e-05, + "loss": -0.1401, + "step": 7790 + }, + { + "epoch": 17.48878923766816, + "grad_norm": 0.6005570292472839, + "learning_rate": 9.826116602254966e-05, + "loss": -0.1535, + "step": 7800 + }, + { + "epoch": 17.51121076233184, + "grad_norm": 0.37412339448928833, + "learning_rate": 9.825395433887805e-05, + "loss": -0.144, + "step": 7810 + }, + { + "epoch": 17.533632286995516, + "grad_norm": 0.7111011147499084, + "learning_rate": 9.824672799692917e-05, + "loss": -0.1448, + "step": 7820 + }, + { + "epoch": 17.556053811659194, + "grad_norm": 0.35042911767959595, + "learning_rate": 9.823948699889823e-05, + "loss": -0.1439, + "step": 7830 + }, + { + "epoch": 17.57847533632287, + "grad_norm": 0.3671702742576599, + "learning_rate": 9.823223134698483e-05, + "loss": -0.1531, + "step": 7840 + }, + { + "epoch": 17.60089686098655, + "grad_norm": 0.35516810417175293, + "learning_rate": 9.822496104339303e-05, + "loss": -0.1471, + "step": 7850 + }, + { + "epoch": 17.623318385650226, + "grad_norm": 0.37065747380256653, + "learning_rate": 9.821767609033138e-05, + "loss": -0.1478, + "step": 7860 + }, + { + "epoch": 17.6457399103139, + "grad_norm": 0.3782210648059845, + "learning_rate": 9.821037649001284e-05, + "loss": -0.1452, + "step": 7870 + }, + { + "epoch": 17.668161434977577, + "grad_norm": 0.5246129035949707, + "learning_rate": 9.820306224465486e-05, + "loss": -0.1432, + "step": 7880 + }, + { + "epoch": 17.690582959641254, + "grad_norm": 0.2789180278778076, + "learning_rate": 9.819573335647928e-05, + "loss": -0.1469, + "step": 7890 + }, + { + "epoch": 17.713004484304932, + "grad_norm": 0.5000686645507812, + "learning_rate": 9.818838982771246e-05, + "loss": -0.1348, + "step": 7900 + }, + { + "epoch": 17.73542600896861, + "grad_norm": 0.4750319719314575, + "learning_rate": 9.818103166058514e-05, + "loss": -0.1467, + "step": 7910 + }, + { + "epoch": 17.757847533632287, + "grad_norm": 0.4161461889743805, + "learning_rate": 9.817365885733254e-05, + "loss": -0.1491, + "step": 7920 + }, + { + "epoch": 17.780269058295964, + "grad_norm": 0.32106813788414, + "learning_rate": 9.816627142019434e-05, + "loss": -0.1486, + "step": 7930 + }, + { + "epoch": 17.80269058295964, + "grad_norm": 0.38808733224868774, + "learning_rate": 9.815886935141463e-05, + "loss": -0.1408, + "step": 7940 + }, + { + "epoch": 17.82511210762332, + "grad_norm": 0.4597635269165039, + "learning_rate": 9.8151452653242e-05, + "loss": -0.1507, + "step": 7950 + }, + { + "epoch": 17.847533632286996, + "grad_norm": 0.4778316617012024, + "learning_rate": 9.814402132792939e-05, + "loss": -0.1442, + "step": 7960 + }, + { + "epoch": 17.869955156950674, + "grad_norm": 0.3494867980480194, + "learning_rate": 9.813657537773428e-05, + "loss": -0.1474, + "step": 7970 + }, + { + "epoch": 17.89237668161435, + "grad_norm": 0.3963158428668976, + "learning_rate": 9.812911480491854e-05, + "loss": -0.1522, + "step": 7980 + }, + { + "epoch": 17.91479820627803, + "grad_norm": 0.43599948287010193, + "learning_rate": 9.81216396117485e-05, + "loss": -0.1487, + "step": 7990 + }, + { + "epoch": 17.937219730941703, + "grad_norm": 0.40678876638412476, + "learning_rate": 9.811414980049491e-05, + "loss": -0.1458, + "step": 8000 + }, + { + "epoch": 17.95964125560538, + "grad_norm": 0.3506334722042084, + "learning_rate": 9.810664537343301e-05, + "loss": -0.1492, + "step": 8010 + }, + { + "epoch": 17.982062780269057, + "grad_norm": 0.36121049523353577, + "learning_rate": 9.809912633284243e-05, + "loss": -0.1493, + "step": 8020 + }, + { + "epoch": 18.004484304932735, + "grad_norm": 0.4958936274051666, + "learning_rate": 9.809159268100725e-05, + "loss": -0.1478, + "step": 8030 + }, + { + "epoch": 18.026905829596412, + "grad_norm": 0.3769684135913849, + "learning_rate": 9.808404442021599e-05, + "loss": -0.1507, + "step": 8040 + }, + { + "epoch": 18.04932735426009, + "grad_norm": 0.3307076394557953, + "learning_rate": 9.807648155276163e-05, + "loss": -0.1498, + "step": 8050 + }, + { + "epoch": 18.071748878923767, + "grad_norm": 0.39467060565948486, + "learning_rate": 9.806890408094156e-05, + "loss": -0.1435, + "step": 8060 + }, + { + "epoch": 18.094170403587444, + "grad_norm": 0.3977019190788269, + "learning_rate": 9.806131200705761e-05, + "loss": -0.1477, + "step": 8070 + }, + { + "epoch": 18.116591928251122, + "grad_norm": 0.4118204116821289, + "learning_rate": 9.805370533341605e-05, + "loss": -0.144, + "step": 8080 + }, + { + "epoch": 18.1390134529148, + "grad_norm": 0.3472743630409241, + "learning_rate": 9.804608406232762e-05, + "loss": -0.1476, + "step": 8090 + }, + { + "epoch": 18.161434977578477, + "grad_norm": 0.38154056668281555, + "learning_rate": 9.803844819610741e-05, + "loss": -0.1496, + "step": 8100 + }, + { + "epoch": 18.183856502242154, + "grad_norm": 0.3991706967353821, + "learning_rate": 9.803079773707504e-05, + "loss": -0.1488, + "step": 8110 + }, + { + "epoch": 18.206278026905828, + "grad_norm": 0.4238066077232361, + "learning_rate": 9.802313268755447e-05, + "loss": -0.1361, + "step": 8120 + }, + { + "epoch": 18.228699551569505, + "grad_norm": 0.4431650638580322, + "learning_rate": 9.801545304987419e-05, + "loss": -0.1459, + "step": 8130 + }, + { + "epoch": 18.251121076233183, + "grad_norm": 0.3234190344810486, + "learning_rate": 9.800775882636704e-05, + "loss": -0.1435, + "step": 8140 + }, + { + "epoch": 18.27354260089686, + "grad_norm": 0.5565519332885742, + "learning_rate": 9.800005001937034e-05, + "loss": -0.1499, + "step": 8150 + }, + { + "epoch": 18.295964125560538, + "grad_norm": 0.38941383361816406, + "learning_rate": 9.79923266312258e-05, + "loss": -0.1412, + "step": 8160 + }, + { + "epoch": 18.318385650224215, + "grad_norm": 0.3040974736213684, + "learning_rate": 9.79845886642796e-05, + "loss": -0.1416, + "step": 8170 + }, + { + "epoch": 18.340807174887892, + "grad_norm": 0.3186461925506592, + "learning_rate": 9.797683612088233e-05, + "loss": -0.1503, + "step": 8180 + }, + { + "epoch": 18.36322869955157, + "grad_norm": 0.24295033514499664, + "learning_rate": 9.796906900338898e-05, + "loss": -0.145, + "step": 8190 + }, + { + "epoch": 18.385650224215247, + "grad_norm": 0.45465269684791565, + "learning_rate": 9.796128731415903e-05, + "loss": -0.1396, + "step": 8200 + }, + { + "epoch": 18.408071748878925, + "grad_norm": 0.42499369382858276, + "learning_rate": 9.795349105555634e-05, + "loss": -0.148, + "step": 8210 + }, + { + "epoch": 18.430493273542602, + "grad_norm": 0.3152267336845398, + "learning_rate": 9.794568022994922e-05, + "loss": -0.1429, + "step": 8220 + }, + { + "epoch": 18.45291479820628, + "grad_norm": 0.4164282977581024, + "learning_rate": 9.793785483971034e-05, + "loss": -0.1497, + "step": 8230 + }, + { + "epoch": 18.475336322869953, + "grad_norm": 0.41417235136032104, + "learning_rate": 9.793001488721691e-05, + "loss": -0.1475, + "step": 8240 + }, + { + "epoch": 18.49775784753363, + "grad_norm": 0.38555487990379333, + "learning_rate": 9.792216037485047e-05, + "loss": -0.1498, + "step": 8250 + }, + { + "epoch": 18.52017937219731, + "grad_norm": 0.36903348565101624, + "learning_rate": 9.791429130499704e-05, + "loss": -0.149, + "step": 8260 + }, + { + "epoch": 18.542600896860986, + "grad_norm": 0.35281145572662354, + "learning_rate": 9.790640768004698e-05, + "loss": -0.151, + "step": 8270 + }, + { + "epoch": 18.565022421524663, + "grad_norm": 0.43504226207733154, + "learning_rate": 9.789850950239518e-05, + "loss": -0.1459, + "step": 8280 + }, + { + "epoch": 18.58744394618834, + "grad_norm": 0.36855021119117737, + "learning_rate": 9.789059677444089e-05, + "loss": -0.1449, + "step": 8290 + }, + { + "epoch": 18.609865470852018, + "grad_norm": 0.27555161714553833, + "learning_rate": 9.788266949858776e-05, + "loss": -0.155, + "step": 8300 + }, + { + "epoch": 18.632286995515695, + "grad_norm": 0.2508240044116974, + "learning_rate": 9.787472767724392e-05, + "loss": -0.1589, + "step": 8310 + }, + { + "epoch": 18.654708520179373, + "grad_norm": 0.4933019280433655, + "learning_rate": 9.786677131282185e-05, + "loss": -0.1528, + "step": 8320 + }, + { + "epoch": 18.67713004484305, + "grad_norm": 0.3513035178184509, + "learning_rate": 9.785880040773853e-05, + "loss": -0.1479, + "step": 8330 + }, + { + "epoch": 18.699551569506728, + "grad_norm": 0.46134039759635925, + "learning_rate": 9.785081496441527e-05, + "loss": -0.1408, + "step": 8340 + }, + { + "epoch": 18.721973094170405, + "grad_norm": 0.5622433423995972, + "learning_rate": 9.784281498527785e-05, + "loss": -0.1451, + "step": 8350 + }, + { + "epoch": 18.744394618834082, + "grad_norm": 0.37520289421081543, + "learning_rate": 9.783480047275646e-05, + "loss": -0.1381, + "step": 8360 + }, + { + "epoch": 18.766816143497756, + "grad_norm": 0.420406311750412, + "learning_rate": 9.78267714292857e-05, + "loss": -0.1467, + "step": 8370 + }, + { + "epoch": 18.789237668161434, + "grad_norm": 0.36217883229255676, + "learning_rate": 9.781872785730454e-05, + "loss": -0.1452, + "step": 8380 + }, + { + "epoch": 18.81165919282511, + "grad_norm": 0.5043547749519348, + "learning_rate": 9.781066975925646e-05, + "loss": -0.1499, + "step": 8390 + }, + { + "epoch": 18.83408071748879, + "grad_norm": 0.38572177290916443, + "learning_rate": 9.780259713758928e-05, + "loss": -0.15, + "step": 8400 + }, + { + "epoch": 18.856502242152466, + "grad_norm": 0.37154078483581543, + "learning_rate": 9.779450999475524e-05, + "loss": -0.1526, + "step": 8410 + }, + { + "epoch": 18.878923766816143, + "grad_norm": 0.40827256441116333, + "learning_rate": 9.7786408333211e-05, + "loss": -0.1549, + "step": 8420 + }, + { + "epoch": 18.90134529147982, + "grad_norm": 0.33836227655410767, + "learning_rate": 9.777829215541764e-05, + "loss": -0.147, + "step": 8430 + }, + { + "epoch": 18.923766816143498, + "grad_norm": 0.3863454759120941, + "learning_rate": 9.777016146384064e-05, + "loss": -0.1552, + "step": 8440 + }, + { + "epoch": 18.946188340807176, + "grad_norm": 0.36673277616500854, + "learning_rate": 9.776201626094988e-05, + "loss": -0.1516, + "step": 8450 + }, + { + "epoch": 18.968609865470853, + "grad_norm": 0.45818132162094116, + "learning_rate": 9.775385654921965e-05, + "loss": -0.1517, + "step": 8460 + }, + { + "epoch": 18.99103139013453, + "grad_norm": 0.20308354496955872, + "learning_rate": 9.774568233112868e-05, + "loss": -0.1514, + "step": 8470 + }, + { + "epoch": 19.013452914798208, + "grad_norm": 0.5334926247596741, + "learning_rate": 9.773749360916007e-05, + "loss": -0.1459, + "step": 8480 + }, + { + "epoch": 19.03587443946188, + "grad_norm": 0.46697017550468445, + "learning_rate": 9.772929038580134e-05, + "loss": -0.1465, + "step": 8490 + }, + { + "epoch": 19.05829596412556, + "grad_norm": 0.48255059123039246, + "learning_rate": 9.772107266354439e-05, + "loss": -0.1458, + "step": 8500 + }, + { + "epoch": 19.080717488789237, + "grad_norm": 0.3152186870574951, + "learning_rate": 9.77128404448856e-05, + "loss": -0.1388, + "step": 8510 + }, + { + "epoch": 19.103139013452914, + "grad_norm": 0.27127742767333984, + "learning_rate": 9.770459373232565e-05, + "loss": -0.1526, + "step": 8520 + }, + { + "epoch": 19.12556053811659, + "grad_norm": 0.3602830469608307, + "learning_rate": 9.769633252836969e-05, + "loss": -0.1455, + "step": 8530 + }, + { + "epoch": 19.14798206278027, + "grad_norm": 0.4560525417327881, + "learning_rate": 9.768805683552724e-05, + "loss": -0.154, + "step": 8540 + }, + { + "epoch": 19.170403587443946, + "grad_norm": 0.40758687257766724, + "learning_rate": 9.767976665631228e-05, + "loss": -0.1509, + "step": 8550 + }, + { + "epoch": 19.192825112107624, + "grad_norm": 0.33155369758605957, + "learning_rate": 9.767146199324311e-05, + "loss": -0.1525, + "step": 8560 + }, + { + "epoch": 19.2152466367713, + "grad_norm": 0.3178279995918274, + "learning_rate": 9.766314284884249e-05, + "loss": -0.1561, + "step": 8570 + }, + { + "epoch": 19.23766816143498, + "grad_norm": 0.307321697473526, + "learning_rate": 9.765480922563752e-05, + "loss": -0.1473, + "step": 8580 + }, + { + "epoch": 19.260089686098656, + "grad_norm": 0.3881741762161255, + "learning_rate": 9.764646112615978e-05, + "loss": -0.1469, + "step": 8590 + }, + { + "epoch": 19.282511210762333, + "grad_norm": 0.4811601936817169, + "learning_rate": 9.763809855294517e-05, + "loss": -0.1503, + "step": 8600 + }, + { + "epoch": 19.304932735426007, + "grad_norm": 0.32523617148399353, + "learning_rate": 9.762972150853404e-05, + "loss": -0.155, + "step": 8610 + }, + { + "epoch": 19.327354260089685, + "grad_norm": 0.43628576397895813, + "learning_rate": 9.762132999547111e-05, + "loss": -0.1535, + "step": 8620 + }, + { + "epoch": 19.349775784753362, + "grad_norm": 0.4906386137008667, + "learning_rate": 9.761292401630549e-05, + "loss": -0.1423, + "step": 8630 + }, + { + "epoch": 19.37219730941704, + "grad_norm": 0.2537840008735657, + "learning_rate": 9.76045035735907e-05, + "loss": -0.1562, + "step": 8640 + }, + { + "epoch": 19.394618834080717, + "grad_norm": 0.2508658170700073, + "learning_rate": 9.759606866988464e-05, + "loss": -0.15, + "step": 8650 + }, + { + "epoch": 19.417040358744394, + "grad_norm": 0.2687319219112396, + "learning_rate": 9.758761930774963e-05, + "loss": -0.155, + "step": 8660 + }, + { + "epoch": 19.43946188340807, + "grad_norm": 0.2957753837108612, + "learning_rate": 9.757915548975235e-05, + "loss": -0.154, + "step": 8670 + }, + { + "epoch": 19.46188340807175, + "grad_norm": 0.3133358657360077, + "learning_rate": 9.757067721846389e-05, + "loss": -0.1522, + "step": 8680 + }, + { + "epoch": 19.484304932735427, + "grad_norm": 0.3139364719390869, + "learning_rate": 9.756218449645971e-05, + "loss": -0.1541, + "step": 8690 + }, + { + "epoch": 19.506726457399104, + "grad_norm": 0.37894997000694275, + "learning_rate": 9.75536773263197e-05, + "loss": -0.1512, + "step": 8700 + }, + { + "epoch": 19.52914798206278, + "grad_norm": 0.2883453369140625, + "learning_rate": 9.75451557106281e-05, + "loss": -0.1575, + "step": 8710 + }, + { + "epoch": 19.55156950672646, + "grad_norm": 0.45228996872901917, + "learning_rate": 9.753661965197354e-05, + "loss": -0.1549, + "step": 8720 + }, + { + "epoch": 19.573991031390136, + "grad_norm": 0.36380866169929504, + "learning_rate": 9.752806915294908e-05, + "loss": -0.1513, + "step": 8730 + }, + { + "epoch": 19.59641255605381, + "grad_norm": 0.38044577836990356, + "learning_rate": 9.75195042161521e-05, + "loss": -0.1538, + "step": 8740 + }, + { + "epoch": 19.618834080717487, + "grad_norm": 0.34413594007492065, + "learning_rate": 9.751092484418442e-05, + "loss": -0.1563, + "step": 8750 + }, + { + "epoch": 19.641255605381165, + "grad_norm": 0.3051753044128418, + "learning_rate": 9.750233103965224e-05, + "loss": -0.1519, + "step": 8760 + }, + { + "epoch": 19.663677130044842, + "grad_norm": 0.3534361720085144, + "learning_rate": 9.749372280516611e-05, + "loss": -0.1512, + "step": 8770 + }, + { + "epoch": 19.68609865470852, + "grad_norm": 0.39732399582862854, + "learning_rate": 9.748510014334097e-05, + "loss": -0.1473, + "step": 8780 + }, + { + "epoch": 19.708520179372197, + "grad_norm": 0.3173627257347107, + "learning_rate": 9.747646305679621e-05, + "loss": -0.1437, + "step": 8790 + }, + { + "epoch": 19.730941704035875, + "grad_norm": 0.4300193190574646, + "learning_rate": 9.74678115481555e-05, + "loss": -0.1405, + "step": 8800 + }, + { + "epoch": 19.753363228699552, + "grad_norm": 0.37849265336990356, + "learning_rate": 9.745914562004696e-05, + "loss": -0.1524, + "step": 8810 + }, + { + "epoch": 19.77578475336323, + "grad_norm": 0.27750951051712036, + "learning_rate": 9.745046527510307e-05, + "loss": -0.1498, + "step": 8820 + }, + { + "epoch": 19.798206278026907, + "grad_norm": 0.41608482599258423, + "learning_rate": 9.744177051596068e-05, + "loss": -0.1491, + "step": 8830 + }, + { + "epoch": 19.820627802690584, + "grad_norm": 0.4230055809020996, + "learning_rate": 9.743306134526105e-05, + "loss": -0.1488, + "step": 8840 + }, + { + "epoch": 19.84304932735426, + "grad_norm": 0.42788711190223694, + "learning_rate": 9.742433776564977e-05, + "loss": -0.1497, + "step": 8850 + }, + { + "epoch": 19.865470852017935, + "grad_norm": 0.3009362518787384, + "learning_rate": 9.741559977977683e-05, + "loss": -0.1465, + "step": 8860 + }, + { + "epoch": 19.887892376681613, + "grad_norm": 0.404155969619751, + "learning_rate": 9.740684739029661e-05, + "loss": -0.1496, + "step": 8870 + }, + { + "epoch": 19.91031390134529, + "grad_norm": 0.3590388298034668, + "learning_rate": 9.739808059986789e-05, + "loss": -0.1428, + "step": 8880 + }, + { + "epoch": 19.932735426008968, + "grad_norm": 0.31942737102508545, + "learning_rate": 9.738929941115373e-05, + "loss": -0.1511, + "step": 8890 + }, + { + "epoch": 19.955156950672645, + "grad_norm": 0.2565781772136688, + "learning_rate": 9.738050382682167e-05, + "loss": -0.1596, + "step": 8900 + }, + { + "epoch": 19.977578475336323, + "grad_norm": 0.31280317902565, + "learning_rate": 9.737169384954355e-05, + "loss": -0.151, + "step": 8910 + }, + { + "epoch": 20.0, + "grad_norm": 0.33799511194229126, + "learning_rate": 9.736286948199562e-05, + "loss": -0.1512, + "step": 8920 + }, + { + "epoch": 20.022421524663677, + "grad_norm": 0.3139895498752594, + "learning_rate": 9.735403072685848e-05, + "loss": -0.1534, + "step": 8930 + }, + { + "epoch": 20.044843049327355, + "grad_norm": 0.4513043165206909, + "learning_rate": 9.734517758681712e-05, + "loss": -0.1537, + "step": 8940 + }, + { + "epoch": 20.067264573991032, + "grad_norm": 0.2668008506298065, + "learning_rate": 9.733631006456088e-05, + "loss": -0.1547, + "step": 8950 + }, + { + "epoch": 20.08968609865471, + "grad_norm": 0.39221641421318054, + "learning_rate": 9.732742816278348e-05, + "loss": -0.1499, + "step": 8960 + }, + { + "epoch": 20.112107623318387, + "grad_norm": 0.5126624703407288, + "learning_rate": 9.731853188418302e-05, + "loss": -0.151, + "step": 8970 + }, + { + "epoch": 20.134529147982065, + "grad_norm": 0.33716070652008057, + "learning_rate": 9.730962123146194e-05, + "loss": -0.1475, + "step": 8980 + }, + { + "epoch": 20.15695067264574, + "grad_norm": 0.35100382566452026, + "learning_rate": 9.730069620732709e-05, + "loss": -0.1501, + "step": 8990 + }, + { + "epoch": 20.179372197309416, + "grad_norm": 0.49209025502204895, + "learning_rate": 9.72917568144896e-05, + "loss": -0.1532, + "step": 9000 + }, + { + "epoch": 20.201793721973093, + "grad_norm": 0.29349321126937866, + "learning_rate": 9.728280305566509e-05, + "loss": -0.1476, + "step": 9010 + }, + { + "epoch": 20.22421524663677, + "grad_norm": 0.32228338718414307, + "learning_rate": 9.727383493357343e-05, + "loss": -0.149, + "step": 9020 + }, + { + "epoch": 20.246636771300448, + "grad_norm": 0.3500288426876068, + "learning_rate": 9.726485245093891e-05, + "loss": -0.1492, + "step": 9030 + }, + { + "epoch": 20.269058295964125, + "grad_norm": 0.2908211350440979, + "learning_rate": 9.725585561049018e-05, + "loss": -0.1529, + "step": 9040 + }, + { + "epoch": 20.291479820627803, + "grad_norm": 0.3950963318347931, + "learning_rate": 9.724684441496022e-05, + "loss": -0.1463, + "step": 9050 + }, + { + "epoch": 20.31390134529148, + "grad_norm": 0.4016164243221283, + "learning_rate": 9.72378188670864e-05, + "loss": -0.153, + "step": 9060 + }, + { + "epoch": 20.336322869955158, + "grad_norm": 0.2954925298690796, + "learning_rate": 9.722877896961047e-05, + "loss": -0.1559, + "step": 9070 + }, + { + "epoch": 20.358744394618835, + "grad_norm": 0.40775996446609497, + "learning_rate": 9.721972472527848e-05, + "loss": -0.1524, + "step": 9080 + }, + { + "epoch": 20.381165919282513, + "grad_norm": 0.4318101704120636, + "learning_rate": 9.721065613684089e-05, + "loss": -0.1557, + "step": 9090 + }, + { + "epoch": 20.40358744394619, + "grad_norm": 0.3081878423690796, + "learning_rate": 9.72015732070525e-05, + "loss": -0.1533, + "step": 9100 + }, + { + "epoch": 20.426008968609864, + "grad_norm": 0.32915064692497253, + "learning_rate": 9.719247593867244e-05, + "loss": -0.1539, + "step": 9110 + }, + { + "epoch": 20.44843049327354, + "grad_norm": 0.48887255787849426, + "learning_rate": 9.718336433446423e-05, + "loss": -0.1535, + "step": 9120 + }, + { + "epoch": 20.47085201793722, + "grad_norm": 0.3587469160556793, + "learning_rate": 9.717423839719574e-05, + "loss": -0.1533, + "step": 9130 + }, + { + "epoch": 20.493273542600896, + "grad_norm": 0.3774474263191223, + "learning_rate": 9.71650981296392e-05, + "loss": -0.1524, + "step": 9140 + }, + { + "epoch": 20.515695067264573, + "grad_norm": 0.42100727558135986, + "learning_rate": 9.715594353457118e-05, + "loss": -0.1522, + "step": 9150 + }, + { + "epoch": 20.53811659192825, + "grad_norm": 0.43216344714164734, + "learning_rate": 9.714677461477257e-05, + "loss": -0.1525, + "step": 9160 + }, + { + "epoch": 20.56053811659193, + "grad_norm": 0.4878748655319214, + "learning_rate": 9.713759137302869e-05, + "loss": -0.1512, + "step": 9170 + }, + { + "epoch": 20.582959641255606, + "grad_norm": 0.47214987874031067, + "learning_rate": 9.712839381212914e-05, + "loss": -0.1509, + "step": 9180 + }, + { + "epoch": 20.605381165919283, + "grad_norm": 0.4317784905433655, + "learning_rate": 9.71191819348679e-05, + "loss": -0.153, + "step": 9190 + }, + { + "epoch": 20.62780269058296, + "grad_norm": 0.35719963908195496, + "learning_rate": 9.710995574404331e-05, + "loss": -0.1508, + "step": 9200 + }, + { + "epoch": 20.650224215246638, + "grad_norm": 0.38945600390434265, + "learning_rate": 9.710071524245802e-05, + "loss": -0.1473, + "step": 9210 + }, + { + "epoch": 20.672645739910315, + "grad_norm": 0.5090315341949463, + "learning_rate": 9.709146043291906e-05, + "loss": -0.1558, + "step": 9220 + }, + { + "epoch": 20.695067264573993, + "grad_norm": 0.4249480962753296, + "learning_rate": 9.70821913182378e-05, + "loss": -0.1501, + "step": 9230 + }, + { + "epoch": 20.717488789237667, + "grad_norm": 0.46184009313583374, + "learning_rate": 9.707290790122995e-05, + "loss": -0.1502, + "step": 9240 + }, + { + "epoch": 20.739910313901344, + "grad_norm": 0.2899419665336609, + "learning_rate": 9.706361018471557e-05, + "loss": -0.1542, + "step": 9250 + }, + { + "epoch": 20.76233183856502, + "grad_norm": 0.3382757008075714, + "learning_rate": 9.705429817151906e-05, + "loss": -0.1539, + "step": 9260 + }, + { + "epoch": 20.7847533632287, + "grad_norm": 0.5067382454872131, + "learning_rate": 9.704497186446917e-05, + "loss": -0.1549, + "step": 9270 + }, + { + "epoch": 20.807174887892376, + "grad_norm": 0.4223986566066742, + "learning_rate": 9.703563126639896e-05, + "loss": -0.1469, + "step": 9280 + }, + { + "epoch": 20.829596412556054, + "grad_norm": 0.6038903594017029, + "learning_rate": 9.70262763801459e-05, + "loss": -0.1517, + "step": 9290 + }, + { + "epoch": 20.85201793721973, + "grad_norm": 0.41037464141845703, + "learning_rate": 9.701690720855171e-05, + "loss": -0.1418, + "step": 9300 + }, + { + "epoch": 20.87443946188341, + "grad_norm": 0.4043366611003876, + "learning_rate": 9.700752375446253e-05, + "loss": -0.1469, + "step": 9310 + }, + { + "epoch": 20.896860986547086, + "grad_norm": 0.44924139976501465, + "learning_rate": 9.69981260207288e-05, + "loss": -0.134, + "step": 9320 + }, + { + "epoch": 20.919282511210763, + "grad_norm": 0.43172746896743774, + "learning_rate": 9.698871401020529e-05, + "loss": -0.1512, + "step": 9330 + }, + { + "epoch": 20.94170403587444, + "grad_norm": 0.36728566884994507, + "learning_rate": 9.697928772575112e-05, + "loss": -0.1488, + "step": 9340 + }, + { + "epoch": 20.96412556053812, + "grad_norm": 0.34617680311203003, + "learning_rate": 9.696984717022976e-05, + "loss": -0.1547, + "step": 9350 + }, + { + "epoch": 20.986547085201792, + "grad_norm": 0.24771800637245178, + "learning_rate": 9.6960392346509e-05, + "loss": -0.1561, + "step": 9360 + }, + { + "epoch": 21.00896860986547, + "grad_norm": 0.26959189772605896, + "learning_rate": 9.695092325746097e-05, + "loss": -0.143, + "step": 9370 + }, + { + "epoch": 21.031390134529147, + "grad_norm": 0.48624128103256226, + "learning_rate": 9.694143990596211e-05, + "loss": -0.1566, + "step": 9380 + }, + { + "epoch": 21.053811659192824, + "grad_norm": 0.33356571197509766, + "learning_rate": 9.693194229489325e-05, + "loss": -0.1407, + "step": 9390 + }, + { + "epoch": 21.076233183856502, + "grad_norm": 0.43587177991867065, + "learning_rate": 9.692243042713944e-05, + "loss": -0.1543, + "step": 9400 + }, + { + "epoch": 21.09865470852018, + "grad_norm": 0.3069794476032257, + "learning_rate": 9.691290430559022e-05, + "loss": -0.1512, + "step": 9410 + }, + { + "epoch": 21.121076233183857, + "grad_norm": 0.36221641302108765, + "learning_rate": 9.690336393313932e-05, + "loss": -0.1532, + "step": 9420 + }, + { + "epoch": 21.143497757847534, + "grad_norm": 0.4281330108642578, + "learning_rate": 9.689380931268487e-05, + "loss": -0.1487, + "step": 9430 + }, + { + "epoch": 21.16591928251121, + "grad_norm": 0.2188316285610199, + "learning_rate": 9.688424044712932e-05, + "loss": -0.1531, + "step": 9440 + }, + { + "epoch": 21.18834080717489, + "grad_norm": 0.5221356749534607, + "learning_rate": 9.687465733937942e-05, + "loss": -0.1408, + "step": 9450 + }, + { + "epoch": 21.210762331838566, + "grad_norm": 0.49432098865509033, + "learning_rate": 9.686505999234627e-05, + "loss": -0.1516, + "step": 9460 + }, + { + "epoch": 21.233183856502244, + "grad_norm": 0.3468703031539917, + "learning_rate": 9.685544840894529e-05, + "loss": -0.1523, + "step": 9470 + }, + { + "epoch": 21.255605381165918, + "grad_norm": 0.39836764335632324, + "learning_rate": 9.684582259209624e-05, + "loss": -0.1539, + "step": 9480 + }, + { + "epoch": 21.278026905829595, + "grad_norm": 0.32755807042121887, + "learning_rate": 9.683618254472317e-05, + "loss": -0.1505, + "step": 9490 + }, + { + "epoch": 21.300448430493272, + "grad_norm": 0.3890736401081085, + "learning_rate": 9.682652826975449e-05, + "loss": -0.155, + "step": 9500 + }, + { + "epoch": 21.32286995515695, + "grad_norm": 0.3597727417945862, + "learning_rate": 9.681685977012291e-05, + "loss": -0.1556, + "step": 9510 + }, + { + "epoch": 21.345291479820627, + "grad_norm": 0.2754303216934204, + "learning_rate": 9.680717704876546e-05, + "loss": -0.1552, + "step": 9520 + }, + { + "epoch": 21.367713004484305, + "grad_norm": 0.49162185192108154, + "learning_rate": 9.679748010862349e-05, + "loss": -0.1522, + "step": 9530 + }, + { + "epoch": 21.390134529147982, + "grad_norm": 0.3282163143157959, + "learning_rate": 9.678776895264267e-05, + "loss": -0.1531, + "step": 9540 + }, + { + "epoch": 21.41255605381166, + "grad_norm": 0.20829305052757263, + "learning_rate": 9.6778043583773e-05, + "loss": -0.1462, + "step": 9550 + }, + { + "epoch": 21.434977578475337, + "grad_norm": 0.36380285024642944, + "learning_rate": 9.67683040049688e-05, + "loss": -0.156, + "step": 9560 + }, + { + "epoch": 21.457399103139014, + "grad_norm": 0.34435367584228516, + "learning_rate": 9.675855021918869e-05, + "loss": -0.1499, + "step": 9570 + }, + { + "epoch": 21.47982062780269, + "grad_norm": 0.3567222058773041, + "learning_rate": 9.674878222939561e-05, + "loss": -0.1579, + "step": 9580 + }, + { + "epoch": 21.50224215246637, + "grad_norm": 0.27858251333236694, + "learning_rate": 9.673900003855681e-05, + "loss": -0.1547, + "step": 9590 + }, + { + "epoch": 21.524663677130043, + "grad_norm": 0.35440191626548767, + "learning_rate": 9.672920364964389e-05, + "loss": -0.147, + "step": 9600 + }, + { + "epoch": 21.54708520179372, + "grad_norm": 0.4171942174434662, + "learning_rate": 9.671939306563269e-05, + "loss": -0.159, + "step": 9610 + }, + { + "epoch": 21.569506726457398, + "grad_norm": 0.40914157032966614, + "learning_rate": 9.670956828950345e-05, + "loss": -0.1519, + "step": 9620 + }, + { + "epoch": 21.591928251121075, + "grad_norm": 0.24180780351161957, + "learning_rate": 9.669972932424065e-05, + "loss": -0.1549, + "step": 9630 + }, + { + "epoch": 21.614349775784753, + "grad_norm": 0.3062634766101837, + "learning_rate": 9.668987617283312e-05, + "loss": -0.161, + "step": 9640 + }, + { + "epoch": 21.63677130044843, + "grad_norm": 0.353843092918396, + "learning_rate": 9.668000883827397e-05, + "loss": -0.1492, + "step": 9650 + }, + { + "epoch": 21.659192825112108, + "grad_norm": 0.34160053730010986, + "learning_rate": 9.667012732356067e-05, + "loss": -0.1496, + "step": 9660 + }, + { + "epoch": 21.681614349775785, + "grad_norm": 0.36376428604125977, + "learning_rate": 9.666023163169493e-05, + "loss": -0.1325, + "step": 9670 + }, + { + "epoch": 21.704035874439462, + "grad_norm": 0.23108938336372375, + "learning_rate": 9.665032176568281e-05, + "loss": -0.1527, + "step": 9680 + }, + { + "epoch": 21.72645739910314, + "grad_norm": 0.45940613746643066, + "learning_rate": 9.664039772853469e-05, + "loss": -0.1497, + "step": 9690 + }, + { + "epoch": 21.748878923766817, + "grad_norm": 0.2690742015838623, + "learning_rate": 9.663045952326518e-05, + "loss": -0.1542, + "step": 9700 + }, + { + "epoch": 21.771300448430495, + "grad_norm": 0.3285200893878937, + "learning_rate": 9.662050715289328e-05, + "loss": -0.1588, + "step": 9710 + }, + { + "epoch": 21.793721973094172, + "grad_norm": 0.31207072734832764, + "learning_rate": 9.661054062044226e-05, + "loss": -0.1497, + "step": 9720 + }, + { + "epoch": 21.816143497757846, + "grad_norm": 0.28012242913246155, + "learning_rate": 9.660055992893968e-05, + "loss": -0.1544, + "step": 9730 + }, + { + "epoch": 21.838565022421523, + "grad_norm": 0.34153294563293457, + "learning_rate": 9.659056508141739e-05, + "loss": -0.1523, + "step": 9740 + }, + { + "epoch": 21.8609865470852, + "grad_norm": 0.30360570549964905, + "learning_rate": 9.658055608091161e-05, + "loss": -0.1484, + "step": 9750 + }, + { + "epoch": 21.883408071748878, + "grad_norm": 0.3625519275665283, + "learning_rate": 9.657053293046276e-05, + "loss": -0.1539, + "step": 9760 + }, + { + "epoch": 21.905829596412556, + "grad_norm": 0.36781805753707886, + "learning_rate": 9.656049563311564e-05, + "loss": -0.1578, + "step": 9770 + }, + { + "epoch": 21.928251121076233, + "grad_norm": 0.4035934805870056, + "learning_rate": 9.655044419191929e-05, + "loss": -0.1468, + "step": 9780 + }, + { + "epoch": 21.95067264573991, + "grad_norm": 0.35047218203544617, + "learning_rate": 9.654037860992711e-05, + "loss": -0.1528, + "step": 9790 + }, + { + "epoch": 21.973094170403588, + "grad_norm": 0.39028260111808777, + "learning_rate": 9.653029889019672e-05, + "loss": -0.1487, + "step": 9800 + }, + { + "epoch": 21.995515695067265, + "grad_norm": 0.42121487855911255, + "learning_rate": 9.65202050357901e-05, + "loss": -0.1481, + "step": 9810 + }, + { + "epoch": 22.017937219730943, + "grad_norm": 0.27110958099365234, + "learning_rate": 9.651009704977347e-05, + "loss": -0.1521, + "step": 9820 + }, + { + "epoch": 22.04035874439462, + "grad_norm": 0.24624665081501007, + "learning_rate": 9.649997493521738e-05, + "loss": -0.1536, + "step": 9830 + }, + { + "epoch": 22.062780269058297, + "grad_norm": 0.39658305048942566, + "learning_rate": 9.64898386951967e-05, + "loss": -0.1551, + "step": 9840 + }, + { + "epoch": 22.08520179372197, + "grad_norm": 0.3667217493057251, + "learning_rate": 9.647968833279049e-05, + "loss": -0.1508, + "step": 9850 + }, + { + "epoch": 22.10762331838565, + "grad_norm": 0.5370233654975891, + "learning_rate": 9.646952385108218e-05, + "loss": -0.1442, + "step": 9860 + }, + { + "epoch": 22.130044843049326, + "grad_norm": 0.33894026279449463, + "learning_rate": 9.645934525315951e-05, + "loss": -0.1553, + "step": 9870 + }, + { + "epoch": 22.152466367713004, + "grad_norm": 0.3294115364551544, + "learning_rate": 9.644915254211442e-05, + "loss": -0.1516, + "step": 9880 + }, + { + "epoch": 22.17488789237668, + "grad_norm": 0.3883153200149536, + "learning_rate": 9.643894572104321e-05, + "loss": -0.1547, + "step": 9890 + }, + { + "epoch": 22.19730941704036, + "grad_norm": 0.3979186713695526, + "learning_rate": 9.642872479304644e-05, + "loss": -0.1533, + "step": 9900 + }, + { + "epoch": 22.219730941704036, + "grad_norm": 0.35962560772895813, + "learning_rate": 9.641848976122895e-05, + "loss": -0.1532, + "step": 9910 + }, + { + "epoch": 22.242152466367713, + "grad_norm": 0.3910924196243286, + "learning_rate": 9.64082406286999e-05, + "loss": -0.1585, + "step": 9920 + }, + { + "epoch": 22.26457399103139, + "grad_norm": 0.456172913312912, + "learning_rate": 9.639797739857269e-05, + "loss": -0.1404, + "step": 9930 + }, + { + "epoch": 22.286995515695068, + "grad_norm": 0.33727023005485535, + "learning_rate": 9.638770007396498e-05, + "loss": -0.1503, + "step": 9940 + }, + { + "epoch": 22.309417040358746, + "grad_norm": 0.6126788854598999, + "learning_rate": 9.63774086579988e-05, + "loss": -0.1431, + "step": 9950 + }, + { + "epoch": 22.331838565022423, + "grad_norm": 0.33403530716896057, + "learning_rate": 9.63671031538004e-05, + "loss": -0.152, + "step": 9960 + }, + { + "epoch": 22.3542600896861, + "grad_norm": 0.4271731674671173, + "learning_rate": 9.635678356450031e-05, + "loss": -0.1483, + "step": 9970 + }, + { + "epoch": 22.376681614349774, + "grad_norm": 0.36427968740463257, + "learning_rate": 9.634644989323336e-05, + "loss": -0.1519, + "step": 9980 + }, + { + "epoch": 22.39910313901345, + "grad_norm": 0.3520902693271637, + "learning_rate": 9.633610214313861e-05, + "loss": -0.1507, + "step": 9990 + }, + { + "epoch": 22.42152466367713, + "grad_norm": 0.541022002696991, + "learning_rate": 9.632574031735951e-05, + "loss": -0.1503, + "step": 10000 + }, + { + "epoch": 22.443946188340806, + "grad_norm": 0.39497697353363037, + "learning_rate": 9.631536441904364e-05, + "loss": -0.902, + "step": 10010 + }, + { + "epoch": 22.466367713004484, + "grad_norm": 0.40074920654296875, + "learning_rate": 9.630497445134293e-05, + "loss": -0.8969, + "step": 10020 + }, + { + "epoch": 22.48878923766816, + "grad_norm": 0.3763134777545929, + "learning_rate": 9.62945704174136e-05, + "loss": -0.9027, + "step": 10030 + }, + { + "epoch": 22.51121076233184, + "grad_norm": 0.332136869430542, + "learning_rate": 9.628415232041612e-05, + "loss": -0.8972, + "step": 10040 + }, + { + "epoch": 22.533632286995516, + "grad_norm": 0.35423576831817627, + "learning_rate": 9.627372016351524e-05, + "loss": -0.899, + "step": 10050 + }, + { + "epoch": 22.556053811659194, + "grad_norm": 0.35406294465065, + "learning_rate": 9.626327394987995e-05, + "loss": -0.9015, + "step": 10060 + }, + { + "epoch": 22.57847533632287, + "grad_norm": 0.3356623947620392, + "learning_rate": 9.625281368268355e-05, + "loss": -0.9003, + "step": 10070 + }, + { + "epoch": 22.60089686098655, + "grad_norm": 0.30513498187065125, + "learning_rate": 9.624233936510357e-05, + "loss": -0.8992, + "step": 10080 + }, + { + "epoch": 22.623318385650226, + "grad_norm": 0.535525381565094, + "learning_rate": 9.623185100032187e-05, + "loss": -0.8932, + "step": 10090 + }, + { + "epoch": 22.6457399103139, + "grad_norm": 0.3641398251056671, + "learning_rate": 9.62213485915245e-05, + "loss": -0.9052, + "step": 10100 + }, + { + "epoch": 22.668161434977577, + "grad_norm": 0.3717898726463318, + "learning_rate": 9.621083214190186e-05, + "loss": -0.9053, + "step": 10110 + }, + { + "epoch": 22.690582959641254, + "grad_norm": 0.3344902992248535, + "learning_rate": 9.62003016546485e-05, + "loss": -0.8956, + "step": 10120 + }, + { + "epoch": 22.713004484304932, + "grad_norm": 0.3509817123413086, + "learning_rate": 9.618975713296339e-05, + "loss": -0.9054, + "step": 10130 + }, + { + "epoch": 22.73542600896861, + "grad_norm": 0.3332061171531677, + "learning_rate": 9.61791985800496e-05, + "loss": -0.9077, + "step": 10140 + }, + { + "epoch": 22.757847533632287, + "grad_norm": 0.3414807915687561, + "learning_rate": 9.616862599911458e-05, + "loss": -0.9001, + "step": 10150 + }, + { + "epoch": 22.780269058295964, + "grad_norm": 0.38627079129219055, + "learning_rate": 9.615803939337e-05, + "loss": -0.9029, + "step": 10160 + }, + { + "epoch": 22.80269058295964, + "grad_norm": 0.32742059230804443, + "learning_rate": 9.614743876603178e-05, + "loss": -0.9025, + "step": 10170 + }, + { + "epoch": 22.82511210762332, + "grad_norm": 0.27908211946487427, + "learning_rate": 9.613682412032013e-05, + "loss": -0.9008, + "step": 10180 + }, + { + "epoch": 22.847533632286996, + "grad_norm": 0.3084784746170044, + "learning_rate": 9.612619545945947e-05, + "loss": -0.8936, + "step": 10190 + }, + { + "epoch": 22.869955156950674, + "grad_norm": 0.2877829968929291, + "learning_rate": 9.611555278667852e-05, + "loss": -0.9059, + "step": 10200 + }, + { + "epoch": 22.89237668161435, + "grad_norm": 0.34476345777511597, + "learning_rate": 9.610489610521024e-05, + "loss": -0.9064, + "step": 10210 + }, + { + "epoch": 22.91479820627803, + "grad_norm": 0.306196004152298, + "learning_rate": 9.609422541829187e-05, + "loss": -0.9056, + "step": 10220 + }, + { + "epoch": 22.937219730941703, + "grad_norm": 0.30029621720314026, + "learning_rate": 9.608354072916486e-05, + "loss": -0.8955, + "step": 10230 + }, + { + "epoch": 22.95964125560538, + "grad_norm": 0.3177456557750702, + "learning_rate": 9.607284204107493e-05, + "loss": -0.9015, + "step": 10240 + }, + { + "epoch": 22.982062780269057, + "grad_norm": 0.29705825448036194, + "learning_rate": 9.606212935727208e-05, + "loss": -0.9105, + "step": 10250 + }, + { + "epoch": 23.004484304932735, + "grad_norm": 0.2765927016735077, + "learning_rate": 9.605140268101052e-05, + "loss": -0.9029, + "step": 10260 + }, + { + "epoch": 23.026905829596412, + "grad_norm": 0.5229991674423218, + "learning_rate": 9.604066201554875e-05, + "loss": -0.9106, + "step": 10270 + }, + { + "epoch": 23.04932735426009, + "grad_norm": 0.48376336693763733, + "learning_rate": 9.60299073641495e-05, + "loss": -0.9069, + "step": 10280 + }, + { + "epoch": 23.071748878923767, + "grad_norm": 0.46750903129577637, + "learning_rate": 9.601913873007974e-05, + "loss": -0.9109, + "step": 10290 + }, + { + "epoch": 23.094170403587444, + "grad_norm": 0.4025031328201294, + "learning_rate": 9.60083561166107e-05, + "loss": -0.9039, + "step": 10300 + }, + { + "epoch": 23.116591928251122, + "grad_norm": 0.24459242820739746, + "learning_rate": 9.599755952701783e-05, + "loss": -0.8994, + "step": 10310 + }, + { + "epoch": 23.1390134529148, + "grad_norm": 0.3454064726829529, + "learning_rate": 9.598674896458089e-05, + "loss": -0.9017, + "step": 10320 + }, + { + "epoch": 23.161434977578477, + "grad_norm": 0.37398090958595276, + "learning_rate": 9.597592443258383e-05, + "loss": -0.9125, + "step": 10330 + }, + { + "epoch": 23.183856502242154, + "grad_norm": 0.3410985767841339, + "learning_rate": 9.596508593431483e-05, + "loss": -0.9127, + "step": 10340 + }, + { + "epoch": 23.206278026905828, + "grad_norm": 0.2891652584075928, + "learning_rate": 9.59542334730664e-05, + "loss": -0.9069, + "step": 10350 + }, + { + "epoch": 23.228699551569505, + "grad_norm": 0.44529563188552856, + "learning_rate": 9.594336705213516e-05, + "loss": -0.9075, + "step": 10360 + }, + { + "epoch": 23.251121076233183, + "grad_norm": 0.4068090617656708, + "learning_rate": 9.593248667482208e-05, + "loss": -0.905, + "step": 10370 + }, + { + "epoch": 23.27354260089686, + "grad_norm": 0.4685012698173523, + "learning_rate": 9.592159234443233e-05, + "loss": -0.8982, + "step": 10380 + }, + { + "epoch": 23.295964125560538, + "grad_norm": 0.41862234473228455, + "learning_rate": 9.59106840642753e-05, + "loss": -0.9002, + "step": 10390 + }, + { + "epoch": 23.318385650224215, + "grad_norm": 0.4689313769340515, + "learning_rate": 9.589976183766467e-05, + "loss": -0.9055, + "step": 10400 + }, + { + "epoch": 23.340807174887892, + "grad_norm": 0.34581458568573, + "learning_rate": 9.58888256679183e-05, + "loss": -0.9035, + "step": 10410 + }, + { + "epoch": 23.36322869955157, + "grad_norm": 0.32967501878738403, + "learning_rate": 9.587787555835832e-05, + "loss": -0.9073, + "step": 10420 + }, + { + "epoch": 23.385650224215247, + "grad_norm": 0.32086995244026184, + "learning_rate": 9.586691151231107e-05, + "loss": -0.9045, + "step": 10430 + }, + { + "epoch": 23.408071748878925, + "grad_norm": 0.37028780579566956, + "learning_rate": 9.585593353310715e-05, + "loss": -0.9059, + "step": 10440 + }, + { + "epoch": 23.430493273542602, + "grad_norm": 0.44641268253326416, + "learning_rate": 9.58449416240814e-05, + "loss": -0.9005, + "step": 10450 + }, + { + "epoch": 23.45291479820628, + "grad_norm": 0.3039346933364868, + "learning_rate": 9.583393578857283e-05, + "loss": -0.9089, + "step": 10460 + }, + { + "epoch": 23.475336322869953, + "grad_norm": 0.3138388395309448, + "learning_rate": 9.582291602992474e-05, + "loss": -0.9101, + "step": 10470 + }, + { + "epoch": 23.49775784753363, + "grad_norm": 0.30921462178230286, + "learning_rate": 9.581188235148466e-05, + "loss": -0.9078, + "step": 10480 + }, + { + "epoch": 23.52017937219731, + "grad_norm": 0.31045618653297424, + "learning_rate": 9.58008347566043e-05, + "loss": -0.9131, + "step": 10490 + }, + { + "epoch": 23.542600896860986, + "grad_norm": 0.38736358284950256, + "learning_rate": 9.578977324863965e-05, + "loss": -0.9106, + "step": 10500 + }, + { + "epoch": 23.565022421524663, + "grad_norm": 0.30927184224128723, + "learning_rate": 9.577869783095089e-05, + "loss": -0.9081, + "step": 10510 + }, + { + "epoch": 23.58744394618834, + "grad_norm": 0.5841875672340393, + "learning_rate": 9.576760850690245e-05, + "loss": -0.9046, + "step": 10520 + }, + { + "epoch": 23.609865470852018, + "grad_norm": 0.39133453369140625, + "learning_rate": 9.575650527986298e-05, + "loss": -0.8976, + "step": 10530 + }, + { + "epoch": 23.632286995515695, + "grad_norm": 0.3489258885383606, + "learning_rate": 9.574538815320531e-05, + "loss": -0.911, + "step": 10540 + }, + { + "epoch": 23.654708520179373, + "grad_norm": 0.43078863620758057, + "learning_rate": 9.573425713030656e-05, + "loss": -0.9046, + "step": 10550 + }, + { + "epoch": 23.67713004484305, + "grad_norm": 0.3849266469478607, + "learning_rate": 9.572311221454806e-05, + "loss": -0.9088, + "step": 10560 + }, + { + "epoch": 23.699551569506728, + "grad_norm": 0.4304797947406769, + "learning_rate": 9.57119534093153e-05, + "loss": -0.9028, + "step": 10570 + }, + { + "epoch": 23.721973094170405, + "grad_norm": 0.40824976563453674, + "learning_rate": 9.570078071799806e-05, + "loss": -0.9078, + "step": 10580 + }, + { + "epoch": 23.744394618834082, + "grad_norm": 0.46336665749549866, + "learning_rate": 9.568959414399028e-05, + "loss": -0.9049, + "step": 10590 + }, + { + "epoch": 23.766816143497756, + "grad_norm": 0.4592741131782532, + "learning_rate": 9.567839369069018e-05, + "loss": -0.906, + "step": 10600 + }, + { + "epoch": 23.789237668161434, + "grad_norm": 0.3190271556377411, + "learning_rate": 9.566717936150013e-05, + "loss": -0.9069, + "step": 10610 + }, + { + "epoch": 23.81165919282511, + "grad_norm": 0.3867676258087158, + "learning_rate": 9.565595115982678e-05, + "loss": -0.9108, + "step": 10620 + }, + { + "epoch": 23.83408071748879, + "grad_norm": 0.3338158428668976, + "learning_rate": 9.564470908908094e-05, + "loss": -0.9021, + "step": 10630 + }, + { + "epoch": 23.856502242152466, + "grad_norm": 0.29101449251174927, + "learning_rate": 9.563345315267764e-05, + "loss": -0.905, + "step": 10640 + }, + { + "epoch": 23.878923766816143, + "grad_norm": 0.3146497905254364, + "learning_rate": 9.562218335403616e-05, + "loss": -0.9083, + "step": 10650 + }, + { + "epoch": 23.90134529147982, + "grad_norm": 0.39916086196899414, + "learning_rate": 9.561089969657999e-05, + "loss": -0.9069, + "step": 10660 + }, + { + "epoch": 23.923766816143498, + "grad_norm": 0.2847098708152771, + "learning_rate": 9.559960218373673e-05, + "loss": -0.9008, + "step": 10670 + }, + { + "epoch": 23.946188340807176, + "grad_norm": 0.43520551919937134, + "learning_rate": 9.558829081893836e-05, + "loss": -0.9078, + "step": 10680 + }, + { + "epoch": 23.968609865470853, + "grad_norm": 0.3230048716068268, + "learning_rate": 9.55769656056209e-05, + "loss": -0.9103, + "step": 10690 + }, + { + "epoch": 23.99103139013453, + "grad_norm": 0.29481083154678345, + "learning_rate": 9.556562654722469e-05, + "loss": -0.9065, + "step": 10700 + }, + { + "epoch": 24.013452914798208, + "grad_norm": 0.26944732666015625, + "learning_rate": 9.555427364719422e-05, + "loss": -0.905, + "step": 10710 + }, + { + "epoch": 24.03587443946188, + "grad_norm": 0.28496167063713074, + "learning_rate": 9.55429069089782e-05, + "loss": -0.9107, + "step": 10720 + }, + { + "epoch": 24.05829596412556, + "grad_norm": 0.2915290892124176, + "learning_rate": 9.553152633602956e-05, + "loss": -0.9054, + "step": 10730 + }, + { + "epoch": 24.080717488789237, + "grad_norm": 0.37023282051086426, + "learning_rate": 9.552013193180543e-05, + "loss": -0.9061, + "step": 10740 + }, + { + "epoch": 24.103139013452914, + "grad_norm": 0.3414650857448578, + "learning_rate": 9.550872369976707e-05, + "loss": -0.9058, + "step": 10750 + }, + { + "epoch": 24.12556053811659, + "grad_norm": 0.5056911110877991, + "learning_rate": 9.549730164338007e-05, + "loss": -0.9043, + "step": 10760 + }, + { + "epoch": 24.14798206278027, + "grad_norm": 0.3480328917503357, + "learning_rate": 9.548586576611408e-05, + "loss": -0.9056, + "step": 10770 + }, + { + "epoch": 24.170403587443946, + "grad_norm": 0.4265042841434479, + "learning_rate": 9.54744160714431e-05, + "loss": -0.9111, + "step": 10780 + }, + { + "epoch": 24.192825112107624, + "grad_norm": 0.27866604924201965, + "learning_rate": 9.546295256284516e-05, + "loss": -0.914, + "step": 10790 + }, + { + "epoch": 24.2152466367713, + "grad_norm": 0.39435917139053345, + "learning_rate": 9.545147524380265e-05, + "loss": -0.9054, + "step": 10800 + }, + { + "epoch": 24.23766816143498, + "grad_norm": 0.3099474012851715, + "learning_rate": 9.543998411780201e-05, + "loss": -0.9046, + "step": 10810 + }, + { + "epoch": 24.260089686098656, + "grad_norm": 0.3095472455024719, + "learning_rate": 9.542847918833397e-05, + "loss": -0.9083, + "step": 10820 + }, + { + "epoch": 24.282511210762333, + "grad_norm": 0.37621191143989563, + "learning_rate": 9.541696045889343e-05, + "loss": -0.907, + "step": 10830 + }, + { + "epoch": 24.304932735426007, + "grad_norm": 0.3313165605068207, + "learning_rate": 9.540542793297947e-05, + "loss": -0.9125, + "step": 10840 + }, + { + "epoch": 24.327354260089685, + "grad_norm": 0.37017086148262024, + "learning_rate": 9.539388161409537e-05, + "loss": -0.9073, + "step": 10850 + }, + { + "epoch": 24.349775784753362, + "grad_norm": 0.5257201194763184, + "learning_rate": 9.538232150574857e-05, + "loss": -0.9054, + "step": 10860 + }, + { + "epoch": 24.37219730941704, + "grad_norm": 0.38192591071128845, + "learning_rate": 9.537074761145076e-05, + "loss": -0.9133, + "step": 10870 + }, + { + "epoch": 24.394618834080717, + "grad_norm": 0.3359403908252716, + "learning_rate": 9.535915993471778e-05, + "loss": -0.9042, + "step": 10880 + }, + { + "epoch": 24.417040358744394, + "grad_norm": 0.3578924536705017, + "learning_rate": 9.534755847906964e-05, + "loss": -0.8944, + "step": 10890 + }, + { + "epoch": 24.43946188340807, + "grad_norm": 0.4758501350879669, + "learning_rate": 9.533594324803057e-05, + "loss": -0.9044, + "step": 10900 + }, + { + "epoch": 24.46188340807175, + "grad_norm": 0.37898942828178406, + "learning_rate": 9.532431424512895e-05, + "loss": -0.9158, + "step": 10910 + }, + { + "epoch": 24.484304932735427, + "grad_norm": 0.3528904914855957, + "learning_rate": 9.531267147389741e-05, + "loss": -0.9077, + "step": 10920 + }, + { + "epoch": 24.506726457399104, + "grad_norm": 0.40027233958244324, + "learning_rate": 9.530101493787266e-05, + "loss": -0.9078, + "step": 10930 + }, + { + "epoch": 24.52914798206278, + "grad_norm": 0.25946858525276184, + "learning_rate": 9.528934464059571e-05, + "loss": -0.9157, + "step": 10940 + }, + { + "epoch": 24.55156950672646, + "grad_norm": 0.24238799512386322, + "learning_rate": 9.527766058561163e-05, + "loss": -0.9162, + "step": 10950 + }, + { + "epoch": 24.573991031390136, + "grad_norm": 0.3949044346809387, + "learning_rate": 9.526596277646976e-05, + "loss": -0.9085, + "step": 10960 + }, + { + "epoch": 24.59641255605381, + "grad_norm": 0.4821436107158661, + "learning_rate": 9.525425121672358e-05, + "loss": -0.9085, + "step": 10970 + }, + { + "epoch": 24.618834080717487, + "grad_norm": 0.31393250823020935, + "learning_rate": 9.524252590993074e-05, + "loss": -0.9135, + "step": 10980 + }, + { + "epoch": 24.641255605381165, + "grad_norm": 0.3033902049064636, + "learning_rate": 9.523078685965309e-05, + "loss": -0.9101, + "step": 10990 + }, + { + "epoch": 24.663677130044842, + "grad_norm": 0.4256376326084137, + "learning_rate": 9.521903406945664e-05, + "loss": -0.9097, + "step": 11000 + }, + { + "epoch": 24.68609865470852, + "grad_norm": 0.338595986366272, + "learning_rate": 9.520726754291158e-05, + "loss": -0.9095, + "step": 11010 + }, + { + "epoch": 24.708520179372197, + "grad_norm": 0.40982699394226074, + "learning_rate": 9.519548728359227e-05, + "loss": -0.9095, + "step": 11020 + }, + { + "epoch": 24.730941704035875, + "grad_norm": 0.2863597571849823, + "learning_rate": 9.518369329507726e-05, + "loss": -0.9175, + "step": 11030 + }, + { + "epoch": 24.753363228699552, + "grad_norm": 0.4347987174987793, + "learning_rate": 9.51718855809492e-05, + "loss": -0.915, + "step": 11040 + }, + { + "epoch": 24.77578475336323, + "grad_norm": 0.35975390672683716, + "learning_rate": 9.516006414479502e-05, + "loss": -0.9084, + "step": 11050 + }, + { + "epoch": 24.798206278026907, + "grad_norm": 0.3416312634944916, + "learning_rate": 9.514822899020572e-05, + "loss": -0.9032, + "step": 11060 + }, + { + "epoch": 24.820627802690584, + "grad_norm": 0.37649017572402954, + "learning_rate": 9.513638012077654e-05, + "loss": -0.9092, + "step": 11070 + }, + { + "epoch": 24.84304932735426, + "grad_norm": 0.28062692284584045, + "learning_rate": 9.512451754010683e-05, + "loss": -0.9097, + "step": 11080 + }, + { + "epoch": 24.865470852017935, + "grad_norm": 0.4890730381011963, + "learning_rate": 9.511264125180013e-05, + "loss": -0.9096, + "step": 11090 + }, + { + "epoch": 24.887892376681613, + "grad_norm": 0.4169207513332367, + "learning_rate": 9.510075125946414e-05, + "loss": -0.9082, + "step": 11100 + }, + { + "epoch": 24.91031390134529, + "grad_norm": 0.34404414892196655, + "learning_rate": 9.508884756671075e-05, + "loss": -0.9084, + "step": 11110 + }, + { + "epoch": 24.932735426008968, + "grad_norm": 0.577639102935791, + "learning_rate": 9.507693017715596e-05, + "loss": -0.9111, + "step": 11120 + }, + { + "epoch": 24.955156950672645, + "grad_norm": 0.30200865864753723, + "learning_rate": 9.506499909441997e-05, + "loss": -0.914, + "step": 11130 + }, + { + "epoch": 24.977578475336323, + "grad_norm": 0.3452973961830139, + "learning_rate": 9.505305432212713e-05, + "loss": -0.9149, + "step": 11140 + }, + { + "epoch": 25.0, + "grad_norm": 0.33578911423683167, + "learning_rate": 9.504109586390595e-05, + "loss": -0.9122, + "step": 11150 + }, + { + "epoch": 25.022421524663677, + "grad_norm": 0.31501349806785583, + "learning_rate": 9.502912372338908e-05, + "loss": -0.9133, + "step": 11160 + }, + { + "epoch": 25.044843049327355, + "grad_norm": 0.289038747549057, + "learning_rate": 9.501713790421335e-05, + "loss": -0.913, + "step": 11170 + }, + { + "epoch": 25.067264573991032, + "grad_norm": 0.3616143465042114, + "learning_rate": 9.500513841001974e-05, + "loss": -0.9162, + "step": 11180 + }, + { + "epoch": 25.08968609865471, + "grad_norm": 0.30189332365989685, + "learning_rate": 9.499312524445336e-05, + "loss": -0.907, + "step": 11190 + }, + { + "epoch": 25.112107623318387, + "grad_norm": 0.38619035482406616, + "learning_rate": 9.498109841116351e-05, + "loss": -0.9088, + "step": 11200 + }, + { + "epoch": 25.134529147982065, + "grad_norm": 0.3183898627758026, + "learning_rate": 9.496905791380363e-05, + "loss": -0.9038, + "step": 11210 + }, + { + "epoch": 25.15695067264574, + "grad_norm": 0.4056541621685028, + "learning_rate": 9.495700375603129e-05, + "loss": -0.9137, + "step": 11220 + }, + { + "epoch": 25.179372197309416, + "grad_norm": 0.38408130407333374, + "learning_rate": 9.494493594150822e-05, + "loss": -0.91, + "step": 11230 + }, + { + "epoch": 25.201793721973093, + "grad_norm": 0.34165987372398376, + "learning_rate": 9.493285447390032e-05, + "loss": -0.9116, + "step": 11240 + }, + { + "epoch": 25.22421524663677, + "grad_norm": 0.3454553186893463, + "learning_rate": 9.492075935687761e-05, + "loss": -0.9152, + "step": 11250 + }, + { + "epoch": 25.246636771300448, + "grad_norm": 0.3005537688732147, + "learning_rate": 9.490865059411427e-05, + "loss": -0.9094, + "step": 11260 + }, + { + "epoch": 25.269058295964125, + "grad_norm": 0.3826751410961151, + "learning_rate": 9.489652818928863e-05, + "loss": -0.9082, + "step": 11270 + }, + { + "epoch": 25.291479820627803, + "grad_norm": 0.2913769483566284, + "learning_rate": 9.488439214608315e-05, + "loss": -0.8988, + "step": 11280 + }, + { + "epoch": 25.31390134529148, + "grad_norm": 0.31094294786453247, + "learning_rate": 9.487224246818444e-05, + "loss": -0.916, + "step": 11290 + }, + { + "epoch": 25.336322869955158, + "grad_norm": 0.37170153856277466, + "learning_rate": 9.486007915928325e-05, + "loss": -0.9142, + "step": 11300 + }, + { + "epoch": 25.358744394618835, + "grad_norm": 0.302129328250885, + "learning_rate": 9.484790222307448e-05, + "loss": -0.9117, + "step": 11310 + }, + { + "epoch": 25.381165919282513, + "grad_norm": 0.24703694880008698, + "learning_rate": 9.483571166325716e-05, + "loss": -0.9084, + "step": 11320 + }, + { + "epoch": 25.40358744394619, + "grad_norm": 0.42365914583206177, + "learning_rate": 9.482350748353444e-05, + "loss": -0.9118, + "step": 11330 + }, + { + "epoch": 25.426008968609864, + "grad_norm": 0.4141514301300049, + "learning_rate": 9.481128968761363e-05, + "loss": -0.9114, + "step": 11340 + }, + { + "epoch": 25.44843049327354, + "grad_norm": 0.3376246392726898, + "learning_rate": 9.479905827920621e-05, + "loss": -0.9145, + "step": 11350 + }, + { + "epoch": 25.47085201793722, + "grad_norm": 0.37286052107810974, + "learning_rate": 9.478681326202773e-05, + "loss": -0.9124, + "step": 11360 + }, + { + "epoch": 25.493273542600896, + "grad_norm": 0.2434249073266983, + "learning_rate": 9.477455463979791e-05, + "loss": -0.9137, + "step": 11370 + }, + { + "epoch": 25.515695067264573, + "grad_norm": 0.2924674153327942, + "learning_rate": 9.476228241624059e-05, + "loss": -0.9058, + "step": 11380 + }, + { + "epoch": 25.53811659192825, + "grad_norm": 0.3925395905971527, + "learning_rate": 9.474999659508374e-05, + "loss": -0.9156, + "step": 11390 + }, + { + "epoch": 25.56053811659193, + "grad_norm": 0.21067224442958832, + "learning_rate": 9.47376971800595e-05, + "loss": -0.9177, + "step": 11400 + }, + { + "epoch": 25.582959641255606, + "grad_norm": 0.291964590549469, + "learning_rate": 9.472538417490409e-05, + "loss": -0.9066, + "step": 11410 + }, + { + "epoch": 25.605381165919283, + "grad_norm": 0.2999967336654663, + "learning_rate": 9.471305758335784e-05, + "loss": -0.9119, + "step": 11420 + }, + { + "epoch": 25.62780269058296, + "grad_norm": 0.377257764339447, + "learning_rate": 9.47007174091653e-05, + "loss": -0.9053, + "step": 11430 + }, + { + "epoch": 25.650224215246638, + "grad_norm": 0.37965986132621765, + "learning_rate": 9.468836365607507e-05, + "loss": -0.9088, + "step": 11440 + }, + { + "epoch": 25.672645739910315, + "grad_norm": 0.5806931853294373, + "learning_rate": 9.467599632783988e-05, + "loss": -0.9023, + "step": 11450 + }, + { + "epoch": 25.695067264573993, + "grad_norm": 0.4209405183792114, + "learning_rate": 9.466361542821662e-05, + "loss": -0.9092, + "step": 11460 + }, + { + "epoch": 25.717488789237667, + "grad_norm": 0.2759377658367157, + "learning_rate": 9.465122096096625e-05, + "loss": -0.9118, + "step": 11470 + }, + { + "epoch": 25.739910313901344, + "grad_norm": 0.30908429622650146, + "learning_rate": 9.463881292985391e-05, + "loss": -0.9055, + "step": 11480 + }, + { + "epoch": 25.76233183856502, + "grad_norm": 0.4614003896713257, + "learning_rate": 9.462639133864881e-05, + "loss": -0.9098, + "step": 11490 + }, + { + "epoch": 25.7847533632287, + "grad_norm": 0.26457321643829346, + "learning_rate": 9.461395619112432e-05, + "loss": -0.9104, + "step": 11500 + }, + { + "epoch": 25.807174887892376, + "grad_norm": 0.2847163677215576, + "learning_rate": 9.460150749105791e-05, + "loss": -0.9166, + "step": 11510 + }, + { + "epoch": 25.829596412556054, + "grad_norm": 0.3794891834259033, + "learning_rate": 9.458904524223116e-05, + "loss": -0.9095, + "step": 11520 + }, + { + "epoch": 25.85201793721973, + "grad_norm": 0.35596925020217896, + "learning_rate": 9.457656944842976e-05, + "loss": -0.9049, + "step": 11530 + }, + { + "epoch": 25.87443946188341, + "grad_norm": 0.28232648968696594, + "learning_rate": 9.456408011344353e-05, + "loss": -0.916, + "step": 11540 + }, + { + "epoch": 25.896860986547086, + "grad_norm": 0.42311301827430725, + "learning_rate": 9.455157724106643e-05, + "loss": -0.9091, + "step": 11550 + }, + { + "epoch": 25.919282511210763, + "grad_norm": 0.3409591615200043, + "learning_rate": 9.453906083509647e-05, + "loss": -0.9087, + "step": 11560 + }, + { + "epoch": 25.94170403587444, + "grad_norm": 0.2551672160625458, + "learning_rate": 9.45265308993358e-05, + "loss": -0.9156, + "step": 11570 + }, + { + "epoch": 25.96412556053812, + "grad_norm": 0.3147939145565033, + "learning_rate": 9.451398743759071e-05, + "loss": -0.9071, + "step": 11580 + }, + { + "epoch": 25.986547085201792, + "grad_norm": 0.33599209785461426, + "learning_rate": 9.450143045367156e-05, + "loss": -0.9147, + "step": 11590 + }, + { + "epoch": 26.00896860986547, + "grad_norm": 0.34047916531562805, + "learning_rate": 9.448885995139283e-05, + "loss": -0.9163, + "step": 11600 + }, + { + "epoch": 26.031390134529147, + "grad_norm": 0.3458956182003021, + "learning_rate": 9.44762759345731e-05, + "loss": -0.9139, + "step": 11610 + }, + { + "epoch": 26.053811659192824, + "grad_norm": 0.4638904929161072, + "learning_rate": 9.446367840703509e-05, + "loss": -0.9137, + "step": 11620 + }, + { + "epoch": 26.076233183856502, + "grad_norm": 0.3526851236820221, + "learning_rate": 9.445106737260556e-05, + "loss": -0.9168, + "step": 11630 + }, + { + "epoch": 26.09865470852018, + "grad_norm": 0.30327871441841125, + "learning_rate": 9.443844283511543e-05, + "loss": -0.9078, + "step": 11640 + }, + { + "epoch": 26.121076233183857, + "grad_norm": 0.3292253613471985, + "learning_rate": 9.442580479839968e-05, + "loss": -0.9055, + "step": 11650 + }, + { + "epoch": 26.143497757847534, + "grad_norm": 0.3690853714942932, + "learning_rate": 9.441315326629745e-05, + "loss": -0.9066, + "step": 11660 + }, + { + "epoch": 26.16591928251121, + "grad_norm": 0.2991633415222168, + "learning_rate": 9.44004882426519e-05, + "loss": -0.914, + "step": 11670 + }, + { + "epoch": 26.18834080717489, + "grad_norm": 0.35100042819976807, + "learning_rate": 9.438780973131037e-05, + "loss": -0.9103, + "step": 11680 + }, + { + "epoch": 26.210762331838566, + "grad_norm": 0.4001748263835907, + "learning_rate": 9.437511773612423e-05, + "loss": -0.9059, + "step": 11690 + }, + { + "epoch": 26.233183856502244, + "grad_norm": 0.3147962689399719, + "learning_rate": 9.436241226094896e-05, + "loss": -0.9034, + "step": 11700 + }, + { + "epoch": 26.255605381165918, + "grad_norm": 0.3209475874900818, + "learning_rate": 9.434969330964418e-05, + "loss": -0.9077, + "step": 11710 + }, + { + "epoch": 26.278026905829595, + "grad_norm": 0.3981972932815552, + "learning_rate": 9.433696088607356e-05, + "loss": -0.91, + "step": 11720 + }, + { + "epoch": 26.300448430493272, + "grad_norm": 0.2145272195339203, + "learning_rate": 9.432421499410486e-05, + "loss": -0.9115, + "step": 11730 + }, + { + "epoch": 26.32286995515695, + "grad_norm": 0.29702043533325195, + "learning_rate": 9.431145563760998e-05, + "loss": -0.9133, + "step": 11740 + }, + { + "epoch": 26.345291479820627, + "grad_norm": 0.33235159516334534, + "learning_rate": 9.429868282046484e-05, + "loss": -0.9017, + "step": 11750 + }, + { + "epoch": 26.367713004484305, + "grad_norm": 0.3273705542087555, + "learning_rate": 9.428589654654951e-05, + "loss": -0.9048, + "step": 11760 + }, + { + "epoch": 26.390134529147982, + "grad_norm": 0.41600629687309265, + "learning_rate": 9.42730968197481e-05, + "loss": -0.9129, + "step": 11770 + }, + { + "epoch": 26.41255605381166, + "grad_norm": 0.33123400807380676, + "learning_rate": 9.426028364394883e-05, + "loss": -0.9129, + "step": 11780 + }, + { + "epoch": 26.434977578475337, + "grad_norm": 0.3565439283847809, + "learning_rate": 9.424745702304402e-05, + "loss": -0.9134, + "step": 11790 + }, + { + "epoch": 26.457399103139014, + "grad_norm": 0.29011279344558716, + "learning_rate": 9.423461696093006e-05, + "loss": -0.9115, + "step": 11800 + }, + { + "epoch": 26.47982062780269, + "grad_norm": 0.36061882972717285, + "learning_rate": 9.422176346150741e-05, + "loss": -0.9137, + "step": 11810 + }, + { + "epoch": 26.50224215246637, + "grad_norm": 0.4031906723976135, + "learning_rate": 9.420889652868063e-05, + "loss": -0.9083, + "step": 11820 + }, + { + "epoch": 26.524663677130043, + "grad_norm": 0.2828062176704407, + "learning_rate": 9.419601616635836e-05, + "loss": -0.9126, + "step": 11830 + }, + { + "epoch": 26.54708520179372, + "grad_norm": 0.35729753971099854, + "learning_rate": 9.418312237845331e-05, + "loss": -0.91, + "step": 11840 + }, + { + "epoch": 26.569506726457398, + "grad_norm": 0.3586178421974182, + "learning_rate": 9.417021516888225e-05, + "loss": -0.9146, + "step": 11850 + }, + { + "epoch": 26.591928251121075, + "grad_norm": 0.3569405972957611, + "learning_rate": 9.415729454156608e-05, + "loss": -0.9129, + "step": 11860 + }, + { + "epoch": 26.614349775784753, + "grad_norm": 0.2718236744403839, + "learning_rate": 9.414436050042973e-05, + "loss": -0.91, + "step": 11870 + }, + { + "epoch": 26.63677130044843, + "grad_norm": 0.3212621510028839, + "learning_rate": 9.413141304940223e-05, + "loss": -0.9146, + "step": 11880 + }, + { + "epoch": 26.659192825112108, + "grad_norm": 0.28784963488578796, + "learning_rate": 9.411845219241666e-05, + "loss": -0.9189, + "step": 11890 + }, + { + "epoch": 26.681614349775785, + "grad_norm": 0.2426450103521347, + "learning_rate": 9.410547793341021e-05, + "loss": -0.9151, + "step": 11900 + }, + { + "epoch": 26.704035874439462, + "grad_norm": 0.40537673234939575, + "learning_rate": 9.409249027632408e-05, + "loss": -0.9163, + "step": 11910 + }, + { + "epoch": 26.72645739910314, + "grad_norm": 0.38228410482406616, + "learning_rate": 9.407948922510362e-05, + "loss": -0.9189, + "step": 11920 + }, + { + "epoch": 26.748878923766817, + "grad_norm": 0.3028300404548645, + "learning_rate": 9.406647478369817e-05, + "loss": -0.9202, + "step": 11930 + }, + { + "epoch": 26.771300448430495, + "grad_norm": 0.2576909363269806, + "learning_rate": 9.405344695606118e-05, + "loss": -0.9152, + "step": 11940 + }, + { + "epoch": 26.793721973094172, + "grad_norm": 0.22031927108764648, + "learning_rate": 9.404040574615018e-05, + "loss": -0.9191, + "step": 11950 + }, + { + "epoch": 26.816143497757846, + "grad_norm": 0.3300761282444, + "learning_rate": 9.402735115792674e-05, + "loss": -0.9152, + "step": 11960 + }, + { + "epoch": 26.838565022421523, + "grad_norm": 0.38877567648887634, + "learning_rate": 9.401428319535649e-05, + "loss": -0.9213, + "step": 11970 + }, + { + "epoch": 26.8609865470852, + "grad_norm": 0.3095521032810211, + "learning_rate": 9.400120186240912e-05, + "loss": -0.9095, + "step": 11980 + }, + { + "epoch": 26.883408071748878, + "grad_norm": 0.24908879399299622, + "learning_rate": 9.398810716305844e-05, + "loss": -0.9176, + "step": 11990 + }, + { + "epoch": 26.905829596412556, + "grad_norm": 0.23141808807849884, + "learning_rate": 9.397499910128222e-05, + "loss": -0.914, + "step": 12000 + }, + { + "epoch": 26.928251121076233, + "grad_norm": 0.36644047498703003, + "learning_rate": 9.396187768106237e-05, + "loss": -0.9241, + "step": 12010 + }, + { + "epoch": 26.95067264573991, + "grad_norm": 0.39666977524757385, + "learning_rate": 9.394874290638482e-05, + "loss": -0.9138, + "step": 12020 + }, + { + "epoch": 26.973094170403588, + "grad_norm": 0.336577445268631, + "learning_rate": 9.393559478123959e-05, + "loss": -0.9126, + "step": 12030 + }, + { + "epoch": 26.995515695067265, + "grad_norm": 0.2846103012561798, + "learning_rate": 9.39224333096207e-05, + "loss": -0.9134, + "step": 12040 + }, + { + "epoch": 27.017937219730943, + "grad_norm": 0.347154825925827, + "learning_rate": 9.390925849552629e-05, + "loss": -0.9138, + "step": 12050 + }, + { + "epoch": 27.04035874439462, + "grad_norm": 0.2981351912021637, + "learning_rate": 9.389607034295849e-05, + "loss": -0.9167, + "step": 12060 + }, + { + "epoch": 27.062780269058297, + "grad_norm": 0.4142342507839203, + "learning_rate": 9.388286885592355e-05, + "loss": -0.9093, + "step": 12070 + }, + { + "epoch": 27.08520179372197, + "grad_norm": 0.29503151774406433, + "learning_rate": 9.386965403843168e-05, + "loss": -0.9178, + "step": 12080 + }, + { + "epoch": 27.10762331838565, + "grad_norm": 0.43355247378349304, + "learning_rate": 9.385642589449726e-05, + "loss": -0.9164, + "step": 12090 + }, + { + "epoch": 27.130044843049326, + "grad_norm": 0.3097057640552521, + "learning_rate": 9.38431844281386e-05, + "loss": -0.9182, + "step": 12100 + }, + { + "epoch": 27.152466367713004, + "grad_norm": 0.48980122804641724, + "learning_rate": 9.38299296433781e-05, + "loss": -0.9138, + "step": 12110 + }, + { + "epoch": 27.17488789237668, + "grad_norm": 0.3387424647808075, + "learning_rate": 9.381666154424226e-05, + "loss": -0.9163, + "step": 12120 + }, + { + "epoch": 27.19730941704036, + "grad_norm": 0.3252856731414795, + "learning_rate": 9.380338013476157e-05, + "loss": -0.9206, + "step": 12130 + }, + { + "epoch": 27.219730941704036, + "grad_norm": 0.38695573806762695, + "learning_rate": 9.379008541897054e-05, + "loss": -0.9167, + "step": 12140 + }, + { + "epoch": 27.242152466367713, + "grad_norm": 0.3669487535953522, + "learning_rate": 9.377677740090777e-05, + "loss": -0.9095, + "step": 12150 + }, + { + "epoch": 27.26457399103139, + "grad_norm": 0.28167369961738586, + "learning_rate": 9.376345608461588e-05, + "loss": -0.9127, + "step": 12160 + }, + { + "epoch": 27.286995515695068, + "grad_norm": 0.3692166805267334, + "learning_rate": 9.375012147414155e-05, + "loss": -0.9161, + "step": 12170 + }, + { + "epoch": 27.309417040358746, + "grad_norm": 0.24995048344135284, + "learning_rate": 9.373677357353545e-05, + "loss": -0.9161, + "step": 12180 + }, + { + "epoch": 27.331838565022423, + "grad_norm": 0.27312639355659485, + "learning_rate": 9.372341238685237e-05, + "loss": -0.9171, + "step": 12190 + }, + { + "epoch": 27.3542600896861, + "grad_norm": 0.2675544023513794, + "learning_rate": 9.371003791815102e-05, + "loss": -0.9182, + "step": 12200 + }, + { + "epoch": 27.376681614349774, + "grad_norm": 0.45887669920921326, + "learning_rate": 9.369665017149429e-05, + "loss": -0.9196, + "step": 12210 + }, + { + "epoch": 27.39910313901345, + "grad_norm": 0.2845950424671173, + "learning_rate": 9.368324915094895e-05, + "loss": -0.9179, + "step": 12220 + }, + { + "epoch": 27.42152466367713, + "grad_norm": 0.22675950825214386, + "learning_rate": 9.366983486058591e-05, + "loss": -0.9128, + "step": 12230 + }, + { + "epoch": 27.443946188340806, + "grad_norm": 0.3913901448249817, + "learning_rate": 9.365640730448009e-05, + "loss": -0.915, + "step": 12240 + }, + { + "epoch": 27.466367713004484, + "grad_norm": 0.4278331995010376, + "learning_rate": 9.36429664867104e-05, + "loss": -0.9165, + "step": 12250 + }, + { + "epoch": 27.48878923766816, + "grad_norm": 0.3721156120300293, + "learning_rate": 9.362951241135982e-05, + "loss": -0.9146, + "step": 12260 + }, + { + "epoch": 27.51121076233184, + "grad_norm": 0.32532668113708496, + "learning_rate": 9.361604508251534e-05, + "loss": -0.9191, + "step": 12270 + }, + { + "epoch": 27.533632286995516, + "grad_norm": 0.28706297278404236, + "learning_rate": 9.360256450426799e-05, + "loss": -0.9198, + "step": 12280 + }, + { + "epoch": 27.556053811659194, + "grad_norm": 0.41861578822135925, + "learning_rate": 9.358907068071279e-05, + "loss": -0.9182, + "step": 12290 + }, + { + "epoch": 27.57847533632287, + "grad_norm": 0.28058797121047974, + "learning_rate": 9.357556361594882e-05, + "loss": -0.9216, + "step": 12300 + }, + { + "epoch": 27.60089686098655, + "grad_norm": 0.3528953194618225, + "learning_rate": 9.356204331407917e-05, + "loss": -0.9126, + "step": 12310 + }, + { + "epoch": 27.623318385650226, + "grad_norm": 0.3609015643596649, + "learning_rate": 9.354850977921094e-05, + "loss": -0.9094, + "step": 12320 + }, + { + "epoch": 27.6457399103139, + "grad_norm": 0.2650313377380371, + "learning_rate": 9.353496301545529e-05, + "loss": -0.9181, + "step": 12330 + }, + { + "epoch": 27.668161434977577, + "grad_norm": 0.3588651716709137, + "learning_rate": 9.352140302692733e-05, + "loss": -0.9174, + "step": 12340 + }, + { + "epoch": 27.690582959641254, + "grad_norm": 0.3093268573284149, + "learning_rate": 9.350782981774627e-05, + "loss": -0.9151, + "step": 12350 + }, + { + "epoch": 27.713004484304932, + "grad_norm": 0.33353927731513977, + "learning_rate": 9.349424339203526e-05, + "loss": -0.9155, + "step": 12360 + }, + { + "epoch": 27.73542600896861, + "grad_norm": 0.24967317283153534, + "learning_rate": 9.34806437539215e-05, + "loss": -0.9162, + "step": 12370 + }, + { + "epoch": 27.757847533632287, + "grad_norm": 0.2110190987586975, + "learning_rate": 9.346703090753622e-05, + "loss": -0.9141, + "step": 12380 + }, + { + "epoch": 27.780269058295964, + "grad_norm": 0.329297810792923, + "learning_rate": 9.345340485701461e-05, + "loss": -0.9163, + "step": 12390 + }, + { + "epoch": 27.80269058295964, + "grad_norm": 0.27016681432724, + "learning_rate": 9.343976560649595e-05, + "loss": -0.923, + "step": 12400 + }, + { + "epoch": 27.82511210762332, + "grad_norm": 0.2541598677635193, + "learning_rate": 9.342611316012344e-05, + "loss": -0.9174, + "step": 12410 + }, + { + "epoch": 27.847533632286996, + "grad_norm": 0.35386043787002563, + "learning_rate": 9.341244752204437e-05, + "loss": -0.9134, + "step": 12420 + }, + { + "epoch": 27.869955156950674, + "grad_norm": 0.2688843309879303, + "learning_rate": 9.339876869640995e-05, + "loss": -0.9203, + "step": 12430 + }, + { + "epoch": 27.89237668161435, + "grad_norm": 0.44577986001968384, + "learning_rate": 9.33850766873755e-05, + "loss": -0.9131, + "step": 12440 + }, + { + "epoch": 27.91479820627803, + "grad_norm": 0.4206576347351074, + "learning_rate": 9.337137149910028e-05, + "loss": -0.9122, + "step": 12450 + }, + { + "epoch": 27.937219730941703, + "grad_norm": 0.32301777601242065, + "learning_rate": 9.335765313574753e-05, + "loss": -0.9184, + "step": 12460 + }, + { + "epoch": 27.95964125560538, + "grad_norm": 0.32683244347572327, + "learning_rate": 9.334392160148457e-05, + "loss": -0.916, + "step": 12470 + }, + { + "epoch": 27.982062780269057, + "grad_norm": 0.371198832988739, + "learning_rate": 9.333017690048264e-05, + "loss": -0.9203, + "step": 12480 + }, + { + "epoch": 28.004484304932735, + "grad_norm": 0.3090057075023651, + "learning_rate": 9.331641903691706e-05, + "loss": -0.9204, + "step": 12490 + }, + { + "epoch": 28.026905829596412, + "grad_norm": 0.3740040957927704, + "learning_rate": 9.330264801496707e-05, + "loss": -0.9171, + "step": 12500 + }, + { + "epoch": 28.04932735426009, + "grad_norm": 0.28480422496795654, + "learning_rate": 9.328886383881594e-05, + "loss": -0.9178, + "step": 12510 + }, + { + "epoch": 28.071748878923767, + "grad_norm": 0.37324297428131104, + "learning_rate": 9.327506651265095e-05, + "loss": -0.9155, + "step": 12520 + }, + { + "epoch": 28.094170403587444, + "grad_norm": 0.3195306062698364, + "learning_rate": 9.326125604066338e-05, + "loss": -0.9143, + "step": 12530 + }, + { + "epoch": 28.116591928251122, + "grad_norm": 0.2823876440525055, + "learning_rate": 9.324743242704847e-05, + "loss": -0.9151, + "step": 12540 + }, + { + "epoch": 28.1390134529148, + "grad_norm": 0.39894670248031616, + "learning_rate": 9.323359567600546e-05, + "loss": -0.9131, + "step": 12550 + }, + { + "epoch": 28.161434977578477, + "grad_norm": 0.4327967166900635, + "learning_rate": 9.321974579173761e-05, + "loss": -0.9185, + "step": 12560 + }, + { + "epoch": 28.183856502242154, + "grad_norm": 0.47411197423934937, + "learning_rate": 9.320588277845213e-05, + "loss": -0.9058, + "step": 12570 + }, + { + "epoch": 28.206278026905828, + "grad_norm": 0.33808550238609314, + "learning_rate": 9.319200664036026e-05, + "loss": -0.9149, + "step": 12580 + }, + { + "epoch": 28.228699551569505, + "grad_norm": 0.4508349895477295, + "learning_rate": 9.31781173816772e-05, + "loss": -0.9188, + "step": 12590 + }, + { + "epoch": 28.251121076233183, + "grad_norm": 0.31824764609336853, + "learning_rate": 9.316421500662212e-05, + "loss": -0.9186, + "step": 12600 + }, + { + "epoch": 28.27354260089686, + "grad_norm": 0.33747681975364685, + "learning_rate": 9.31502995194182e-05, + "loss": -0.9185, + "step": 12610 + }, + { + "epoch": 28.295964125560538, + "grad_norm": 0.2726086378097534, + "learning_rate": 9.31363709242926e-05, + "loss": -0.9168, + "step": 12620 + }, + { + "epoch": 28.318385650224215, + "grad_norm": 0.3468644320964813, + "learning_rate": 9.312242922547647e-05, + "loss": -0.925, + "step": 12630 + }, + { + "epoch": 28.340807174887892, + "grad_norm": 0.32118675112724304, + "learning_rate": 9.310847442720492e-05, + "loss": -0.9187, + "step": 12640 + }, + { + "epoch": 28.36322869955157, + "grad_norm": 0.2283705770969391, + "learning_rate": 9.309450653371706e-05, + "loss": -0.9218, + "step": 12650 + }, + { + "epoch": 28.385650224215247, + "grad_norm": 0.2248518019914627, + "learning_rate": 9.308052554925595e-05, + "loss": -0.9215, + "step": 12660 + }, + { + "epoch": 28.408071748878925, + "grad_norm": 0.36839810013771057, + "learning_rate": 9.306653147806867e-05, + "loss": -0.9164, + "step": 12670 + }, + { + "epoch": 28.430493273542602, + "grad_norm": 0.3701472580432892, + "learning_rate": 9.305252432440622e-05, + "loss": -0.92, + "step": 12680 + }, + { + "epoch": 28.45291479820628, + "grad_norm": 0.26981160044670105, + "learning_rate": 9.303850409252361e-05, + "loss": -0.924, + "step": 12690 + }, + { + "epoch": 28.475336322869953, + "grad_norm": 0.33668628334999084, + "learning_rate": 9.302447078667985e-05, + "loss": -0.9247, + "step": 12700 + }, + { + "epoch": 28.49775784753363, + "grad_norm": 0.4705911874771118, + "learning_rate": 9.301042441113783e-05, + "loss": -0.9214, + "step": 12710 + }, + { + "epoch": 28.52017937219731, + "grad_norm": 0.36379149556159973, + "learning_rate": 9.299636497016451e-05, + "loss": -0.9154, + "step": 12720 + }, + { + "epoch": 28.542600896860986, + "grad_norm": 0.3932231366634369, + "learning_rate": 9.298229246803076e-05, + "loss": -0.9186, + "step": 12730 + }, + { + "epoch": 28.565022421524663, + "grad_norm": 0.2753264009952545, + "learning_rate": 9.296820690901144e-05, + "loss": -0.9175, + "step": 12740 + }, + { + "epoch": 28.58744394618834, + "grad_norm": 0.34328505396842957, + "learning_rate": 9.295410829738539e-05, + "loss": -0.9177, + "step": 12750 + }, + { + "epoch": 28.609865470852018, + "grad_norm": 0.3263484537601471, + "learning_rate": 9.293999663743535e-05, + "loss": -0.919, + "step": 12760 + }, + { + "epoch": 28.632286995515695, + "grad_norm": 0.39114364981651306, + "learning_rate": 9.292587193344813e-05, + "loss": -0.9139, + "step": 12770 + }, + { + "epoch": 28.654708520179373, + "grad_norm": 0.3675188422203064, + "learning_rate": 9.291173418971437e-05, + "loss": -0.9208, + "step": 12780 + }, + { + "epoch": 28.67713004484305, + "grad_norm": 0.30521902441978455, + "learning_rate": 9.28975834105288e-05, + "loss": -0.9162, + "step": 12790 + }, + { + "epoch": 28.699551569506728, + "grad_norm": 0.322161465883255, + "learning_rate": 9.288341960019004e-05, + "loss": -0.917, + "step": 12800 + }, + { + "epoch": 28.721973094170405, + "grad_norm": 0.3625313639640808, + "learning_rate": 9.286924276300067e-05, + "loss": -0.9155, + "step": 12810 + }, + { + "epoch": 28.744394618834082, + "grad_norm": 0.3582245111465454, + "learning_rate": 9.285505290326726e-05, + "loss": -0.9188, + "step": 12820 + }, + { + "epoch": 28.766816143497756, + "grad_norm": 0.41856780648231506, + "learning_rate": 9.284085002530027e-05, + "loss": -0.9197, + "step": 12830 + }, + { + "epoch": 28.789237668161434, + "grad_norm": 0.26263391971588135, + "learning_rate": 9.282663413341422e-05, + "loss": -0.9156, + "step": 12840 + }, + { + "epoch": 28.81165919282511, + "grad_norm": 0.4794764220714569, + "learning_rate": 9.281240523192747e-05, + "loss": -0.9221, + "step": 12850 + }, + { + "epoch": 28.83408071748879, + "grad_norm": 0.29088762402534485, + "learning_rate": 9.279816332516242e-05, + "loss": -0.9225, + "step": 12860 + }, + { + "epoch": 28.856502242152466, + "grad_norm": 0.365097314119339, + "learning_rate": 9.278390841744536e-05, + "loss": -0.916, + "step": 12870 + }, + { + "epoch": 28.878923766816143, + "grad_norm": 0.2648821473121643, + "learning_rate": 9.276964051310658e-05, + "loss": -0.9208, + "step": 12880 + }, + { + "epoch": 28.90134529147982, + "grad_norm": 0.40552395582199097, + "learning_rate": 9.275535961648027e-05, + "loss": -0.925, + "step": 12890 + }, + { + "epoch": 28.923766816143498, + "grad_norm": 0.2953842282295227, + "learning_rate": 9.274106573190459e-05, + "loss": -0.9225, + "step": 12900 + }, + { + "epoch": 28.946188340807176, + "grad_norm": 0.3714725375175476, + "learning_rate": 9.272675886372168e-05, + "loss": -0.9221, + "step": 12910 + }, + { + "epoch": 28.968609865470853, + "grad_norm": 0.24683542549610138, + "learning_rate": 9.271243901627754e-05, + "loss": -0.9176, + "step": 12920 + }, + { + "epoch": 28.99103139013453, + "grad_norm": 0.3508479595184326, + "learning_rate": 9.269810619392219e-05, + "loss": -0.9204, + "step": 12930 + }, + { + "epoch": 29.013452914798208, + "grad_norm": 0.2971908748149872, + "learning_rate": 9.268376040100955e-05, + "loss": -0.9151, + "step": 12940 + }, + { + "epoch": 29.03587443946188, + "grad_norm": 0.32165706157684326, + "learning_rate": 9.266940164189752e-05, + "loss": -0.925, + "step": 12950 + }, + { + "epoch": 29.05829596412556, + "grad_norm": 0.2997313439846039, + "learning_rate": 9.265502992094787e-05, + "loss": -0.9176, + "step": 12960 + }, + { + "epoch": 29.080717488789237, + "grad_norm": 0.496072381734848, + "learning_rate": 9.264064524252638e-05, + "loss": -0.922, + "step": 12970 + }, + { + "epoch": 29.103139013452914, + "grad_norm": 0.2188308835029602, + "learning_rate": 9.262624761100271e-05, + "loss": -0.9163, + "step": 12980 + }, + { + "epoch": 29.12556053811659, + "grad_norm": 0.30036213994026184, + "learning_rate": 9.261183703075051e-05, + "loss": -0.919, + "step": 12990 + }, + { + "epoch": 29.14798206278027, + "grad_norm": 0.48414120078086853, + "learning_rate": 9.259741350614733e-05, + "loss": -0.9189, + "step": 13000 + }, + { + "epoch": 29.170403587443946, + "grad_norm": 0.21723142266273499, + "learning_rate": 9.258297704157464e-05, + "loss": -0.9154, + "step": 13010 + }, + { + "epoch": 29.192825112107624, + "grad_norm": 0.3483704626560211, + "learning_rate": 9.256852764141786e-05, + "loss": -0.9176, + "step": 13020 + }, + { + "epoch": 29.2152466367713, + "grad_norm": 0.437795490026474, + "learning_rate": 9.255406531006634e-05, + "loss": -0.9186, + "step": 13030 + }, + { + "epoch": 29.23766816143498, + "grad_norm": 0.2974001467227936, + "learning_rate": 9.253959005191335e-05, + "loss": -0.9067, + "step": 13040 + }, + { + "epoch": 29.260089686098656, + "grad_norm": 0.34824642539024353, + "learning_rate": 9.25251018713561e-05, + "loss": -0.9108, + "step": 13050 + }, + { + "epoch": 29.282511210762333, + "grad_norm": 0.33399802446365356, + "learning_rate": 9.251060077279571e-05, + "loss": -0.9204, + "step": 13060 + }, + { + "epoch": 29.304932735426007, + "grad_norm": 0.3387065827846527, + "learning_rate": 9.249608676063724e-05, + "loss": -0.9167, + "step": 13070 + }, + { + "epoch": 29.327354260089685, + "grad_norm": 0.2584705352783203, + "learning_rate": 9.248155983928964e-05, + "loss": -0.9223, + "step": 13080 + }, + { + "epoch": 29.349775784753362, + "grad_norm": 0.30113348364830017, + "learning_rate": 9.246702001316583e-05, + "loss": -0.9199, + "step": 13090 + }, + { + "epoch": 29.37219730941704, + "grad_norm": 0.2692442238330841, + "learning_rate": 9.245246728668262e-05, + "loss": -0.9223, + "step": 13100 + }, + { + "epoch": 29.394618834080717, + "grad_norm": 0.45194578170776367, + "learning_rate": 9.243790166426073e-05, + "loss": -0.9169, + "step": 13110 + }, + { + "epoch": 29.417040358744394, + "grad_norm": 0.2599581778049469, + "learning_rate": 9.242332315032484e-05, + "loss": -0.92, + "step": 13120 + }, + { + "epoch": 29.43946188340807, + "grad_norm": 0.34691497683525085, + "learning_rate": 9.240873174930349e-05, + "loss": -0.9124, + "step": 13130 + }, + { + "epoch": 29.46188340807175, + "grad_norm": 0.27784621715545654, + "learning_rate": 9.239412746562917e-05, + "loss": -0.9171, + "step": 13140 + }, + { + "epoch": 29.484304932735427, + "grad_norm": 0.3393186032772064, + "learning_rate": 9.237951030373828e-05, + "loss": -0.9135, + "step": 13150 + }, + { + "epoch": 29.506726457399104, + "grad_norm": 0.3796761631965637, + "learning_rate": 9.236488026807113e-05, + "loss": -0.9135, + "step": 13160 + }, + { + "epoch": 29.52914798206278, + "grad_norm": 0.38325297832489014, + "learning_rate": 9.235023736307193e-05, + "loss": -0.9215, + "step": 13170 + }, + { + "epoch": 29.55156950672646, + "grad_norm": 0.35062000155448914, + "learning_rate": 9.233558159318881e-05, + "loss": -0.9182, + "step": 13180 + }, + { + "epoch": 29.573991031390136, + "grad_norm": 0.5471120476722717, + "learning_rate": 9.232091296287382e-05, + "loss": -0.9199, + "step": 13190 + }, + { + "epoch": 29.59641255605381, + "grad_norm": 0.2555099129676819, + "learning_rate": 9.230623147658288e-05, + "loss": -0.9148, + "step": 13200 + }, + { + "epoch": 29.618834080717487, + "grad_norm": 0.2633056640625, + "learning_rate": 9.229153713877586e-05, + "loss": -0.9219, + "step": 13210 + }, + { + "epoch": 29.641255605381165, + "grad_norm": 0.2777378261089325, + "learning_rate": 9.227682995391649e-05, + "loss": -0.9152, + "step": 13220 + }, + { + "epoch": 29.663677130044842, + "grad_norm": 0.3721217215061188, + "learning_rate": 9.226210992647243e-05, + "loss": -0.9175, + "step": 13230 + }, + { + "epoch": 29.68609865470852, + "grad_norm": 0.3994000256061554, + "learning_rate": 9.224737706091525e-05, + "loss": -0.9232, + "step": 13240 + }, + { + "epoch": 29.708520179372197, + "grad_norm": 0.3485909104347229, + "learning_rate": 9.223263136172039e-05, + "loss": -0.9154, + "step": 13250 + }, + { + "epoch": 29.730941704035875, + "grad_norm": 0.38182657957077026, + "learning_rate": 9.22178728333672e-05, + "loss": -0.9196, + "step": 13260 + }, + { + "epoch": 29.753363228699552, + "grad_norm": 0.308361679315567, + "learning_rate": 9.220310148033897e-05, + "loss": -0.9208, + "step": 13270 + }, + { + "epoch": 29.77578475336323, + "grad_norm": 0.3606289327144623, + "learning_rate": 9.21883173071228e-05, + "loss": -0.9192, + "step": 13280 + }, + { + "epoch": 29.798206278026907, + "grad_norm": 0.2532016336917877, + "learning_rate": 9.217352031820976e-05, + "loss": -0.9275, + "step": 13290 + }, + { + "epoch": 29.820627802690584, + "grad_norm": 0.3232014775276184, + "learning_rate": 9.215871051809477e-05, + "loss": -0.9198, + "step": 13300 + }, + { + "epoch": 29.84304932735426, + "grad_norm": 0.5056620836257935, + "learning_rate": 9.214388791127666e-05, + "loss": -0.9164, + "step": 13310 + }, + { + "epoch": 29.865470852017935, + "grad_norm": 0.3066166341304779, + "learning_rate": 9.212905250225814e-05, + "loss": -0.9163, + "step": 13320 + }, + { + "epoch": 29.887892376681613, + "grad_norm": 0.29935941100120544, + "learning_rate": 9.211420429554583e-05, + "loss": -0.918, + "step": 13330 + }, + { + "epoch": 29.91031390134529, + "grad_norm": 0.39903610944747925, + "learning_rate": 9.209934329565022e-05, + "loss": -0.9198, + "step": 13340 + }, + { + "epoch": 29.932735426008968, + "grad_norm": 0.3712812066078186, + "learning_rate": 9.208446950708568e-05, + "loss": -0.9117, + "step": 13350 + }, + { + "epoch": 29.955156950672645, + "grad_norm": 0.3505795896053314, + "learning_rate": 9.20695829343705e-05, + "loss": -0.9188, + "step": 13360 + }, + { + "epoch": 29.977578475336323, + "grad_norm": 0.2937847673892975, + "learning_rate": 9.205468358202678e-05, + "loss": -0.923, + "step": 13370 + }, + { + "epoch": 30.0, + "grad_norm": 0.3111177384853363, + "learning_rate": 9.203977145458059e-05, + "loss": -0.9136, + "step": 13380 + }, + { + "epoch": 30.022421524663677, + "grad_norm": 0.3796631693840027, + "learning_rate": 9.202484655656182e-05, + "loss": -0.9225, + "step": 13390 + }, + { + "epoch": 30.044843049327355, + "grad_norm": 0.3488638401031494, + "learning_rate": 9.200990889250427e-05, + "loss": -0.924, + "step": 13400 + }, + { + "epoch": 30.067264573991032, + "grad_norm": 0.21926052868366241, + "learning_rate": 9.19949584669456e-05, + "loss": -0.9248, + "step": 13410 + }, + { + "epoch": 30.08968609865471, + "grad_norm": 0.36466798186302185, + "learning_rate": 9.197999528442738e-05, + "loss": -0.9225, + "step": 13420 + }, + { + "epoch": 30.112107623318387, + "grad_norm": 0.2644979953765869, + "learning_rate": 9.196501934949499e-05, + "loss": -0.9197, + "step": 13430 + }, + { + "epoch": 30.134529147982065, + "grad_norm": 0.4186928868293762, + "learning_rate": 9.195003066669776e-05, + "loss": -0.9202, + "step": 13440 + }, + { + "epoch": 30.15695067264574, + "grad_norm": 0.29420220851898193, + "learning_rate": 9.193502924058884e-05, + "loss": -0.9225, + "step": 13450 + }, + { + "epoch": 30.179372197309416, + "grad_norm": 0.3544923663139343, + "learning_rate": 9.192001507572526e-05, + "loss": -0.9188, + "step": 13460 + }, + { + "epoch": 30.201793721973093, + "grad_norm": 0.4123791754245758, + "learning_rate": 9.190498817666793e-05, + "loss": -0.914, + "step": 13470 + }, + { + "epoch": 30.22421524663677, + "grad_norm": 0.3862922787666321, + "learning_rate": 9.188994854798163e-05, + "loss": -0.9198, + "step": 13480 + }, + { + "epoch": 30.246636771300448, + "grad_norm": 0.2799808382987976, + "learning_rate": 9.187489619423499e-05, + "loss": -0.9212, + "step": 13490 + }, + { + "epoch": 30.269058295964125, + "grad_norm": 0.4355735182762146, + "learning_rate": 9.185983112000056e-05, + "loss": -0.9212, + "step": 13500 + }, + { + "epoch": 30.291479820627803, + "grad_norm": 0.31680071353912354, + "learning_rate": 9.184475332985464e-05, + "loss": -0.9264, + "step": 13510 + }, + { + "epoch": 30.31390134529148, + "grad_norm": 0.2161516398191452, + "learning_rate": 9.182966282837754e-05, + "loss": -0.9239, + "step": 13520 + }, + { + "epoch": 30.336322869955158, + "grad_norm": 0.32340583205223083, + "learning_rate": 9.18145596201533e-05, + "loss": -0.9235, + "step": 13530 + }, + { + "epoch": 30.358744394618835, + "grad_norm": 0.2672184109687805, + "learning_rate": 9.179944370976991e-05, + "loss": -0.9211, + "step": 13540 + }, + { + "epoch": 30.381165919282513, + "grad_norm": 0.27936050295829773, + "learning_rate": 9.178431510181918e-05, + "loss": -0.923, + "step": 13550 + }, + { + "epoch": 30.40358744394619, + "grad_norm": 0.3451181948184967, + "learning_rate": 9.176917380089675e-05, + "loss": -0.9214, + "step": 13560 + }, + { + "epoch": 30.426008968609864, + "grad_norm": 0.3330938518047333, + "learning_rate": 9.175401981160219e-05, + "loss": -0.9174, + "step": 13570 + }, + { + "epoch": 30.44843049327354, + "grad_norm": 0.3088288903236389, + "learning_rate": 9.173885313853885e-05, + "loss": -0.9222, + "step": 13580 + }, + { + "epoch": 30.47085201793722, + "grad_norm": 0.47405558824539185, + "learning_rate": 9.172367378631398e-05, + "loss": -0.9139, + "step": 13590 + }, + { + "epoch": 30.493273542600896, + "grad_norm": 0.5123191475868225, + "learning_rate": 9.170848175953866e-05, + "loss": -0.9116, + "step": 13600 + }, + { + "epoch": 30.515695067264573, + "grad_norm": 0.4678470194339752, + "learning_rate": 9.169327706282784e-05, + "loss": -0.9228, + "step": 13610 + }, + { + "epoch": 30.53811659192825, + "grad_norm": 0.37054911255836487, + "learning_rate": 9.167805970080029e-05, + "loss": -0.9249, + "step": 13620 + }, + { + "epoch": 30.56053811659193, + "grad_norm": 0.4416565001010895, + "learning_rate": 9.166282967807864e-05, + "loss": -0.9209, + "step": 13630 + }, + { + "epoch": 30.582959641255606, + "grad_norm": 0.265085905790329, + "learning_rate": 9.16475869992894e-05, + "loss": -0.9251, + "step": 13640 + }, + { + "epoch": 30.605381165919283, + "grad_norm": 0.4337123930454254, + "learning_rate": 9.163233166906284e-05, + "loss": -0.9279, + "step": 13650 + }, + { + "epoch": 30.62780269058296, + "grad_norm": 0.37562403082847595, + "learning_rate": 9.161706369203317e-05, + "loss": -0.9206, + "step": 13660 + }, + { + "epoch": 30.650224215246638, + "grad_norm": 0.282855749130249, + "learning_rate": 9.16017830728384e-05, + "loss": -0.9294, + "step": 13670 + }, + { + "epoch": 30.672645739910315, + "grad_norm": 0.2935281991958618, + "learning_rate": 9.158648981612035e-05, + "loss": -0.9212, + "step": 13680 + }, + { + "epoch": 30.695067264573993, + "grad_norm": 0.3311089277267456, + "learning_rate": 9.157118392652472e-05, + "loss": -0.923, + "step": 13690 + }, + { + "epoch": 30.717488789237667, + "grad_norm": 0.31151139736175537, + "learning_rate": 9.155586540870104e-05, + "loss": -0.9242, + "step": 13700 + }, + { + "epoch": 30.739910313901344, + "grad_norm": 0.3509134352207184, + "learning_rate": 9.154053426730267e-05, + "loss": -0.9212, + "step": 13710 + }, + { + "epoch": 30.76233183856502, + "grad_norm": 0.370948851108551, + "learning_rate": 9.15251905069868e-05, + "loss": -0.9151, + "step": 13720 + }, + { + "epoch": 30.7847533632287, + "grad_norm": 0.31699782609939575, + "learning_rate": 9.150983413241446e-05, + "loss": -0.9231, + "step": 13730 + }, + { + "epoch": 30.807174887892376, + "grad_norm": 0.2630338966846466, + "learning_rate": 9.149446514825051e-05, + "loss": -0.918, + "step": 13740 + }, + { + "epoch": 30.829596412556054, + "grad_norm": 0.4896722733974457, + "learning_rate": 9.147908355916365e-05, + "loss": -0.9157, + "step": 13750 + }, + { + "epoch": 30.85201793721973, + "grad_norm": 0.4682144820690155, + "learning_rate": 9.146368936982642e-05, + "loss": -0.9105, + "step": 13760 + }, + { + "epoch": 30.87443946188341, + "grad_norm": 0.3720111846923828, + "learning_rate": 9.144828258491511e-05, + "loss": -0.9188, + "step": 13770 + }, + { + "epoch": 30.896860986547086, + "grad_norm": 0.37507951259613037, + "learning_rate": 9.143286320910996e-05, + "loss": -0.9192, + "step": 13780 + }, + { + "epoch": 30.919282511210763, + "grad_norm": 0.3739001452922821, + "learning_rate": 9.141743124709491e-05, + "loss": -0.9217, + "step": 13790 + }, + { + "epoch": 30.94170403587444, + "grad_norm": 0.30726689100265503, + "learning_rate": 9.140198670355784e-05, + "loss": -0.9209, + "step": 13800 + }, + { + "epoch": 30.96412556053812, + "grad_norm": 0.28899431228637695, + "learning_rate": 9.138652958319034e-05, + "loss": -0.915, + "step": 13810 + }, + { + "epoch": 30.986547085201792, + "grad_norm": 0.40835466980934143, + "learning_rate": 9.137105989068791e-05, + "loss": -0.9208, + "step": 13820 + }, + { + "epoch": 31.00896860986547, + "grad_norm": 0.3711370825767517, + "learning_rate": 9.135557763074983e-05, + "loss": -0.9125, + "step": 13830 + }, + { + "epoch": 31.031390134529147, + "grad_norm": 0.5705682039260864, + "learning_rate": 9.13400828080792e-05, + "loss": -0.9155, + "step": 13840 + }, + { + "epoch": 31.053811659192824, + "grad_norm": 0.3063352406024933, + "learning_rate": 9.132457542738292e-05, + "loss": -0.9231, + "step": 13850 + }, + { + "epoch": 31.076233183856502, + "grad_norm": 0.25500252842903137, + "learning_rate": 9.130905549337174e-05, + "loss": -0.9279, + "step": 13860 + }, + { + "epoch": 31.09865470852018, + "grad_norm": 0.34163400530815125, + "learning_rate": 9.129352301076021e-05, + "loss": -0.9223, + "step": 13870 + }, + { + "epoch": 31.121076233183857, + "grad_norm": 0.3389946520328522, + "learning_rate": 9.127797798426668e-05, + "loss": -0.9216, + "step": 13880 + }, + { + "epoch": 31.143497757847534, + "grad_norm": 0.29954996705055237, + "learning_rate": 9.126242041861333e-05, + "loss": -0.9118, + "step": 13890 + }, + { + "epoch": 31.16591928251121, + "grad_norm": 0.2647561728954315, + "learning_rate": 9.124685031852611e-05, + "loss": -0.9203, + "step": 13900 + }, + { + "epoch": 31.18834080717489, + "grad_norm": 0.2108236700296402, + "learning_rate": 9.123126768873482e-05, + "loss": -0.924, + "step": 13910 + }, + { + "epoch": 31.210762331838566, + "grad_norm": 0.42317041754722595, + "learning_rate": 9.121567253397308e-05, + "loss": -0.9193, + "step": 13920 + }, + { + "epoch": 31.233183856502244, + "grad_norm": 0.2735370993614197, + "learning_rate": 9.120006485897824e-05, + "loss": -0.9255, + "step": 13930 + }, + { + "epoch": 31.255605381165918, + "grad_norm": 0.4073503017425537, + "learning_rate": 9.118444466849152e-05, + "loss": -0.9166, + "step": 13940 + }, + { + "epoch": 31.278026905829595, + "grad_norm": 0.23344986140727997, + "learning_rate": 9.116881196725793e-05, + "loss": -0.9234, + "step": 13950 + }, + { + "epoch": 31.300448430493272, + "grad_norm": 0.2877860367298126, + "learning_rate": 9.115316676002627e-05, + "loss": -0.9236, + "step": 13960 + }, + { + "epoch": 31.32286995515695, + "grad_norm": 0.2719196081161499, + "learning_rate": 9.113750905154911e-05, + "loss": -0.9233, + "step": 13970 + }, + { + "epoch": 31.345291479820627, + "grad_norm": 0.252864271402359, + "learning_rate": 9.112183884658289e-05, + "loss": -0.9188, + "step": 13980 + }, + { + "epoch": 31.367713004484305, + "grad_norm": 0.31721436977386475, + "learning_rate": 9.11061561498878e-05, + "loss": -0.929, + "step": 13990 + }, + { + "epoch": 31.390134529147982, + "grad_norm": 0.3555852770805359, + "learning_rate": 9.109046096622779e-05, + "loss": -0.9191, + "step": 14000 + }, + { + "epoch": 31.41255605381166, + "grad_norm": 0.3753679692745209, + "learning_rate": 9.107475330037069e-05, + "loss": -0.9202, + "step": 14010 + }, + { + "epoch": 31.434977578475337, + "grad_norm": 0.32429221272468567, + "learning_rate": 9.105903315708806e-05, + "loss": -0.9231, + "step": 14020 + }, + { + "epoch": 31.457399103139014, + "grad_norm": 0.328401118516922, + "learning_rate": 9.104330054115524e-05, + "loss": -0.9289, + "step": 14030 + }, + { + "epoch": 31.47982062780269, + "grad_norm": 0.29300743341445923, + "learning_rate": 9.102755545735141e-05, + "loss": -0.9262, + "step": 14040 + }, + { + "epoch": 31.50224215246637, + "grad_norm": 0.2657925486564636, + "learning_rate": 9.10117979104595e-05, + "loss": -0.9217, + "step": 14050 + }, + { + "epoch": 31.524663677130043, + "grad_norm": 0.3787411153316498, + "learning_rate": 9.099602790526624e-05, + "loss": -0.9212, + "step": 14060 + }, + { + "epoch": 31.54708520179372, + "grad_norm": 0.35104966163635254, + "learning_rate": 9.098024544656212e-05, + "loss": -0.9272, + "step": 14070 + }, + { + "epoch": 31.569506726457398, + "grad_norm": 0.26933345198631287, + "learning_rate": 9.096445053914148e-05, + "loss": -0.9161, + "step": 14080 + }, + { + "epoch": 31.591928251121075, + "grad_norm": 0.2855548560619354, + "learning_rate": 9.094864318780236e-05, + "loss": -0.9224, + "step": 14090 + }, + { + "epoch": 31.614349775784753, + "grad_norm": 0.365366667509079, + "learning_rate": 9.093282339734663e-05, + "loss": -0.9117, + "step": 14100 + }, + { + "epoch": 31.63677130044843, + "grad_norm": 0.43187418580055237, + "learning_rate": 9.091699117257992e-05, + "loss": -0.9258, + "step": 14110 + }, + { + "epoch": 31.659192825112108, + "grad_norm": 0.2635927200317383, + "learning_rate": 9.090114651831163e-05, + "loss": -0.9261, + "step": 14120 + }, + { + "epoch": 31.681614349775785, + "grad_norm": 0.3329140543937683, + "learning_rate": 9.088528943935497e-05, + "loss": -0.9166, + "step": 14130 + }, + { + "epoch": 31.704035874439462, + "grad_norm": 0.30886566638946533, + "learning_rate": 9.086941994052689e-05, + "loss": -0.9258, + "step": 14140 + }, + { + "epoch": 31.72645739910314, + "grad_norm": 0.31512004137039185, + "learning_rate": 9.085353802664813e-05, + "loss": -0.9264, + "step": 14150 + }, + { + "epoch": 31.748878923766817, + "grad_norm": 0.22691188752651215, + "learning_rate": 9.08376437025432e-05, + "loss": -0.9276, + "step": 14160 + }, + { + "epoch": 31.771300448430495, + "grad_norm": 0.3032132089138031, + "learning_rate": 9.082173697304035e-05, + "loss": -0.9237, + "step": 14170 + }, + { + "epoch": 31.793721973094172, + "grad_norm": 0.27247750759124756, + "learning_rate": 9.080581784297166e-05, + "loss": -0.9283, + "step": 14180 + }, + { + "epoch": 31.816143497757846, + "grad_norm": 0.32305848598480225, + "learning_rate": 9.078988631717291e-05, + "loss": -0.9268, + "step": 14190 + }, + { + "epoch": 31.838565022421523, + "grad_norm": 0.2620583176612854, + "learning_rate": 9.077394240048369e-05, + "loss": -0.9255, + "step": 14200 + }, + { + "epoch": 31.8609865470852, + "grad_norm": 0.21238577365875244, + "learning_rate": 9.075798609774736e-05, + "loss": -0.9219, + "step": 14210 + }, + { + "epoch": 31.883408071748878, + "grad_norm": 0.2968014180660248, + "learning_rate": 9.0742017413811e-05, + "loss": -0.9245, + "step": 14220 + }, + { + "epoch": 31.905829596412556, + "grad_norm": 0.2889772057533264, + "learning_rate": 9.072603635352548e-05, + "loss": -0.9249, + "step": 14230 + }, + { + "epoch": 31.928251121076233, + "grad_norm": 0.4065931439399719, + "learning_rate": 9.071004292174541e-05, + "loss": -0.924, + "step": 14240 + }, + { + "epoch": 31.95067264573991, + "grad_norm": 0.3581114709377289, + "learning_rate": 9.06940371233292e-05, + "loss": -0.9264, + "step": 14250 + }, + { + "epoch": 31.973094170403588, + "grad_norm": 0.2461351603269577, + "learning_rate": 9.067801896313898e-05, + "loss": -0.9225, + "step": 14260 + }, + { + "epoch": 31.995515695067265, + "grad_norm": 0.22863945364952087, + "learning_rate": 9.066198844604064e-05, + "loss": -0.9212, + "step": 14270 + }, + { + "epoch": 32.01793721973094, + "grad_norm": 0.2278958261013031, + "learning_rate": 9.06459455769038e-05, + "loss": -0.9223, + "step": 14280 + }, + { + "epoch": 32.04035874439462, + "grad_norm": 0.2785201966762543, + "learning_rate": 9.062989036060193e-05, + "loss": -0.9147, + "step": 14290 + }, + { + "epoch": 32.062780269058294, + "grad_norm": 0.24673567712306976, + "learning_rate": 9.061382280201212e-05, + "loss": -0.9264, + "step": 14300 + }, + { + "epoch": 32.08520179372197, + "grad_norm": 0.37557631731033325, + "learning_rate": 9.059774290601528e-05, + "loss": -0.9128, + "step": 14310 + }, + { + "epoch": 32.10762331838565, + "grad_norm": 0.21983958780765533, + "learning_rate": 9.058165067749606e-05, + "loss": -0.9201, + "step": 14320 + }, + { + "epoch": 32.130044843049326, + "grad_norm": 0.4068288803100586, + "learning_rate": 9.056554612134288e-05, + "loss": -0.9216, + "step": 14330 + }, + { + "epoch": 32.152466367713004, + "grad_norm": 0.35283055901527405, + "learning_rate": 9.054942924244785e-05, + "loss": -0.924, + "step": 14340 + }, + { + "epoch": 32.17488789237668, + "grad_norm": 0.3083736002445221, + "learning_rate": 9.053330004570686e-05, + "loss": -0.9249, + "step": 14350 + }, + { + "epoch": 32.19730941704036, + "grad_norm": 0.35125863552093506, + "learning_rate": 9.051715853601955e-05, + "loss": -0.9201, + "step": 14360 + }, + { + "epoch": 32.219730941704036, + "grad_norm": 0.1978391855955124, + "learning_rate": 9.050100471828926e-05, + "loss": -0.9194, + "step": 14370 + }, + { + "epoch": 32.24215246636771, + "grad_norm": 0.2501949369907379, + "learning_rate": 9.048483859742311e-05, + "loss": -0.9162, + "step": 14380 + }, + { + "epoch": 32.26457399103139, + "grad_norm": 0.3641577363014221, + "learning_rate": 9.046866017833193e-05, + "loss": -0.9225, + "step": 14390 + }, + { + "epoch": 32.28699551569507, + "grad_norm": 0.3121744990348816, + "learning_rate": 9.045246946593029e-05, + "loss": -0.9223, + "step": 14400 + }, + { + "epoch": 32.309417040358746, + "grad_norm": 0.3109759986400604, + "learning_rate": 9.043626646513652e-05, + "loss": -0.9273, + "step": 14410 + }, + { + "epoch": 32.33183856502242, + "grad_norm": 0.2895858883857727, + "learning_rate": 9.042005118087267e-05, + "loss": -0.9276, + "step": 14420 + }, + { + "epoch": 32.3542600896861, + "grad_norm": 0.35008570551872253, + "learning_rate": 9.040382361806448e-05, + "loss": -0.9227, + "step": 14430 + }, + { + "epoch": 32.37668161434978, + "grad_norm": 0.306016743183136, + "learning_rate": 9.038758378164148e-05, + "loss": -0.9221, + "step": 14440 + }, + { + "epoch": 32.399103139013455, + "grad_norm": 0.36174288392066956, + "learning_rate": 9.037133167653691e-05, + "loss": -0.9233, + "step": 14450 + }, + { + "epoch": 32.42152466367713, + "grad_norm": 0.2340325266122818, + "learning_rate": 9.035506730768771e-05, + "loss": -0.9228, + "step": 14460 + }, + { + "epoch": 32.44394618834081, + "grad_norm": 0.4145239591598511, + "learning_rate": 9.033879068003458e-05, + "loss": -0.9209, + "step": 14470 + }, + { + "epoch": 32.46636771300449, + "grad_norm": 0.26040810346603394, + "learning_rate": 9.032250179852193e-05, + "loss": -0.9238, + "step": 14480 + }, + { + "epoch": 32.488789237668165, + "grad_norm": 0.2663280665874481, + "learning_rate": 9.030620066809787e-05, + "loss": -0.9261, + "step": 14490 + }, + { + "epoch": 32.511210762331835, + "grad_norm": 0.4596174955368042, + "learning_rate": 9.028988729371428e-05, + "loss": -0.9242, + "step": 14500 + }, + { + "epoch": 32.53363228699551, + "grad_norm": 0.3813193142414093, + "learning_rate": 9.027356168032673e-05, + "loss": -0.9221, + "step": 14510 + }, + { + "epoch": 32.55605381165919, + "grad_norm": 0.44436606764793396, + "learning_rate": 9.02572238328945e-05, + "loss": -0.9211, + "step": 14520 + }, + { + "epoch": 32.57847533632287, + "grad_norm": 0.35546717047691345, + "learning_rate": 9.02408737563806e-05, + "loss": -0.9199, + "step": 14530 + }, + { + "epoch": 32.600896860986545, + "grad_norm": 0.31155091524124146, + "learning_rate": 9.022451145575174e-05, + "loss": -0.9237, + "step": 14540 + }, + { + "epoch": 32.62331838565022, + "grad_norm": 0.37279269099235535, + "learning_rate": 9.02081369359784e-05, + "loss": -0.9232, + "step": 14550 + }, + { + "epoch": 32.6457399103139, + "grad_norm": 0.2524457573890686, + "learning_rate": 9.019175020203465e-05, + "loss": -0.9308, + "step": 14560 + }, + { + "epoch": 32.66816143497758, + "grad_norm": 0.2478761076927185, + "learning_rate": 9.017535125889842e-05, + "loss": -0.9277, + "step": 14570 + }, + { + "epoch": 32.690582959641254, + "grad_norm": 0.3067675530910492, + "learning_rate": 9.015894011155124e-05, + "loss": -0.9234, + "step": 14580 + }, + { + "epoch": 32.71300448430493, + "grad_norm": 0.30992552638053894, + "learning_rate": 9.014251676497838e-05, + "loss": -0.9218, + "step": 14590 + }, + { + "epoch": 32.73542600896861, + "grad_norm": 0.1971282958984375, + "learning_rate": 9.012608122416884e-05, + "loss": -0.9271, + "step": 14600 + }, + { + "epoch": 32.75784753363229, + "grad_norm": 0.26075685024261475, + "learning_rate": 9.010963349411529e-05, + "loss": -0.9226, + "step": 14610 + }, + { + "epoch": 32.780269058295964, + "grad_norm": 0.2696171998977661, + "learning_rate": 9.00931735798141e-05, + "loss": -0.9203, + "step": 14620 + }, + { + "epoch": 32.80269058295964, + "grad_norm": 0.35224592685699463, + "learning_rate": 9.00767014862654e-05, + "loss": -0.925, + "step": 14630 + }, + { + "epoch": 32.82511210762332, + "grad_norm": 0.26489338278770447, + "learning_rate": 9.006021721847295e-05, + "loss": -0.9255, + "step": 14640 + }, + { + "epoch": 32.847533632286996, + "grad_norm": 0.39274778962135315, + "learning_rate": 9.004372078144423e-05, + "loss": -0.921, + "step": 14650 + }, + { + "epoch": 32.869955156950674, + "grad_norm": 0.3881152272224426, + "learning_rate": 9.002721218019043e-05, + "loss": -0.9173, + "step": 14660 + }, + { + "epoch": 32.89237668161435, + "grad_norm": 0.44582271575927734, + "learning_rate": 9.001069141972642e-05, + "loss": -0.9263, + "step": 14670 + }, + { + "epoch": 32.91479820627803, + "grad_norm": 0.30409085750579834, + "learning_rate": 8.99941585050708e-05, + "loss": -0.9239, + "step": 14680 + }, + { + "epoch": 32.937219730941706, + "grad_norm": 0.473486989736557, + "learning_rate": 8.997761344124578e-05, + "loss": -0.9252, + "step": 14690 + }, + { + "epoch": 32.95964125560538, + "grad_norm": 0.35115140676498413, + "learning_rate": 8.996105623327737e-05, + "loss": -0.9265, + "step": 14700 + }, + { + "epoch": 32.98206278026906, + "grad_norm": 0.41377493739128113, + "learning_rate": 8.994448688619517e-05, + "loss": -0.927, + "step": 14710 + }, + { + "epoch": 33.00448430493274, + "grad_norm": 0.4658704102039337, + "learning_rate": 8.992790540503253e-05, + "loss": -0.922, + "step": 14720 + }, + { + "epoch": 33.026905829596416, + "grad_norm": 0.2852845788002014, + "learning_rate": 8.991131179482648e-05, + "loss": -0.9272, + "step": 14730 + }, + { + "epoch": 33.04932735426009, + "grad_norm": 0.42247694730758667, + "learning_rate": 8.989470606061768e-05, + "loss": -0.9201, + "step": 14740 + }, + { + "epoch": 33.07174887892376, + "grad_norm": 0.25571495294570923, + "learning_rate": 8.987808820745056e-05, + "loss": -0.9207, + "step": 14750 + }, + { + "epoch": 33.09417040358744, + "grad_norm": 0.4212527573108673, + "learning_rate": 8.986145824037315e-05, + "loss": -0.924, + "step": 14760 + }, + { + "epoch": 33.11659192825112, + "grad_norm": 0.26904720067977905, + "learning_rate": 8.984481616443721e-05, + "loss": -0.9126, + "step": 14770 + }, + { + "epoch": 33.139013452914796, + "grad_norm": 0.32521116733551025, + "learning_rate": 8.982816198469815e-05, + "loss": -0.9227, + "step": 14780 + }, + { + "epoch": 33.16143497757847, + "grad_norm": 0.2772562503814697, + "learning_rate": 8.98114957062151e-05, + "loss": -0.9203, + "step": 14790 + }, + { + "epoch": 33.18385650224215, + "grad_norm": 0.3203803598880768, + "learning_rate": 8.97948173340508e-05, + "loss": -0.9287, + "step": 14800 + }, + { + "epoch": 33.20627802690583, + "grad_norm": 0.27189645171165466, + "learning_rate": 8.977812687327172e-05, + "loss": -0.9299, + "step": 14810 + }, + { + "epoch": 33.228699551569505, + "grad_norm": 0.34724366664886475, + "learning_rate": 8.976142432894798e-05, + "loss": -0.9235, + "step": 14820 + }, + { + "epoch": 33.25112107623318, + "grad_norm": 0.30170387029647827, + "learning_rate": 8.974470970615336e-05, + "loss": -0.9189, + "step": 14830 + }, + { + "epoch": 33.27354260089686, + "grad_norm": 0.2948788106441498, + "learning_rate": 8.972798300996534e-05, + "loss": -0.9224, + "step": 14840 + }, + { + "epoch": 33.29596412556054, + "grad_norm": 0.32176586985588074, + "learning_rate": 8.971124424546504e-05, + "loss": -0.9247, + "step": 14850 + }, + { + "epoch": 33.318385650224215, + "grad_norm": 0.3420780599117279, + "learning_rate": 8.969449341773724e-05, + "loss": -0.9226, + "step": 14860 + }, + { + "epoch": 33.34080717488789, + "grad_norm": 0.25319796800613403, + "learning_rate": 8.967773053187042e-05, + "loss": -0.9186, + "step": 14870 + }, + { + "epoch": 33.36322869955157, + "grad_norm": 0.23691703379154205, + "learning_rate": 8.966095559295668e-05, + "loss": -0.9257, + "step": 14880 + }, + { + "epoch": 33.38565022421525, + "grad_norm": 0.3341802656650543, + "learning_rate": 8.964416860609184e-05, + "loss": -0.9228, + "step": 14890 + }, + { + "epoch": 33.408071748878925, + "grad_norm": 0.3300033509731293, + "learning_rate": 8.962736957637532e-05, + "loss": -0.9292, + "step": 14900 + }, + { + "epoch": 33.4304932735426, + "grad_norm": 0.26305627822875977, + "learning_rate": 8.96105585089102e-05, + "loss": -0.927, + "step": 14910 + }, + { + "epoch": 33.45291479820628, + "grad_norm": 0.414024293422699, + "learning_rate": 8.959373540880329e-05, + "loss": -0.9165, + "step": 14920 + }, + { + "epoch": 33.47533632286996, + "grad_norm": 0.3884960412979126, + "learning_rate": 8.957690028116495e-05, + "loss": -0.917, + "step": 14930 + }, + { + "epoch": 33.497757847533634, + "grad_norm": 0.2846263647079468, + "learning_rate": 8.956005313110928e-05, + "loss": -0.9254, + "step": 14940 + }, + { + "epoch": 33.52017937219731, + "grad_norm": 0.5254570841789246, + "learning_rate": 8.9543193963754e-05, + "loss": -0.9218, + "step": 14950 + }, + { + "epoch": 33.54260089686099, + "grad_norm": 0.4713185727596283, + "learning_rate": 8.952632278422048e-05, + "loss": -0.9252, + "step": 14960 + }, + { + "epoch": 33.56502242152467, + "grad_norm": 0.39015620946884155, + "learning_rate": 8.95094395976337e-05, + "loss": -0.9164, + "step": 14970 + }, + { + "epoch": 33.587443946188344, + "grad_norm": 0.23587678372859955, + "learning_rate": 8.949254440912239e-05, + "loss": -0.9222, + "step": 14980 + }, + { + "epoch": 33.609865470852014, + "grad_norm": 0.30806994438171387, + "learning_rate": 8.94756372238188e-05, + "loss": -0.9204, + "step": 14990 + }, + { + "epoch": 33.63228699551569, + "grad_norm": 0.29809868335723877, + "learning_rate": 8.945871804685892e-05, + "loss": -0.926, + "step": 15000 + }, + { + "epoch": 33.65470852017937, + "grad_norm": 0.2575610876083374, + "learning_rate": 8.944178688338236e-05, + "loss": -0.9284, + "step": 15010 + }, + { + "epoch": 33.67713004484305, + "grad_norm": 0.3311762809753418, + "learning_rate": 8.942484373853233e-05, + "loss": -0.9255, + "step": 15020 + }, + { + "epoch": 33.699551569506724, + "grad_norm": 0.4039982855319977, + "learning_rate": 8.940788861745572e-05, + "loss": -0.9214, + "step": 15030 + }, + { + "epoch": 33.7219730941704, + "grad_norm": 0.25952044129371643, + "learning_rate": 8.939092152530308e-05, + "loss": -0.9247, + "step": 15040 + }, + { + "epoch": 33.74439461883408, + "grad_norm": 0.30484646558761597, + "learning_rate": 8.937394246722853e-05, + "loss": -0.924, + "step": 15050 + }, + { + "epoch": 33.766816143497756, + "grad_norm": 0.3383737802505493, + "learning_rate": 8.935695144838984e-05, + "loss": -0.9204, + "step": 15060 + }, + { + "epoch": 33.789237668161434, + "grad_norm": 0.45294034481048584, + "learning_rate": 8.933994847394849e-05, + "loss": -0.923, + "step": 15070 + }, + { + "epoch": 33.81165919282511, + "grad_norm": 0.3865688443183899, + "learning_rate": 8.932293354906949e-05, + "loss": -0.9247, + "step": 15080 + }, + { + "epoch": 33.83408071748879, + "grad_norm": 0.3100355863571167, + "learning_rate": 8.930590667892153e-05, + "loss": -0.9219, + "step": 15090 + }, + { + "epoch": 33.856502242152466, + "grad_norm": 0.31335097551345825, + "learning_rate": 8.928886786867696e-05, + "loss": -0.9235, + "step": 15100 + }, + { + "epoch": 33.87892376681614, + "grad_norm": 0.3539462685585022, + "learning_rate": 8.927181712351168e-05, + "loss": -0.9278, + "step": 15110 + }, + { + "epoch": 33.90134529147982, + "grad_norm": 0.29777008295059204, + "learning_rate": 8.925475444860527e-05, + "loss": -0.9245, + "step": 15120 + }, + { + "epoch": 33.9237668161435, + "grad_norm": 0.28348252177238464, + "learning_rate": 8.923767984914092e-05, + "loss": -0.9329, + "step": 15130 + }, + { + "epoch": 33.946188340807176, + "grad_norm": 0.27758803963661194, + "learning_rate": 8.922059333030545e-05, + "loss": -0.9175, + "step": 15140 + }, + { + "epoch": 33.96860986547085, + "grad_norm": 0.21998494863510132, + "learning_rate": 8.920349489728928e-05, + "loss": -0.9247, + "step": 15150 + }, + { + "epoch": 33.99103139013453, + "grad_norm": 0.38339394330978394, + "learning_rate": 8.918638455528646e-05, + "loss": -0.9182, + "step": 15160 + }, + { + "epoch": 34.01345291479821, + "grad_norm": 0.2567254602909088, + "learning_rate": 8.916926230949468e-05, + "loss": -0.9263, + "step": 15170 + }, + { + "epoch": 34.035874439461885, + "grad_norm": 0.19863934814929962, + "learning_rate": 8.915212816511522e-05, + "loss": -0.9275, + "step": 15180 + }, + { + "epoch": 34.05829596412556, + "grad_norm": 0.3606928288936615, + "learning_rate": 8.913498212735296e-05, + "loss": -0.925, + "step": 15190 + }, + { + "epoch": 34.08071748878924, + "grad_norm": 0.2160269021987915, + "learning_rate": 8.911782420141643e-05, + "loss": -0.9253, + "step": 15200 + }, + { + "epoch": 34.10313901345292, + "grad_norm": 0.28120502829551697, + "learning_rate": 8.910065439251775e-05, + "loss": -0.9263, + "step": 15210 + }, + { + "epoch": 34.125560538116595, + "grad_norm": 0.40550974011421204, + "learning_rate": 8.908347270587268e-05, + "loss": -0.9274, + "step": 15220 + }, + { + "epoch": 34.14798206278027, + "grad_norm": 0.6087920069694519, + "learning_rate": 8.906627914670054e-05, + "loss": -0.9227, + "step": 15230 + }, + { + "epoch": 34.17040358744394, + "grad_norm": 0.23678921163082123, + "learning_rate": 8.904907372022427e-05, + "loss": -0.9299, + "step": 15240 + }, + { + "epoch": 34.19282511210762, + "grad_norm": 0.39266088604927063, + "learning_rate": 8.903185643167042e-05, + "loss": -0.9258, + "step": 15250 + }, + { + "epoch": 34.2152466367713, + "grad_norm": 0.3132597506046295, + "learning_rate": 8.901462728626919e-05, + "loss": -0.9284, + "step": 15260 + }, + { + "epoch": 34.237668161434975, + "grad_norm": 0.357388973236084, + "learning_rate": 8.899738628925429e-05, + "loss": -0.9228, + "step": 15270 + }, + { + "epoch": 34.26008968609865, + "grad_norm": 0.3456941246986389, + "learning_rate": 8.898013344586312e-05, + "loss": -0.9276, + "step": 15280 + }, + { + "epoch": 34.28251121076233, + "grad_norm": 0.30235555768013, + "learning_rate": 8.896286876133661e-05, + "loss": -0.9212, + "step": 15290 + }, + { + "epoch": 34.30493273542601, + "grad_norm": 0.3421819508075714, + "learning_rate": 8.894559224091933e-05, + "loss": -0.9268, + "step": 15300 + }, + { + "epoch": 34.327354260089685, + "grad_norm": 0.37653571367263794, + "learning_rate": 8.892830388985942e-05, + "loss": -0.9254, + "step": 15310 + }, + { + "epoch": 34.34977578475336, + "grad_norm": 0.23827512562274933, + "learning_rate": 8.891100371340864e-05, + "loss": -0.93, + "step": 15320 + }, + { + "epoch": 34.37219730941704, + "grad_norm": 0.3839574158191681, + "learning_rate": 8.889369171682231e-05, + "loss": -0.9218, + "step": 15330 + }, + { + "epoch": 34.39461883408072, + "grad_norm": 0.31716543436050415, + "learning_rate": 8.887636790535936e-05, + "loss": -0.9257, + "step": 15340 + }, + { + "epoch": 34.417040358744394, + "grad_norm": 0.3721698820590973, + "learning_rate": 8.885903228428231e-05, + "loss": -0.9239, + "step": 15350 + }, + { + "epoch": 34.43946188340807, + "grad_norm": 0.4132646322250366, + "learning_rate": 8.884168485885727e-05, + "loss": -0.9245, + "step": 15360 + }, + { + "epoch": 34.46188340807175, + "grad_norm": 0.3445843756198883, + "learning_rate": 8.882432563435393e-05, + "loss": -0.9216, + "step": 15370 + }, + { + "epoch": 34.48430493273543, + "grad_norm": 0.32519710063934326, + "learning_rate": 8.880695461604556e-05, + "loss": -0.9259, + "step": 15380 + }, + { + "epoch": 34.506726457399104, + "grad_norm": 0.3143320083618164, + "learning_rate": 8.878957180920901e-05, + "loss": -0.9251, + "step": 15390 + }, + { + "epoch": 34.52914798206278, + "grad_norm": 0.38420990109443665, + "learning_rate": 8.877217721912473e-05, + "loss": -0.9264, + "step": 15400 + }, + { + "epoch": 34.55156950672646, + "grad_norm": 0.3493296802043915, + "learning_rate": 8.875477085107673e-05, + "loss": -0.924, + "step": 15410 + }, + { + "epoch": 34.573991031390136, + "grad_norm": 0.34606531262397766, + "learning_rate": 8.87373527103526e-05, + "loss": -0.9229, + "step": 15420 + }, + { + "epoch": 34.596412556053814, + "grad_norm": 0.22544297575950623, + "learning_rate": 8.871992280224353e-05, + "loss": -0.9279, + "step": 15430 + }, + { + "epoch": 34.61883408071749, + "grad_norm": 0.33344265818595886, + "learning_rate": 8.870248113204422e-05, + "loss": -0.9266, + "step": 15440 + }, + { + "epoch": 34.64125560538117, + "grad_norm": 0.25298869609832764, + "learning_rate": 8.868502770505306e-05, + "loss": -0.9238, + "step": 15450 + }, + { + "epoch": 34.663677130044846, + "grad_norm": 0.21550607681274414, + "learning_rate": 8.86675625265719e-05, + "loss": -0.9269, + "step": 15460 + }, + { + "epoch": 34.68609865470852, + "grad_norm": 0.28388136625289917, + "learning_rate": 8.865008560190618e-05, + "loss": -0.9291, + "step": 15470 + }, + { + "epoch": 34.7085201793722, + "grad_norm": 0.41405361890792847, + "learning_rate": 8.863259693636496e-05, + "loss": -0.9208, + "step": 15480 + }, + { + "epoch": 34.73094170403587, + "grad_norm": 0.4656422734260559, + "learning_rate": 8.861509653526083e-05, + "loss": -0.9246, + "step": 15490 + }, + { + "epoch": 34.75336322869955, + "grad_norm": 0.29272013902664185, + "learning_rate": 8.859758440390993e-05, + "loss": -0.9266, + "step": 15500 + }, + { + "epoch": 34.775784753363226, + "grad_norm": 0.33499565720558167, + "learning_rate": 8.858006054763202e-05, + "loss": -0.9273, + "step": 15510 + }, + { + "epoch": 34.7982062780269, + "grad_norm": 0.4644489884376526, + "learning_rate": 8.856252497175035e-05, + "loss": -0.9276, + "step": 15520 + }, + { + "epoch": 34.82062780269058, + "grad_norm": 0.20196761190891266, + "learning_rate": 8.854497768159178e-05, + "loss": -0.9328, + "step": 15530 + }, + { + "epoch": 34.84304932735426, + "grad_norm": 0.39608263969421387, + "learning_rate": 8.852741868248671e-05, + "loss": -0.9267, + "step": 15540 + }, + { + "epoch": 34.865470852017935, + "grad_norm": 0.29710856080055237, + "learning_rate": 8.85098479797691e-05, + "loss": -0.9232, + "step": 15550 + }, + { + "epoch": 34.88789237668161, + "grad_norm": 0.4058378338813782, + "learning_rate": 8.849226557877646e-05, + "loss": -0.9246, + "step": 15560 + }, + { + "epoch": 34.91031390134529, + "grad_norm": 0.314578652381897, + "learning_rate": 8.84746714848499e-05, + "loss": -0.9281, + "step": 15570 + }, + { + "epoch": 34.93273542600897, + "grad_norm": 0.28228721022605896, + "learning_rate": 8.845706570333397e-05, + "loss": -0.9293, + "step": 15580 + }, + { + "epoch": 34.955156950672645, + "grad_norm": 0.2868085205554962, + "learning_rate": 8.84394482395769e-05, + "loss": -0.9209, + "step": 15590 + }, + { + "epoch": 34.97757847533632, + "grad_norm": 0.31324562430381775, + "learning_rate": 8.842181909893038e-05, + "loss": -0.9321, + "step": 15600 + }, + { + "epoch": 35.0, + "grad_norm": 0.34221145510673523, + "learning_rate": 8.840417828674969e-05, + "loss": -0.9265, + "step": 15610 + }, + { + "epoch": 35.02242152466368, + "grad_norm": 0.21527695655822754, + "learning_rate": 8.838652580839364e-05, + "loss": -0.9236, + "step": 15620 + }, + { + "epoch": 35.044843049327355, + "grad_norm": 0.3586133122444153, + "learning_rate": 8.836886166922458e-05, + "loss": -0.9156, + "step": 15630 + }, + { + "epoch": 35.06726457399103, + "grad_norm": 0.24530191719532013, + "learning_rate": 8.835118587460844e-05, + "loss": -0.9274, + "step": 15640 + }, + { + "epoch": 35.08968609865471, + "grad_norm": 0.3282879590988159, + "learning_rate": 8.83334984299146e-05, + "loss": -0.9187, + "step": 15650 + }, + { + "epoch": 35.11210762331839, + "grad_norm": 0.3155154883861542, + "learning_rate": 8.83157993405161e-05, + "loss": -0.9174, + "step": 15660 + }, + { + "epoch": 35.134529147982065, + "grad_norm": 0.28764188289642334, + "learning_rate": 8.829808861178943e-05, + "loss": -0.9277, + "step": 15670 + }, + { + "epoch": 35.15695067264574, + "grad_norm": 0.24011920392513275, + "learning_rate": 8.828036624911464e-05, + "loss": -0.9241, + "step": 15680 + }, + { + "epoch": 35.17937219730942, + "grad_norm": 0.2596661150455475, + "learning_rate": 8.826263225787532e-05, + "loss": -0.9273, + "step": 15690 + }, + { + "epoch": 35.2017937219731, + "grad_norm": 0.42742836475372314, + "learning_rate": 8.824488664345858e-05, + "loss": -0.9283, + "step": 15700 + }, + { + "epoch": 35.224215246636774, + "grad_norm": 0.2790358066558838, + "learning_rate": 8.822712941125508e-05, + "loss": -0.9301, + "step": 15710 + }, + { + "epoch": 35.24663677130045, + "grad_norm": 0.2431556135416031, + "learning_rate": 8.820936056665898e-05, + "loss": -0.9243, + "step": 15720 + }, + { + "epoch": 35.26905829596413, + "grad_norm": 0.31711000204086304, + "learning_rate": 8.819158011506801e-05, + "loss": -0.9276, + "step": 15730 + }, + { + "epoch": 35.2914798206278, + "grad_norm": 0.3139680027961731, + "learning_rate": 8.81737880618834e-05, + "loss": -0.9294, + "step": 15740 + }, + { + "epoch": 35.31390134529148, + "grad_norm": 0.1876002699136734, + "learning_rate": 8.815598441250987e-05, + "loss": -0.9266, + "step": 15750 + }, + { + "epoch": 35.336322869955154, + "grad_norm": 0.3171076476573944, + "learning_rate": 8.813816917235576e-05, + "loss": -0.9286, + "step": 15760 + }, + { + "epoch": 35.35874439461883, + "grad_norm": 0.371596097946167, + "learning_rate": 8.812034234683282e-05, + "loss": -0.9251, + "step": 15770 + }, + { + "epoch": 35.38116591928251, + "grad_norm": 0.2931230664253235, + "learning_rate": 8.810250394135637e-05, + "loss": -0.9281, + "step": 15780 + }, + { + "epoch": 35.403587443946186, + "grad_norm": 0.2966974675655365, + "learning_rate": 8.808465396134529e-05, + "loss": -0.9213, + "step": 15790 + }, + { + "epoch": 35.426008968609864, + "grad_norm": 0.38863906264305115, + "learning_rate": 8.806679241222189e-05, + "loss": -0.9295, + "step": 15800 + }, + { + "epoch": 35.44843049327354, + "grad_norm": 0.3326304256916046, + "learning_rate": 8.804891929941203e-05, + "loss": -0.9204, + "step": 15810 + }, + { + "epoch": 35.47085201793722, + "grad_norm": 0.30911362171173096, + "learning_rate": 8.803103462834514e-05, + "loss": -0.9233, + "step": 15820 + }, + { + "epoch": 35.493273542600896, + "grad_norm": 0.36213335394859314, + "learning_rate": 8.801313840445408e-05, + "loss": -0.9258, + "step": 15830 + }, + { + "epoch": 35.51569506726457, + "grad_norm": 0.22208049893379211, + "learning_rate": 8.799523063317524e-05, + "loss": -0.9247, + "step": 15840 + }, + { + "epoch": 35.53811659192825, + "grad_norm": 0.2902582585811615, + "learning_rate": 8.797731131994854e-05, + "loss": -0.9302, + "step": 15850 + }, + { + "epoch": 35.56053811659193, + "grad_norm": 0.2860928475856781, + "learning_rate": 8.795938047021739e-05, + "loss": -0.9216, + "step": 15860 + }, + { + "epoch": 35.582959641255606, + "grad_norm": 0.26721855998039246, + "learning_rate": 8.794143808942872e-05, + "loss": -0.9246, + "step": 15870 + }, + { + "epoch": 35.60538116591928, + "grad_norm": 0.33788201212882996, + "learning_rate": 8.792348418303296e-05, + "loss": -0.9241, + "step": 15880 + }, + { + "epoch": 35.62780269058296, + "grad_norm": 0.33920973539352417, + "learning_rate": 8.790551875648398e-05, + "loss": -0.9183, + "step": 15890 + }, + { + "epoch": 35.65022421524664, + "grad_norm": 0.38164055347442627, + "learning_rate": 8.788754181523926e-05, + "loss": -0.9235, + "step": 15900 + }, + { + "epoch": 35.672645739910315, + "grad_norm": 0.28862419724464417, + "learning_rate": 8.78695533647597e-05, + "loss": -0.9251, + "step": 15910 + }, + { + "epoch": 35.69506726457399, + "grad_norm": 0.31335359811782837, + "learning_rate": 8.785155341050972e-05, + "loss": -0.9168, + "step": 15920 + }, + { + "epoch": 35.71748878923767, + "grad_norm": 0.3062214255332947, + "learning_rate": 8.783354195795721e-05, + "loss": -0.9259, + "step": 15930 + }, + { + "epoch": 35.73991031390135, + "grad_norm": 0.36063310503959656, + "learning_rate": 8.78155190125736e-05, + "loss": -0.9244, + "step": 15940 + }, + { + "epoch": 35.762331838565025, + "grad_norm": 0.3445437550544739, + "learning_rate": 8.779748457983378e-05, + "loss": -0.9218, + "step": 15950 + }, + { + "epoch": 35.7847533632287, + "grad_norm": 0.30902060866355896, + "learning_rate": 8.777943866521612e-05, + "loss": -0.9153, + "step": 15960 + }, + { + "epoch": 35.80717488789238, + "grad_norm": 0.25495126843452454, + "learning_rate": 8.77613812742025e-05, + "loss": -0.9316, + "step": 15970 + }, + { + "epoch": 35.82959641255606, + "grad_norm": 0.37317922711372375, + "learning_rate": 8.774331241227829e-05, + "loss": -0.9208, + "step": 15980 + }, + { + "epoch": 35.85201793721973, + "grad_norm": 0.2844065725803375, + "learning_rate": 8.772523208493232e-05, + "loss": -0.9265, + "step": 15990 + }, + { + "epoch": 35.874439461883405, + "grad_norm": 0.38463765382766724, + "learning_rate": 8.770714029765692e-05, + "loss": -0.9214, + "step": 16000 + }, + { + "epoch": 35.89686098654708, + "grad_norm": 0.32231348752975464, + "learning_rate": 8.768903705594789e-05, + "loss": -0.9235, + "step": 16010 + }, + { + "epoch": 35.91928251121076, + "grad_norm": 0.2787666618824005, + "learning_rate": 8.767092236530453e-05, + "loss": -0.9263, + "step": 16020 + }, + { + "epoch": 35.94170403587444, + "grad_norm": 0.3749614953994751, + "learning_rate": 8.76527962312296e-05, + "loss": -0.9221, + "step": 16030 + }, + { + "epoch": 35.964125560538115, + "grad_norm": 0.29022929072380066, + "learning_rate": 8.763465865922934e-05, + "loss": -0.9261, + "step": 16040 + }, + { + "epoch": 35.98654708520179, + "grad_norm": 0.27556782960891724, + "learning_rate": 8.761650965481347e-05, + "loss": -0.9242, + "step": 16050 + }, + { + "epoch": 36.00896860986547, + "grad_norm": 0.3211730122566223, + "learning_rate": 8.759834922349516e-05, + "loss": -0.9267, + "step": 16060 + }, + { + "epoch": 36.03139013452915, + "grad_norm": 0.3373742699623108, + "learning_rate": 8.758017737079108e-05, + "loss": -0.9262, + "step": 16070 + }, + { + "epoch": 36.053811659192824, + "grad_norm": 0.5570143461227417, + "learning_rate": 8.756199410222137e-05, + "loss": -0.9291, + "step": 16080 + }, + { + "epoch": 36.0762331838565, + "grad_norm": 0.3173503577709198, + "learning_rate": 8.754379942330963e-05, + "loss": -0.9284, + "step": 16090 + }, + { + "epoch": 36.09865470852018, + "grad_norm": 0.21547873318195343, + "learning_rate": 8.75255933395829e-05, + "loss": -0.9276, + "step": 16100 + }, + { + "epoch": 36.12107623318386, + "grad_norm": 0.326244056224823, + "learning_rate": 8.750737585657171e-05, + "loss": -0.9229, + "step": 16110 + }, + { + "epoch": 36.143497757847534, + "grad_norm": 0.2898331582546234, + "learning_rate": 8.748914697981008e-05, + "loss": -0.9265, + "step": 16120 + }, + { + "epoch": 36.16591928251121, + "grad_norm": 0.3394623398780823, + "learning_rate": 8.747090671483542e-05, + "loss": -0.9259, + "step": 16130 + }, + { + "epoch": 36.18834080717489, + "grad_norm": 0.24108390510082245, + "learning_rate": 8.745265506718869e-05, + "loss": -0.9324, + "step": 16140 + }, + { + "epoch": 36.210762331838566, + "grad_norm": 0.23422852158546448, + "learning_rate": 8.74343920424142e-05, + "loss": -0.9293, + "step": 16150 + }, + { + "epoch": 36.233183856502244, + "grad_norm": 0.4379168748855591, + "learning_rate": 8.741611764605982e-05, + "loss": -0.9272, + "step": 16160 + }, + { + "epoch": 36.25560538116592, + "grad_norm": 0.23874209821224213, + "learning_rate": 8.739783188367682e-05, + "loss": -0.9284, + "step": 16170 + }, + { + "epoch": 36.2780269058296, + "grad_norm": 0.38031384348869324, + "learning_rate": 8.737953476081991e-05, + "loss": -0.9316, + "step": 16180 + }, + { + "epoch": 36.300448430493276, + "grad_norm": 0.2814386188983917, + "learning_rate": 8.73612262830473e-05, + "loss": -0.9274, + "step": 16190 + }, + { + "epoch": 36.32286995515695, + "grad_norm": 0.3545658588409424, + "learning_rate": 8.734290645592061e-05, + "loss": -0.9304, + "step": 16200 + }, + { + "epoch": 36.34529147982063, + "grad_norm": 0.30190518498420715, + "learning_rate": 8.732457528500493e-05, + "loss": -0.9288, + "step": 16210 + }, + { + "epoch": 36.36771300448431, + "grad_norm": 0.3206964135169983, + "learning_rate": 8.730623277586875e-05, + "loss": -0.9292, + "step": 16220 + }, + { + "epoch": 36.390134529147986, + "grad_norm": 0.23844139277935028, + "learning_rate": 8.72878789340841e-05, + "loss": -0.9303, + "step": 16230 + }, + { + "epoch": 36.412556053811656, + "grad_norm": 0.23416011035442352, + "learning_rate": 8.726951376522635e-05, + "loss": -0.9248, + "step": 16240 + }, + { + "epoch": 36.43497757847533, + "grad_norm": 0.29617324471473694, + "learning_rate": 8.725113727487435e-05, + "loss": -0.9263, + "step": 16250 + }, + { + "epoch": 36.45739910313901, + "grad_norm": 0.3127666413784027, + "learning_rate": 8.723274946861042e-05, + "loss": -0.9307, + "step": 16260 + }, + { + "epoch": 36.47982062780269, + "grad_norm": 0.3408046066761017, + "learning_rate": 8.721435035202026e-05, + "loss": -0.9293, + "step": 16270 + }, + { + "epoch": 36.502242152466366, + "grad_norm": 0.509017288684845, + "learning_rate": 8.719593993069306e-05, + "loss": -0.9199, + "step": 16280 + }, + { + "epoch": 36.52466367713004, + "grad_norm": 0.36451613903045654, + "learning_rate": 8.717751821022139e-05, + "loss": -0.9275, + "step": 16290 + }, + { + "epoch": 36.54708520179372, + "grad_norm": 0.3447364866733551, + "learning_rate": 8.715908519620134e-05, + "loss": -0.926, + "step": 16300 + }, + { + "epoch": 36.5695067264574, + "grad_norm": 0.2636013925075531, + "learning_rate": 8.71406408942323e-05, + "loss": -0.9253, + "step": 16310 + }, + { + "epoch": 36.591928251121075, + "grad_norm": 0.2295209914445877, + "learning_rate": 8.712218530991723e-05, + "loss": -0.9299, + "step": 16320 + }, + { + "epoch": 36.61434977578475, + "grad_norm": 0.375614732503891, + "learning_rate": 8.710371844886241e-05, + "loss": -0.9295, + "step": 16330 + }, + { + "epoch": 36.63677130044843, + "grad_norm": 0.40126511454582214, + "learning_rate": 8.708524031667758e-05, + "loss": -0.9272, + "step": 16340 + }, + { + "epoch": 36.65919282511211, + "grad_norm": 0.44090715050697327, + "learning_rate": 8.706675091897592e-05, + "loss": -0.9245, + "step": 16350 + }, + { + "epoch": 36.681614349775785, + "grad_norm": 0.3283575773239136, + "learning_rate": 8.704825026137404e-05, + "loss": -0.9239, + "step": 16360 + }, + { + "epoch": 36.70403587443946, + "grad_norm": 0.24735936522483826, + "learning_rate": 8.702973834949192e-05, + "loss": -0.9226, + "step": 16370 + }, + { + "epoch": 36.72645739910314, + "grad_norm": 0.35984480381011963, + "learning_rate": 8.701121518895301e-05, + "loss": -0.9282, + "step": 16380 + }, + { + "epoch": 36.74887892376682, + "grad_norm": 0.4155043065547943, + "learning_rate": 8.699268078538414e-05, + "loss": -0.927, + "step": 16390 + }, + { + "epoch": 36.771300448430495, + "grad_norm": 0.3467298448085785, + "learning_rate": 8.69741351444156e-05, + "loss": -0.9305, + "step": 16400 + }, + { + "epoch": 36.79372197309417, + "grad_norm": 0.24807853996753693, + "learning_rate": 8.695557827168101e-05, + "loss": -0.9313, + "step": 16410 + }, + { + "epoch": 36.81614349775785, + "grad_norm": 0.33253175020217896, + "learning_rate": 8.693701017281753e-05, + "loss": -0.9257, + "step": 16420 + }, + { + "epoch": 36.83856502242153, + "grad_norm": 0.49798324704170227, + "learning_rate": 8.691843085346563e-05, + "loss": -0.9233, + "step": 16430 + }, + { + "epoch": 36.860986547085204, + "grad_norm": 0.31307074427604675, + "learning_rate": 8.689984031926919e-05, + "loss": -0.9306, + "step": 16440 + }, + { + "epoch": 36.88340807174888, + "grad_norm": 0.19431275129318237, + "learning_rate": 8.688123857587555e-05, + "loss": -0.9288, + "step": 16450 + }, + { + "epoch": 36.90582959641256, + "grad_norm": 0.2634895443916321, + "learning_rate": 8.686262562893544e-05, + "loss": -0.9237, + "step": 16460 + }, + { + "epoch": 36.92825112107624, + "grad_norm": 0.3272128701210022, + "learning_rate": 8.684400148410294e-05, + "loss": -0.9334, + "step": 16470 + }, + { + "epoch": 36.95067264573991, + "grad_norm": 0.24905414879322052, + "learning_rate": 8.682536614703562e-05, + "loss": -0.9293, + "step": 16480 + }, + { + "epoch": 36.973094170403584, + "grad_norm": 0.2867535650730133, + "learning_rate": 8.680671962339437e-05, + "loss": -0.9298, + "step": 16490 + }, + { + "epoch": 36.99551569506726, + "grad_norm": 0.28927698731422424, + "learning_rate": 8.678806191884352e-05, + "loss": -0.9308, + "step": 16500 + }, + { + "epoch": 37.01793721973094, + "grad_norm": 0.23407325148582458, + "learning_rate": 8.67693930390508e-05, + "loss": -0.9329, + "step": 16510 + }, + { + "epoch": 37.04035874439462, + "grad_norm": 0.27185651659965515, + "learning_rate": 8.67507129896873e-05, + "loss": -0.9296, + "step": 16520 + }, + { + "epoch": 37.062780269058294, + "grad_norm": 0.38487711548805237, + "learning_rate": 8.673202177642757e-05, + "loss": -0.922, + "step": 16530 + }, + { + "epoch": 37.08520179372197, + "grad_norm": 0.32057318091392517, + "learning_rate": 8.671331940494945e-05, + "loss": -0.9201, + "step": 16540 + }, + { + "epoch": 37.10762331838565, + "grad_norm": 0.28782588243484497, + "learning_rate": 8.669460588093427e-05, + "loss": -0.9245, + "step": 16550 + }, + { + "epoch": 37.130044843049326, + "grad_norm": 0.18032851815223694, + "learning_rate": 8.667588121006667e-05, + "loss": -0.9301, + "step": 16560 + }, + { + "epoch": 37.152466367713004, + "grad_norm": 0.29489782452583313, + "learning_rate": 8.665714539803475e-05, + "loss": -0.9251, + "step": 16570 + }, + { + "epoch": 37.17488789237668, + "grad_norm": 0.3840877115726471, + "learning_rate": 8.663839845052993e-05, + "loss": -0.9333, + "step": 16580 + }, + { + "epoch": 37.19730941704036, + "grad_norm": 0.36690565943717957, + "learning_rate": 8.661964037324703e-05, + "loss": -0.9303, + "step": 16590 + }, + { + "epoch": 37.219730941704036, + "grad_norm": 0.27224695682525635, + "learning_rate": 8.660087117188427e-05, + "loss": -0.9361, + "step": 16600 + }, + { + "epoch": 37.24215246636771, + "grad_norm": 0.39522096514701843, + "learning_rate": 8.658209085214325e-05, + "loss": -0.9277, + "step": 16610 + }, + { + "epoch": 37.26457399103139, + "grad_norm": 0.34995999932289124, + "learning_rate": 8.656329941972891e-05, + "loss": -0.9249, + "step": 16620 + }, + { + "epoch": 37.28699551569507, + "grad_norm": 0.29652178287506104, + "learning_rate": 8.654449688034963e-05, + "loss": -0.9274, + "step": 16630 + }, + { + "epoch": 37.309417040358746, + "grad_norm": 0.39177408814430237, + "learning_rate": 8.652568323971706e-05, + "loss": -0.9284, + "step": 16640 + }, + { + "epoch": 37.33183856502242, + "grad_norm": 0.3051941692829132, + "learning_rate": 8.650685850354636e-05, + "loss": -0.931, + "step": 16650 + }, + { + "epoch": 37.3542600896861, + "grad_norm": 0.2997993230819702, + "learning_rate": 8.648802267755593e-05, + "loss": -0.9267, + "step": 16660 + }, + { + "epoch": 37.37668161434978, + "grad_norm": 0.3595852255821228, + "learning_rate": 8.646917576746764e-05, + "loss": -0.9302, + "step": 16670 + }, + { + "epoch": 37.399103139013455, + "grad_norm": 0.2779640853404999, + "learning_rate": 8.645031777900666e-05, + "loss": -0.9296, + "step": 16680 + }, + { + "epoch": 37.42152466367713, + "grad_norm": 0.3427213132381439, + "learning_rate": 8.643144871790154e-05, + "loss": -0.9246, + "step": 16690 + }, + { + "epoch": 37.44394618834081, + "grad_norm": 0.2871159613132477, + "learning_rate": 8.641256858988424e-05, + "loss": -0.9283, + "step": 16700 + }, + { + "epoch": 37.46636771300449, + "grad_norm": 0.32238146662712097, + "learning_rate": 8.639367740069e-05, + "loss": -0.9291, + "step": 16710 + }, + { + "epoch": 37.488789237668165, + "grad_norm": 0.3046554923057556, + "learning_rate": 8.63747751560575e-05, + "loss": -0.9275, + "step": 16720 + }, + { + "epoch": 37.511210762331835, + "grad_norm": 0.32385382056236267, + "learning_rate": 8.635586186172871e-05, + "loss": -0.9291, + "step": 16730 + }, + { + "epoch": 37.53363228699551, + "grad_norm": 0.3193691372871399, + "learning_rate": 8.633693752344902e-05, + "loss": -0.9342, + "step": 16740 + }, + { + "epoch": 37.55605381165919, + "grad_norm": 0.30222660303115845, + "learning_rate": 8.631800214696713e-05, + "loss": -0.9313, + "step": 16750 + }, + { + "epoch": 37.57847533632287, + "grad_norm": 0.3445499539375305, + "learning_rate": 8.629905573803511e-05, + "loss": -0.9275, + "step": 16760 + }, + { + "epoch": 37.600896860986545, + "grad_norm": 0.3023505210876465, + "learning_rate": 8.628009830240839e-05, + "loss": -0.9301, + "step": 16770 + }, + { + "epoch": 37.62331838565022, + "grad_norm": 0.21157652139663696, + "learning_rate": 8.626112984584571e-05, + "loss": -0.9273, + "step": 16780 + }, + { + "epoch": 37.6457399103139, + "grad_norm": 0.2475896179676056, + "learning_rate": 8.62421503741092e-05, + "loss": -0.9316, + "step": 16790 + }, + { + "epoch": 37.66816143497758, + "grad_norm": 0.17867588996887207, + "learning_rate": 8.622315989296432e-05, + "loss": -0.9356, + "step": 16800 + }, + { + "epoch": 37.690582959641254, + "grad_norm": 0.26811450719833374, + "learning_rate": 8.62041584081799e-05, + "loss": -0.932, + "step": 16810 + }, + { + "epoch": 37.71300448430493, + "grad_norm": 0.20382876694202423, + "learning_rate": 8.618514592552807e-05, + "loss": -0.9306, + "step": 16820 + }, + { + "epoch": 37.73542600896861, + "grad_norm": 0.3256577253341675, + "learning_rate": 8.616612245078431e-05, + "loss": -0.9334, + "step": 16830 + }, + { + "epoch": 37.75784753363229, + "grad_norm": 0.23230019211769104, + "learning_rate": 8.614708798972746e-05, + "loss": -0.9319, + "step": 16840 + }, + { + "epoch": 37.780269058295964, + "grad_norm": 0.24286550283432007, + "learning_rate": 8.61280425481397e-05, + "loss": -0.935, + "step": 16850 + }, + { + "epoch": 37.80269058295964, + "grad_norm": 0.3238298296928406, + "learning_rate": 8.61089861318065e-05, + "loss": -0.9329, + "step": 16860 + }, + { + "epoch": 37.82511210762332, + "grad_norm": 0.18577899038791656, + "learning_rate": 8.608991874651673e-05, + "loss": -0.9344, + "step": 16870 + }, + { + "epoch": 37.847533632286996, + "grad_norm": 0.3343936800956726, + "learning_rate": 8.607084039806255e-05, + "loss": -0.9273, + "step": 16880 + }, + { + "epoch": 37.869955156950674, + "grad_norm": 0.4396905303001404, + "learning_rate": 8.605175109223944e-05, + "loss": -0.9312, + "step": 16890 + }, + { + "epoch": 37.89237668161435, + "grad_norm": 0.3336085379123688, + "learning_rate": 8.603265083484624e-05, + "loss": -0.9287, + "step": 16900 + }, + { + "epoch": 37.91479820627803, + "grad_norm": 0.16193850338459015, + "learning_rate": 8.60135396316851e-05, + "loss": -0.9343, + "step": 16910 + }, + { + "epoch": 37.937219730941706, + "grad_norm": 0.26219886541366577, + "learning_rate": 8.599441748856152e-05, + "loss": -0.9337, + "step": 16920 + }, + { + "epoch": 37.95964125560538, + "grad_norm": 0.3126435875892639, + "learning_rate": 8.597528441128427e-05, + "loss": -0.9295, + "step": 16930 + }, + { + "epoch": 37.98206278026906, + "grad_norm": 0.2742380201816559, + "learning_rate": 8.595614040566549e-05, + "loss": -0.9333, + "step": 16940 + }, + { + "epoch": 38.00448430493274, + "grad_norm": 0.33945050835609436, + "learning_rate": 8.593698547752063e-05, + "loss": -0.9302, + "step": 16950 + }, + { + "epoch": 38.026905829596416, + "grad_norm": 0.4219784140586853, + "learning_rate": 8.591781963266843e-05, + "loss": -0.9206, + "step": 16960 + }, + { + "epoch": 38.04932735426009, + "grad_norm": 0.2324623167514801, + "learning_rate": 8.5898642876931e-05, + "loss": -0.933, + "step": 16970 + }, + { + "epoch": 38.07174887892376, + "grad_norm": 0.2651047110557556, + "learning_rate": 8.587945521613369e-05, + "loss": -0.9225, + "step": 16980 + }, + { + "epoch": 38.09417040358744, + "grad_norm": 0.43815743923187256, + "learning_rate": 8.586025665610524e-05, + "loss": -0.9292, + "step": 16990 + }, + { + "epoch": 38.11659192825112, + "grad_norm": 0.2629507780075073, + "learning_rate": 8.584104720267765e-05, + "loss": -0.9288, + "step": 17000 + }, + { + "epoch": 38.139013452914796, + "grad_norm": 0.3800317645072937, + "learning_rate": 8.582182686168625e-05, + "loss": -0.9289, + "step": 17010 + }, + { + "epoch": 38.16143497757847, + "grad_norm": 0.2636982500553131, + "learning_rate": 8.580259563896967e-05, + "loss": -0.9313, + "step": 17020 + }, + { + "epoch": 38.18385650224215, + "grad_norm": 0.29340487718582153, + "learning_rate": 8.578335354036983e-05, + "loss": -0.9298, + "step": 17030 + }, + { + "epoch": 38.20627802690583, + "grad_norm": 0.3253284990787506, + "learning_rate": 8.576410057173201e-05, + "loss": -0.9293, + "step": 17040 + }, + { + "epoch": 38.228699551569505, + "grad_norm": 0.2839791476726532, + "learning_rate": 8.574483673890474e-05, + "loss": -0.9322, + "step": 17050 + }, + { + "epoch": 38.25112107623318, + "grad_norm": 0.26633772253990173, + "learning_rate": 8.572556204773983e-05, + "loss": -0.9315, + "step": 17060 + }, + { + "epoch": 38.27354260089686, + "grad_norm": 0.2180199772119522, + "learning_rate": 8.570627650409246e-05, + "loss": -0.9318, + "step": 17070 + }, + { + "epoch": 38.29596412556054, + "grad_norm": 0.21240994334220886, + "learning_rate": 8.568698011382107e-05, + "loss": -0.9303, + "step": 17080 + }, + { + "epoch": 38.318385650224215, + "grad_norm": 0.3649972379207611, + "learning_rate": 8.566767288278738e-05, + "loss": -0.925, + "step": 17090 + }, + { + "epoch": 38.34080717488789, + "grad_norm": 0.21529492735862732, + "learning_rate": 8.56483548168564e-05, + "loss": -0.9298, + "step": 17100 + }, + { + "epoch": 38.36322869955157, + "grad_norm": 0.3167644739151001, + "learning_rate": 8.562902592189648e-05, + "loss": -0.9329, + "step": 17110 + }, + { + "epoch": 38.38565022421525, + "grad_norm": 0.3545401990413666, + "learning_rate": 8.560968620377921e-05, + "loss": -0.9307, + "step": 17120 + }, + { + "epoch": 38.408071748878925, + "grad_norm": 0.2791231870651245, + "learning_rate": 8.559033566837951e-05, + "loss": -0.9283, + "step": 17130 + }, + { + "epoch": 38.4304932735426, + "grad_norm": 0.291636198759079, + "learning_rate": 8.557097432157551e-05, + "loss": -0.9243, + "step": 17140 + }, + { + "epoch": 38.45291479820628, + "grad_norm": 0.16629265248775482, + "learning_rate": 8.555160216924872e-05, + "loss": -0.9335, + "step": 17150 + }, + { + "epoch": 38.47533632286996, + "grad_norm": 0.3381149470806122, + "learning_rate": 8.55322192172839e-05, + "loss": -0.9314, + "step": 17160 + }, + { + "epoch": 38.497757847533634, + "grad_norm": 0.1389753669500351, + "learning_rate": 8.551282547156902e-05, + "loss": -0.936, + "step": 17170 + }, + { + "epoch": 38.52017937219731, + "grad_norm": 0.24679869413375854, + "learning_rate": 8.549342093799544e-05, + "loss": -0.9295, + "step": 17180 + }, + { + "epoch": 38.54260089686099, + "grad_norm": 0.2571127116680145, + "learning_rate": 8.547400562245773e-05, + "loss": -0.929, + "step": 17190 + }, + { + "epoch": 38.56502242152467, + "grad_norm": 0.3855926990509033, + "learning_rate": 8.545457953085374e-05, + "loss": -0.9293, + "step": 17200 + }, + { + "epoch": 38.587443946188344, + "grad_norm": 0.3185344934463501, + "learning_rate": 8.543514266908463e-05, + "loss": -0.9313, + "step": 17210 + }, + { + "epoch": 38.609865470852014, + "grad_norm": 0.2702292799949646, + "learning_rate": 8.541569504305478e-05, + "loss": -0.9297, + "step": 17220 + }, + { + "epoch": 38.63228699551569, + "grad_norm": 0.28036409616470337, + "learning_rate": 8.539623665867187e-05, + "loss": -0.9281, + "step": 17230 + }, + { + "epoch": 38.65470852017937, + "grad_norm": 0.26934078335762024, + "learning_rate": 8.537676752184685e-05, + "loss": -0.9307, + "step": 17240 + }, + { + "epoch": 38.67713004484305, + "grad_norm": 0.2687683701515198, + "learning_rate": 8.53572876384939e-05, + "loss": -0.9292, + "step": 17250 + }, + { + "epoch": 38.699551569506724, + "grad_norm": 0.37631383538246155, + "learning_rate": 8.533779701453056e-05, + "loss": -0.9313, + "step": 17260 + }, + { + "epoch": 38.7219730941704, + "grad_norm": 0.27772393822669983, + "learning_rate": 8.53182956558775e-05, + "loss": -0.9303, + "step": 17270 + }, + { + "epoch": 38.74439461883408, + "grad_norm": 0.3390691578388214, + "learning_rate": 8.529878356845877e-05, + "loss": -0.9353, + "step": 17280 + }, + { + "epoch": 38.766816143497756, + "grad_norm": 0.3271861970424652, + "learning_rate": 8.527926075820158e-05, + "loss": -0.9283, + "step": 17290 + }, + { + "epoch": 38.789237668161434, + "grad_norm": 0.21468676626682281, + "learning_rate": 8.525972723103648e-05, + "loss": -0.9281, + "step": 17300 + }, + { + "epoch": 38.81165919282511, + "grad_norm": 0.17510917782783508, + "learning_rate": 8.524018299289722e-05, + "loss": -0.9344, + "step": 17310 + }, + { + "epoch": 38.83408071748879, + "grad_norm": 0.4188162088394165, + "learning_rate": 8.522062804972083e-05, + "loss": -0.9306, + "step": 17320 + }, + { + "epoch": 38.856502242152466, + "grad_norm": 0.27556756138801575, + "learning_rate": 8.520106240744759e-05, + "loss": -0.9301, + "step": 17330 + }, + { + "epoch": 38.87892376681614, + "grad_norm": 0.30930328369140625, + "learning_rate": 8.518148607202102e-05, + "loss": -0.927, + "step": 17340 + }, + { + "epoch": 38.90134529147982, + "grad_norm": 0.30675771832466125, + "learning_rate": 8.51618990493879e-05, + "loss": -0.929, + "step": 17350 + }, + { + "epoch": 38.9237668161435, + "grad_norm": 0.3221587836742401, + "learning_rate": 8.514230134549823e-05, + "loss": -0.926, + "step": 17360 + }, + { + "epoch": 38.946188340807176, + "grad_norm": 0.2392353117465973, + "learning_rate": 8.51226929663053e-05, + "loss": -0.9312, + "step": 17370 + }, + { + "epoch": 38.96860986547085, + "grad_norm": 0.40025192499160767, + "learning_rate": 8.51030739177656e-05, + "loss": -0.9294, + "step": 17380 + }, + { + "epoch": 38.99103139013453, + "grad_norm": 0.3208850622177124, + "learning_rate": 8.508344420583889e-05, + "loss": -0.9266, + "step": 17390 + }, + { + "epoch": 39.01345291479821, + "grad_norm": 0.28203725814819336, + "learning_rate": 8.506380383648816e-05, + "loss": -0.9344, + "step": 17400 + }, + { + "epoch": 39.035874439461885, + "grad_norm": 0.2716729938983917, + "learning_rate": 8.504415281567963e-05, + "loss": -0.9351, + "step": 17410 + }, + { + "epoch": 39.05829596412556, + "grad_norm": 0.31244656443595886, + "learning_rate": 8.502449114938275e-05, + "loss": -0.9356, + "step": 17420 + }, + { + "epoch": 39.08071748878924, + "grad_norm": 0.2646550238132477, + "learning_rate": 8.500481884357025e-05, + "loss": -0.9303, + "step": 17430 + }, + { + "epoch": 39.10313901345292, + "grad_norm": 0.31927329301834106, + "learning_rate": 8.498513590421801e-05, + "loss": -0.9297, + "step": 17440 + }, + { + "epoch": 39.125560538116595, + "grad_norm": 0.20884522795677185, + "learning_rate": 8.496544233730522e-05, + "loss": -0.9292, + "step": 17450 + }, + { + "epoch": 39.14798206278027, + "grad_norm": 0.3008347451686859, + "learning_rate": 8.494573814881426e-05, + "loss": -0.9281, + "step": 17460 + }, + { + "epoch": 39.17040358744394, + "grad_norm": 0.2383633404970169, + "learning_rate": 8.492602334473074e-05, + "loss": -0.9366, + "step": 17470 + }, + { + "epoch": 39.19282511210762, + "grad_norm": 0.27863746881484985, + "learning_rate": 8.49062979310435e-05, + "loss": -0.9265, + "step": 17480 + }, + { + "epoch": 39.2152466367713, + "grad_norm": 0.41002461314201355, + "learning_rate": 8.488656191374458e-05, + "loss": -0.9263, + "step": 17490 + }, + { + "epoch": 39.237668161434975, + "grad_norm": 0.3654491901397705, + "learning_rate": 8.48668152988293e-05, + "loss": -0.929, + "step": 17500 + }, + { + "epoch": 39.26008968609865, + "grad_norm": 0.24138902127742767, + "learning_rate": 8.484705809229612e-05, + "loss": -0.9293, + "step": 17510 + }, + { + "epoch": 39.28251121076233, + "grad_norm": 0.3346310555934906, + "learning_rate": 8.482729030014677e-05, + "loss": -0.9254, + "step": 17520 + }, + { + "epoch": 39.30493273542601, + "grad_norm": 0.3086315393447876, + "learning_rate": 8.48075119283862e-05, + "loss": -0.9337, + "step": 17530 + }, + { + "epoch": 39.327354260089685, + "grad_norm": 0.3267945647239685, + "learning_rate": 8.478772298302254e-05, + "loss": -0.9323, + "step": 17540 + }, + { + "epoch": 39.34977578475336, + "grad_norm": 0.33552008867263794, + "learning_rate": 8.476792347006716e-05, + "loss": -0.9259, + "step": 17550 + }, + { + "epoch": 39.37219730941704, + "grad_norm": 0.2967265844345093, + "learning_rate": 8.474811339553462e-05, + "loss": -0.9325, + "step": 17560 + }, + { + "epoch": 39.39461883408072, + "grad_norm": 0.4678538739681244, + "learning_rate": 8.47282927654427e-05, + "loss": -0.9206, + "step": 17570 + }, + { + "epoch": 39.417040358744394, + "grad_norm": 0.39328524470329285, + "learning_rate": 8.470846158581238e-05, + "loss": -0.9301, + "step": 17580 + }, + { + "epoch": 39.43946188340807, + "grad_norm": 0.35651785135269165, + "learning_rate": 8.468861986266787e-05, + "loss": -0.9287, + "step": 17590 + }, + { + "epoch": 39.46188340807175, + "grad_norm": 0.31886956095695496, + "learning_rate": 8.466876760203654e-05, + "loss": -0.9308, + "step": 17600 + }, + { + "epoch": 39.48430493273543, + "grad_norm": 0.32590994238853455, + "learning_rate": 8.464890480994898e-05, + "loss": -0.9229, + "step": 17610 + }, + { + "epoch": 39.506726457399104, + "grad_norm": 0.21029162406921387, + "learning_rate": 8.462903149243899e-05, + "loss": -0.9325, + "step": 17620 + }, + { + "epoch": 39.52914798206278, + "grad_norm": 0.2612999379634857, + "learning_rate": 8.460914765554357e-05, + "loss": -0.9353, + "step": 17630 + }, + { + "epoch": 39.55156950672646, + "grad_norm": 0.3182608187198639, + "learning_rate": 8.458925330530288e-05, + "loss": -0.9222, + "step": 17640 + }, + { + "epoch": 39.573991031390136, + "grad_norm": 0.39490923285484314, + "learning_rate": 8.456934844776032e-05, + "loss": -0.9284, + "step": 17650 + }, + { + "epoch": 39.596412556053814, + "grad_norm": 0.49377697706222534, + "learning_rate": 8.454943308896246e-05, + "loss": -0.929, + "step": 17660 + }, + { + "epoch": 39.61883408071749, + "grad_norm": 0.21955029666423798, + "learning_rate": 8.452950723495905e-05, + "loss": -0.933, + "step": 17670 + }, + { + "epoch": 39.64125560538117, + "grad_norm": 0.3629653751850128, + "learning_rate": 8.450957089180303e-05, + "loss": -0.9309, + "step": 17680 + }, + { + "epoch": 39.663677130044846, + "grad_norm": 0.35816091299057007, + "learning_rate": 8.448962406555055e-05, + "loss": -0.9302, + "step": 17690 + }, + { + "epoch": 39.68609865470852, + "grad_norm": 0.21947892010211945, + "learning_rate": 8.446966676226093e-05, + "loss": -0.9255, + "step": 17700 + }, + { + "epoch": 39.7085201793722, + "grad_norm": 0.41866499185562134, + "learning_rate": 8.444969898799667e-05, + "loss": -0.9258, + "step": 17710 + }, + { + "epoch": 39.73094170403587, + "grad_norm": 0.2837367057800293, + "learning_rate": 8.442972074882343e-05, + "loss": -0.9306, + "step": 17720 + }, + { + "epoch": 39.75336322869955, + "grad_norm": 0.41895338892936707, + "learning_rate": 8.44097320508101e-05, + "loss": -0.9289, + "step": 17730 + }, + { + "epoch": 39.775784753363226, + "grad_norm": 0.26582616567611694, + "learning_rate": 8.43897329000287e-05, + "loss": -0.9339, + "step": 17740 + }, + { + "epoch": 39.7982062780269, + "grad_norm": 0.3698517084121704, + "learning_rate": 8.436972330255448e-05, + "loss": -0.9314, + "step": 17750 + }, + { + "epoch": 39.82062780269058, + "grad_norm": 0.24483126401901245, + "learning_rate": 8.434970326446579e-05, + "loss": -0.929, + "step": 17760 + }, + { + "epoch": 39.84304932735426, + "grad_norm": 0.2169337272644043, + "learning_rate": 8.432967279184418e-05, + "loss": -0.9304, + "step": 17770 + }, + { + "epoch": 39.865470852017935, + "grad_norm": 0.3756512999534607, + "learning_rate": 8.430963189077441e-05, + "loss": -0.9327, + "step": 17780 + }, + { + "epoch": 39.88789237668161, + "grad_norm": 0.22426767647266388, + "learning_rate": 8.428958056734437e-05, + "loss": -0.9339, + "step": 17790 + }, + { + "epoch": 39.91031390134529, + "grad_norm": 0.250882089138031, + "learning_rate": 8.426951882764513e-05, + "loss": -0.9179, + "step": 17800 + }, + { + "epoch": 39.93273542600897, + "grad_norm": 0.35034507513046265, + "learning_rate": 8.424944667777089e-05, + "loss": -0.9349, + "step": 17810 + }, + { + "epoch": 39.955156950672645, + "grad_norm": 0.3083525598049164, + "learning_rate": 8.422936412381905e-05, + "loss": -0.9288, + "step": 17820 + }, + { + "epoch": 39.97757847533632, + "grad_norm": 0.26392197608947754, + "learning_rate": 8.420927117189017e-05, + "loss": -0.934, + "step": 17830 + }, + { + "epoch": 40.0, + "grad_norm": 0.27126771211624146, + "learning_rate": 8.418916782808795e-05, + "loss": -0.9282, + "step": 17840 + }, + { + "epoch": 40.02242152466368, + "grad_norm": 0.34720250964164734, + "learning_rate": 8.416905409851926e-05, + "loss": -0.932, + "step": 17850 + }, + { + "epoch": 40.044843049327355, + "grad_norm": 0.24327591061592102, + "learning_rate": 8.41489299892941e-05, + "loss": -0.9341, + "step": 17860 + }, + { + "epoch": 40.06726457399103, + "grad_norm": 0.31343477964401245, + "learning_rate": 8.412879550652566e-05, + "loss": -0.9315, + "step": 17870 + }, + { + "epoch": 40.08968609865471, + "grad_norm": 0.3110867142677307, + "learning_rate": 8.410865065633029e-05, + "loss": -0.9329, + "step": 17880 + }, + { + "epoch": 40.11210762331839, + "grad_norm": 0.3076401650905609, + "learning_rate": 8.408849544482742e-05, + "loss": -0.9267, + "step": 17890 + }, + { + "epoch": 40.134529147982065, + "grad_norm": 0.22461189329624176, + "learning_rate": 8.406832987813968e-05, + "loss": -0.9303, + "step": 17900 + }, + { + "epoch": 40.15695067264574, + "grad_norm": 0.2548610270023346, + "learning_rate": 8.404815396239286e-05, + "loss": -0.9309, + "step": 17910 + }, + { + "epoch": 40.17937219730942, + "grad_norm": 0.22903475165367126, + "learning_rate": 8.402796770371587e-05, + "loss": -0.9315, + "step": 17920 + }, + { + "epoch": 40.2017937219731, + "grad_norm": 0.2812703251838684, + "learning_rate": 8.400777110824071e-05, + "loss": -0.935, + "step": 17930 + }, + { + "epoch": 40.224215246636774, + "grad_norm": 0.33381474018096924, + "learning_rate": 8.398756418210263e-05, + "loss": -0.9341, + "step": 17940 + }, + { + "epoch": 40.24663677130045, + "grad_norm": 0.2545645833015442, + "learning_rate": 8.396734693143993e-05, + "loss": -0.9333, + "step": 17950 + }, + { + "epoch": 40.26905829596413, + "grad_norm": 0.22923561930656433, + "learning_rate": 8.39471193623941e-05, + "loss": -0.9264, + "step": 17960 + }, + { + "epoch": 40.2914798206278, + "grad_norm": 0.304362416267395, + "learning_rate": 8.392688148110974e-05, + "loss": -0.931, + "step": 17970 + }, + { + "epoch": 40.31390134529148, + "grad_norm": 0.25496160984039307, + "learning_rate": 8.390663329373456e-05, + "loss": -0.9343, + "step": 17980 + }, + { + "epoch": 40.336322869955154, + "grad_norm": 0.3753626346588135, + "learning_rate": 8.388637480641944e-05, + "loss": -0.9241, + "step": 17990 + }, + { + "epoch": 40.35874439461883, + "grad_norm": 0.2534741461277008, + "learning_rate": 8.386610602531837e-05, + "loss": -0.9294, + "step": 18000 + }, + { + "epoch": 40.38116591928251, + "grad_norm": 0.2728371024131775, + "learning_rate": 8.384582695658847e-05, + "loss": -0.9354, + "step": 18010 + }, + { + "epoch": 40.403587443946186, + "grad_norm": 0.22527305781841278, + "learning_rate": 8.382553760638999e-05, + "loss": -0.9314, + "step": 18020 + }, + { + "epoch": 40.426008968609864, + "grad_norm": 0.3054051697254181, + "learning_rate": 8.380523798088631e-05, + "loss": -0.9337, + "step": 18030 + }, + { + "epoch": 40.44843049327354, + "grad_norm": 0.24451106786727905, + "learning_rate": 8.378492808624389e-05, + "loss": -0.9347, + "step": 18040 + }, + { + "epoch": 40.47085201793722, + "grad_norm": 0.23088446259498596, + "learning_rate": 8.376460792863237e-05, + "loss": -0.9302, + "step": 18050 + }, + { + "epoch": 40.493273542600896, + "grad_norm": 0.26295748353004456, + "learning_rate": 8.374427751422444e-05, + "loss": -0.9363, + "step": 18060 + }, + { + "epoch": 40.51569506726457, + "grad_norm": 0.371719092130661, + "learning_rate": 8.3723936849196e-05, + "loss": -0.9346, + "step": 18070 + }, + { + "epoch": 40.53811659192825, + "grad_norm": 0.2284904271364212, + "learning_rate": 8.370358593972595e-05, + "loss": -0.9344, + "step": 18080 + }, + { + "epoch": 40.56053811659193, + "grad_norm": 0.29511064291000366, + "learning_rate": 8.36832247919964e-05, + "loss": -0.9345, + "step": 18090 + }, + { + "epoch": 40.582959641255606, + "grad_norm": 0.2116822600364685, + "learning_rate": 8.36628534121925e-05, + "loss": -0.9308, + "step": 18100 + }, + { + "epoch": 40.60538116591928, + "grad_norm": 0.3374122083187103, + "learning_rate": 8.364247180650254e-05, + "loss": -0.9294, + "step": 18110 + }, + { + "epoch": 40.62780269058296, + "grad_norm": 0.25542381405830383, + "learning_rate": 8.362207998111794e-05, + "loss": -0.9372, + "step": 18120 + }, + { + "epoch": 40.65022421524664, + "grad_norm": 0.26859161257743835, + "learning_rate": 8.360167794223318e-05, + "loss": -0.9362, + "step": 18130 + }, + { + "epoch": 40.672645739910315, + "grad_norm": 0.3831384778022766, + "learning_rate": 8.358126569604586e-05, + "loss": -0.9327, + "step": 18140 + }, + { + "epoch": 40.69506726457399, + "grad_norm": 0.26578962802886963, + "learning_rate": 8.356084324875668e-05, + "loss": -0.9281, + "step": 18150 + }, + { + "epoch": 40.71748878923767, + "grad_norm": 0.2699836194515228, + "learning_rate": 8.354041060656945e-05, + "loss": -0.934, + "step": 18160 + }, + { + "epoch": 40.73991031390135, + "grad_norm": 0.2911466658115387, + "learning_rate": 8.351996777569106e-05, + "loss": -0.9343, + "step": 18170 + }, + { + "epoch": 40.762331838565025, + "grad_norm": 0.32907187938690186, + "learning_rate": 8.349951476233148e-05, + "loss": -0.9311, + "step": 18180 + }, + { + "epoch": 40.7847533632287, + "grad_norm": 0.23675550520420074, + "learning_rate": 8.347905157270386e-05, + "loss": -0.9312, + "step": 18190 + }, + { + "epoch": 40.80717488789238, + "grad_norm": 0.433622807264328, + "learning_rate": 8.345857821302432e-05, + "loss": -0.9319, + "step": 18200 + }, + { + "epoch": 40.82959641255606, + "grad_norm": 0.30761057138442993, + "learning_rate": 8.343809468951213e-05, + "loss": -0.9308, + "step": 18210 + }, + { + "epoch": 40.85201793721973, + "grad_norm": 0.29424577951431274, + "learning_rate": 8.341760100838965e-05, + "loss": -0.9255, + "step": 18220 + }, + { + "epoch": 40.874439461883405, + "grad_norm": 0.32380610704421997, + "learning_rate": 8.339709717588233e-05, + "loss": -0.9288, + "step": 18230 + }, + { + "epoch": 40.89686098654708, + "grad_norm": 0.26302459836006165, + "learning_rate": 8.33765831982187e-05, + "loss": -0.9323, + "step": 18240 + }, + { + "epoch": 40.91928251121076, + "grad_norm": 0.20418976247310638, + "learning_rate": 8.335605908163035e-05, + "loss": -0.9374, + "step": 18250 + }, + { + "epoch": 40.94170403587444, + "grad_norm": 0.3421807885169983, + "learning_rate": 8.333552483235196e-05, + "loss": -0.9376, + "step": 18260 + }, + { + "epoch": 40.964125560538115, + "grad_norm": 0.21428199112415314, + "learning_rate": 8.33149804566213e-05, + "loss": -0.9309, + "step": 18270 + }, + { + "epoch": 40.98654708520179, + "grad_norm": 0.22264863550662994, + "learning_rate": 8.329442596067921e-05, + "loss": -0.9326, + "step": 18280 + }, + { + "epoch": 41.00896860986547, + "grad_norm": 0.30846813321113586, + "learning_rate": 8.32738613507696e-05, + "loss": -0.9303, + "step": 18290 + }, + { + "epoch": 41.03139013452915, + "grad_norm": 0.46894508600234985, + "learning_rate": 8.325328663313946e-05, + "loss": -0.9271, + "step": 18300 + }, + { + "epoch": 41.053811659192824, + "grad_norm": 0.315557599067688, + "learning_rate": 8.323270181403884e-05, + "loss": -0.9341, + "step": 18310 + }, + { + "epoch": 41.0762331838565, + "grad_norm": 0.36100658774375916, + "learning_rate": 8.321210689972086e-05, + "loss": -0.9309, + "step": 18320 + }, + { + "epoch": 41.09865470852018, + "grad_norm": 0.3233073651790619, + "learning_rate": 8.319150189644174e-05, + "loss": -0.9306, + "step": 18330 + }, + { + "epoch": 41.12107623318386, + "grad_norm": 0.3214190602302551, + "learning_rate": 8.31708868104607e-05, + "loss": -0.921, + "step": 18340 + }, + { + "epoch": 41.143497757847534, + "grad_norm": 0.2659228444099426, + "learning_rate": 8.315026164804007e-05, + "loss": -0.9295, + "step": 18350 + }, + { + "epoch": 41.16591928251121, + "grad_norm": 0.3405274748802185, + "learning_rate": 8.312962641544524e-05, + "loss": -0.9304, + "step": 18360 + }, + { + "epoch": 41.18834080717489, + "grad_norm": 0.37642502784729004, + "learning_rate": 8.310898111894465e-05, + "loss": -0.9339, + "step": 18370 + }, + { + "epoch": 41.210762331838566, + "grad_norm": 0.253567099571228, + "learning_rate": 8.308832576480977e-05, + "loss": -0.935, + "step": 18380 + }, + { + "epoch": 41.233183856502244, + "grad_norm": 0.23065438866615295, + "learning_rate": 8.306766035931519e-05, + "loss": -0.9293, + "step": 18390 + }, + { + "epoch": 41.25560538116592, + "grad_norm": 0.22270303964614868, + "learning_rate": 8.304698490873847e-05, + "loss": -0.934, + "step": 18400 + }, + { + "epoch": 41.2780269058296, + "grad_norm": 0.2418278604745865, + "learning_rate": 8.30262994193603e-05, + "loss": -0.9343, + "step": 18410 + }, + { + "epoch": 41.300448430493276, + "grad_norm": 0.26782581210136414, + "learning_rate": 8.300560389746438e-05, + "loss": -0.9362, + "step": 18420 + }, + { + "epoch": 41.32286995515695, + "grad_norm": 0.25779828429222107, + "learning_rate": 8.298489834933745e-05, + "loss": -0.9293, + "step": 18430 + }, + { + "epoch": 41.34529147982063, + "grad_norm": 0.2762886881828308, + "learning_rate": 8.296418278126934e-05, + "loss": -0.93, + "step": 18440 + }, + { + "epoch": 41.36771300448431, + "grad_norm": 0.19411498308181763, + "learning_rate": 8.294345719955284e-05, + "loss": -0.9351, + "step": 18450 + }, + { + "epoch": 41.390134529147986, + "grad_norm": 0.3129475712776184, + "learning_rate": 8.29227216104839e-05, + "loss": -0.9311, + "step": 18460 + }, + { + "epoch": 41.412556053811656, + "grad_norm": 0.2961171567440033, + "learning_rate": 8.290197602036137e-05, + "loss": -0.9291, + "step": 18470 + }, + { + "epoch": 41.43497757847533, + "grad_norm": 0.20735158026218414, + "learning_rate": 8.288122043548725e-05, + "loss": -0.936, + "step": 18480 + }, + { + "epoch": 41.45739910313901, + "grad_norm": 0.3774130344390869, + "learning_rate": 8.286045486216657e-05, + "loss": -0.9275, + "step": 18490 + }, + { + "epoch": 41.47982062780269, + "grad_norm": 0.34955835342407227, + "learning_rate": 8.283967930670733e-05, + "loss": -0.9346, + "step": 18500 + }, + { + "epoch": 41.502242152466366, + "grad_norm": 0.325006365776062, + "learning_rate": 8.281889377542058e-05, + "loss": -0.9329, + "step": 18510 + }, + { + "epoch": 41.52466367713004, + "grad_norm": 0.31326737999916077, + "learning_rate": 8.279809827462045e-05, + "loss": -0.937, + "step": 18520 + }, + { + "epoch": 41.54708520179372, + "grad_norm": 0.20646001398563385, + "learning_rate": 8.277729281062402e-05, + "loss": -0.9316, + "step": 18530 + }, + { + "epoch": 41.5695067264574, + "grad_norm": 0.2168074995279312, + "learning_rate": 8.27564773897515e-05, + "loss": -0.9353, + "step": 18540 + }, + { + "epoch": 41.591928251121075, + "grad_norm": 0.27699580788612366, + "learning_rate": 8.273565201832602e-05, + "loss": -0.9327, + "step": 18550 + }, + { + "epoch": 41.61434977578475, + "grad_norm": 0.21844574809074402, + "learning_rate": 8.27148167026738e-05, + "loss": -0.9342, + "step": 18560 + }, + { + "epoch": 41.63677130044843, + "grad_norm": 0.3621934652328491, + "learning_rate": 8.269397144912405e-05, + "loss": -0.9331, + "step": 18570 + }, + { + "epoch": 41.65919282511211, + "grad_norm": 0.22428187727928162, + "learning_rate": 8.267311626400899e-05, + "loss": -0.937, + "step": 18580 + }, + { + "epoch": 41.681614349775785, + "grad_norm": 0.39169493317604065, + "learning_rate": 8.26522511536639e-05, + "loss": -0.9267, + "step": 18590 + }, + { + "epoch": 41.70403587443946, + "grad_norm": 0.2611638903617859, + "learning_rate": 8.263137612442706e-05, + "loss": -0.9328, + "step": 18600 + }, + { + "epoch": 41.72645739910314, + "grad_norm": 0.3789830803871155, + "learning_rate": 8.261049118263971e-05, + "loss": -0.9373, + "step": 18610 + }, + { + "epoch": 41.74887892376682, + "grad_norm": 0.2299080342054367, + "learning_rate": 8.258959633464619e-05, + "loss": -0.9342, + "step": 18620 + }, + { + "epoch": 41.771300448430495, + "grad_norm": 0.22028808295726776, + "learning_rate": 8.256869158679377e-05, + "loss": -0.9353, + "step": 18630 + }, + { + "epoch": 41.79372197309417, + "grad_norm": 0.25002843141555786, + "learning_rate": 8.254777694543278e-05, + "loss": -0.9345, + "step": 18640 + }, + { + "epoch": 41.81614349775785, + "grad_norm": 0.186080664396286, + "learning_rate": 8.252685241691651e-05, + "loss": -0.9345, + "step": 18650 + }, + { + "epoch": 41.83856502242153, + "grad_norm": 0.2360316514968872, + "learning_rate": 8.250591800760133e-05, + "loss": -0.9294, + "step": 18660 + }, + { + "epoch": 41.860986547085204, + "grad_norm": 0.3576846420764923, + "learning_rate": 8.248497372384649e-05, + "loss": -0.9273, + "step": 18670 + }, + { + "epoch": 41.88340807174888, + "grad_norm": 0.2686362564563751, + "learning_rate": 8.246401957201437e-05, + "loss": -0.9317, + "step": 18680 + }, + { + "epoch": 41.90582959641256, + "grad_norm": 0.30236777663230896, + "learning_rate": 8.244305555847027e-05, + "loss": -0.9318, + "step": 18690 + }, + { + "epoch": 41.92825112107624, + "grad_norm": 0.16032728552818298, + "learning_rate": 8.24220816895825e-05, + "loss": -0.9371, + "step": 18700 + }, + { + "epoch": 41.95067264573991, + "grad_norm": 0.28171008825302124, + "learning_rate": 8.240109797172237e-05, + "loss": -0.9316, + "step": 18710 + }, + { + "epoch": 41.973094170403584, + "grad_norm": 0.47334668040275574, + "learning_rate": 8.238010441126416e-05, + "loss": -0.9285, + "step": 18720 + }, + { + "epoch": 41.99551569506726, + "grad_norm": 0.4033391773700714, + "learning_rate": 8.23591010145852e-05, + "loss": -0.9275, + "step": 18730 + }, + { + "epoch": 42.01793721973094, + "grad_norm": 0.3348526656627655, + "learning_rate": 8.233808778806571e-05, + "loss": -0.932, + "step": 18740 + }, + { + "epoch": 42.04035874439462, + "grad_norm": 0.32192957401275635, + "learning_rate": 8.231706473808903e-05, + "loss": -0.93, + "step": 18750 + }, + { + "epoch": 42.062780269058294, + "grad_norm": 0.4141150116920471, + "learning_rate": 8.229603187104133e-05, + "loss": -0.9251, + "step": 18760 + }, + { + "epoch": 42.08520179372197, + "grad_norm": 0.3866988718509674, + "learning_rate": 8.22749891933119e-05, + "loss": -0.931, + "step": 18770 + }, + { + "epoch": 42.10762331838565, + "grad_norm": 0.4237884283065796, + "learning_rate": 8.225393671129291e-05, + "loss": -0.9263, + "step": 18780 + }, + { + "epoch": 42.130044843049326, + "grad_norm": 0.25204333662986755, + "learning_rate": 8.223287443137957e-05, + "loss": -0.9356, + "step": 18790 + }, + { + "epoch": 42.152466367713004, + "grad_norm": 0.3180689513683319, + "learning_rate": 8.221180235997004e-05, + "loss": -0.9319, + "step": 18800 + }, + { + "epoch": 42.17488789237668, + "grad_norm": 0.24467802047729492, + "learning_rate": 8.219072050346544e-05, + "loss": -0.9352, + "step": 18810 + }, + { + "epoch": 42.19730941704036, + "grad_norm": 0.26554933190345764, + "learning_rate": 8.216962886826992e-05, + "loss": -0.9335, + "step": 18820 + }, + { + "epoch": 42.219730941704036, + "grad_norm": 0.3160099387168884, + "learning_rate": 8.214852746079054e-05, + "loss": -0.9303, + "step": 18830 + }, + { + "epoch": 42.24215246636771, + "grad_norm": 0.33025026321411133, + "learning_rate": 8.212741628743732e-05, + "loss": -0.9338, + "step": 18840 + }, + { + "epoch": 42.26457399103139, + "grad_norm": 0.36837559938430786, + "learning_rate": 8.210629535462333e-05, + "loss": -0.9331, + "step": 18850 + }, + { + "epoch": 42.28699551569507, + "grad_norm": 0.3484347462654114, + "learning_rate": 8.208516466876453e-05, + "loss": -0.9354, + "step": 18860 + }, + { + "epoch": 42.309417040358746, + "grad_norm": 0.24684381484985352, + "learning_rate": 8.206402423627986e-05, + "loss": -0.9383, + "step": 18870 + }, + { + "epoch": 42.33183856502242, + "grad_norm": 0.4418052136898041, + "learning_rate": 8.204287406359124e-05, + "loss": -0.921, + "step": 18880 + }, + { + "epoch": 42.3542600896861, + "grad_norm": 0.26991766691207886, + "learning_rate": 8.20217141571235e-05, + "loss": -0.9343, + "step": 18890 + }, + { + "epoch": 42.37668161434978, + "grad_norm": 0.23885668814182281, + "learning_rate": 8.200054452330449e-05, + "loss": -0.9386, + "step": 18900 + }, + { + "epoch": 42.399103139013455, + "grad_norm": 0.33922651410102844, + "learning_rate": 8.197936516856499e-05, + "loss": -0.9316, + "step": 18910 + }, + { + "epoch": 42.42152466367713, + "grad_norm": 0.2915500998497009, + "learning_rate": 8.195817609933871e-05, + "loss": -0.9325, + "step": 18920 + }, + { + "epoch": 42.44394618834081, + "grad_norm": 0.32896098494529724, + "learning_rate": 8.193697732206233e-05, + "loss": -0.9346, + "step": 18930 + }, + { + "epoch": 42.46636771300449, + "grad_norm": 0.22656439244747162, + "learning_rate": 8.19157688431755e-05, + "loss": -0.9325, + "step": 18940 + }, + { + "epoch": 42.488789237668165, + "grad_norm": 0.33335933089256287, + "learning_rate": 8.189455066912077e-05, + "loss": -0.9358, + "step": 18950 + }, + { + "epoch": 42.511210762331835, + "grad_norm": 0.25275737047195435, + "learning_rate": 8.187332280634369e-05, + "loss": -0.9335, + "step": 18960 + }, + { + "epoch": 42.53363228699551, + "grad_norm": 0.1794552057981491, + "learning_rate": 8.18520852612927e-05, + "loss": -0.9346, + "step": 18970 + }, + { + "epoch": 42.55605381165919, + "grad_norm": 0.2551756501197815, + "learning_rate": 8.183083804041921e-05, + "loss": -0.9285, + "step": 18980 + }, + { + "epoch": 42.57847533632287, + "grad_norm": 0.2781985104084015, + "learning_rate": 8.180958115017757e-05, + "loss": -0.934, + "step": 18990 + }, + { + "epoch": 42.600896860986545, + "grad_norm": 0.3870719373226166, + "learning_rate": 8.178831459702505e-05, + "loss": -0.9271, + "step": 19000 + }, + { + "epoch": 42.62331838565022, + "grad_norm": 0.2892410457134247, + "learning_rate": 8.17670383874219e-05, + "loss": -0.9301, + "step": 19010 + }, + { + "epoch": 42.6457399103139, + "grad_norm": 0.27418723702430725, + "learning_rate": 8.174575252783124e-05, + "loss": -0.9361, + "step": 19020 + }, + { + "epoch": 42.66816143497758, + "grad_norm": 0.30322960019111633, + "learning_rate": 8.172445702471914e-05, + "loss": -0.9278, + "step": 19030 + }, + { + "epoch": 42.690582959641254, + "grad_norm": 0.26010799407958984, + "learning_rate": 8.170315188455466e-05, + "loss": -0.9303, + "step": 19040 + }, + { + "epoch": 42.71300448430493, + "grad_norm": 0.3083159625530243, + "learning_rate": 8.168183711380969e-05, + "loss": -0.9289, + "step": 19050 + }, + { + "epoch": 42.73542600896861, + "grad_norm": 0.23232223093509674, + "learning_rate": 8.166051271895913e-05, + "loss": -0.9314, + "step": 19060 + }, + { + "epoch": 42.75784753363229, + "grad_norm": 0.3707640469074249, + "learning_rate": 8.163917870648075e-05, + "loss": -0.9249, + "step": 19070 + }, + { + "epoch": 42.780269058295964, + "grad_norm": 0.28856101632118225, + "learning_rate": 8.161783508285526e-05, + "loss": -0.9335, + "step": 19080 + }, + { + "epoch": 42.80269058295964, + "grad_norm": 0.2842434048652649, + "learning_rate": 8.159648185456628e-05, + "loss": -0.9302, + "step": 19090 + }, + { + "epoch": 42.82511210762332, + "grad_norm": 0.3058185279369354, + "learning_rate": 8.157511902810038e-05, + "loss": -0.9288, + "step": 19100 + }, + { + "epoch": 42.847533632286996, + "grad_norm": 0.3985295593738556, + "learning_rate": 8.155374660994701e-05, + "loss": -0.9316, + "step": 19110 + }, + { + "epoch": 42.869955156950674, + "grad_norm": 0.2431056946516037, + "learning_rate": 8.153236460659857e-05, + "loss": -0.9296, + "step": 19120 + }, + { + "epoch": 42.89237668161435, + "grad_norm": 0.22357337176799774, + "learning_rate": 8.151097302455031e-05, + "loss": -0.9366, + "step": 19130 + }, + { + "epoch": 42.91479820627803, + "grad_norm": 0.3467863202095032, + "learning_rate": 8.148957187030044e-05, + "loss": -0.9299, + "step": 19140 + }, + { + "epoch": 42.937219730941706, + "grad_norm": 0.2755807936191559, + "learning_rate": 8.146816115035006e-05, + "loss": -0.9334, + "step": 19150 + }, + { + "epoch": 42.95964125560538, + "grad_norm": 0.20655953884124756, + "learning_rate": 8.14467408712032e-05, + "loss": -0.9288, + "step": 19160 + }, + { + "epoch": 42.98206278026906, + "grad_norm": 0.2704418897628784, + "learning_rate": 8.142531103936678e-05, + "loss": -0.9326, + "step": 19170 + }, + { + "epoch": 43.00448430493274, + "grad_norm": 0.1802455633878708, + "learning_rate": 8.14038716613506e-05, + "loss": -0.9416, + "step": 19180 + }, + { + "epoch": 43.026905829596416, + "grad_norm": 0.27828484773635864, + "learning_rate": 8.138242274366736e-05, + "loss": -0.9349, + "step": 19190 + }, + { + "epoch": 43.04932735426009, + "grad_norm": 0.32386693358421326, + "learning_rate": 8.136096429283271e-05, + "loss": -0.9322, + "step": 19200 + }, + { + "epoch": 43.07174887892376, + "grad_norm": 0.3949166238307953, + "learning_rate": 8.133949631536515e-05, + "loss": -0.9311, + "step": 19210 + }, + { + "epoch": 43.09417040358744, + "grad_norm": 0.2973581850528717, + "learning_rate": 8.131801881778607e-05, + "loss": -0.9293, + "step": 19220 + }, + { + "epoch": 43.11659192825112, + "grad_norm": 0.2462926208972931, + "learning_rate": 8.129653180661978e-05, + "loss": -0.9298, + "step": 19230 + }, + { + "epoch": 43.139013452914796, + "grad_norm": 0.3184420168399811, + "learning_rate": 8.127503528839346e-05, + "loss": -0.9286, + "step": 19240 + }, + { + "epoch": 43.16143497757847, + "grad_norm": 0.3716786503791809, + "learning_rate": 8.125352926963721e-05, + "loss": -0.9281, + "step": 19250 + }, + { + "epoch": 43.18385650224215, + "grad_norm": 0.2744639217853546, + "learning_rate": 8.123201375688395e-05, + "loss": -0.9357, + "step": 19260 + }, + { + "epoch": 43.20627802690583, + "grad_norm": 0.33672037720680237, + "learning_rate": 8.121048875666954e-05, + "loss": -0.9317, + "step": 19270 + }, + { + "epoch": 43.228699551569505, + "grad_norm": 0.2687162160873413, + "learning_rate": 8.118895427553274e-05, + "loss": -0.9341, + "step": 19280 + }, + { + "epoch": 43.25112107623318, + "grad_norm": 0.24715793132781982, + "learning_rate": 8.116741032001511e-05, + "loss": -0.9288, + "step": 19290 + }, + { + "epoch": 43.27354260089686, + "grad_norm": 0.28739386796951294, + "learning_rate": 8.114585689666114e-05, + "loss": -0.9332, + "step": 19300 + }, + { + "epoch": 43.29596412556054, + "grad_norm": 0.3275057077407837, + "learning_rate": 8.112429401201821e-05, + "loss": -0.935, + "step": 19310 + }, + { + "epoch": 43.318385650224215, + "grad_norm": 0.27506208419799805, + "learning_rate": 8.110272167263656e-05, + "loss": -0.9329, + "step": 19320 + }, + { + "epoch": 43.34080717488789, + "grad_norm": 0.29415270686149597, + "learning_rate": 8.108113988506929e-05, + "loss": -0.9311, + "step": 19330 + }, + { + "epoch": 43.36322869955157, + "grad_norm": 0.2255401462316513, + "learning_rate": 8.105954865587235e-05, + "loss": -0.9327, + "step": 19340 + }, + { + "epoch": 43.38565022421525, + "grad_norm": 0.24453198909759521, + "learning_rate": 8.103794799160463e-05, + "loss": -0.9344, + "step": 19350 + }, + { + "epoch": 43.408071748878925, + "grad_norm": 0.314414381980896, + "learning_rate": 8.101633789882781e-05, + "loss": -0.9308, + "step": 19360 + }, + { + "epoch": 43.4304932735426, + "grad_norm": 0.2618437111377716, + "learning_rate": 8.099471838410648e-05, + "loss": -0.9338, + "step": 19370 + }, + { + "epoch": 43.45291479820628, + "grad_norm": 0.2783154845237732, + "learning_rate": 8.097308945400806e-05, + "loss": -0.9364, + "step": 19380 + }, + { + "epoch": 43.47533632286996, + "grad_norm": 0.2837678790092468, + "learning_rate": 8.095145111510288e-05, + "loss": -0.9338, + "step": 19390 + }, + { + "epoch": 43.497757847533634, + "grad_norm": 0.3694780468940735, + "learning_rate": 8.092980337396406e-05, + "loss": -0.9367, + "step": 19400 + }, + { + "epoch": 43.52017937219731, + "grad_norm": 0.1854865998029709, + "learning_rate": 8.090814623716763e-05, + "loss": -0.932, + "step": 19410 + }, + { + "epoch": 43.54260089686099, + "grad_norm": 0.28076085448265076, + "learning_rate": 8.088647971129246e-05, + "loss": -0.9318, + "step": 19420 + }, + { + "epoch": 43.56502242152467, + "grad_norm": 0.23648104071617126, + "learning_rate": 8.086480380292026e-05, + "loss": -0.9332, + "step": 19430 + }, + { + "epoch": 43.587443946188344, + "grad_norm": 0.23824667930603027, + "learning_rate": 8.084311851863562e-05, + "loss": -0.9336, + "step": 19440 + }, + { + "epoch": 43.609865470852014, + "grad_norm": 0.31286174058914185, + "learning_rate": 8.082142386502591e-05, + "loss": -0.9348, + "step": 19450 + }, + { + "epoch": 43.63228699551569, + "grad_norm": 0.28690433502197266, + "learning_rate": 8.079971984868145e-05, + "loss": -0.9308, + "step": 19460 + }, + { + "epoch": 43.65470852017937, + "grad_norm": 0.27319157123565674, + "learning_rate": 8.077800647619532e-05, + "loss": -0.9358, + "step": 19470 + }, + { + "epoch": 43.67713004484305, + "grad_norm": 0.2829558253288269, + "learning_rate": 8.075628375416345e-05, + "loss": -0.9347, + "step": 19480 + }, + { + "epoch": 43.699551569506724, + "grad_norm": 0.3472268581390381, + "learning_rate": 8.073455168918464e-05, + "loss": -0.9329, + "step": 19490 + }, + { + "epoch": 43.7219730941704, + "grad_norm": 0.2622247636318207, + "learning_rate": 8.071281028786055e-05, + "loss": -0.9353, + "step": 19500 + }, + { + "epoch": 43.74439461883408, + "grad_norm": 0.30596843361854553, + "learning_rate": 8.069105955679562e-05, + "loss": -0.9351, + "step": 19510 + }, + { + "epoch": 43.766816143497756, + "grad_norm": 0.2934272885322571, + "learning_rate": 8.066929950259713e-05, + "loss": -0.9329, + "step": 19520 + }, + { + "epoch": 43.789237668161434, + "grad_norm": 0.26862266659736633, + "learning_rate": 8.064753013187522e-05, + "loss": -0.9319, + "step": 19530 + }, + { + "epoch": 43.81165919282511, + "grad_norm": 0.2544781267642975, + "learning_rate": 8.062575145124289e-05, + "loss": -0.9365, + "step": 19540 + }, + { + "epoch": 43.83408071748879, + "grad_norm": 0.28753867745399475, + "learning_rate": 8.060396346731587e-05, + "loss": -0.9331, + "step": 19550 + }, + { + "epoch": 43.856502242152466, + "grad_norm": 0.2954162359237671, + "learning_rate": 8.058216618671281e-05, + "loss": -0.9355, + "step": 19560 + }, + { + "epoch": 43.87892376681614, + "grad_norm": 0.24427980184555054, + "learning_rate": 8.056035961605514e-05, + "loss": -0.9381, + "step": 19570 + }, + { + "epoch": 43.90134529147982, + "grad_norm": 0.22344431281089783, + "learning_rate": 8.05385437619671e-05, + "loss": -0.9268, + "step": 19580 + }, + { + "epoch": 43.9237668161435, + "grad_norm": 0.28415167331695557, + "learning_rate": 8.05167186310758e-05, + "loss": -0.9367, + "step": 19590 + }, + { + "epoch": 43.946188340807176, + "grad_norm": 0.25981977581977844, + "learning_rate": 8.049488423001113e-05, + "loss": -0.9351, + "step": 19600 + }, + { + "epoch": 43.96860986547085, + "grad_norm": 0.24905571341514587, + "learning_rate": 8.047304056540581e-05, + "loss": -0.9327, + "step": 19610 + }, + { + "epoch": 43.99103139013453, + "grad_norm": 0.2725323736667633, + "learning_rate": 8.045118764389534e-05, + "loss": -0.9311, + "step": 19620 + }, + { + "epoch": 44.01345291479821, + "grad_norm": 0.31143108010292053, + "learning_rate": 8.042932547211809e-05, + "loss": -0.9377, + "step": 19630 + }, + { + "epoch": 44.035874439461885, + "grad_norm": 0.24100081622600555, + "learning_rate": 8.04074540567152e-05, + "loss": -0.9357, + "step": 19640 + }, + { + "epoch": 44.05829596412556, + "grad_norm": 0.2326710969209671, + "learning_rate": 8.038557340433063e-05, + "loss": -0.9252, + "step": 19650 + }, + { + "epoch": 44.08071748878924, + "grad_norm": 0.22147676348686218, + "learning_rate": 8.036368352161115e-05, + "loss": -0.9335, + "step": 19660 + }, + { + "epoch": 44.10313901345292, + "grad_norm": 0.23062512278556824, + "learning_rate": 8.034178441520633e-05, + "loss": -0.9304, + "step": 19670 + }, + { + "epoch": 44.125560538116595, + "grad_norm": 0.34962594509124756, + "learning_rate": 8.031987609176852e-05, + "loss": -0.9276, + "step": 19680 + }, + { + "epoch": 44.14798206278027, + "grad_norm": 0.20888669788837433, + "learning_rate": 8.02979585579529e-05, + "loss": -0.9294, + "step": 19690 + }, + { + "epoch": 44.17040358744394, + "grad_norm": 0.37111350893974304, + "learning_rate": 8.027603182041745e-05, + "loss": -0.9238, + "step": 19700 + }, + { + "epoch": 44.19282511210762, + "grad_norm": 0.20616325736045837, + "learning_rate": 8.025409588582292e-05, + "loss": -0.9287, + "step": 19710 + }, + { + "epoch": 44.2152466367713, + "grad_norm": 0.3901020884513855, + "learning_rate": 8.023215076083288e-05, + "loss": -0.933, + "step": 19720 + }, + { + "epoch": 44.237668161434975, + "grad_norm": 0.3827168345451355, + "learning_rate": 8.021019645211367e-05, + "loss": -0.9331, + "step": 19730 + }, + { + "epoch": 44.26008968609865, + "grad_norm": 0.34767869114875793, + "learning_rate": 8.018823296633441e-05, + "loss": -0.9334, + "step": 19740 + }, + { + "epoch": 44.28251121076233, + "grad_norm": 0.22030960023403168, + "learning_rate": 8.016626031016708e-05, + "loss": -0.9305, + "step": 19750 + }, + { + "epoch": 44.30493273542601, + "grad_norm": 0.2541322708129883, + "learning_rate": 8.014427849028636e-05, + "loss": -0.9359, + "step": 19760 + }, + { + "epoch": 44.327354260089685, + "grad_norm": 0.35388725996017456, + "learning_rate": 8.012228751336974e-05, + "loss": -0.9331, + "step": 19770 + }, + { + "epoch": 44.34977578475336, + "grad_norm": 0.31503167748451233, + "learning_rate": 8.01002873860975e-05, + "loss": -0.9261, + "step": 19780 + }, + { + "epoch": 44.37219730941704, + "grad_norm": 0.21673107147216797, + "learning_rate": 8.00782781151527e-05, + "loss": -0.9368, + "step": 19790 + }, + { + "epoch": 44.39461883408072, + "grad_norm": 0.22922196984291077, + "learning_rate": 8.005625970722119e-05, + "loss": -0.9325, + "step": 19800 + }, + { + "epoch": 44.417040358744394, + "grad_norm": 0.32719898223876953, + "learning_rate": 8.003423216899158e-05, + "loss": -0.9367, + "step": 19810 + }, + { + "epoch": 44.43946188340807, + "grad_norm": 0.2807775139808655, + "learning_rate": 8.001219550715522e-05, + "loss": -0.9279, + "step": 19820 + }, + { + "epoch": 44.46188340807175, + "grad_norm": 0.21141266822814941, + "learning_rate": 7.999014972840632e-05, + "loss": -0.928, + "step": 19830 + }, + { + "epoch": 44.48430493273543, + "grad_norm": 0.23529034852981567, + "learning_rate": 7.996809483944174e-05, + "loss": -0.9356, + "step": 19840 + }, + { + "epoch": 44.506726457399104, + "grad_norm": 0.27313554286956787, + "learning_rate": 7.994603084696124e-05, + "loss": -0.9375, + "step": 19850 + }, + { + "epoch": 44.52914798206278, + "grad_norm": 0.19842705130577087, + "learning_rate": 7.992395775766724e-05, + "loss": -0.9392, + "step": 19860 + }, + { + "epoch": 44.55156950672646, + "grad_norm": 0.24465739727020264, + "learning_rate": 7.990187557826497e-05, + "loss": -0.9273, + "step": 19870 + }, + { + "epoch": 44.573991031390136, + "grad_norm": 0.23004570603370667, + "learning_rate": 7.987978431546242e-05, + "loss": -0.9308, + "step": 19880 + }, + { + "epoch": 44.596412556053814, + "grad_norm": 0.24528633058071136, + "learning_rate": 7.985768397597031e-05, + "loss": -0.9315, + "step": 19890 + }, + { + "epoch": 44.61883408071749, + "grad_norm": 0.34733515977859497, + "learning_rate": 7.983557456650216e-05, + "loss": -0.9279, + "step": 19900 + }, + { + "epoch": 44.64125560538117, + "grad_norm": 0.2939070463180542, + "learning_rate": 7.981345609377422e-05, + "loss": -0.9327, + "step": 19910 + }, + { + "epoch": 44.663677130044846, + "grad_norm": 0.33379632234573364, + "learning_rate": 7.97913285645055e-05, + "loss": -0.9355, + "step": 19920 + }, + { + "epoch": 44.68609865470852, + "grad_norm": 0.48819705843925476, + "learning_rate": 7.976919198541776e-05, + "loss": -0.9365, + "step": 19930 + }, + { + "epoch": 44.7085201793722, + "grad_norm": 0.35564056038856506, + "learning_rate": 7.974704636323548e-05, + "loss": -0.9327, + "step": 19940 + }, + { + "epoch": 44.73094170403587, + "grad_norm": 0.2515212893486023, + "learning_rate": 7.972489170468597e-05, + "loss": -0.9386, + "step": 19950 + }, + { + "epoch": 44.75336322869955, + "grad_norm": 0.22536852955818176, + "learning_rate": 7.970272801649918e-05, + "loss": -0.9303, + "step": 19960 + }, + { + "epoch": 44.775784753363226, + "grad_norm": 0.3715771734714508, + "learning_rate": 7.96805553054079e-05, + "loss": -0.9318, + "step": 19970 + }, + { + "epoch": 44.7982062780269, + "grad_norm": 0.3229493200778961, + "learning_rate": 7.965837357814756e-05, + "loss": -0.9332, + "step": 19980 + }, + { + "epoch": 44.82062780269058, + "grad_norm": 0.1629324108362198, + "learning_rate": 7.963618284145643e-05, + "loss": -0.9303, + "step": 19990 + }, + { + "epoch": 44.84304932735426, + "grad_norm": 0.22044049203395844, + "learning_rate": 7.961398310207544e-05, + "loss": -0.9336, + "step": 20000 + }, + { + "epoch": 44.865470852017935, + "grad_norm": 0.2417132407426834, + "learning_rate": 7.95917743667483e-05, + "loss": -0.9315, + "step": 20010 + }, + { + "epoch": 44.88789237668161, + "grad_norm": 0.29833245277404785, + "learning_rate": 7.956955664222144e-05, + "loss": -0.9298, + "step": 20020 + }, + { + "epoch": 44.91031390134529, + "grad_norm": 0.24392957985401154, + "learning_rate": 7.954732993524399e-05, + "loss": -0.9356, + "step": 20030 + }, + { + "epoch": 44.93273542600897, + "grad_norm": 0.22547650337219238, + "learning_rate": 7.952509425256786e-05, + "loss": -0.9348, + "step": 20040 + }, + { + "epoch": 44.955156950672645, + "grad_norm": 0.319812536239624, + "learning_rate": 7.950284960094767e-05, + "loss": -0.9317, + "step": 20050 + }, + { + "epoch": 44.97757847533632, + "grad_norm": 0.3204957842826843, + "learning_rate": 7.948059598714076e-05, + "loss": -0.9338, + "step": 20060 + }, + { + "epoch": 45.0, + "grad_norm": 0.3752472996711731, + "learning_rate": 7.945833341790717e-05, + "loss": -0.9281, + "step": 20070 + }, + { + "epoch": 45.02242152466368, + "grad_norm": 0.36203962564468384, + "learning_rate": 7.94360619000097e-05, + "loss": -0.9288, + "step": 20080 + }, + { + "epoch": 45.044843049327355, + "grad_norm": 0.2641749978065491, + "learning_rate": 7.941378144021381e-05, + "loss": -0.9362, + "step": 20090 + }, + { + "epoch": 45.06726457399103, + "grad_norm": 0.2354661524295807, + "learning_rate": 7.939149204528777e-05, + "loss": -0.9376, + "step": 20100 + }, + { + "epoch": 45.08968609865471, + "grad_norm": 0.3482816517353058, + "learning_rate": 7.936919372200246e-05, + "loss": -0.9303, + "step": 20110 + }, + { + "epoch": 45.11210762331839, + "grad_norm": 0.2899804413318634, + "learning_rate": 7.934688647713158e-05, + "loss": -0.9337, + "step": 20120 + }, + { + "epoch": 45.134529147982065, + "grad_norm": 0.2840135395526886, + "learning_rate": 7.932457031745143e-05, + "loss": -0.9349, + "step": 20130 + }, + { + "epoch": 45.15695067264574, + "grad_norm": 0.21659190952777863, + "learning_rate": 7.930224524974108e-05, + "loss": -0.9319, + "step": 20140 + }, + { + "epoch": 45.17937219730942, + "grad_norm": 0.27971357107162476, + "learning_rate": 7.927991128078232e-05, + "loss": -0.9346, + "step": 20150 + }, + { + "epoch": 45.2017937219731, + "grad_norm": 0.2399396151304245, + "learning_rate": 7.925756841735958e-05, + "loss": -0.9356, + "step": 20160 + }, + { + "epoch": 45.224215246636774, + "grad_norm": 0.22645621001720428, + "learning_rate": 7.923521666626008e-05, + "loss": -0.9342, + "step": 20170 + }, + { + "epoch": 45.24663677130045, + "grad_norm": 0.3463071584701538, + "learning_rate": 7.921285603427366e-05, + "loss": -0.9375, + "step": 20180 + }, + { + "epoch": 45.26905829596413, + "grad_norm": 0.26510995626449585, + "learning_rate": 7.91904865281929e-05, + "loss": -0.9345, + "step": 20190 + }, + { + "epoch": 45.2914798206278, + "grad_norm": 0.43202126026153564, + "learning_rate": 7.916810815481307e-05, + "loss": -0.9341, + "step": 20200 + }, + { + "epoch": 45.31390134529148, + "grad_norm": 0.34554484486579895, + "learning_rate": 7.914572092093211e-05, + "loss": -0.9349, + "step": 20210 + }, + { + "epoch": 45.336322869955154, + "grad_norm": 0.19640249013900757, + "learning_rate": 7.912332483335068e-05, + "loss": -0.9333, + "step": 20220 + }, + { + "epoch": 45.35874439461883, + "grad_norm": 0.23023377358913422, + "learning_rate": 7.910091989887213e-05, + "loss": -0.9344, + "step": 20230 + }, + { + "epoch": 45.38116591928251, + "grad_norm": 0.27124691009521484, + "learning_rate": 7.907850612430248e-05, + "loss": -0.9355, + "step": 20240 + }, + { + "epoch": 45.403587443946186, + "grad_norm": 0.23235094547271729, + "learning_rate": 7.905608351645044e-05, + "loss": -0.9371, + "step": 20250 + }, + { + "epoch": 45.426008968609864, + "grad_norm": 0.3085789084434509, + "learning_rate": 7.90336520821274e-05, + "loss": -0.9332, + "step": 20260 + }, + { + "epoch": 45.44843049327354, + "grad_norm": 0.2185596376657486, + "learning_rate": 7.901121182814746e-05, + "loss": -0.9393, + "step": 20270 + }, + { + "epoch": 45.47085201793722, + "grad_norm": 0.16081081330776215, + "learning_rate": 7.898876276132736e-05, + "loss": -0.9378, + "step": 20280 + }, + { + "epoch": 45.493273542600896, + "grad_norm": 0.2922610640525818, + "learning_rate": 7.896630488848654e-05, + "loss": -0.9376, + "step": 20290 + }, + { + "epoch": 45.51569506726457, + "grad_norm": 0.31918254494667053, + "learning_rate": 7.89438382164471e-05, + "loss": -0.9386, + "step": 20300 + }, + { + "epoch": 45.53811659192825, + "grad_norm": 0.32244032621383667, + "learning_rate": 7.892136275203383e-05, + "loss": -0.9393, + "step": 20310 + }, + { + "epoch": 45.56053811659193, + "grad_norm": 0.34247729182243347, + "learning_rate": 7.889887850207418e-05, + "loss": -0.9354, + "step": 20320 + }, + { + "epoch": 45.582959641255606, + "grad_norm": 0.2775270342826843, + "learning_rate": 7.887638547339827e-05, + "loss": -0.9324, + "step": 20330 + }, + { + "epoch": 45.60538116591928, + "grad_norm": 0.23122955858707428, + "learning_rate": 7.885388367283891e-05, + "loss": -0.9383, + "step": 20340 + }, + { + "epoch": 45.62780269058296, + "grad_norm": 0.2635442316532135, + "learning_rate": 7.88313731072315e-05, + "loss": -0.9378, + "step": 20350 + }, + { + "epoch": 45.65022421524664, + "grad_norm": 0.3431180417537689, + "learning_rate": 7.88088537834142e-05, + "loss": -0.9352, + "step": 20360 + }, + { + "epoch": 45.672645739910315, + "grad_norm": 0.282839298248291, + "learning_rate": 7.878632570822778e-05, + "loss": -0.9361, + "step": 20370 + }, + { + "epoch": 45.69506726457399, + "grad_norm": 0.3348165452480316, + "learning_rate": 7.876378888851567e-05, + "loss": -0.9352, + "step": 20380 + }, + { + "epoch": 45.71748878923767, + "grad_norm": 0.31639423966407776, + "learning_rate": 7.874124333112396e-05, + "loss": -0.9338, + "step": 20390 + }, + { + "epoch": 45.73991031390135, + "grad_norm": 0.2747762203216553, + "learning_rate": 7.871868904290138e-05, + "loss": -0.9374, + "step": 20400 + }, + { + "epoch": 45.762331838565025, + "grad_norm": 0.2805693447589874, + "learning_rate": 7.869612603069935e-05, + "loss": -0.9312, + "step": 20410 + }, + { + "epoch": 45.7847533632287, + "grad_norm": 0.2687333822250366, + "learning_rate": 7.867355430137192e-05, + "loss": -0.9363, + "step": 20420 + }, + { + "epoch": 45.80717488789238, + "grad_norm": 0.26327115297317505, + "learning_rate": 7.865097386177577e-05, + "loss": -0.94, + "step": 20430 + }, + { + "epoch": 45.82959641255606, + "grad_norm": 0.31567347049713135, + "learning_rate": 7.862838471877023e-05, + "loss": -0.9295, + "step": 20440 + }, + { + "epoch": 45.85201793721973, + "grad_norm": 0.27014854550361633, + "learning_rate": 7.860578687921731e-05, + "loss": -0.9295, + "step": 20450 + }, + { + "epoch": 45.874439461883405, + "grad_norm": 0.2828081548213959, + "learning_rate": 7.858318034998164e-05, + "loss": -0.9366, + "step": 20460 + }, + { + "epoch": 45.89686098654708, + "grad_norm": 0.24354179203510284, + "learning_rate": 7.856056513793046e-05, + "loss": -0.9372, + "step": 20470 + }, + { + "epoch": 45.91928251121076, + "grad_norm": 0.22929686307907104, + "learning_rate": 7.85379412499337e-05, + "loss": -0.9371, + "step": 20480 + }, + { + "epoch": 45.94170403587444, + "grad_norm": 0.21123680472373962, + "learning_rate": 7.851530869286389e-05, + "loss": -0.9333, + "step": 20490 + }, + { + "epoch": 45.964125560538115, + "grad_norm": 0.3898676335811615, + "learning_rate": 7.849266747359619e-05, + "loss": -0.9378, + "step": 20500 + }, + { + "epoch": 45.98654708520179, + "grad_norm": 0.27574437856674194, + "learning_rate": 7.847001759900843e-05, + "loss": -0.94, + "step": 20510 + }, + { + "epoch": 46.00896860986547, + "grad_norm": 0.2676864564418793, + "learning_rate": 7.844735907598102e-05, + "loss": -0.9309, + "step": 20520 + }, + { + "epoch": 46.03139013452915, + "grad_norm": 0.3357073664665222, + "learning_rate": 7.842469191139703e-05, + "loss": -0.936, + "step": 20530 + }, + { + "epoch": 46.053811659192824, + "grad_norm": 0.27891701459884644, + "learning_rate": 7.840201611214215e-05, + "loss": -0.938, + "step": 20540 + }, + { + "epoch": 46.0762331838565, + "grad_norm": 0.22234949469566345, + "learning_rate": 7.837933168510469e-05, + "loss": -0.9282, + "step": 20550 + }, + { + "epoch": 46.09865470852018, + "grad_norm": 0.3521080017089844, + "learning_rate": 7.835663863717559e-05, + "loss": -0.9298, + "step": 20560 + }, + { + "epoch": 46.12107623318386, + "grad_norm": 0.3055013418197632, + "learning_rate": 7.833393697524838e-05, + "loss": -0.9327, + "step": 20570 + }, + { + "epoch": 46.143497757847534, + "grad_norm": 0.30209484696388245, + "learning_rate": 7.831122670621922e-05, + "loss": -0.9359, + "step": 20580 + }, + { + "epoch": 46.16591928251121, + "grad_norm": 0.1721743494272232, + "learning_rate": 7.82885078369869e-05, + "loss": -0.9374, + "step": 20590 + }, + { + "epoch": 46.18834080717489, + "grad_norm": 0.2935103476047516, + "learning_rate": 7.826578037445283e-05, + "loss": -0.9416, + "step": 20600 + }, + { + "epoch": 46.210762331838566, + "grad_norm": 0.18906311690807343, + "learning_rate": 7.824304432552097e-05, + "loss": -0.9406, + "step": 20610 + }, + { + "epoch": 46.233183856502244, + "grad_norm": 0.2967517375946045, + "learning_rate": 7.822029969709798e-05, + "loss": -0.9332, + "step": 20620 + }, + { + "epoch": 46.25560538116592, + "grad_norm": 0.20445911586284637, + "learning_rate": 7.819754649609306e-05, + "loss": -0.9303, + "step": 20630 + }, + { + "epoch": 46.2780269058296, + "grad_norm": 0.25800207257270813, + "learning_rate": 7.817478472941802e-05, + "loss": -0.9356, + "step": 20640 + }, + { + "epoch": 46.300448430493276, + "grad_norm": 0.2870939075946808, + "learning_rate": 7.815201440398727e-05, + "loss": -0.9418, + "step": 20650 + }, + { + "epoch": 46.32286995515695, + "grad_norm": 0.23108044266700745, + "learning_rate": 7.812923552671789e-05, + "loss": -0.9362, + "step": 20660 + }, + { + "epoch": 46.34529147982063, + "grad_norm": 0.2828485369682312, + "learning_rate": 7.810644810452945e-05, + "loss": -0.9392, + "step": 20670 + }, + { + "epoch": 46.36771300448431, + "grad_norm": 0.25520944595336914, + "learning_rate": 7.808365214434417e-05, + "loss": -0.9384, + "step": 20680 + }, + { + "epoch": 46.390134529147986, + "grad_norm": 0.1865137368440628, + "learning_rate": 7.80608476530869e-05, + "loss": -0.9367, + "step": 20690 + }, + { + "epoch": 46.412556053811656, + "grad_norm": 0.3192405700683594, + "learning_rate": 7.8038034637685e-05, + "loss": -0.9399, + "step": 20700 + }, + { + "epoch": 46.43497757847533, + "grad_norm": 0.2697412371635437, + "learning_rate": 7.801521310506848e-05, + "loss": -0.9379, + "step": 20710 + }, + { + "epoch": 46.45739910313901, + "grad_norm": 0.3795578181743622, + "learning_rate": 7.799238306216994e-05, + "loss": -0.9429, + "step": 20720 + }, + { + "epoch": 46.47982062780269, + "grad_norm": 0.3042336106300354, + "learning_rate": 7.796954451592448e-05, + "loss": -0.9333, + "step": 20730 + }, + { + "epoch": 46.502242152466366, + "grad_norm": 0.32174721360206604, + "learning_rate": 7.794669747326992e-05, + "loss": -0.9345, + "step": 20740 + }, + { + "epoch": 46.52466367713004, + "grad_norm": 0.2633396089076996, + "learning_rate": 7.792384194114654e-05, + "loss": -0.9392, + "step": 20750 + }, + { + "epoch": 46.54708520179372, + "grad_norm": 0.38541552424430847, + "learning_rate": 7.790097792649729e-05, + "loss": -0.9338, + "step": 20760 + }, + { + "epoch": 46.5695067264574, + "grad_norm": 0.2628447115421295, + "learning_rate": 7.787810543626762e-05, + "loss": -0.9387, + "step": 20770 + }, + { + "epoch": 46.591928251121075, + "grad_norm": 0.24650421738624573, + "learning_rate": 7.785522447740558e-05, + "loss": -0.9363, + "step": 20780 + }, + { + "epoch": 46.61434977578475, + "grad_norm": 0.22980087995529175, + "learning_rate": 7.783233505686182e-05, + "loss": -0.9295, + "step": 20790 + }, + { + "epoch": 46.63677130044843, + "grad_norm": 0.2945285737514496, + "learning_rate": 7.780943718158955e-05, + "loss": -0.9395, + "step": 20800 + }, + { + "epoch": 46.65919282511211, + "grad_norm": 0.24277140200138092, + "learning_rate": 7.778653085854453e-05, + "loss": -0.9355, + "step": 20810 + }, + { + "epoch": 46.681614349775785, + "grad_norm": 0.25381195545196533, + "learning_rate": 7.77636160946851e-05, + "loss": -0.9358, + "step": 20820 + }, + { + "epoch": 46.70403587443946, + "grad_norm": 0.2702644467353821, + "learning_rate": 7.774069289697215e-05, + "loss": -0.932, + "step": 20830 + }, + { + "epoch": 46.72645739910314, + "grad_norm": 0.25781944394111633, + "learning_rate": 7.771776127236913e-05, + "loss": -0.9375, + "step": 20840 + }, + { + "epoch": 46.74887892376682, + "grad_norm": 0.2233569473028183, + "learning_rate": 7.769482122784212e-05, + "loss": -0.9387, + "step": 20850 + }, + { + "epoch": 46.771300448430495, + "grad_norm": 0.26149532198905945, + "learning_rate": 7.767187277035963e-05, + "loss": -0.9338, + "step": 20860 + }, + { + "epoch": 46.79372197309417, + "grad_norm": 0.22419068217277527, + "learning_rate": 7.764891590689285e-05, + "loss": -0.9331, + "step": 20870 + }, + { + "epoch": 46.81614349775785, + "grad_norm": 0.21310129761695862, + "learning_rate": 7.762595064441542e-05, + "loss": -0.9356, + "step": 20880 + }, + { + "epoch": 46.83856502242153, + "grad_norm": 0.22943870723247528, + "learning_rate": 7.760297698990362e-05, + "loss": -0.9376, + "step": 20890 + }, + { + "epoch": 46.860986547085204, + "grad_norm": 0.23826384544372559, + "learning_rate": 7.757999495033623e-05, + "loss": -0.9317, + "step": 20900 + }, + { + "epoch": 46.88340807174888, + "grad_norm": 0.23009692132472992, + "learning_rate": 7.755700453269456e-05, + "loss": -0.9406, + "step": 20910 + }, + { + "epoch": 46.90582959641256, + "grad_norm": 0.28373512625694275, + "learning_rate": 7.753400574396254e-05, + "loss": -0.9329, + "step": 20920 + }, + { + "epoch": 46.92825112107624, + "grad_norm": 0.2373715341091156, + "learning_rate": 7.751099859112655e-05, + "loss": -0.9322, + "step": 20930 + }, + { + "epoch": 46.95067264573991, + "grad_norm": 0.2132851630449295, + "learning_rate": 7.748798308117557e-05, + "loss": -0.9395, + "step": 20940 + }, + { + "epoch": 46.973094170403584, + "grad_norm": 0.2117188423871994, + "learning_rate": 7.746495922110112e-05, + "loss": -0.9379, + "step": 20950 + }, + { + "epoch": 46.99551569506726, + "grad_norm": 0.2731548845767975, + "learning_rate": 7.744192701789723e-05, + "loss": -0.9368, + "step": 20960 + }, + { + "epoch": 47.01793721973094, + "grad_norm": 0.3646279573440552, + "learning_rate": 7.741888647856046e-05, + "loss": -0.939, + "step": 20970 + }, + { + "epoch": 47.04035874439462, + "grad_norm": 0.21889740228652954, + "learning_rate": 7.739583761008994e-05, + "loss": -0.9373, + "step": 20980 + }, + { + "epoch": 47.062780269058294, + "grad_norm": 0.3350118398666382, + "learning_rate": 7.73727804194873e-05, + "loss": -0.9385, + "step": 20990 + }, + { + "epoch": 47.08520179372197, + "grad_norm": 0.27717018127441406, + "learning_rate": 7.734971491375671e-05, + "loss": -0.9384, + "step": 21000 + }, + { + "epoch": 47.10762331838565, + "grad_norm": 0.34541651606559753, + "learning_rate": 7.732664109990485e-05, + "loss": -0.937, + "step": 21010 + }, + { + "epoch": 47.130044843049326, + "grad_norm": 0.3073355257511139, + "learning_rate": 7.730355898494095e-05, + "loss": -0.9287, + "step": 21020 + }, + { + "epoch": 47.152466367713004, + "grad_norm": 0.22128801047801971, + "learning_rate": 7.728046857587673e-05, + "loss": -0.9286, + "step": 21030 + }, + { + "epoch": 47.17488789237668, + "grad_norm": 0.31301331520080566, + "learning_rate": 7.725736987972647e-05, + "loss": -0.9363, + "step": 21040 + }, + { + "epoch": 47.19730941704036, + "grad_norm": 0.5094095468521118, + "learning_rate": 7.723426290350691e-05, + "loss": -0.9326, + "step": 21050 + }, + { + "epoch": 47.219730941704036, + "grad_norm": 0.23590019345283508, + "learning_rate": 7.721114765423736e-05, + "loss": -0.9366, + "step": 21060 + }, + { + "epoch": 47.24215246636771, + "grad_norm": 0.17598329484462738, + "learning_rate": 7.718802413893963e-05, + "loss": -0.9363, + "step": 21070 + }, + { + "epoch": 47.26457399103139, + "grad_norm": 0.37051141262054443, + "learning_rate": 7.716489236463802e-05, + "loss": -0.9351, + "step": 21080 + }, + { + "epoch": 47.28699551569507, + "grad_norm": 0.3188111186027527, + "learning_rate": 7.714175233835936e-05, + "loss": -0.9395, + "step": 21090 + }, + { + "epoch": 47.309417040358746, + "grad_norm": 0.19301725924015045, + "learning_rate": 7.711860406713299e-05, + "loss": -0.9373, + "step": 21100 + }, + { + "epoch": 47.33183856502242, + "grad_norm": 0.30296996235847473, + "learning_rate": 7.70954475579907e-05, + "loss": -0.9371, + "step": 21110 + }, + { + "epoch": 47.3542600896861, + "grad_norm": 0.3495195209980011, + "learning_rate": 7.707228281796688e-05, + "loss": -0.9337, + "step": 21120 + }, + { + "epoch": 47.37668161434978, + "grad_norm": 0.40225982666015625, + "learning_rate": 7.704910985409833e-05, + "loss": -0.9386, + "step": 21130 + }, + { + "epoch": 47.399103139013455, + "grad_norm": 0.19836615025997162, + "learning_rate": 7.702592867342439e-05, + "loss": -0.9362, + "step": 21140 + }, + { + "epoch": 47.42152466367713, + "grad_norm": 0.32979291677474976, + "learning_rate": 7.700273928298691e-05, + "loss": -0.9376, + "step": 21150 + }, + { + "epoch": 47.44394618834081, + "grad_norm": 0.34246379137039185, + "learning_rate": 7.697954168983021e-05, + "loss": -0.9343, + "step": 21160 + }, + { + "epoch": 47.46636771300449, + "grad_norm": 0.1866230070590973, + "learning_rate": 7.695633590100109e-05, + "loss": -0.937, + "step": 21170 + }, + { + "epoch": 47.488789237668165, + "grad_norm": 0.3196810185909271, + "learning_rate": 7.693312192354886e-05, + "loss": -0.9378, + "step": 21180 + }, + { + "epoch": 47.511210762331835, + "grad_norm": 0.2648060917854309, + "learning_rate": 7.690989976452532e-05, + "loss": -0.9373, + "step": 21190 + }, + { + "epoch": 47.53363228699551, + "grad_norm": 0.32592326402664185, + "learning_rate": 7.688666943098475e-05, + "loss": -0.9364, + "step": 21200 + }, + { + "epoch": 47.55605381165919, + "grad_norm": 0.20041534304618835, + "learning_rate": 7.686343092998389e-05, + "loss": -0.9388, + "step": 21210 + }, + { + "epoch": 47.57847533632287, + "grad_norm": 0.2724946141242981, + "learning_rate": 7.684018426858202e-05, + "loss": -0.9354, + "step": 21220 + }, + { + "epoch": 47.600896860986545, + "grad_norm": 0.25608280301094055, + "learning_rate": 7.681692945384084e-05, + "loss": -0.9373, + "step": 21230 + }, + { + "epoch": 47.62331838565022, + "grad_norm": 0.2919933497905731, + "learning_rate": 7.679366649282456e-05, + "loss": -0.9365, + "step": 21240 + }, + { + "epoch": 47.6457399103139, + "grad_norm": 0.26142439246177673, + "learning_rate": 7.677039539259983e-05, + "loss": -0.9379, + "step": 21250 + }, + { + "epoch": 47.66816143497758, + "grad_norm": 0.27577638626098633, + "learning_rate": 7.674711616023581e-05, + "loss": -0.9359, + "step": 21260 + }, + { + "epoch": 47.690582959641254, + "grad_norm": 0.21723484992980957, + "learning_rate": 7.672382880280413e-05, + "loss": -0.9391, + "step": 21270 + }, + { + "epoch": 47.71300448430493, + "grad_norm": 0.1991993486881256, + "learning_rate": 7.670053332737885e-05, + "loss": -0.9392, + "step": 21280 + }, + { + "epoch": 47.73542600896861, + "grad_norm": 0.26473426818847656, + "learning_rate": 7.667722974103654e-05, + "loss": -0.9417, + "step": 21290 + }, + { + "epoch": 47.75784753363229, + "grad_norm": 0.2851455509662628, + "learning_rate": 7.66539180508562e-05, + "loss": -0.9377, + "step": 21300 + }, + { + "epoch": 47.780269058295964, + "grad_norm": 0.22871793806552887, + "learning_rate": 7.663059826391932e-05, + "loss": -0.9343, + "step": 21310 + }, + { + "epoch": 47.80269058295964, + "grad_norm": 0.17683139443397522, + "learning_rate": 7.660727038730981e-05, + "loss": -0.9406, + "step": 21320 + }, + { + "epoch": 47.82511210762332, + "grad_norm": 0.19816121459007263, + "learning_rate": 7.65839344281141e-05, + "loss": -0.9375, + "step": 21330 + }, + { + "epoch": 47.847533632286996, + "grad_norm": 0.25815919041633606, + "learning_rate": 7.656059039342101e-05, + "loss": -0.939, + "step": 21340 + }, + { + "epoch": 47.869955156950674, + "grad_norm": 0.17462258040905, + "learning_rate": 7.653723829032187e-05, + "loss": -0.9365, + "step": 21350 + }, + { + "epoch": 47.89237668161435, + "grad_norm": 0.2994996905326843, + "learning_rate": 7.65138781259104e-05, + "loss": -0.9302, + "step": 21360 + }, + { + "epoch": 47.91479820627803, + "grad_norm": 0.20940260589122772, + "learning_rate": 7.649050990728279e-05, + "loss": -0.9417, + "step": 21370 + }, + { + "epoch": 47.937219730941706, + "grad_norm": 0.35624292492866516, + "learning_rate": 7.646713364153774e-05, + "loss": -0.9364, + "step": 21380 + }, + { + "epoch": 47.95964125560538, + "grad_norm": 0.21093028783798218, + "learning_rate": 7.64437493357763e-05, + "loss": -0.9377, + "step": 21390 + }, + { + "epoch": 47.98206278026906, + "grad_norm": 0.19265852868556976, + "learning_rate": 7.642035699710202e-05, + "loss": -0.9372, + "step": 21400 + }, + { + "epoch": 48.00448430493274, + "grad_norm": 0.3199368119239807, + "learning_rate": 7.639695663262089e-05, + "loss": -0.938, + "step": 21410 + }, + { + "epoch": 48.026905829596416, + "grad_norm": 0.24463263154029846, + "learning_rate": 7.637354824944128e-05, + "loss": -0.928, + "step": 21420 + }, + { + "epoch": 48.04932735426009, + "grad_norm": 0.2680254280567169, + "learning_rate": 7.635013185467408e-05, + "loss": -0.9408, + "step": 21430 + }, + { + "epoch": 48.07174887892376, + "grad_norm": 0.22952525317668915, + "learning_rate": 7.632670745543256e-05, + "loss": -0.9409, + "step": 21440 + }, + { + "epoch": 48.09417040358744, + "grad_norm": 0.3440549671649933, + "learning_rate": 7.630327505883242e-05, + "loss": -0.9355, + "step": 21450 + }, + { + "epoch": 48.11659192825112, + "grad_norm": 0.2973431348800659, + "learning_rate": 7.627983467199182e-05, + "loss": -0.938, + "step": 21460 + }, + { + "epoch": 48.139013452914796, + "grad_norm": 0.26159799098968506, + "learning_rate": 7.625638630203132e-05, + "loss": -0.9334, + "step": 21470 + }, + { + "epoch": 48.16143497757847, + "grad_norm": 0.2207626849412918, + "learning_rate": 7.623292995607394e-05, + "loss": -0.9388, + "step": 21480 + }, + { + "epoch": 48.18385650224215, + "grad_norm": 0.25107353925704956, + "learning_rate": 7.620946564124507e-05, + "loss": -0.9374, + "step": 21490 + }, + { + "epoch": 48.20627802690583, + "grad_norm": 0.2670421302318573, + "learning_rate": 7.618599336467256e-05, + "loss": -0.939, + "step": 21500 + }, + { + "epoch": 48.228699551569505, + "grad_norm": 0.22228485345840454, + "learning_rate": 7.616251313348666e-05, + "loss": -0.9388, + "step": 21510 + }, + { + "epoch": 48.25112107623318, + "grad_norm": 0.2223612368106842, + "learning_rate": 7.613902495482005e-05, + "loss": -0.9353, + "step": 21520 + }, + { + "epoch": 48.27354260089686, + "grad_norm": 0.3199104368686676, + "learning_rate": 7.611552883580784e-05, + "loss": -0.9388, + "step": 21530 + }, + { + "epoch": 48.29596412556054, + "grad_norm": 0.24367988109588623, + "learning_rate": 7.609202478358748e-05, + "loss": -0.939, + "step": 21540 + }, + { + "epoch": 48.318385650224215, + "grad_norm": 0.31213662028312683, + "learning_rate": 7.606851280529895e-05, + "loss": -0.9391, + "step": 21550 + }, + { + "epoch": 48.34080717488789, + "grad_norm": 0.24046961963176727, + "learning_rate": 7.604499290808449e-05, + "loss": -0.9389, + "step": 21560 + }, + { + "epoch": 48.36322869955157, + "grad_norm": 0.2807801365852356, + "learning_rate": 7.602146509908888e-05, + "loss": -0.9391, + "step": 21570 + }, + { + "epoch": 48.38565022421525, + "grad_norm": 0.18220524489879608, + "learning_rate": 7.599792938545921e-05, + "loss": -0.934, + "step": 21580 + }, + { + "epoch": 48.408071748878925, + "grad_norm": 0.18905742466449738, + "learning_rate": 7.597438577434506e-05, + "loss": -0.937, + "step": 21590 + }, + { + "epoch": 48.4304932735426, + "grad_norm": 0.17849323153495789, + "learning_rate": 7.595083427289831e-05, + "loss": -0.94, + "step": 21600 + }, + { + "epoch": 48.45291479820628, + "grad_norm": 0.19818632304668427, + "learning_rate": 7.59272748882733e-05, + "loss": -0.9427, + "step": 21610 + }, + { + "epoch": 48.47533632286996, + "grad_norm": 0.22942756116390228, + "learning_rate": 7.590370762762675e-05, + "loss": -0.9364, + "step": 21620 + }, + { + "epoch": 48.497757847533634, + "grad_norm": 0.31255486607551575, + "learning_rate": 7.588013249811777e-05, + "loss": -0.9331, + "step": 21630 + }, + { + "epoch": 48.52017937219731, + "grad_norm": 0.30023205280303955, + "learning_rate": 7.585654950690786e-05, + "loss": -0.9377, + "step": 21640 + }, + { + "epoch": 48.54260089686099, + "grad_norm": 0.4237698018550873, + "learning_rate": 7.583295866116091e-05, + "loss": -0.9368, + "step": 21650 + }, + { + "epoch": 48.56502242152467, + "grad_norm": 0.19579251110553741, + "learning_rate": 7.580935996804321e-05, + "loss": -0.9361, + "step": 21660 + }, + { + "epoch": 48.587443946188344, + "grad_norm": 0.26921379566192627, + "learning_rate": 7.57857534347234e-05, + "loss": -0.9349, + "step": 21670 + }, + { + "epoch": 48.609865470852014, + "grad_norm": 0.18352797627449036, + "learning_rate": 7.576213906837254e-05, + "loss": -0.9382, + "step": 21680 + }, + { + "epoch": 48.63228699551569, + "grad_norm": 0.22352905571460724, + "learning_rate": 7.573851687616403e-05, + "loss": -0.9356, + "step": 21690 + }, + { + "epoch": 48.65470852017937, + "grad_norm": 0.17491941154003143, + "learning_rate": 7.571488686527368e-05, + "loss": -0.943, + "step": 21700 + }, + { + "epoch": 48.67713004484305, + "grad_norm": 0.2062711864709854, + "learning_rate": 7.569124904287968e-05, + "loss": -0.9402, + "step": 21710 + }, + { + "epoch": 48.699551569506724, + "grad_norm": 0.28589868545532227, + "learning_rate": 7.566760341616254e-05, + "loss": -0.9357, + "step": 21720 + }, + { + "epoch": 48.7219730941704, + "grad_norm": 0.2902723252773285, + "learning_rate": 7.564394999230519e-05, + "loss": -0.9388, + "step": 21730 + }, + { + "epoch": 48.74439461883408, + "grad_norm": 0.30008137226104736, + "learning_rate": 7.562028877849294e-05, + "loss": -0.9425, + "step": 21740 + }, + { + "epoch": 48.766816143497756, + "grad_norm": 0.26709648966789246, + "learning_rate": 7.559661978191341e-05, + "loss": -0.9384, + "step": 21750 + }, + { + "epoch": 48.789237668161434, + "grad_norm": 0.27992236614227295, + "learning_rate": 7.557294300975664e-05, + "loss": -0.9403, + "step": 21760 + }, + { + "epoch": 48.81165919282511, + "grad_norm": 0.18794263899326324, + "learning_rate": 7.554925846921499e-05, + "loss": -0.9403, + "step": 21770 + }, + { + "epoch": 48.83408071748879, + "grad_norm": 0.2853770852088928, + "learning_rate": 7.552556616748321e-05, + "loss": -0.9371, + "step": 21780 + }, + { + "epoch": 48.856502242152466, + "grad_norm": 0.32886821031570435, + "learning_rate": 7.550186611175838e-05, + "loss": -0.9326, + "step": 21790 + }, + { + "epoch": 48.87892376681614, + "grad_norm": 0.2104085385799408, + "learning_rate": 7.547815830923998e-05, + "loss": -0.9411, + "step": 21800 + }, + { + "epoch": 48.90134529147982, + "grad_norm": 0.2678171694278717, + "learning_rate": 7.54544427671298e-05, + "loss": -0.9367, + "step": 21810 + }, + { + "epoch": 48.9237668161435, + "grad_norm": 0.36303946375846863, + "learning_rate": 7.543071949263198e-05, + "loss": -0.9416, + "step": 21820 + }, + { + "epoch": 48.946188340807176, + "grad_norm": 0.21286505460739136, + "learning_rate": 7.540698849295305e-05, + "loss": -0.9399, + "step": 21830 + }, + { + "epoch": 48.96860986547085, + "grad_norm": 0.16259482502937317, + "learning_rate": 7.538324977530183e-05, + "loss": -0.9395, + "step": 21840 + }, + { + "epoch": 48.99103139013453, + "grad_norm": 0.33883392810821533, + "learning_rate": 7.535950334688955e-05, + "loss": -0.9328, + "step": 21850 + }, + { + "epoch": 49.01345291479821, + "grad_norm": 0.24114543199539185, + "learning_rate": 7.533574921492972e-05, + "loss": -0.9399, + "step": 21860 + }, + { + "epoch": 49.035874439461885, + "grad_norm": 0.2184928059577942, + "learning_rate": 7.531198738663824e-05, + "loss": -0.9426, + "step": 21870 + }, + { + "epoch": 49.05829596412556, + "grad_norm": 0.27104318141937256, + "learning_rate": 7.528821786923333e-05, + "loss": -0.9414, + "step": 21880 + }, + { + "epoch": 49.08071748878924, + "grad_norm": 0.28437983989715576, + "learning_rate": 7.52644406699355e-05, + "loss": -0.9373, + "step": 21890 + }, + { + "epoch": 49.10313901345292, + "grad_norm": 0.27833008766174316, + "learning_rate": 7.524065579596766e-05, + "loss": -0.9375, + "step": 21900 + }, + { + "epoch": 49.125560538116595, + "grad_norm": 0.25612083077430725, + "learning_rate": 7.521686325455506e-05, + "loss": -0.9417, + "step": 21910 + }, + { + "epoch": 49.14798206278027, + "grad_norm": 0.29324230551719666, + "learning_rate": 7.51930630529252e-05, + "loss": -0.9346, + "step": 21920 + }, + { + "epoch": 49.17040358744394, + "grad_norm": 0.3343493342399597, + "learning_rate": 7.516925519830797e-05, + "loss": -0.9387, + "step": 21930 + }, + { + "epoch": 49.19282511210762, + "grad_norm": 0.22642655670642853, + "learning_rate": 7.514543969793557e-05, + "loss": -0.9395, + "step": 21940 + }, + { + "epoch": 49.2152466367713, + "grad_norm": 0.2824237048625946, + "learning_rate": 7.512161655904251e-05, + "loss": -0.9377, + "step": 21950 + }, + { + "epoch": 49.237668161434975, + "grad_norm": 0.2807859480381012, + "learning_rate": 7.509778578886563e-05, + "loss": -0.936, + "step": 21960 + }, + { + "epoch": 49.26008968609865, + "grad_norm": 0.24727769196033478, + "learning_rate": 7.507394739464412e-05, + "loss": -0.9376, + "step": 21970 + }, + { + "epoch": 49.28251121076233, + "grad_norm": 0.22569967806339264, + "learning_rate": 7.50501013836194e-05, + "loss": -0.9347, + "step": 21980 + }, + { + "epoch": 49.30493273542601, + "grad_norm": 0.24196279048919678, + "learning_rate": 7.50262477630353e-05, + "loss": -0.9346, + "step": 21990 + }, + { + "epoch": 49.327354260089685, + "grad_norm": 0.20472191274166107, + "learning_rate": 7.500238654013794e-05, + "loss": -0.9342, + "step": 22000 + }, + { + "epoch": 49.34977578475336, + "grad_norm": 0.33353060483932495, + "learning_rate": 7.497851772217566e-05, + "loss": -0.9386, + "step": 22010 + }, + { + "epoch": 49.37219730941704, + "grad_norm": 0.338112473487854, + "learning_rate": 7.495464131639924e-05, + "loss": -0.9423, + "step": 22020 + }, + { + "epoch": 49.39461883408072, + "grad_norm": 0.20952339470386505, + "learning_rate": 7.493075733006166e-05, + "loss": -0.9374, + "step": 22030 + }, + { + "epoch": 49.417040358744394, + "grad_norm": 0.24557963013648987, + "learning_rate": 7.490686577041828e-05, + "loss": -0.9387, + "step": 22040 + }, + { + "epoch": 49.43946188340807, + "grad_norm": 0.2819485664367676, + "learning_rate": 7.488296664472668e-05, + "loss": -0.93, + "step": 22050 + }, + { + "epoch": 49.46188340807175, + "grad_norm": 0.22892871499061584, + "learning_rate": 7.485905996024682e-05, + "loss": -0.9402, + "step": 22060 + }, + { + "epoch": 49.48430493273543, + "grad_norm": 0.315536767244339, + "learning_rate": 7.483514572424093e-05, + "loss": -0.942, + "step": 22070 + }, + { + "epoch": 49.506726457399104, + "grad_norm": 0.22942695021629333, + "learning_rate": 7.481122394397349e-05, + "loss": -0.9401, + "step": 22080 + }, + { + "epoch": 49.52914798206278, + "grad_norm": 0.1734154373407364, + "learning_rate": 7.478729462671131e-05, + "loss": -0.94, + "step": 22090 + }, + { + "epoch": 49.55156950672646, + "grad_norm": 0.3265754282474518, + "learning_rate": 7.47633577797235e-05, + "loss": -0.9364, + "step": 22100 + }, + { + "epoch": 49.573991031390136, + "grad_norm": 0.2509628236293793, + "learning_rate": 7.473941341028144e-05, + "loss": -0.941, + "step": 22110 + }, + { + "epoch": 49.596412556053814, + "grad_norm": 0.17699964344501495, + "learning_rate": 7.471546152565879e-05, + "loss": -0.9429, + "step": 22120 + }, + { + "epoch": 49.61883408071749, + "grad_norm": 0.21196183562278748, + "learning_rate": 7.46915021331315e-05, + "loss": -0.9347, + "step": 22130 + }, + { + "epoch": 49.64125560538117, + "grad_norm": 0.28134456276893616, + "learning_rate": 7.466753523997778e-05, + "loss": -0.9386, + "step": 22140 + }, + { + "epoch": 49.663677130044846, + "grad_norm": 0.23084929585456848, + "learning_rate": 7.464356085347819e-05, + "loss": -0.9354, + "step": 22150 + }, + { + "epoch": 49.68609865470852, + "grad_norm": 0.28971296548843384, + "learning_rate": 7.461957898091548e-05, + "loss": -0.9366, + "step": 22160 + }, + { + "epoch": 49.7085201793722, + "grad_norm": 0.2556270956993103, + "learning_rate": 7.459558962957473e-05, + "loss": -0.9349, + "step": 22170 + }, + { + "epoch": 49.73094170403587, + "grad_norm": 0.27671435475349426, + "learning_rate": 7.457159280674326e-05, + "loss": -0.939, + "step": 22180 + }, + { + "epoch": 49.75336322869955, + "grad_norm": 0.25565508008003235, + "learning_rate": 7.454758851971066e-05, + "loss": -0.9389, + "step": 22190 + }, + { + "epoch": 49.775784753363226, + "grad_norm": 0.38200682401657104, + "learning_rate": 7.45235767757688e-05, + "loss": -0.9342, + "step": 22200 + }, + { + "epoch": 49.7982062780269, + "grad_norm": 0.41740572452545166, + "learning_rate": 7.449955758221183e-05, + "loss": -0.9298, + "step": 22210 + }, + { + "epoch": 49.82062780269058, + "grad_norm": 0.30580994486808777, + "learning_rate": 7.447553094633615e-05, + "loss": -0.9395, + "step": 22220 + }, + { + "epoch": 49.84304932735426, + "grad_norm": 0.30178481340408325, + "learning_rate": 7.445149687544039e-05, + "loss": -0.9363, + "step": 22230 + }, + { + "epoch": 49.865470852017935, + "grad_norm": 0.26654908061027527, + "learning_rate": 7.44274553768255e-05, + "loss": -0.9362, + "step": 22240 + }, + { + "epoch": 49.88789237668161, + "grad_norm": 0.2547902762889862, + "learning_rate": 7.440340645779464e-05, + "loss": -0.9381, + "step": 22250 + }, + { + "epoch": 49.91031390134529, + "grad_norm": 0.11225591599941254, + "learning_rate": 7.437935012565322e-05, + "loss": -0.9392, + "step": 22260 + }, + { + "epoch": 49.93273542600897, + "grad_norm": 0.1962154358625412, + "learning_rate": 7.435528638770893e-05, + "loss": -0.9371, + "step": 22270 + }, + { + "epoch": 49.955156950672645, + "grad_norm": 0.2276511937379837, + "learning_rate": 7.433121525127171e-05, + "loss": -0.9321, + "step": 22280 + }, + { + "epoch": 49.97757847533632, + "grad_norm": 0.1835835874080658, + "learning_rate": 7.430713672365371e-05, + "loss": -0.9281, + "step": 22290 + }, + { + "epoch": 50.0, + "grad_norm": 0.3388553559780121, + "learning_rate": 7.428305081216938e-05, + "loss": -0.9391, + "step": 22300 + }, + { + "epoch": 50.02242152466368, + "grad_norm": 0.2845526337623596, + "learning_rate": 7.425895752413536e-05, + "loss": -0.9405, + "step": 22310 + }, + { + "epoch": 50.044843049327355, + "grad_norm": 0.2292814552783966, + "learning_rate": 7.423485686687057e-05, + "loss": -0.9312, + "step": 22320 + }, + { + "epoch": 50.06726457399103, + "grad_norm": 0.3032786250114441, + "learning_rate": 7.421074884769616e-05, + "loss": -0.939, + "step": 22330 + }, + { + "epoch": 50.08968609865471, + "grad_norm": 0.2107398957014084, + "learning_rate": 7.418663347393548e-05, + "loss": -0.9368, + "step": 22340 + }, + { + "epoch": 50.11210762331839, + "grad_norm": 0.305743008852005, + "learning_rate": 7.416251075291418e-05, + "loss": -0.9357, + "step": 22350 + }, + { + "epoch": 50.134529147982065, + "grad_norm": 0.24027496576309204, + "learning_rate": 7.413838069196007e-05, + "loss": -0.9415, + "step": 22360 + }, + { + "epoch": 50.15695067264574, + "grad_norm": 0.1626400351524353, + "learning_rate": 7.411424329840324e-05, + "loss": -0.9381, + "step": 22370 + }, + { + "epoch": 50.17937219730942, + "grad_norm": 0.34349268674850464, + "learning_rate": 7.409009857957601e-05, + "loss": -0.9304, + "step": 22380 + }, + { + "epoch": 50.2017937219731, + "grad_norm": 0.22034063935279846, + "learning_rate": 7.40659465428129e-05, + "loss": -0.9333, + "step": 22390 + }, + { + "epoch": 50.224215246636774, + "grad_norm": 0.1796979010105133, + "learning_rate": 7.404178719545063e-05, + "loss": -0.9346, + "step": 22400 + }, + { + "epoch": 50.24663677130045, + "grad_norm": 0.3742120862007141, + "learning_rate": 7.401762054482822e-05, + "loss": -0.9317, + "step": 22410 + }, + { + "epoch": 50.26905829596413, + "grad_norm": 0.28579577803611755, + "learning_rate": 7.39934465982868e-05, + "loss": -0.9361, + "step": 22420 + }, + { + "epoch": 50.2914798206278, + "grad_norm": 0.26047495007514954, + "learning_rate": 7.396926536316984e-05, + "loss": -0.9403, + "step": 22430 + }, + { + "epoch": 50.31390134529148, + "grad_norm": 0.22648662328720093, + "learning_rate": 7.394507684682293e-05, + "loss": -0.9384, + "step": 22440 + }, + { + "epoch": 50.336322869955154, + "grad_norm": 0.2953048646450043, + "learning_rate": 7.392088105659393e-05, + "loss": -0.9397, + "step": 22450 + }, + { + "epoch": 50.35874439461883, + "grad_norm": 0.34694740176200867, + "learning_rate": 7.389667799983284e-05, + "loss": -0.9348, + "step": 22460 + }, + { + "epoch": 50.38116591928251, + "grad_norm": 0.2033974826335907, + "learning_rate": 7.387246768389193e-05, + "loss": -0.9387, + "step": 22470 + }, + { + "epoch": 50.403587443946186, + "grad_norm": 0.3725281059741974, + "learning_rate": 7.384825011612563e-05, + "loss": -0.9411, + "step": 22480 + }, + { + "epoch": 50.426008968609864, + "grad_norm": 0.3159498870372772, + "learning_rate": 7.382402530389066e-05, + "loss": -0.9306, + "step": 22490 + }, + { + "epoch": 50.44843049327354, + "grad_norm": 0.2644689083099365, + "learning_rate": 7.379979325454582e-05, + "loss": -0.938, + "step": 22500 + }, + { + "epoch": 50.47085201793722, + "grad_norm": 0.3648507595062256, + "learning_rate": 7.37755539754522e-05, + "loss": -0.939, + "step": 22510 + }, + { + "epoch": 50.493273542600896, + "grad_norm": 0.27714505791664124, + "learning_rate": 7.375130747397302e-05, + "loss": -0.938, + "step": 22520 + }, + { + "epoch": 50.51569506726457, + "grad_norm": 0.30382904410362244, + "learning_rate": 7.372705375747377e-05, + "loss": -0.9372, + "step": 22530 + }, + { + "epoch": 50.53811659192825, + "grad_norm": 0.3087071180343628, + "learning_rate": 7.370279283332205e-05, + "loss": -0.9415, + "step": 22540 + }, + { + "epoch": 50.56053811659193, + "grad_norm": 0.3192812204360962, + "learning_rate": 7.36785247088877e-05, + "loss": -0.9413, + "step": 22550 + }, + { + "epoch": 50.582959641255606, + "grad_norm": 0.19195477664470673, + "learning_rate": 7.365424939154275e-05, + "loss": -0.9389, + "step": 22560 + }, + { + "epoch": 50.60538116591928, + "grad_norm": 0.24808450043201447, + "learning_rate": 7.362996688866138e-05, + "loss": -0.9403, + "step": 22570 + }, + { + "epoch": 50.62780269058296, + "grad_norm": 0.33451154828071594, + "learning_rate": 7.360567720761999e-05, + "loss": -0.9358, + "step": 22580 + }, + { + "epoch": 50.65022421524664, + "grad_norm": 0.2713640034198761, + "learning_rate": 7.358138035579711e-05, + "loss": -0.9386, + "step": 22590 + }, + { + "epoch": 50.672645739910315, + "grad_norm": 0.33042681217193604, + "learning_rate": 7.355707634057354e-05, + "loss": -0.9385, + "step": 22600 + }, + { + "epoch": 50.69506726457399, + "grad_norm": 0.2985649108886719, + "learning_rate": 7.353276516933215e-05, + "loss": -0.9426, + "step": 22610 + }, + { + "epoch": 50.71748878923767, + "grad_norm": 0.2539604604244232, + "learning_rate": 7.350844684945806e-05, + "loss": -0.9344, + "step": 22620 + }, + { + "epoch": 50.73991031390135, + "grad_norm": 0.27866941690444946, + "learning_rate": 7.348412138833851e-05, + "loss": -0.9398, + "step": 22630 + }, + { + "epoch": 50.762331838565025, + "grad_norm": 0.32754430174827576, + "learning_rate": 7.345978879336295e-05, + "loss": -0.9365, + "step": 22640 + }, + { + "epoch": 50.7847533632287, + "grad_norm": 0.2626665532588959, + "learning_rate": 7.343544907192296e-05, + "loss": -0.934, + "step": 22650 + }, + { + "epoch": 50.80717488789238, + "grad_norm": 0.1931474506855011, + "learning_rate": 7.341110223141235e-05, + "loss": -0.9402, + "step": 22660 + }, + { + "epoch": 50.82959641255606, + "grad_norm": 0.34617024660110474, + "learning_rate": 7.3386748279227e-05, + "loss": -0.9393, + "step": 22670 + }, + { + "epoch": 50.85201793721973, + "grad_norm": 0.2632863521575928, + "learning_rate": 7.336238722276501e-05, + "loss": -0.9313, + "step": 22680 + }, + { + "epoch": 50.874439461883405, + "grad_norm": 0.36812058091163635, + "learning_rate": 7.333801906942663e-05, + "loss": -0.9393, + "step": 22690 + }, + { + "epoch": 50.89686098654708, + "grad_norm": 0.21058857440948486, + "learning_rate": 7.331364382661428e-05, + "loss": -0.9385, + "step": 22700 + }, + { + "epoch": 50.91928251121076, + "grad_norm": 0.25461113452911377, + "learning_rate": 7.328926150173248e-05, + "loss": -0.9406, + "step": 22710 + }, + { + "epoch": 50.94170403587444, + "grad_norm": 0.23968997597694397, + "learning_rate": 7.326487210218795e-05, + "loss": -0.9407, + "step": 22720 + }, + { + "epoch": 50.964125560538115, + "grad_norm": 0.26052722334861755, + "learning_rate": 7.324047563538955e-05, + "loss": -0.9336, + "step": 22730 + }, + { + "epoch": 50.98654708520179, + "grad_norm": 0.2964003086090088, + "learning_rate": 7.321607210874828e-05, + "loss": -0.9349, + "step": 22740 + }, + { + "epoch": 51.00896860986547, + "grad_norm": 0.3150850236415863, + "learning_rate": 7.31916615296773e-05, + "loss": -0.9396, + "step": 22750 + }, + { + "epoch": 51.03139013452915, + "grad_norm": 0.27446994185447693, + "learning_rate": 7.316724390559188e-05, + "loss": -0.9403, + "step": 22760 + }, + { + "epoch": 51.053811659192824, + "grad_norm": 0.254693865776062, + "learning_rate": 7.314281924390946e-05, + "loss": -0.9442, + "step": 22770 + }, + { + "epoch": 51.0762331838565, + "grad_norm": 0.36865097284317017, + "learning_rate": 7.311838755204959e-05, + "loss": -0.9402, + "step": 22780 + }, + { + "epoch": 51.09865470852018, + "grad_norm": 0.3218690752983093, + "learning_rate": 7.3093948837434e-05, + "loss": -0.9381, + "step": 22790 + }, + { + "epoch": 51.12107623318386, + "grad_norm": 0.32660672068595886, + "learning_rate": 7.306950310748651e-05, + "loss": -0.9403, + "step": 22800 + }, + { + "epoch": 51.143497757847534, + "grad_norm": 0.3397519290447235, + "learning_rate": 7.304505036963311e-05, + "loss": -0.9374, + "step": 22810 + }, + { + "epoch": 51.16591928251121, + "grad_norm": 0.27422794699668884, + "learning_rate": 7.302059063130186e-05, + "loss": -0.9392, + "step": 22820 + }, + { + "epoch": 51.18834080717489, + "grad_norm": 0.35755741596221924, + "learning_rate": 7.2996123899923e-05, + "loss": -0.9403, + "step": 22830 + }, + { + "epoch": 51.210762331838566, + "grad_norm": 0.2591880261898041, + "learning_rate": 7.297165018292886e-05, + "loss": -0.9365, + "step": 22840 + }, + { + "epoch": 51.233183856502244, + "grad_norm": 0.2606683075428009, + "learning_rate": 7.294716948775396e-05, + "loss": -0.9409, + "step": 22850 + }, + { + "epoch": 51.25560538116592, + "grad_norm": 0.23137451708316803, + "learning_rate": 7.292268182183484e-05, + "loss": -0.9379, + "step": 22860 + }, + { + "epoch": 51.2780269058296, + "grad_norm": 0.1981307566165924, + "learning_rate": 7.28981871926102e-05, + "loss": -0.9423, + "step": 22870 + }, + { + "epoch": 51.300448430493276, + "grad_norm": 0.2690190374851227, + "learning_rate": 7.28736856075209e-05, + "loss": -0.9398, + "step": 22880 + }, + { + "epoch": 51.32286995515695, + "grad_norm": 0.21838359534740448, + "learning_rate": 7.284917707400985e-05, + "loss": -0.9394, + "step": 22890 + }, + { + "epoch": 51.34529147982063, + "grad_norm": 0.1842585951089859, + "learning_rate": 7.282466159952212e-05, + "loss": -0.9347, + "step": 22900 + }, + { + "epoch": 51.36771300448431, + "grad_norm": 0.22638462483882904, + "learning_rate": 7.280013919150483e-05, + "loss": -0.9385, + "step": 22910 + }, + { + "epoch": 51.390134529147986, + "grad_norm": 0.23531654477119446, + "learning_rate": 7.277560985740728e-05, + "loss": -0.9269, + "step": 22920 + }, + { + "epoch": 51.412556053811656, + "grad_norm": 0.3061802089214325, + "learning_rate": 7.275107360468079e-05, + "loss": -0.9342, + "step": 22930 + }, + { + "epoch": 51.43497757847533, + "grad_norm": 0.273716002702713, + "learning_rate": 7.272653044077885e-05, + "loss": -0.9368, + "step": 22940 + }, + { + "epoch": 51.45739910313901, + "grad_norm": 0.2670084834098816, + "learning_rate": 7.270198037315703e-05, + "loss": -0.9339, + "step": 22950 + }, + { + "epoch": 51.47982062780269, + "grad_norm": 0.3164423108100891, + "learning_rate": 7.267742340927297e-05, + "loss": -0.9419, + "step": 22960 + }, + { + "epoch": 51.502242152466366, + "grad_norm": 0.2831017076969147, + "learning_rate": 7.265285955658645e-05, + "loss": -0.9411, + "step": 22970 + }, + { + "epoch": 51.52466367713004, + "grad_norm": 0.19710350036621094, + "learning_rate": 7.26282888225593e-05, + "loss": -0.9422, + "step": 22980 + }, + { + "epoch": 51.54708520179372, + "grad_norm": 0.22969302535057068, + "learning_rate": 7.260371121465548e-05, + "loss": -0.9386, + "step": 22990 + }, + { + "epoch": 51.5695067264574, + "grad_norm": 0.36402636766433716, + "learning_rate": 7.2579126740341e-05, + "loss": -0.9331, + "step": 23000 + }, + { + "epoch": 51.591928251121075, + "grad_norm": 0.3137633502483368, + "learning_rate": 7.2554535407084e-05, + "loss": -0.9403, + "step": 23010 + }, + { + "epoch": 51.61434977578475, + "grad_norm": 0.24020259082317352, + "learning_rate": 7.252993722235464e-05, + "loss": -0.9392, + "step": 23020 + }, + { + "epoch": 51.63677130044843, + "grad_norm": 0.25896787643432617, + "learning_rate": 7.250533219362523e-05, + "loss": -0.9424, + "step": 23030 + }, + { + "epoch": 51.65919282511211, + "grad_norm": 0.4244493544101715, + "learning_rate": 7.248072032837012e-05, + "loss": -0.9361, + "step": 23040 + }, + { + "epoch": 51.681614349775785, + "grad_norm": 0.3334680497646332, + "learning_rate": 7.245610163406575e-05, + "loss": -0.9357, + "step": 23050 + }, + { + "epoch": 51.70403587443946, + "grad_norm": 0.2934368848800659, + "learning_rate": 7.243147611819061e-05, + "loss": -0.9362, + "step": 23060 + }, + { + "epoch": 51.72645739910314, + "grad_norm": 0.21818579733371735, + "learning_rate": 7.240684378822531e-05, + "loss": -0.9399, + "step": 23070 + }, + { + "epoch": 51.74887892376682, + "grad_norm": 0.21837900578975677, + "learning_rate": 7.238220465165248e-05, + "loss": -0.9419, + "step": 23080 + }, + { + "epoch": 51.771300448430495, + "grad_norm": 0.2509973347187042, + "learning_rate": 7.235755871595684e-05, + "loss": -0.9366, + "step": 23090 + }, + { + "epoch": 51.79372197309417, + "grad_norm": 0.26488783955574036, + "learning_rate": 7.233290598862517e-05, + "loss": -0.9381, + "step": 23100 + }, + { + "epoch": 51.81614349775785, + "grad_norm": 0.26220783591270447, + "learning_rate": 7.230824647714635e-05, + "loss": -0.9408, + "step": 23110 + }, + { + "epoch": 51.83856502242153, + "grad_norm": 0.26849284768104553, + "learning_rate": 7.228358018901124e-05, + "loss": -0.9386, + "step": 23120 + }, + { + "epoch": 51.860986547085204, + "grad_norm": 0.2104901522397995, + "learning_rate": 7.225890713171286e-05, + "loss": -0.9367, + "step": 23130 + }, + { + "epoch": 51.88340807174888, + "grad_norm": 0.24872466921806335, + "learning_rate": 7.223422731274618e-05, + "loss": -0.9427, + "step": 23140 + }, + { + "epoch": 51.90582959641256, + "grad_norm": 0.20889440178871155, + "learning_rate": 7.220954073960832e-05, + "loss": -0.9421, + "step": 23150 + }, + { + "epoch": 51.92825112107624, + "grad_norm": 0.3054313361644745, + "learning_rate": 7.218484741979838e-05, + "loss": -0.9443, + "step": 23160 + }, + { + "epoch": 51.95067264573991, + "grad_norm": 0.3299896717071533, + "learning_rate": 7.216014736081756e-05, + "loss": -0.936, + "step": 23170 + }, + { + "epoch": 51.973094170403584, + "grad_norm": 0.2841031551361084, + "learning_rate": 7.213544057016906e-05, + "loss": -0.9414, + "step": 23180 + }, + { + "epoch": 51.99551569506726, + "grad_norm": 0.18526093661785126, + "learning_rate": 7.211072705535819e-05, + "loss": -0.9402, + "step": 23190 + }, + { + "epoch": 52.01793721973094, + "grad_norm": 0.18997228145599365, + "learning_rate": 7.208600682389224e-05, + "loss": -0.9425, + "step": 23200 + }, + { + "epoch": 52.04035874439462, + "grad_norm": 0.30923888087272644, + "learning_rate": 7.206127988328055e-05, + "loss": -0.9374, + "step": 23210 + }, + { + "epoch": 52.062780269058294, + "grad_norm": 0.2197026014328003, + "learning_rate": 7.203654624103453e-05, + "loss": -0.9428, + "step": 23220 + }, + { + "epoch": 52.08520179372197, + "grad_norm": 0.2936004102230072, + "learning_rate": 7.201180590466761e-05, + "loss": -0.9366, + "step": 23230 + }, + { + "epoch": 52.10762331838565, + "grad_norm": 0.23339422047138214, + "learning_rate": 7.198705888169523e-05, + "loss": -0.9328, + "step": 23240 + }, + { + "epoch": 52.130044843049326, + "grad_norm": 0.3419140875339508, + "learning_rate": 7.196230517963491e-05, + "loss": -0.9416, + "step": 23250 + }, + { + "epoch": 52.152466367713004, + "grad_norm": 0.3700021505355835, + "learning_rate": 7.193754480600615e-05, + "loss": -0.9375, + "step": 23260 + }, + { + "epoch": 52.17488789237668, + "grad_norm": 0.35951289534568787, + "learning_rate": 7.19127777683305e-05, + "loss": -0.9317, + "step": 23270 + }, + { + "epoch": 52.19730941704036, + "grad_norm": 0.3311088979244232, + "learning_rate": 7.188800407413156e-05, + "loss": -0.943, + "step": 23280 + }, + { + "epoch": 52.219730941704036, + "grad_norm": 0.2936321794986725, + "learning_rate": 7.186322373093489e-05, + "loss": -0.94, + "step": 23290 + }, + { + "epoch": 52.24215246636771, + "grad_norm": 0.2902646064758301, + "learning_rate": 7.18384367462681e-05, + "loss": -0.9408, + "step": 23300 + }, + { + "epoch": 52.26457399103139, + "grad_norm": 0.25416240096092224, + "learning_rate": 7.181364312766085e-05, + "loss": -0.9399, + "step": 23310 + }, + { + "epoch": 52.28699551569507, + "grad_norm": 0.24680878221988678, + "learning_rate": 7.178884288264477e-05, + "loss": -0.9433, + "step": 23320 + }, + { + "epoch": 52.309417040358746, + "grad_norm": 0.29229530692100525, + "learning_rate": 7.176403601875353e-05, + "loss": -0.94, + "step": 23330 + }, + { + "epoch": 52.33183856502242, + "grad_norm": 0.2361726313829422, + "learning_rate": 7.173922254352279e-05, + "loss": -0.9347, + "step": 23340 + }, + { + "epoch": 52.3542600896861, + "grad_norm": 0.2561427056789398, + "learning_rate": 7.171440246449024e-05, + "loss": -0.9418, + "step": 23350 + }, + { + "epoch": 52.37668161434978, + "grad_norm": 0.23733440041542053, + "learning_rate": 7.168957578919555e-05, + "loss": -0.9427, + "step": 23360 + }, + { + "epoch": 52.399103139013455, + "grad_norm": 0.22043685615062714, + "learning_rate": 7.16647425251804e-05, + "loss": -0.9373, + "step": 23370 + }, + { + "epoch": 52.42152466367713, + "grad_norm": 0.395440936088562, + "learning_rate": 7.163990267998852e-05, + "loss": -0.9361, + "step": 23380 + }, + { + "epoch": 52.44394618834081, + "grad_norm": 0.2629133462905884, + "learning_rate": 7.161505626116556e-05, + "loss": -0.936, + "step": 23390 + }, + { + "epoch": 52.46636771300449, + "grad_norm": 0.2912895083427429, + "learning_rate": 7.159020327625923e-05, + "loss": -0.9397, + "step": 23400 + }, + { + "epoch": 52.488789237668165, + "grad_norm": 0.27662453055381775, + "learning_rate": 7.15653437328192e-05, + "loss": -0.9284, + "step": 23410 + }, + { + "epoch": 52.511210762331835, + "grad_norm": 0.21379968523979187, + "learning_rate": 7.154047763839713e-05, + "loss": -0.9412, + "step": 23420 + }, + { + "epoch": 52.53363228699551, + "grad_norm": 0.24314993619918823, + "learning_rate": 7.15156050005467e-05, + "loss": -0.9419, + "step": 23430 + }, + { + "epoch": 52.55605381165919, + "grad_norm": 0.28680509328842163, + "learning_rate": 7.149072582682357e-05, + "loss": -0.942, + "step": 23440 + }, + { + "epoch": 52.57847533632287, + "grad_norm": 0.23325130343437195, + "learning_rate": 7.146584012478535e-05, + "loss": -0.9416, + "step": 23450 + }, + { + "epoch": 52.600896860986545, + "grad_norm": 0.1870451122522354, + "learning_rate": 7.144094790199169e-05, + "loss": -0.9394, + "step": 23460 + }, + { + "epoch": 52.62331838565022, + "grad_norm": 0.36357739567756653, + "learning_rate": 7.141604916600415e-05, + "loss": -0.9379, + "step": 23470 + }, + { + "epoch": 52.6457399103139, + "grad_norm": 0.3299023509025574, + "learning_rate": 7.139114392438635e-05, + "loss": -0.9441, + "step": 23480 + }, + { + "epoch": 52.66816143497758, + "grad_norm": 0.25581613183021545, + "learning_rate": 7.136623218470382e-05, + "loss": -0.9419, + "step": 23490 + }, + { + "epoch": 52.690582959641254, + "grad_norm": 0.22124208509922028, + "learning_rate": 7.13413139545241e-05, + "loss": -0.9414, + "step": 23500 + }, + { + "epoch": 52.71300448430493, + "grad_norm": 0.20297765731811523, + "learning_rate": 7.131638924141668e-05, + "loss": -0.9374, + "step": 23510 + }, + { + "epoch": 52.73542600896861, + "grad_norm": 0.2761153280735016, + "learning_rate": 7.129145805295304e-05, + "loss": -0.9396, + "step": 23520 + }, + { + "epoch": 52.75784753363229, + "grad_norm": 0.22916391491889954, + "learning_rate": 7.126652039670661e-05, + "loss": -0.9444, + "step": 23530 + }, + { + "epoch": 52.780269058295964, + "grad_norm": 0.2992285192012787, + "learning_rate": 7.124157628025278e-05, + "loss": -0.9421, + "step": 23540 + }, + { + "epoch": 52.80269058295964, + "grad_norm": 0.17425191402435303, + "learning_rate": 7.121662571116894e-05, + "loss": -0.9412, + "step": 23550 + }, + { + "epoch": 52.82511210762332, + "grad_norm": 0.22402215003967285, + "learning_rate": 7.119166869703441e-05, + "loss": -0.9416, + "step": 23560 + }, + { + "epoch": 52.847533632286996, + "grad_norm": 0.16866539418697357, + "learning_rate": 7.116670524543044e-05, + "loss": -0.9372, + "step": 23570 + }, + { + "epoch": 52.869955156950674, + "grad_norm": 0.3390856087207794, + "learning_rate": 7.114173536394032e-05, + "loss": -0.9405, + "step": 23580 + }, + { + "epoch": 52.89237668161435, + "grad_norm": 0.27164366841316223, + "learning_rate": 7.111675906014917e-05, + "loss": -0.9363, + "step": 23590 + }, + { + "epoch": 52.91479820627803, + "grad_norm": 0.20171144604682922, + "learning_rate": 7.109177634164421e-05, + "loss": -0.9434, + "step": 23600 + }, + { + "epoch": 52.937219730941706, + "grad_norm": 0.19986870884895325, + "learning_rate": 7.106678721601449e-05, + "loss": -0.9427, + "step": 23610 + }, + { + "epoch": 52.95964125560538, + "grad_norm": 0.25985097885131836, + "learning_rate": 7.104179169085103e-05, + "loss": -0.9373, + "step": 23620 + }, + { + "epoch": 52.98206278026906, + "grad_norm": 0.2792198657989502, + "learning_rate": 7.101678977374683e-05, + "loss": -0.9348, + "step": 23630 + }, + { + "epoch": 53.00448430493274, + "grad_norm": 0.24066123366355896, + "learning_rate": 7.099178147229685e-05, + "loss": -0.9423, + "step": 23640 + }, + { + "epoch": 53.026905829596416, + "grad_norm": 0.2332630604505539, + "learning_rate": 7.096676679409789e-05, + "loss": -0.9387, + "step": 23650 + }, + { + "epoch": 53.04932735426009, + "grad_norm": 0.2666306793689728, + "learning_rate": 7.094174574674877e-05, + "loss": -0.9427, + "step": 23660 + }, + { + "epoch": 53.07174887892376, + "grad_norm": 0.2755768895149231, + "learning_rate": 7.091671833785025e-05, + "loss": -0.9435, + "step": 23670 + }, + { + "epoch": 53.09417040358744, + "grad_norm": 0.2704540491104126, + "learning_rate": 7.089168457500493e-05, + "loss": -0.9379, + "step": 23680 + }, + { + "epoch": 53.11659192825112, + "grad_norm": 0.22896507382392883, + "learning_rate": 7.086664446581747e-05, + "loss": -0.9353, + "step": 23690 + }, + { + "epoch": 53.139013452914796, + "grad_norm": 0.21877306699752808, + "learning_rate": 7.084159801789438e-05, + "loss": -0.9398, + "step": 23700 + }, + { + "epoch": 53.16143497757847, + "grad_norm": 0.2112514078617096, + "learning_rate": 7.081654523884411e-05, + "loss": -0.9404, + "step": 23710 + }, + { + "epoch": 53.18385650224215, + "grad_norm": 0.2885020077228546, + "learning_rate": 7.0791486136277e-05, + "loss": -0.9377, + "step": 23720 + }, + { + "epoch": 53.20627802690583, + "grad_norm": 0.3112456798553467, + "learning_rate": 7.07664207178054e-05, + "loss": -0.9373, + "step": 23730 + }, + { + "epoch": 53.228699551569505, + "grad_norm": 0.21204710006713867, + "learning_rate": 7.074134899104345e-05, + "loss": -0.9371, + "step": 23740 + }, + { + "epoch": 53.25112107623318, + "grad_norm": 0.18234530091285706, + "learning_rate": 7.071627096360735e-05, + "loss": -0.942, + "step": 23750 + }, + { + "epoch": 53.27354260089686, + "grad_norm": 0.26172325015068054, + "learning_rate": 7.069118664311511e-05, + "loss": -0.9429, + "step": 23760 + }, + { + "epoch": 53.29596412556054, + "grad_norm": 0.2199196070432663, + "learning_rate": 7.06660960371867e-05, + "loss": -0.9437, + "step": 23770 + }, + { + "epoch": 53.318385650224215, + "grad_norm": 0.34887751936912537, + "learning_rate": 7.064099915344396e-05, + "loss": -0.935, + "step": 23780 + }, + { + "epoch": 53.34080717488789, + "grad_norm": 0.312092125415802, + "learning_rate": 7.061589599951066e-05, + "loss": -0.941, + "step": 23790 + }, + { + "epoch": 53.36322869955157, + "grad_norm": 0.275387704372406, + "learning_rate": 7.05907865830125e-05, + "loss": -0.9366, + "step": 23800 + }, + { + "epoch": 53.38565022421525, + "grad_norm": 0.2876167595386505, + "learning_rate": 7.056567091157703e-05, + "loss": -0.9381, + "step": 23810 + }, + { + "epoch": 53.408071748878925, + "grad_norm": 0.22902891039848328, + "learning_rate": 7.054054899283375e-05, + "loss": -0.938, + "step": 23820 + }, + { + "epoch": 53.4304932735426, + "grad_norm": 0.17359276115894318, + "learning_rate": 7.051542083441403e-05, + "loss": -0.9413, + "step": 23830 + }, + { + "epoch": 53.45291479820628, + "grad_norm": 0.2284993827342987, + "learning_rate": 7.049028644395113e-05, + "loss": -0.9401, + "step": 23840 + }, + { + "epoch": 53.47533632286996, + "grad_norm": 0.2755122184753418, + "learning_rate": 7.046514582908024e-05, + "loss": -0.9457, + "step": 23850 + }, + { + "epoch": 53.497757847533634, + "grad_norm": 0.2699902355670929, + "learning_rate": 7.043999899743838e-05, + "loss": -0.9419, + "step": 23860 + }, + { + "epoch": 53.52017937219731, + "grad_norm": 0.20375199615955353, + "learning_rate": 7.041484595666451e-05, + "loss": -0.9384, + "step": 23870 + }, + { + "epoch": 53.54260089686099, + "grad_norm": 0.27316972613334656, + "learning_rate": 7.038968671439948e-05, + "loss": -0.9409, + "step": 23880 + }, + { + "epoch": 53.56502242152467, + "grad_norm": 0.26066944003105164, + "learning_rate": 7.036452127828596e-05, + "loss": -0.9362, + "step": 23890 + }, + { + "epoch": 53.587443946188344, + "grad_norm": 0.25665920972824097, + "learning_rate": 7.033934965596859e-05, + "loss": -0.9434, + "step": 23900 + }, + { + "epoch": 53.609865470852014, + "grad_norm": 0.39881590008735657, + "learning_rate": 7.031417185509381e-05, + "loss": -0.9398, + "step": 23910 + }, + { + "epoch": 53.63228699551569, + "grad_norm": 0.25074002146720886, + "learning_rate": 7.028898788331e-05, + "loss": -0.9398, + "step": 23920 + }, + { + "epoch": 53.65470852017937, + "grad_norm": 0.24159389734268188, + "learning_rate": 7.026379774826736e-05, + "loss": -0.9337, + "step": 23930 + }, + { + "epoch": 53.67713004484305, + "grad_norm": 0.2773287296295166, + "learning_rate": 7.0238601457618e-05, + "loss": -0.9422, + "step": 23940 + }, + { + "epoch": 53.699551569506724, + "grad_norm": 0.20131231844425201, + "learning_rate": 7.02133990190159e-05, + "loss": -0.9406, + "step": 23950 + }, + { + "epoch": 53.7219730941704, + "grad_norm": 0.24764390289783478, + "learning_rate": 7.018819044011687e-05, + "loss": -0.9424, + "step": 23960 + }, + { + "epoch": 53.74439461883408, + "grad_norm": 0.17466245591640472, + "learning_rate": 7.016297572857863e-05, + "loss": -0.9443, + "step": 23970 + }, + { + "epoch": 53.766816143497756, + "grad_norm": 0.18640223145484924, + "learning_rate": 7.013775489206072e-05, + "loss": -0.941, + "step": 23980 + }, + { + "epoch": 53.789237668161434, + "grad_norm": 0.24127526581287384, + "learning_rate": 7.01125279382246e-05, + "loss": -0.9434, + "step": 23990 + }, + { + "epoch": 53.81165919282511, + "grad_norm": 0.2578391432762146, + "learning_rate": 7.008729487473351e-05, + "loss": -0.9424, + "step": 24000 + }, + { + "epoch": 53.83408071748879, + "grad_norm": 0.3436724543571472, + "learning_rate": 7.006205570925263e-05, + "loss": -0.9397, + "step": 24010 + }, + { + "epoch": 53.856502242152466, + "grad_norm": 0.25041940808296204, + "learning_rate": 7.003681044944892e-05, + "loss": -0.9374, + "step": 24020 + }, + { + "epoch": 53.87892376681614, + "grad_norm": 0.22570490837097168, + "learning_rate": 7.001155910299126e-05, + "loss": -0.9396, + "step": 24030 + }, + { + "epoch": 53.90134529147982, + "grad_norm": 0.4293327033519745, + "learning_rate": 6.99863016775503e-05, + "loss": -0.9321, + "step": 24040 + }, + { + "epoch": 53.9237668161435, + "grad_norm": 0.22845445573329926, + "learning_rate": 6.996103818079859e-05, + "loss": -0.9422, + "step": 24050 + }, + { + "epoch": 53.946188340807176, + "grad_norm": 0.23155809938907623, + "learning_rate": 6.993576862041054e-05, + "loss": -0.9406, + "step": 24060 + }, + { + "epoch": 53.96860986547085, + "grad_norm": 0.3275812864303589, + "learning_rate": 6.991049300406235e-05, + "loss": -0.9442, + "step": 24070 + }, + { + "epoch": 53.99103139013453, + "grad_norm": 0.19045555591583252, + "learning_rate": 6.988521133943209e-05, + "loss": -0.9409, + "step": 24080 + }, + { + "epoch": 54.01345291479821, + "grad_norm": 0.24064239859580994, + "learning_rate": 6.985992363419966e-05, + "loss": -0.9391, + "step": 24090 + }, + { + "epoch": 54.035874439461885, + "grad_norm": 0.19050484895706177, + "learning_rate": 6.983462989604682e-05, + "loss": -0.9417, + "step": 24100 + }, + { + "epoch": 54.05829596412556, + "grad_norm": 0.24317876994609833, + "learning_rate": 6.980933013265709e-05, + "loss": -0.9449, + "step": 24110 + }, + { + "epoch": 54.08071748878924, + "grad_norm": 0.33138224482536316, + "learning_rate": 6.978402435171592e-05, + "loss": -0.9324, + "step": 24120 + }, + { + "epoch": 54.10313901345292, + "grad_norm": 0.1763572245836258, + "learning_rate": 6.975871256091052e-05, + "loss": -0.9373, + "step": 24130 + }, + { + "epoch": 54.125560538116595, + "grad_norm": 0.23282666504383087, + "learning_rate": 6.973339476792995e-05, + "loss": -0.9409, + "step": 24140 + }, + { + "epoch": 54.14798206278027, + "grad_norm": 0.26755329966545105, + "learning_rate": 6.970807098046505e-05, + "loss": -0.9418, + "step": 24150 + }, + { + "epoch": 54.17040358744394, + "grad_norm": 0.2634772062301636, + "learning_rate": 6.968274120620858e-05, + "loss": -0.9422, + "step": 24160 + }, + { + "epoch": 54.19282511210762, + "grad_norm": 0.18372243642807007, + "learning_rate": 6.965740545285499e-05, + "loss": -0.9464, + "step": 24170 + }, + { + "epoch": 54.2152466367713, + "grad_norm": 0.20004159212112427, + "learning_rate": 6.963206372810068e-05, + "loss": -0.9426, + "step": 24180 + }, + { + "epoch": 54.237668161434975, + "grad_norm": 0.2070722132921219, + "learning_rate": 6.960671603964375e-05, + "loss": -0.9375, + "step": 24190 + }, + { + "epoch": 54.26008968609865, + "grad_norm": 0.20868562161922455, + "learning_rate": 6.958136239518418e-05, + "loss": -0.9405, + "step": 24200 + }, + { + "epoch": 54.28251121076233, + "grad_norm": 0.1983892321586609, + "learning_rate": 6.955600280242371e-05, + "loss": -0.9446, + "step": 24210 + }, + { + "epoch": 54.30493273542601, + "grad_norm": 0.16701334714889526, + "learning_rate": 6.953063726906596e-05, + "loss": -0.9428, + "step": 24220 + }, + { + "epoch": 54.327354260089685, + "grad_norm": 0.19776123762130737, + "learning_rate": 6.950526580281626e-05, + "loss": -0.9396, + "step": 24230 + }, + { + "epoch": 54.34977578475336, + "grad_norm": 0.19635571539402008, + "learning_rate": 6.947988841138184e-05, + "loss": -0.9424, + "step": 24240 + }, + { + "epoch": 54.37219730941704, + "grad_norm": 0.4008130431175232, + "learning_rate": 6.945450510247165e-05, + "loss": -0.9436, + "step": 24250 + }, + { + "epoch": 54.39461883408072, + "grad_norm": 0.25759726762771606, + "learning_rate": 6.942911588379647e-05, + "loss": -0.9421, + "step": 24260 + }, + { + "epoch": 54.417040358744394, + "grad_norm": 0.2102440744638443, + "learning_rate": 6.940372076306888e-05, + "loss": -0.9414, + "step": 24270 + }, + { + "epoch": 54.43946188340807, + "grad_norm": 0.24086827039718628, + "learning_rate": 6.937831974800326e-05, + "loss": -0.9402, + "step": 24280 + }, + { + "epoch": 54.46188340807175, + "grad_norm": 0.288068026304245, + "learning_rate": 6.935291284631574e-05, + "loss": -0.9377, + "step": 24290 + }, + { + "epoch": 54.48430493273543, + "grad_norm": 0.23726814985275269, + "learning_rate": 6.932750006572428e-05, + "loss": -0.9441, + "step": 24300 + }, + { + "epoch": 54.506726457399104, + "grad_norm": 0.27640581130981445, + "learning_rate": 6.930208141394863e-05, + "loss": -0.9241, + "step": 24310 + }, + { + "epoch": 54.52914798206278, + "grad_norm": 0.18576832115650177, + "learning_rate": 6.927665689871026e-05, + "loss": -0.9443, + "step": 24320 + }, + { + "epoch": 54.55156950672646, + "grad_norm": 0.3277636170387268, + "learning_rate": 6.925122652773253e-05, + "loss": -0.9429, + "step": 24330 + }, + { + "epoch": 54.573991031390136, + "grad_norm": 0.1810186803340912, + "learning_rate": 6.922579030874046e-05, + "loss": -0.942, + "step": 24340 + }, + { + "epoch": 54.596412556053814, + "grad_norm": 0.23538930714130402, + "learning_rate": 6.920034824946093e-05, + "loss": -0.9401, + "step": 24350 + }, + { + "epoch": 54.61883408071749, + "grad_norm": 0.2607669234275818, + "learning_rate": 6.917490035762255e-05, + "loss": -0.9372, + "step": 24360 + }, + { + "epoch": 54.64125560538117, + "grad_norm": 0.2122533917427063, + "learning_rate": 6.914944664095573e-05, + "loss": -0.9369, + "step": 24370 + }, + { + "epoch": 54.663677130044846, + "grad_norm": 0.22216391563415527, + "learning_rate": 6.912398710719264e-05, + "loss": -0.9424, + "step": 24380 + }, + { + "epoch": 54.68609865470852, + "grad_norm": 0.22435423731803894, + "learning_rate": 6.90985217640672e-05, + "loss": -0.9457, + "step": 24390 + }, + { + "epoch": 54.7085201793722, + "grad_norm": 0.21895749866962433, + "learning_rate": 6.90730506193151e-05, + "loss": -0.9414, + "step": 24400 + }, + { + "epoch": 54.73094170403587, + "grad_norm": 0.2848255932331085, + "learning_rate": 6.904757368067384e-05, + "loss": -0.9393, + "step": 24410 + }, + { + "epoch": 54.75336322869955, + "grad_norm": 0.22298532724380493, + "learning_rate": 6.90220909558826e-05, + "loss": -0.9395, + "step": 24420 + }, + { + "epoch": 54.775784753363226, + "grad_norm": 0.23099954426288605, + "learning_rate": 6.899660245268237e-05, + "loss": -0.9391, + "step": 24430 + }, + { + "epoch": 54.7982062780269, + "grad_norm": 0.249789759516716, + "learning_rate": 6.897110817881592e-05, + "loss": -0.9401, + "step": 24440 + }, + { + "epoch": 54.82062780269058, + "grad_norm": 0.28826144337654114, + "learning_rate": 6.894560814202769e-05, + "loss": -0.9405, + "step": 24450 + }, + { + "epoch": 54.84304932735426, + "grad_norm": 0.2667083740234375, + "learning_rate": 6.892010235006394e-05, + "loss": -0.9331, + "step": 24460 + }, + { + "epoch": 54.865470852017935, + "grad_norm": 0.21455711126327515, + "learning_rate": 6.889459081067264e-05, + "loss": -0.9414, + "step": 24470 + }, + { + "epoch": 54.88789237668161, + "grad_norm": 0.297272264957428, + "learning_rate": 6.886907353160356e-05, + "loss": -0.9373, + "step": 24480 + }, + { + "epoch": 54.91031390134529, + "grad_norm": 0.2285468578338623, + "learning_rate": 6.884355052060814e-05, + "loss": -0.9428, + "step": 24490 + }, + { + "epoch": 54.93273542600897, + "grad_norm": 0.2255743145942688, + "learning_rate": 6.88180217854396e-05, + "loss": -0.9401, + "step": 24500 + }, + { + "epoch": 54.955156950672645, + "grad_norm": 0.25132712721824646, + "learning_rate": 6.87924873338529e-05, + "loss": -0.9391, + "step": 24510 + }, + { + "epoch": 54.97757847533632, + "grad_norm": 0.2884373962879181, + "learning_rate": 6.876694717360475e-05, + "loss": -0.9426, + "step": 24520 + }, + { + "epoch": 55.0, + "grad_norm": 0.4170597493648529, + "learning_rate": 6.874140131245355e-05, + "loss": -0.9402, + "step": 24530 + }, + { + "epoch": 55.02242152466368, + "grad_norm": 0.30531689524650574, + "learning_rate": 6.871584975815948e-05, + "loss": -0.9396, + "step": 24540 + }, + { + "epoch": 55.044843049327355, + "grad_norm": 0.22563306987285614, + "learning_rate": 6.86902925184844e-05, + "loss": -0.94, + "step": 24550 + }, + { + "epoch": 55.06726457399103, + "grad_norm": 0.29601380228996277, + "learning_rate": 6.866472960119195e-05, + "loss": -0.9398, + "step": 24560 + }, + { + "epoch": 55.08968609865471, + "grad_norm": 0.20429055392742157, + "learning_rate": 6.863916101404748e-05, + "loss": -0.9403, + "step": 24570 + }, + { + "epoch": 55.11210762331839, + "grad_norm": 0.2195153683423996, + "learning_rate": 6.8613586764818e-05, + "loss": -0.9387, + "step": 24580 + }, + { + "epoch": 55.134529147982065, + "grad_norm": 0.30164554715156555, + "learning_rate": 6.858800686127233e-05, + "loss": -0.9412, + "step": 24590 + }, + { + "epoch": 55.15695067264574, + "grad_norm": 0.28093552589416504, + "learning_rate": 6.856242131118097e-05, + "loss": -0.943, + "step": 24600 + }, + { + "epoch": 55.17937219730942, + "grad_norm": 0.3335550129413605, + "learning_rate": 6.853683012231614e-05, + "loss": -0.9402, + "step": 24610 + }, + { + "epoch": 55.2017937219731, + "grad_norm": 0.2964143753051758, + "learning_rate": 6.851123330245173e-05, + "loss": -0.9436, + "step": 24620 + }, + { + "epoch": 55.224215246636774, + "grad_norm": 0.24938520789146423, + "learning_rate": 6.848563085936343e-05, + "loss": -0.9417, + "step": 24630 + }, + { + "epoch": 55.24663677130045, + "grad_norm": 0.264434278011322, + "learning_rate": 6.846002280082853e-05, + "loss": -0.943, + "step": 24640 + }, + { + "epoch": 55.26905829596413, + "grad_norm": 0.25896134972572327, + "learning_rate": 6.843440913462614e-05, + "loss": -0.9436, + "step": 24650 + }, + { + "epoch": 55.2914798206278, + "grad_norm": 0.24611198902130127, + "learning_rate": 6.840878986853698e-05, + "loss": -0.9424, + "step": 24660 + }, + { + "epoch": 55.31390134529148, + "grad_norm": 0.4415561258792877, + "learning_rate": 6.838316501034352e-05, + "loss": -0.9423, + "step": 24670 + }, + { + "epoch": 55.336322869955154, + "grad_norm": 0.25824469327926636, + "learning_rate": 6.83575345678299e-05, + "loss": -0.9434, + "step": 24680 + }, + { + "epoch": 55.35874439461883, + "grad_norm": 0.28978949785232544, + "learning_rate": 6.833189854878196e-05, + "loss": -0.9425, + "step": 24690 + }, + { + "epoch": 55.38116591928251, + "grad_norm": 0.2899949550628662, + "learning_rate": 6.83062569609873e-05, + "loss": -0.9398, + "step": 24700 + }, + { + "epoch": 55.403587443946186, + "grad_norm": 0.22942966222763062, + "learning_rate": 6.828060981223512e-05, + "loss": -0.9437, + "step": 24710 + }, + { + "epoch": 55.426008968609864, + "grad_norm": 0.21488964557647705, + "learning_rate": 6.825495711031634e-05, + "loss": -0.9431, + "step": 24720 + }, + { + "epoch": 55.44843049327354, + "grad_norm": 0.22674594819545746, + "learning_rate": 6.822929886302359e-05, + "loss": -0.9446, + "step": 24730 + }, + { + "epoch": 55.47085201793722, + "grad_norm": 0.1811126172542572, + "learning_rate": 6.820363507815116e-05, + "loss": -0.946, + "step": 24740 + }, + { + "epoch": 55.493273542600896, + "grad_norm": 0.24751785397529602, + "learning_rate": 6.817796576349501e-05, + "loss": -0.9399, + "step": 24750 + }, + { + "epoch": 55.51569506726457, + "grad_norm": 0.23191046714782715, + "learning_rate": 6.815229092685285e-05, + "loss": -0.9393, + "step": 24760 + }, + { + "epoch": 55.53811659192825, + "grad_norm": 0.30118319392204285, + "learning_rate": 6.812661057602399e-05, + "loss": -0.9455, + "step": 24770 + }, + { + "epoch": 55.56053811659193, + "grad_norm": 0.22873714566230774, + "learning_rate": 6.810092471880943e-05, + "loss": -0.9422, + "step": 24780 + }, + { + "epoch": 55.582959641255606, + "grad_norm": 0.32459455728530884, + "learning_rate": 6.807523336301187e-05, + "loss": -0.9408, + "step": 24790 + }, + { + "epoch": 55.60538116591928, + "grad_norm": 0.28125426173210144, + "learning_rate": 6.804953651643566e-05, + "loss": -0.9386, + "step": 24800 + }, + { + "epoch": 55.62780269058296, + "grad_norm": 0.29177752137184143, + "learning_rate": 6.802383418688685e-05, + "loss": -0.9374, + "step": 24810 + }, + { + "epoch": 55.65022421524664, + "grad_norm": 0.29111745953559875, + "learning_rate": 6.799812638217309e-05, + "loss": -0.9424, + "step": 24820 + }, + { + "epoch": 55.672645739910315, + "grad_norm": 0.3476196825504303, + "learning_rate": 6.797241311010373e-05, + "loss": -0.9402, + "step": 24830 + }, + { + "epoch": 55.69506726457399, + "grad_norm": 0.2804240584373474, + "learning_rate": 6.794669437848982e-05, + "loss": -0.9383, + "step": 24840 + }, + { + "epoch": 55.71748878923767, + "grad_norm": 0.2128501683473587, + "learning_rate": 6.792097019514402e-05, + "loss": -0.9425, + "step": 24850 + }, + { + "epoch": 55.73991031390135, + "grad_norm": 0.28785043954849243, + "learning_rate": 6.789524056788064e-05, + "loss": -0.9419, + "step": 24860 + }, + { + "epoch": 55.762331838565025, + "grad_norm": 0.1830490678548813, + "learning_rate": 6.786950550451567e-05, + "loss": -0.9452, + "step": 24870 + }, + { + "epoch": 55.7847533632287, + "grad_norm": 0.17872892320156097, + "learning_rate": 6.784376501286676e-05, + "loss": -0.9417, + "step": 24880 + }, + { + "epoch": 55.80717488789238, + "grad_norm": 0.19682088494300842, + "learning_rate": 6.781801910075316e-05, + "loss": -0.9465, + "step": 24890 + }, + { + "epoch": 55.82959641255606, + "grad_norm": 0.2730081081390381, + "learning_rate": 6.779226777599581e-05, + "loss": -0.9418, + "step": 24900 + }, + { + "epoch": 55.85201793721973, + "grad_norm": 0.20663826167583466, + "learning_rate": 6.776651104641729e-05, + "loss": -0.9376, + "step": 24910 + }, + { + "epoch": 55.874439461883405, + "grad_norm": 0.29277122020721436, + "learning_rate": 6.774074891984183e-05, + "loss": -0.941, + "step": 24920 + }, + { + "epoch": 55.89686098654708, + "grad_norm": 0.1634233146905899, + "learning_rate": 6.771498140409526e-05, + "loss": -0.9407, + "step": 24930 + }, + { + "epoch": 55.91928251121076, + "grad_norm": 0.22625796496868134, + "learning_rate": 6.768920850700506e-05, + "loss": -0.946, + "step": 24940 + }, + { + "epoch": 55.94170403587444, + "grad_norm": 0.23486068844795227, + "learning_rate": 6.766343023640039e-05, + "loss": -0.9366, + "step": 24950 + }, + { + "epoch": 55.964125560538115, + "grad_norm": 0.38255953788757324, + "learning_rate": 6.763764660011198e-05, + "loss": -0.9417, + "step": 24960 + }, + { + "epoch": 55.98654708520179, + "grad_norm": 0.2504153549671173, + "learning_rate": 6.761185760597223e-05, + "loss": -0.9436, + "step": 24970 + }, + { + "epoch": 56.00896860986547, + "grad_norm": 0.28194794058799744, + "learning_rate": 6.758606326181515e-05, + "loss": -0.9404, + "step": 24980 + }, + { + "epoch": 56.03139013452915, + "grad_norm": 0.2607189416885376, + "learning_rate": 6.75602635754764e-05, + "loss": -0.9444, + "step": 24990 + }, + { + "epoch": 56.053811659192824, + "grad_norm": 0.3015444278717041, + "learning_rate": 6.75344585547932e-05, + "loss": -0.9423, + "step": 25000 + }, + { + "epoch": 56.0762331838565, + "grad_norm": 0.2613963186740875, + "learning_rate": 6.750864820760449e-05, + "loss": -0.9456, + "step": 25010 + }, + { + "epoch": 56.09865470852018, + "grad_norm": 0.42110899090766907, + "learning_rate": 6.748283254175072e-05, + "loss": -0.9391, + "step": 25020 + }, + { + "epoch": 56.12107623318386, + "grad_norm": 0.35819876194000244, + "learning_rate": 6.745701156507404e-05, + "loss": -0.9405, + "step": 25030 + }, + { + "epoch": 56.143497757847534, + "grad_norm": 0.1476014107465744, + "learning_rate": 6.743118528541818e-05, + "loss": -0.9423, + "step": 25040 + }, + { + "epoch": 56.16591928251121, + "grad_norm": 0.3208256661891937, + "learning_rate": 6.740535371062846e-05, + "loss": -0.9435, + "step": 25050 + }, + { + "epoch": 56.18834080717489, + "grad_norm": 0.17163343727588654, + "learning_rate": 6.737951684855185e-05, + "loss": -0.9447, + "step": 25060 + }, + { + "epoch": 56.210762331838566, + "grad_norm": 0.21293878555297852, + "learning_rate": 6.735367470703691e-05, + "loss": -0.9451, + "step": 25070 + }, + { + "epoch": 56.233183856502244, + "grad_norm": 0.2333642989397049, + "learning_rate": 6.732782729393379e-05, + "loss": -0.9434, + "step": 25080 + }, + { + "epoch": 56.25560538116592, + "grad_norm": 0.22583775222301483, + "learning_rate": 6.730197461709425e-05, + "loss": -0.9434, + "step": 25090 + }, + { + "epoch": 56.2780269058296, + "grad_norm": 0.2428818941116333, + "learning_rate": 6.727611668437164e-05, + "loss": -0.9396, + "step": 25100 + }, + { + "epoch": 56.300448430493276, + "grad_norm": 0.22396428883075714, + "learning_rate": 6.725025350362094e-05, + "loss": -0.9452, + "step": 25110 + }, + { + "epoch": 56.32286995515695, + "grad_norm": 0.33248913288116455, + "learning_rate": 6.72243850826987e-05, + "loss": -0.9465, + "step": 25120 + }, + { + "epoch": 56.34529147982063, + "grad_norm": 0.2772247791290283, + "learning_rate": 6.719851142946305e-05, + "loss": -0.9434, + "step": 25130 + }, + { + "epoch": 56.36771300448431, + "grad_norm": 0.2795802354812622, + "learning_rate": 6.717263255177372e-05, + "loss": -0.9375, + "step": 25140 + }, + { + "epoch": 56.390134529147986, + "grad_norm": 0.18928584456443787, + "learning_rate": 6.714674845749205e-05, + "loss": -0.9414, + "step": 25150 + }, + { + "epoch": 56.412556053811656, + "grad_norm": 0.25264450907707214, + "learning_rate": 6.712085915448092e-05, + "loss": -0.9424, + "step": 25160 + }, + { + "epoch": 56.43497757847533, + "grad_norm": 0.3741258382797241, + "learning_rate": 6.709496465060486e-05, + "loss": -0.9402, + "step": 25170 + }, + { + "epoch": 56.45739910313901, + "grad_norm": 0.20252344012260437, + "learning_rate": 6.706906495372987e-05, + "loss": -0.9446, + "step": 25180 + }, + { + "epoch": 56.47982062780269, + "grad_norm": 0.2839559018611908, + "learning_rate": 6.704316007172365e-05, + "loss": -0.943, + "step": 25190 + }, + { + "epoch": 56.502242152466366, + "grad_norm": 0.22564294934272766, + "learning_rate": 6.701725001245539e-05, + "loss": -0.9386, + "step": 25200 + }, + { + "epoch": 56.52466367713004, + "grad_norm": 0.36423319578170776, + "learning_rate": 6.699133478379588e-05, + "loss": -0.9419, + "step": 25210 + }, + { + "epoch": 56.54708520179372, + "grad_norm": 0.16650822758674622, + "learning_rate": 6.69654143936175e-05, + "loss": -0.9433, + "step": 25220 + }, + { + "epoch": 56.5695067264574, + "grad_norm": 0.20964738726615906, + "learning_rate": 6.693948884979419e-05, + "loss": -0.9406, + "step": 25230 + }, + { + "epoch": 56.591928251121075, + "grad_norm": 0.27272793650627136, + "learning_rate": 6.691355816020142e-05, + "loss": -0.9427, + "step": 25240 + }, + { + "epoch": 56.61434977578475, + "grad_norm": 0.15454410016536713, + "learning_rate": 6.688762233271624e-05, + "loss": -0.9414, + "step": 25250 + }, + { + "epoch": 56.63677130044843, + "grad_norm": 0.22236506640911102, + "learning_rate": 6.68616813752173e-05, + "loss": -0.9408, + "step": 25260 + }, + { + "epoch": 56.65919282511211, + "grad_norm": 0.17088140547275543, + "learning_rate": 6.683573529558477e-05, + "loss": -0.9451, + "step": 25270 + }, + { + "epoch": 56.681614349775785, + "grad_norm": 0.1856728345155716, + "learning_rate": 6.680978410170037e-05, + "loss": -0.9434, + "step": 25280 + }, + { + "epoch": 56.70403587443946, + "grad_norm": 0.20471633970737457, + "learning_rate": 6.678382780144741e-05, + "loss": -0.9431, + "step": 25290 + }, + { + "epoch": 56.72645739910314, + "grad_norm": 0.17360499501228333, + "learning_rate": 6.675786640271071e-05, + "loss": -0.9453, + "step": 25300 + }, + { + "epoch": 56.74887892376682, + "grad_norm": 0.27550652623176575, + "learning_rate": 6.673189991337665e-05, + "loss": -0.9427, + "step": 25310 + }, + { + "epoch": 56.771300448430495, + "grad_norm": 0.25332579016685486, + "learning_rate": 6.670592834133317e-05, + "loss": -0.9376, + "step": 25320 + }, + { + "epoch": 56.79372197309417, + "grad_norm": 0.2902957797050476, + "learning_rate": 6.667995169446979e-05, + "loss": -0.9426, + "step": 25330 + }, + { + "epoch": 56.81614349775785, + "grad_norm": 0.26939448714256287, + "learning_rate": 6.665396998067747e-05, + "loss": -0.9368, + "step": 25340 + }, + { + "epoch": 56.83856502242153, + "grad_norm": 0.3278329074382782, + "learning_rate": 6.66279832078488e-05, + "loss": -0.943, + "step": 25350 + }, + { + "epoch": 56.860986547085204, + "grad_norm": 0.23341332376003265, + "learning_rate": 6.660199138387786e-05, + "loss": -0.944, + "step": 25360 + }, + { + "epoch": 56.88340807174888, + "grad_norm": 0.2543450593948364, + "learning_rate": 6.65759945166603e-05, + "loss": -0.9386, + "step": 25370 + }, + { + "epoch": 56.90582959641256, + "grad_norm": 0.2110522836446762, + "learning_rate": 6.654999261409326e-05, + "loss": -0.9372, + "step": 25380 + }, + { + "epoch": 56.92825112107624, + "grad_norm": 0.24779188632965088, + "learning_rate": 6.652398568407544e-05, + "loss": -0.938, + "step": 25390 + }, + { + "epoch": 56.95067264573991, + "grad_norm": 0.35930097103118896, + "learning_rate": 6.649797373450707e-05, + "loss": -0.9386, + "step": 25400 + }, + { + "epoch": 56.973094170403584, + "grad_norm": 0.15733212232589722, + "learning_rate": 6.647195677328988e-05, + "loss": -0.9453, + "step": 25410 + }, + { + "epoch": 56.99551569506726, + "grad_norm": 0.1570158153772354, + "learning_rate": 6.644593480832712e-05, + "loss": -0.9449, + "step": 25420 + }, + { + "epoch": 57.01793721973094, + "grad_norm": 0.21170541644096375, + "learning_rate": 6.641990784752363e-05, + "loss": -0.9413, + "step": 25430 + }, + { + "epoch": 57.04035874439462, + "grad_norm": 0.27404946088790894, + "learning_rate": 6.639387589878566e-05, + "loss": -0.9428, + "step": 25440 + }, + { + "epoch": 57.062780269058294, + "grad_norm": 0.22631807625293732, + "learning_rate": 6.636783897002103e-05, + "loss": -0.945, + "step": 25450 + }, + { + "epoch": 57.08520179372197, + "grad_norm": 0.2348569631576538, + "learning_rate": 6.63417970691391e-05, + "loss": -0.9427, + "step": 25460 + }, + { + "epoch": 57.10762331838565, + "grad_norm": 0.3215603232383728, + "learning_rate": 6.63157502040507e-05, + "loss": -0.9444, + "step": 25470 + }, + { + "epoch": 57.130044843049326, + "grad_norm": 0.18338148295879364, + "learning_rate": 6.628969838266819e-05, + "loss": -0.9382, + "step": 25480 + }, + { + "epoch": 57.152466367713004, + "grad_norm": 0.20984390377998352, + "learning_rate": 6.626364161290541e-05, + "loss": -0.9423, + "step": 25490 + }, + { + "epoch": 57.17488789237668, + "grad_norm": 0.3254556953907013, + "learning_rate": 6.623757990267774e-05, + "loss": -0.9431, + "step": 25500 + }, + { + "epoch": 57.19730941704036, + "grad_norm": 0.3217345178127289, + "learning_rate": 6.621151325990201e-05, + "loss": -0.9429, + "step": 25510 + }, + { + "epoch": 57.219730941704036, + "grad_norm": 0.19569392502307892, + "learning_rate": 6.618544169249657e-05, + "loss": -0.9417, + "step": 25520 + }, + { + "epoch": 57.24215246636771, + "grad_norm": 0.2402227222919464, + "learning_rate": 6.615936520838133e-05, + "loss": -0.9404, + "step": 25530 + }, + { + "epoch": 57.26457399103139, + "grad_norm": 0.3083745539188385, + "learning_rate": 6.613328381547759e-05, + "loss": -0.9439, + "step": 25540 + }, + { + "epoch": 57.28699551569507, + "grad_norm": 0.42168208956718445, + "learning_rate": 6.610719752170821e-05, + "loss": -0.9274, + "step": 25550 + }, + { + "epoch": 57.309417040358746, + "grad_norm": 0.3573894202709198, + "learning_rate": 6.60811063349975e-05, + "loss": -0.9438, + "step": 25560 + }, + { + "epoch": 57.33183856502242, + "grad_norm": 0.2902054786682129, + "learning_rate": 6.605501026327127e-05, + "loss": -0.9413, + "step": 25570 + }, + { + "epoch": 57.3542600896861, + "grad_norm": 0.16190148890018463, + "learning_rate": 6.602890931445685e-05, + "loss": -0.9365, + "step": 25580 + }, + { + "epoch": 57.37668161434978, + "grad_norm": 0.26263853907585144, + "learning_rate": 6.6002803496483e-05, + "loss": -0.9424, + "step": 25590 + }, + { + "epoch": 57.399103139013455, + "grad_norm": 0.30495789647102356, + "learning_rate": 6.597669281727997e-05, + "loss": -0.9415, + "step": 25600 + }, + { + "epoch": 57.42152466367713, + "grad_norm": 0.24241365492343903, + "learning_rate": 6.595057728477949e-05, + "loss": -0.9364, + "step": 25610 + }, + { + "epoch": 57.44394618834081, + "grad_norm": 0.3976122736930847, + "learning_rate": 6.59244569069148e-05, + "loss": -0.9394, + "step": 25620 + }, + { + "epoch": 57.46636771300449, + "grad_norm": 0.2199128121137619, + "learning_rate": 6.589833169162054e-05, + "loss": -0.9469, + "step": 25630 + }, + { + "epoch": 57.488789237668165, + "grad_norm": 0.24560093879699707, + "learning_rate": 6.587220164683291e-05, + "loss": -0.9395, + "step": 25640 + }, + { + "epoch": 57.511210762331835, + "grad_norm": 0.2532009780406952, + "learning_rate": 6.58460667804895e-05, + "loss": -0.9441, + "step": 25650 + }, + { + "epoch": 57.53363228699551, + "grad_norm": 0.23010443150997162, + "learning_rate": 6.581992710052938e-05, + "loss": -0.9402, + "step": 25660 + }, + { + "epoch": 57.55605381165919, + "grad_norm": 0.23112981021404266, + "learning_rate": 6.579378261489311e-05, + "loss": -0.9418, + "step": 25670 + }, + { + "epoch": 57.57847533632287, + "grad_norm": 0.25677961111068726, + "learning_rate": 6.576763333152268e-05, + "loss": -0.9414, + "step": 25680 + }, + { + "epoch": 57.600896860986545, + "grad_norm": 0.21636155247688293, + "learning_rate": 6.574147925836159e-05, + "loss": -0.9458, + "step": 25690 + }, + { + "epoch": 57.62331838565022, + "grad_norm": 0.19453175365924835, + "learning_rate": 6.571532040335472e-05, + "loss": -0.9425, + "step": 25700 + }, + { + "epoch": 57.6457399103139, + "grad_norm": 0.2804657518863678, + "learning_rate": 6.568915677444845e-05, + "loss": -0.9452, + "step": 25710 + }, + { + "epoch": 57.66816143497758, + "grad_norm": 0.2734091281890869, + "learning_rate": 6.56629883795906e-05, + "loss": -0.9433, + "step": 25720 + }, + { + "epoch": 57.690582959641254, + "grad_norm": 0.2639697194099426, + "learning_rate": 6.563681522673043e-05, + "loss": -0.9434, + "step": 25730 + }, + { + "epoch": 57.71300448430493, + "grad_norm": 0.23582996428012848, + "learning_rate": 6.561063732381867e-05, + "loss": -0.9398, + "step": 25740 + }, + { + "epoch": 57.73542600896861, + "grad_norm": 0.2293683886528015, + "learning_rate": 6.558445467880745e-05, + "loss": -0.9415, + "step": 25750 + }, + { + "epoch": 57.75784753363229, + "grad_norm": 0.3063901364803314, + "learning_rate": 6.55582672996504e-05, + "loss": -0.9415, + "step": 25760 + }, + { + "epoch": 57.780269058295964, + "grad_norm": 0.27467283606529236, + "learning_rate": 6.553207519430253e-05, + "loss": -0.9429, + "step": 25770 + }, + { + "epoch": 57.80269058295964, + "grad_norm": 0.16614504158496857, + "learning_rate": 6.550587837072032e-05, + "loss": -0.942, + "step": 25780 + }, + { + "epoch": 57.82511210762332, + "grad_norm": 0.21981555223464966, + "learning_rate": 6.547967683686166e-05, + "loss": -0.9429, + "step": 25790 + }, + { + "epoch": 57.847533632286996, + "grad_norm": 0.15074661374092102, + "learning_rate": 6.545347060068591e-05, + "loss": -0.9448, + "step": 25800 + }, + { + "epoch": 57.869955156950674, + "grad_norm": 0.22016564011573792, + "learning_rate": 6.542725967015382e-05, + "loss": -0.9425, + "step": 25810 + }, + { + "epoch": 57.89237668161435, + "grad_norm": 0.21666109561920166, + "learning_rate": 6.540104405322757e-05, + "loss": -0.9448, + "step": 25820 + }, + { + "epoch": 57.91479820627803, + "grad_norm": 0.20727267861366272, + "learning_rate": 6.537482375787077e-05, + "loss": -0.9412, + "step": 25830 + }, + { + "epoch": 57.937219730941706, + "grad_norm": 0.2513749301433563, + "learning_rate": 6.534859879204845e-05, + "loss": -0.9431, + "step": 25840 + }, + { + "epoch": 57.95964125560538, + "grad_norm": 0.22757767140865326, + "learning_rate": 6.532236916372709e-05, + "loss": -0.9437, + "step": 25850 + }, + { + "epoch": 57.98206278026906, + "grad_norm": 0.3122309744358063, + "learning_rate": 6.529613488087454e-05, + "loss": -0.9434, + "step": 25860 + }, + { + "epoch": 58.00448430493274, + "grad_norm": 0.27222487330436707, + "learning_rate": 6.526989595146009e-05, + "loss": -0.9468, + "step": 25870 + }, + { + "epoch": 58.026905829596416, + "grad_norm": 0.2165900319814682, + "learning_rate": 6.524365238345441e-05, + "loss": -0.9458, + "step": 25880 + }, + { + "epoch": 58.04932735426009, + "grad_norm": 0.27270281314849854, + "learning_rate": 6.521740418482964e-05, + "loss": -0.9441, + "step": 25890 + }, + { + "epoch": 58.07174887892376, + "grad_norm": 0.22298599779605865, + "learning_rate": 6.519115136355925e-05, + "loss": -0.9443, + "step": 25900 + }, + { + "epoch": 58.09417040358744, + "grad_norm": 0.28739839792251587, + "learning_rate": 6.51648939276182e-05, + "loss": -0.9414, + "step": 25910 + }, + { + "epoch": 58.11659192825112, + "grad_norm": 0.22011078894138336, + "learning_rate": 6.513863188498277e-05, + "loss": -0.9424, + "step": 25920 + }, + { + "epoch": 58.139013452914796, + "grad_norm": 0.2812582552433014, + "learning_rate": 6.511236524363068e-05, + "loss": -0.9421, + "step": 25930 + }, + { + "epoch": 58.16143497757847, + "grad_norm": 0.3403503894805908, + "learning_rate": 6.508609401154104e-05, + "loss": -0.9402, + "step": 25940 + }, + { + "epoch": 58.18385650224215, + "grad_norm": 0.2665221691131592, + "learning_rate": 6.505981819669439e-05, + "loss": -0.9389, + "step": 25950 + }, + { + "epoch": 58.20627802690583, + "grad_norm": 0.23063121736049652, + "learning_rate": 6.503353780707258e-05, + "loss": -0.9434, + "step": 25960 + }, + { + "epoch": 58.228699551569505, + "grad_norm": 0.20811472833156586, + "learning_rate": 6.500725285065895e-05, + "loss": -0.9442, + "step": 25970 + }, + { + "epoch": 58.25112107623318, + "grad_norm": 0.29132330417633057, + "learning_rate": 6.498096333543813e-05, + "loss": -0.9406, + "step": 25980 + }, + { + "epoch": 58.27354260089686, + "grad_norm": 0.22113622725009918, + "learning_rate": 6.49546692693962e-05, + "loss": -0.9451, + "step": 25990 + }, + { + "epoch": 58.29596412556054, + "grad_norm": 0.29862216114997864, + "learning_rate": 6.492837066052059e-05, + "loss": -0.9366, + "step": 26000 + }, + { + "epoch": 58.318385650224215, + "grad_norm": 0.2764894366264343, + "learning_rate": 6.490206751680014e-05, + "loss": -0.9365, + "step": 26010 + }, + { + "epoch": 58.34080717488789, + "grad_norm": 0.3367348611354828, + "learning_rate": 6.487575984622505e-05, + "loss": -0.9397, + "step": 26020 + }, + { + "epoch": 58.36322869955157, + "grad_norm": 0.23690587282180786, + "learning_rate": 6.484944765678689e-05, + "loss": -0.9414, + "step": 26030 + }, + { + "epoch": 58.38565022421525, + "grad_norm": 0.3045159578323364, + "learning_rate": 6.482313095647861e-05, + "loss": -0.9423, + "step": 26040 + }, + { + "epoch": 58.408071748878925, + "grad_norm": 0.23642782866954803, + "learning_rate": 6.479680975329451e-05, + "loss": -0.9399, + "step": 26050 + }, + { + "epoch": 58.4304932735426, + "grad_norm": 0.18362928926944733, + "learning_rate": 6.477048405523031e-05, + "loss": -0.9448, + "step": 26060 + }, + { + "epoch": 58.45291479820628, + "grad_norm": 0.23692704737186432, + "learning_rate": 6.474415387028304e-05, + "loss": -0.9423, + "step": 26070 + }, + { + "epoch": 58.47533632286996, + "grad_norm": 0.26091092824935913, + "learning_rate": 6.471781920645114e-05, + "loss": -0.947, + "step": 26080 + }, + { + "epoch": 58.497757847533634, + "grad_norm": 0.2285461127758026, + "learning_rate": 6.469148007173434e-05, + "loss": -0.9442, + "step": 26090 + }, + { + "epoch": 58.52017937219731, + "grad_norm": 0.1594657599925995, + "learning_rate": 6.466513647413381e-05, + "loss": -0.943, + "step": 26100 + }, + { + "epoch": 58.54260089686099, + "grad_norm": 0.22046306729316711, + "learning_rate": 6.463878842165203e-05, + "loss": -0.9436, + "step": 26110 + }, + { + "epoch": 58.56502242152467, + "grad_norm": 0.15607106685638428, + "learning_rate": 6.461243592229286e-05, + "loss": -0.9417, + "step": 26120 + }, + { + "epoch": 58.587443946188344, + "grad_norm": 0.17857609689235687, + "learning_rate": 6.458607898406146e-05, + "loss": -0.9471, + "step": 26130 + }, + { + "epoch": 58.609865470852014, + "grad_norm": 0.1740322858095169, + "learning_rate": 6.455971761496439e-05, + "loss": -0.9478, + "step": 26140 + }, + { + "epoch": 58.63228699551569, + "grad_norm": 0.15828000009059906, + "learning_rate": 6.453335182300953e-05, + "loss": -0.9448, + "step": 26150 + }, + { + "epoch": 58.65470852017937, + "grad_norm": 0.18905964493751526, + "learning_rate": 6.450698161620612e-05, + "loss": -0.9468, + "step": 26160 + }, + { + "epoch": 58.67713004484305, + "grad_norm": 0.26905444264411926, + "learning_rate": 6.448060700256473e-05, + "loss": -0.9385, + "step": 26170 + }, + { + "epoch": 58.699551569506724, + "grad_norm": 0.15565893054008484, + "learning_rate": 6.445422799009726e-05, + "loss": -0.9452, + "step": 26180 + }, + { + "epoch": 58.7219730941704, + "grad_norm": 0.3421958386898041, + "learning_rate": 6.442784458681699e-05, + "loss": -0.9427, + "step": 26190 + }, + { + "epoch": 58.74439461883408, + "grad_norm": 0.2739560306072235, + "learning_rate": 6.440145680073847e-05, + "loss": -0.9445, + "step": 26200 + }, + { + "epoch": 58.766816143497756, + "grad_norm": 0.19211937487125397, + "learning_rate": 6.437506463987762e-05, + "loss": -0.9416, + "step": 26210 + }, + { + "epoch": 58.789237668161434, + "grad_norm": 0.18565990030765533, + "learning_rate": 6.434866811225168e-05, + "loss": -0.9447, + "step": 26220 + }, + { + "epoch": 58.81165919282511, + "grad_norm": 0.237209290266037, + "learning_rate": 6.432226722587923e-05, + "loss": -0.9414, + "step": 26230 + }, + { + "epoch": 58.83408071748879, + "grad_norm": 0.24920223653316498, + "learning_rate": 6.429586198878015e-05, + "loss": -0.9408, + "step": 26240 + }, + { + "epoch": 58.856502242152466, + "grad_norm": 0.22274670004844666, + "learning_rate": 6.426945240897566e-05, + "loss": -0.9358, + "step": 26250 + }, + { + "epoch": 58.87892376681614, + "grad_norm": 0.17165228724479675, + "learning_rate": 6.424303849448829e-05, + "loss": -0.9427, + "step": 26260 + }, + { + "epoch": 58.90134529147982, + "grad_norm": 0.20151665806770325, + "learning_rate": 6.42166202533419e-05, + "loss": -0.943, + "step": 26270 + }, + { + "epoch": 58.9237668161435, + "grad_norm": 0.20731018483638763, + "learning_rate": 6.419019769356164e-05, + "loss": -0.9461, + "step": 26280 + }, + { + "epoch": 58.946188340807176, + "grad_norm": 0.26915237307548523, + "learning_rate": 6.416377082317398e-05, + "loss": -0.9436, + "step": 26290 + }, + { + "epoch": 58.96860986547085, + "grad_norm": 0.23497238755226135, + "learning_rate": 6.413733965020674e-05, + "loss": -0.944, + "step": 26300 + }, + { + "epoch": 58.99103139013453, + "grad_norm": 0.20766311883926392, + "learning_rate": 6.411090418268896e-05, + "loss": -0.9454, + "step": 26310 + }, + { + "epoch": 59.01345291479821, + "grad_norm": 0.15400834381580353, + "learning_rate": 6.408446442865109e-05, + "loss": -0.9482, + "step": 26320 + }, + { + "epoch": 59.035874439461885, + "grad_norm": 0.18406574428081512, + "learning_rate": 6.405802039612479e-05, + "loss": -0.9423, + "step": 26330 + }, + { + "epoch": 59.05829596412556, + "grad_norm": 0.19632188975811005, + "learning_rate": 6.403157209314308e-05, + "loss": -0.9429, + "step": 26340 + }, + { + "epoch": 59.08071748878924, + "grad_norm": 0.2023487389087677, + "learning_rate": 6.400511952774024e-05, + "loss": -0.9431, + "step": 26350 + }, + { + "epoch": 59.10313901345292, + "grad_norm": 0.12870509922504425, + "learning_rate": 6.397866270795187e-05, + "loss": -0.9451, + "step": 26360 + }, + { + "epoch": 59.125560538116595, + "grad_norm": 0.25145530700683594, + "learning_rate": 6.395220164181489e-05, + "loss": -0.9467, + "step": 26370 + }, + { + "epoch": 59.14798206278027, + "grad_norm": 0.26373109221458435, + "learning_rate": 6.39257363373674e-05, + "loss": -0.9432, + "step": 26380 + }, + { + "epoch": 59.17040358744394, + "grad_norm": 0.18872250616550446, + "learning_rate": 6.389926680264892e-05, + "loss": -0.9437, + "step": 26390 + }, + { + "epoch": 59.19282511210762, + "grad_norm": 0.18159259855747223, + "learning_rate": 6.387279304570017e-05, + "loss": -0.9373, + "step": 26400 + }, + { + "epoch": 59.2152466367713, + "grad_norm": 0.14834368228912354, + "learning_rate": 6.384631507456319e-05, + "loss": -0.9458, + "step": 26410 + }, + { + "epoch": 59.237668161434975, + "grad_norm": 0.22260083258152008, + "learning_rate": 6.381983289728126e-05, + "loss": -0.9437, + "step": 26420 + }, + { + "epoch": 59.26008968609865, + "grad_norm": 0.2882222533226013, + "learning_rate": 6.3793346521899e-05, + "loss": -0.9402, + "step": 26430 + }, + { + "epoch": 59.28251121076233, + "grad_norm": 0.19983799755573273, + "learning_rate": 6.376685595646226e-05, + "loss": -0.9407, + "step": 26440 + }, + { + "epoch": 59.30493273542601, + "grad_norm": 0.26810359954833984, + "learning_rate": 6.374036120901816e-05, + "loss": -0.9405, + "step": 26450 + }, + { + "epoch": 59.327354260089685, + "grad_norm": 0.268173485994339, + "learning_rate": 6.371386228761514e-05, + "loss": -0.9438, + "step": 26460 + }, + { + "epoch": 59.34977578475336, + "grad_norm": 0.2475842982530594, + "learning_rate": 6.368735920030283e-05, + "loss": -0.9387, + "step": 26470 + }, + { + "epoch": 59.37219730941704, + "grad_norm": 0.16123077273368835, + "learning_rate": 6.366085195513218e-05, + "loss": -0.9415, + "step": 26480 + }, + { + "epoch": 59.39461883408072, + "grad_norm": 0.2969350814819336, + "learning_rate": 6.363434056015543e-05, + "loss": -0.938, + "step": 26490 + }, + { + "epoch": 59.417040358744394, + "grad_norm": 0.1748512238264084, + "learning_rate": 6.360782502342599e-05, + "loss": -0.942, + "step": 26500 + }, + { + "epoch": 59.43946188340807, + "grad_norm": 0.23934832215309143, + "learning_rate": 6.358130535299862e-05, + "loss": -0.9376, + "step": 26510 + }, + { + "epoch": 59.46188340807175, + "grad_norm": 0.2534016966819763, + "learning_rate": 6.355478155692926e-05, + "loss": -0.9414, + "step": 26520 + }, + { + "epoch": 59.48430493273543, + "grad_norm": 0.26402291655540466, + "learning_rate": 6.352825364327517e-05, + "loss": -0.945, + "step": 26530 + }, + { + "epoch": 59.506726457399104, + "grad_norm": 0.25257956981658936, + "learning_rate": 6.350172162009482e-05, + "loss": -0.9373, + "step": 26540 + }, + { + "epoch": 59.52914798206278, + "grad_norm": 0.2471223771572113, + "learning_rate": 6.347518549544793e-05, + "loss": -0.9405, + "step": 26550 + }, + { + "epoch": 59.55156950672646, + "grad_norm": 0.3715278208255768, + "learning_rate": 6.344864527739547e-05, + "loss": -0.9439, + "step": 26560 + }, + { + "epoch": 59.573991031390136, + "grad_norm": 0.2612208425998688, + "learning_rate": 6.342210097399966e-05, + "loss": -0.9438, + "step": 26570 + }, + { + "epoch": 59.596412556053814, + "grad_norm": 0.1921583116054535, + "learning_rate": 6.339555259332398e-05, + "loss": -0.9355, + "step": 26580 + }, + { + "epoch": 59.61883408071749, + "grad_norm": 0.27548012137413025, + "learning_rate": 6.33690001434331e-05, + "loss": -0.9372, + "step": 26590 + }, + { + "epoch": 59.64125560538117, + "grad_norm": 0.31137362122535706, + "learning_rate": 6.334244363239296e-05, + "loss": -0.9424, + "step": 26600 + }, + { + "epoch": 59.663677130044846, + "grad_norm": 0.20502378046512604, + "learning_rate": 6.331588306827073e-05, + "loss": -0.9464, + "step": 26610 + }, + { + "epoch": 59.68609865470852, + "grad_norm": 0.15429842472076416, + "learning_rate": 6.328931845913483e-05, + "loss": -0.9433, + "step": 26620 + }, + { + "epoch": 59.7085201793722, + "grad_norm": 0.18441760540008545, + "learning_rate": 6.326274981305484e-05, + "loss": -0.9448, + "step": 26630 + }, + { + "epoch": 59.73094170403587, + "grad_norm": 0.23114298284053802, + "learning_rate": 6.323617713810166e-05, + "loss": -0.9421, + "step": 26640 + }, + { + "epoch": 59.75336322869955, + "grad_norm": 0.23540425300598145, + "learning_rate": 6.320960044234734e-05, + "loss": -0.9456, + "step": 26650 + }, + { + "epoch": 59.775784753363226, + "grad_norm": 0.39184409379959106, + "learning_rate": 6.318301973386518e-05, + "loss": -0.9372, + "step": 26660 + }, + { + "epoch": 59.7982062780269, + "grad_norm": 0.38092759251594543, + "learning_rate": 6.315643502072971e-05, + "loss": -0.9429, + "step": 26670 + }, + { + "epoch": 59.82062780269058, + "grad_norm": 0.1796349734067917, + "learning_rate": 6.312984631101667e-05, + "loss": -0.9448, + "step": 26680 + }, + { + "epoch": 59.84304932735426, + "grad_norm": 0.2785564959049225, + "learning_rate": 6.310325361280297e-05, + "loss": -0.946, + "step": 26690 + }, + { + "epoch": 59.865470852017935, + "grad_norm": 0.2900034487247467, + "learning_rate": 6.30766569341668e-05, + "loss": -0.9446, + "step": 26700 + }, + { + "epoch": 59.88789237668161, + "grad_norm": 0.2496289312839508, + "learning_rate": 6.305005628318753e-05, + "loss": -0.9389, + "step": 26710 + }, + { + "epoch": 59.91031390134529, + "grad_norm": 0.19655939936637878, + "learning_rate": 6.302345166794572e-05, + "loss": -0.9459, + "step": 26720 + }, + { + "epoch": 59.93273542600897, + "grad_norm": 0.18032661080360413, + "learning_rate": 6.299684309652316e-05, + "loss": -0.945, + "step": 26730 + }, + { + "epoch": 59.955156950672645, + "grad_norm": 0.3320923149585724, + "learning_rate": 6.297023057700283e-05, + "loss": -0.9459, + "step": 26740 + }, + { + "epoch": 59.97757847533632, + "grad_norm": 0.17951610684394836, + "learning_rate": 6.294361411746891e-05, + "loss": -0.9464, + "step": 26750 + }, + { + "epoch": 60.0, + "grad_norm": 0.3200441002845764, + "learning_rate": 6.291699372600677e-05, + "loss": -0.9412, + "step": 26760 + }, + { + "epoch": 60.02242152466368, + "grad_norm": 0.26938146352767944, + "learning_rate": 6.2890369410703e-05, + "loss": -0.9418, + "step": 26770 + }, + { + "epoch": 60.044843049327355, + "grad_norm": 0.18340514600276947, + "learning_rate": 6.286374117964534e-05, + "loss": -0.9445, + "step": 26780 + }, + { + "epoch": 60.06726457399103, + "grad_norm": 0.24635519087314606, + "learning_rate": 6.283710904092277e-05, + "loss": -0.9463, + "step": 26790 + }, + { + "epoch": 60.08968609865471, + "grad_norm": 0.23790402710437775, + "learning_rate": 6.281047300262542e-05, + "loss": -0.944, + "step": 26800 + }, + { + "epoch": 60.11210762331839, + "grad_norm": 0.19675366580486298, + "learning_rate": 6.278383307284461e-05, + "loss": -0.9416, + "step": 26810 + }, + { + "epoch": 60.134529147982065, + "grad_norm": 0.25706636905670166, + "learning_rate": 6.275718925967284e-05, + "loss": -0.9435, + "step": 26820 + }, + { + "epoch": 60.15695067264574, + "grad_norm": 0.1879802793264389, + "learning_rate": 6.273054157120382e-05, + "loss": -0.9447, + "step": 26830 + }, + { + "epoch": 60.17937219730942, + "grad_norm": 0.2162971794605255, + "learning_rate": 6.270389001553238e-05, + "loss": -0.9459, + "step": 26840 + }, + { + "epoch": 60.2017937219731, + "grad_norm": 0.3572078347206116, + "learning_rate": 6.26772346007546e-05, + "loss": -0.9412, + "step": 26850 + }, + { + "epoch": 60.224215246636774, + "grad_norm": 0.17612186074256897, + "learning_rate": 6.265057533496767e-05, + "loss": -0.9472, + "step": 26860 + }, + { + "epoch": 60.24663677130045, + "grad_norm": 0.24678580462932587, + "learning_rate": 6.262391222626997e-05, + "loss": -0.9409, + "step": 26870 + }, + { + "epoch": 60.26905829596413, + "grad_norm": 0.2750299572944641, + "learning_rate": 6.259724528276106e-05, + "loss": -0.9373, + "step": 26880 + }, + { + "epoch": 60.2914798206278, + "grad_norm": 0.38012710213661194, + "learning_rate": 6.257057451254162e-05, + "loss": -0.9439, + "step": 26890 + }, + { + "epoch": 60.31390134529148, + "grad_norm": 0.258431077003479, + "learning_rate": 6.254389992371357e-05, + "loss": -0.9439, + "step": 26900 + }, + { + "epoch": 60.336322869955154, + "grad_norm": 0.24653112888336182, + "learning_rate": 6.25172215243799e-05, + "loss": -0.9401, + "step": 26910 + }, + { + "epoch": 60.35874439461883, + "grad_norm": 0.33267319202423096, + "learning_rate": 6.249053932264486e-05, + "loss": -0.9442, + "step": 26920 + }, + { + "epoch": 60.38116591928251, + "grad_norm": 0.332468718290329, + "learning_rate": 6.246385332661376e-05, + "loss": -0.9452, + "step": 26930 + }, + { + "epoch": 60.403587443946186, + "grad_norm": 0.2377975881099701, + "learning_rate": 6.24371635443931e-05, + "loss": -0.9395, + "step": 26940 + }, + { + "epoch": 60.426008968609864, + "grad_norm": 0.24232210218906403, + "learning_rate": 6.241046998409054e-05, + "loss": -0.9466, + "step": 26950 + }, + { + "epoch": 60.44843049327354, + "grad_norm": 0.265167772769928, + "learning_rate": 6.238377265381489e-05, + "loss": -0.9445, + "step": 26960 + }, + { + "epoch": 60.47085201793722, + "grad_norm": 0.3776964843273163, + "learning_rate": 6.235707156167607e-05, + "loss": -0.9369, + "step": 26970 + }, + { + "epoch": 60.493273542600896, + "grad_norm": 0.34274303913116455, + "learning_rate": 6.233036671578519e-05, + "loss": -0.939, + "step": 26980 + }, + { + "epoch": 60.51569506726457, + "grad_norm": 0.33149978518486023, + "learning_rate": 6.230365812425445e-05, + "loss": -0.9421, + "step": 26990 + }, + { + "epoch": 60.53811659192825, + "grad_norm": 0.19983604550361633, + "learning_rate": 6.227694579519724e-05, + "loss": -0.9444, + "step": 27000 + }, + { + "epoch": 60.56053811659193, + "grad_norm": 0.13978701829910278, + "learning_rate": 6.225022973672805e-05, + "loss": -0.9447, + "step": 27010 + }, + { + "epoch": 60.582959641255606, + "grad_norm": 0.24551254510879517, + "learning_rate": 6.222350995696253e-05, + "loss": -0.9344, + "step": 27020 + }, + { + "epoch": 60.60538116591928, + "grad_norm": 0.22124244272708893, + "learning_rate": 6.21967864640174e-05, + "loss": -0.9454, + "step": 27030 + }, + { + "epoch": 60.62780269058296, + "grad_norm": 0.27249959111213684, + "learning_rate": 6.217005926601059e-05, + "loss": -0.947, + "step": 27040 + }, + { + "epoch": 60.65022421524664, + "grad_norm": 0.22267138957977295, + "learning_rate": 6.214332837106111e-05, + "loss": -0.9462, + "step": 27050 + }, + { + "epoch": 60.672645739910315, + "grad_norm": 0.2381371259689331, + "learning_rate": 6.21165937872891e-05, + "loss": -0.9443, + "step": 27060 + }, + { + "epoch": 60.69506726457399, + "grad_norm": 0.3290307819843292, + "learning_rate": 6.208985552281582e-05, + "loss": -0.9428, + "step": 27070 + }, + { + "epoch": 60.71748878923767, + "grad_norm": 0.21952027082443237, + "learning_rate": 6.206311358576364e-05, + "loss": -0.9383, + "step": 27080 + }, + { + "epoch": 60.73991031390135, + "grad_norm": 0.16663376986980438, + "learning_rate": 6.203636798425608e-05, + "loss": -0.9457, + "step": 27090 + }, + { + "epoch": 60.762331838565025, + "grad_norm": 0.354627788066864, + "learning_rate": 6.20096187264177e-05, + "loss": -0.9405, + "step": 27100 + }, + { + "epoch": 60.7847533632287, + "grad_norm": 0.2451326996088028, + "learning_rate": 6.198286582037425e-05, + "loss": -0.9432, + "step": 27110 + }, + { + "epoch": 60.80717488789238, + "grad_norm": 0.2198084592819214, + "learning_rate": 6.195610927425256e-05, + "loss": -0.9409, + "step": 27120 + }, + { + "epoch": 60.82959641255606, + "grad_norm": 0.2344246357679367, + "learning_rate": 6.192934909618056e-05, + "loss": -0.9469, + "step": 27130 + }, + { + "epoch": 60.85201793721973, + "grad_norm": 0.18030838668346405, + "learning_rate": 6.190258529428728e-05, + "loss": -0.9432, + "step": 27140 + }, + { + "epoch": 60.874439461883405, + "grad_norm": 0.30648961663246155, + "learning_rate": 6.187581787670285e-05, + "loss": -0.938, + "step": 27150 + }, + { + "epoch": 60.89686098654708, + "grad_norm": 0.19582685828208923, + "learning_rate": 6.184904685155852e-05, + "loss": -0.9434, + "step": 27160 + }, + { + "epoch": 60.91928251121076, + "grad_norm": 0.27440890669822693, + "learning_rate": 6.18222722269866e-05, + "loss": -0.9425, + "step": 27170 + }, + { + "epoch": 60.94170403587444, + "grad_norm": 0.14519137144088745, + "learning_rate": 6.179549401112053e-05, + "loss": -0.9475, + "step": 27180 + }, + { + "epoch": 60.964125560538115, + "grad_norm": 0.21946661174297333, + "learning_rate": 6.176871221209482e-05, + "loss": -0.9427, + "step": 27190 + }, + { + "epoch": 60.98654708520179, + "grad_norm": 0.21197371184825897, + "learning_rate": 6.174192683804508e-05, + "loss": -0.9428, + "step": 27200 + }, + { + "epoch": 61.00896860986547, + "grad_norm": 0.21457985043525696, + "learning_rate": 6.1715137897108e-05, + "loss": -0.9475, + "step": 27210 + }, + { + "epoch": 61.03139013452915, + "grad_norm": 0.1981126368045807, + "learning_rate": 6.168834539742134e-05, + "loss": -0.9416, + "step": 27220 + }, + { + "epoch": 61.053811659192824, + "grad_norm": 0.34243854880332947, + "learning_rate": 6.166154934712397e-05, + "loss": -0.9428, + "step": 27230 + }, + { + "epoch": 61.0762331838565, + "grad_norm": 0.27049997448921204, + "learning_rate": 6.163474975435581e-05, + "loss": -0.9479, + "step": 27240 + }, + { + "epoch": 61.09865470852018, + "grad_norm": 0.1959293782711029, + "learning_rate": 6.160794662725787e-05, + "loss": -0.9406, + "step": 27250 + }, + { + "epoch": 61.12107623318386, + "grad_norm": 0.25946176052093506, + "learning_rate": 6.158113997397222e-05, + "loss": -0.9433, + "step": 27260 + }, + { + "epoch": 61.143497757847534, + "grad_norm": 0.26069533824920654, + "learning_rate": 6.155432980264205e-05, + "loss": -0.9435, + "step": 27270 + }, + { + "epoch": 61.16591928251121, + "grad_norm": 0.2625037729740143, + "learning_rate": 6.152751612141156e-05, + "loss": -0.9395, + "step": 27280 + }, + { + "epoch": 61.18834080717489, + "grad_norm": 0.16361351311206818, + "learning_rate": 6.150069893842602e-05, + "loss": -0.9446, + "step": 27290 + }, + { + "epoch": 61.210762331838566, + "grad_norm": 0.3863760530948639, + "learning_rate": 6.147387826183182e-05, + "loss": -0.944, + "step": 27300 + }, + { + "epoch": 61.233183856502244, + "grad_norm": 0.27107763290405273, + "learning_rate": 6.144705409977635e-05, + "loss": -0.9417, + "step": 27310 + }, + { + "epoch": 61.25560538116592, + "grad_norm": 0.18973882496356964, + "learning_rate": 6.142022646040808e-05, + "loss": -0.9474, + "step": 27320 + }, + { + "epoch": 61.2780269058296, + "grad_norm": 0.32223042845726013, + "learning_rate": 6.139339535187653e-05, + "loss": -0.9447, + "step": 27330 + }, + { + "epoch": 61.300448430493276, + "grad_norm": 0.1552860587835312, + "learning_rate": 6.136656078233232e-05, + "loss": -0.9491, + "step": 27340 + }, + { + "epoch": 61.32286995515695, + "grad_norm": 0.21542982757091522, + "learning_rate": 6.133972275992707e-05, + "loss": -0.941, + "step": 27350 + }, + { + "epoch": 61.34529147982063, + "grad_norm": 0.2458633929491043, + "learning_rate": 6.131288129281342e-05, + "loss": -0.9423, + "step": 27360 + }, + { + "epoch": 61.36771300448431, + "grad_norm": 0.23407892882823944, + "learning_rate": 6.128603638914516e-05, + "loss": -0.9458, + "step": 27370 + }, + { + "epoch": 61.390134529147986, + "grad_norm": 0.20517009496688843, + "learning_rate": 6.125918805707704e-05, + "loss": -0.9444, + "step": 27380 + }, + { + "epoch": 61.412556053811656, + "grad_norm": 0.16321755945682526, + "learning_rate": 6.123233630476485e-05, + "loss": -0.9416, + "step": 27390 + }, + { + "epoch": 61.43497757847533, + "grad_norm": 0.26203200221061707, + "learning_rate": 6.120548114036547e-05, + "loss": -0.9445, + "step": 27400 + }, + { + "epoch": 61.45739910313901, + "grad_norm": 0.36108097434043884, + "learning_rate": 6.117862257203679e-05, + "loss": -0.9434, + "step": 27410 + }, + { + "epoch": 61.47982062780269, + "grad_norm": 0.27159929275512695, + "learning_rate": 6.115176060793771e-05, + "loss": -0.939, + "step": 27420 + }, + { + "epoch": 61.502242152466366, + "grad_norm": 0.1972680538892746, + "learning_rate": 6.112489525622822e-05, + "loss": -0.9476, + "step": 27430 + }, + { + "epoch": 61.52466367713004, + "grad_norm": 0.28528571128845215, + "learning_rate": 6.109802652506928e-05, + "loss": -0.9469, + "step": 27440 + }, + { + "epoch": 61.54708520179372, + "grad_norm": 0.2790861129760742, + "learning_rate": 6.107115442262291e-05, + "loss": -0.9448, + "step": 27450 + }, + { + "epoch": 61.5695067264574, + "grad_norm": 0.1747312843799591, + "learning_rate": 6.104427895705214e-05, + "loss": -0.9443, + "step": 27460 + }, + { + "epoch": 61.591928251121075, + "grad_norm": 0.2388865351676941, + "learning_rate": 6.101740013652103e-05, + "loss": -0.9468, + "step": 27470 + }, + { + "epoch": 61.61434977578475, + "grad_norm": 0.2107379138469696, + "learning_rate": 6.099051796919465e-05, + "loss": -0.9426, + "step": 27480 + }, + { + "epoch": 61.63677130044843, + "grad_norm": 0.2942377030849457, + "learning_rate": 6.096363246323911e-05, + "loss": -0.9399, + "step": 27490 + }, + { + "epoch": 61.65919282511211, + "grad_norm": 0.23226398229599, + "learning_rate": 6.0936743626821504e-05, + "loss": -0.9432, + "step": 27500 + }, + { + "epoch": 61.681614349775785, + "grad_norm": 0.2641476094722748, + "learning_rate": 6.090985146810996e-05, + "loss": -0.9437, + "step": 27510 + }, + { + "epoch": 61.70403587443946, + "grad_norm": 0.4040982723236084, + "learning_rate": 6.088295599527357e-05, + "loss": -0.9431, + "step": 27520 + }, + { + "epoch": 61.72645739910314, + "grad_norm": 0.23075750470161438, + "learning_rate": 6.085605721648252e-05, + "loss": -0.9461, + "step": 27530 + }, + { + "epoch": 61.74887892376682, + "grad_norm": 0.2845956087112427, + "learning_rate": 6.082915513990792e-05, + "loss": -0.9446, + "step": 27540 + }, + { + "epoch": 61.771300448430495, + "grad_norm": 0.1653466373682022, + "learning_rate": 6.080224977372192e-05, + "loss": -0.9457, + "step": 27550 + }, + { + "epoch": 61.79372197309417, + "grad_norm": 0.236322820186615, + "learning_rate": 6.0775341126097666e-05, + "loss": -0.9447, + "step": 27560 + }, + { + "epoch": 61.81614349775785, + "grad_norm": 0.2139267474412918, + "learning_rate": 6.074842920520926e-05, + "loss": -0.9407, + "step": 27570 + }, + { + "epoch": 61.83856502242153, + "grad_norm": 0.19063310325145721, + "learning_rate": 6.072151401923186e-05, + "loss": -0.9469, + "step": 27580 + }, + { + "epoch": 61.860986547085204, + "grad_norm": 0.19090594351291656, + "learning_rate": 6.069459557634159e-05, + "loss": -0.942, + "step": 27590 + }, + { + "epoch": 61.88340807174888, + "grad_norm": 0.4406696557998657, + "learning_rate": 6.066767388471557e-05, + "loss": -0.9408, + "step": 27600 + }, + { + "epoch": 61.90582959641256, + "grad_norm": 0.27302199602127075, + "learning_rate": 6.064074895253188e-05, + "loss": -0.9424, + "step": 27610 + }, + { + "epoch": 61.92825112107624, + "grad_norm": 0.182488813996315, + "learning_rate": 6.061382078796961e-05, + "loss": -0.9414, + "step": 27620 + }, + { + "epoch": 61.95067264573991, + "grad_norm": 0.2556951940059662, + "learning_rate": 6.0586889399208814e-05, + "loss": -0.9455, + "step": 27630 + }, + { + "epoch": 61.973094170403584, + "grad_norm": 0.1767144501209259, + "learning_rate": 6.0559954794430565e-05, + "loss": -0.9426, + "step": 27640 + }, + { + "epoch": 61.99551569506726, + "grad_norm": 0.23493285477161407, + "learning_rate": 6.053301698181687e-05, + "loss": -0.9418, + "step": 27650 + }, + { + "epoch": 62.01793721973094, + "grad_norm": 0.23456355929374695, + "learning_rate": 6.0506075969550725e-05, + "loss": -0.9442, + "step": 27660 + }, + { + "epoch": 62.04035874439462, + "grad_norm": 0.21470524370670319, + "learning_rate": 6.047913176581609e-05, + "loss": -0.9435, + "step": 27670 + }, + { + "epoch": 62.062780269058294, + "grad_norm": 0.25782743096351624, + "learning_rate": 6.0452184378797904e-05, + "loss": -0.9469, + "step": 27680 + }, + { + "epoch": 62.08520179372197, + "grad_norm": 0.218076691031456, + "learning_rate": 6.042523381668209e-05, + "loss": -0.945, + "step": 27690 + }, + { + "epoch": 62.10762331838565, + "grad_norm": 0.258129358291626, + "learning_rate": 6.03982800876555e-05, + "loss": -0.945, + "step": 27700 + }, + { + "epoch": 62.130044843049326, + "grad_norm": 0.22798790037631989, + "learning_rate": 6.0371323199905975e-05, + "loss": -0.9385, + "step": 27710 + }, + { + "epoch": 62.152466367713004, + "grad_norm": 0.13280931115150452, + "learning_rate": 6.03443631616223e-05, + "loss": -0.9449, + "step": 27720 + }, + { + "epoch": 62.17488789237668, + "grad_norm": 0.2464611530303955, + "learning_rate": 6.031739998099421e-05, + "loss": -0.9458, + "step": 27730 + }, + { + "epoch": 62.19730941704036, + "grad_norm": 0.2543491721153259, + "learning_rate": 6.029043366621243e-05, + "loss": -0.947, + "step": 27740 + }, + { + "epoch": 62.219730941704036, + "grad_norm": 0.22881293296813965, + "learning_rate": 6.0263464225468615e-05, + "loss": -0.9432, + "step": 27750 + }, + { + "epoch": 62.24215246636771, + "grad_norm": 0.2815555930137634, + "learning_rate": 6.023649166695534e-05, + "loss": -0.9468, + "step": 27760 + }, + { + "epoch": 62.26457399103139, + "grad_norm": 0.19241304695606232, + "learning_rate": 6.0209515998866186e-05, + "loss": -0.9428, + "step": 27770 + }, + { + "epoch": 62.28699551569507, + "grad_norm": 0.14683036506175995, + "learning_rate": 6.018253722939563e-05, + "loss": -0.9461, + "step": 27780 + }, + { + "epoch": 62.309417040358746, + "grad_norm": 0.23198285698890686, + "learning_rate": 6.015555536673914e-05, + "loss": -0.9458, + "step": 27790 + }, + { + "epoch": 62.33183856502242, + "grad_norm": 0.2546038329601288, + "learning_rate": 6.0128570419093054e-05, + "loss": -0.9425, + "step": 27800 + }, + { + "epoch": 62.3542600896861, + "grad_norm": 0.25474122166633606, + "learning_rate": 6.010158239465471e-05, + "loss": -0.9462, + "step": 27810 + }, + { + "epoch": 62.37668161434978, + "grad_norm": 0.3362525701522827, + "learning_rate": 6.007459130162235e-05, + "loss": -0.9432, + "step": 27820 + }, + { + "epoch": 62.399103139013455, + "grad_norm": 0.2145395427942276, + "learning_rate": 6.004759714819516e-05, + "loss": -0.9449, + "step": 27830 + }, + { + "epoch": 62.42152466367713, + "grad_norm": 0.21623988449573517, + "learning_rate": 6.002059994257323e-05, + "loss": -0.9423, + "step": 27840 + }, + { + "epoch": 62.44394618834081, + "grad_norm": 0.3345890939235687, + "learning_rate": 5.999359969295764e-05, + "loss": -0.9406, + "step": 27850 + }, + { + "epoch": 62.46636771300449, + "grad_norm": 0.28245168924331665, + "learning_rate": 5.9966596407550314e-05, + "loss": -0.9479, + "step": 27860 + }, + { + "epoch": 62.488789237668165, + "grad_norm": 0.178461953997612, + "learning_rate": 5.993959009455416e-05, + "loss": -0.9413, + "step": 27870 + }, + { + "epoch": 62.511210762331835, + "grad_norm": 0.1945464015007019, + "learning_rate": 5.991258076217298e-05, + "loss": -0.9474, + "step": 27880 + }, + { + "epoch": 62.53363228699551, + "grad_norm": 0.2143382430076599, + "learning_rate": 5.988556841861147e-05, + "loss": -0.9439, + "step": 27890 + }, + { + "epoch": 62.55605381165919, + "grad_norm": 0.28139248490333557, + "learning_rate": 5.985855307207531e-05, + "loss": -0.9419, + "step": 27900 + }, + { + "epoch": 62.57847533632287, + "grad_norm": 0.2564072608947754, + "learning_rate": 5.9831534730771e-05, + "loss": -0.9458, + "step": 27910 + }, + { + "epoch": 62.600896860986545, + "grad_norm": 0.32583585381507874, + "learning_rate": 5.980451340290605e-05, + "loss": -0.9435, + "step": 27920 + }, + { + "epoch": 62.62331838565022, + "grad_norm": 0.18871305882930756, + "learning_rate": 5.97774890966888e-05, + "loss": -0.9477, + "step": 27930 + }, + { + "epoch": 62.6457399103139, + "grad_norm": 0.22018088400363922, + "learning_rate": 5.975046182032851e-05, + "loss": -0.9474, + "step": 27940 + }, + { + "epoch": 62.66816143497758, + "grad_norm": 0.2509959638118744, + "learning_rate": 5.972343158203537e-05, + "loss": -0.9452, + "step": 27950 + }, + { + "epoch": 62.690582959641254, + "grad_norm": 0.2500210106372833, + "learning_rate": 5.969639839002045e-05, + "loss": -0.9466, + "step": 27960 + }, + { + "epoch": 62.71300448430493, + "grad_norm": 0.20265232026576996, + "learning_rate": 5.966936225249572e-05, + "loss": -0.9459, + "step": 27970 + }, + { + "epoch": 62.73542600896861, + "grad_norm": 0.2491590827703476, + "learning_rate": 5.9642323177674044e-05, + "loss": -0.9454, + "step": 27980 + }, + { + "epoch": 62.75784753363229, + "grad_norm": 0.28101006150245667, + "learning_rate": 5.9615281173769154e-05, + "loss": -0.9462, + "step": 27990 + }, + { + "epoch": 62.780269058295964, + "grad_norm": 0.15068548917770386, + "learning_rate": 5.958823624899574e-05, + "loss": -0.9485, + "step": 28000 + }, + { + "epoch": 62.80269058295964, + "grad_norm": 0.33845022320747375, + "learning_rate": 5.956118841156933e-05, + "loss": -0.9469, + "step": 28010 + }, + { + "epoch": 62.82511210762332, + "grad_norm": 0.11979185044765472, + "learning_rate": 5.953413766970631e-05, + "loss": -0.9443, + "step": 28020 + }, + { + "epoch": 62.847533632286996, + "grad_norm": 0.284469872713089, + "learning_rate": 5.9507084031624e-05, + "loss": -0.9445, + "step": 28030 + }, + { + "epoch": 62.869955156950674, + "grad_norm": 0.281082421541214, + "learning_rate": 5.948002750554058e-05, + "loss": -0.9417, + "step": 28040 + }, + { + "epoch": 62.89237668161435, + "grad_norm": 0.2745921015739441, + "learning_rate": 5.9452968099675124e-05, + "loss": -0.9461, + "step": 28050 + }, + { + "epoch": 62.91479820627803, + "grad_norm": 0.19780275225639343, + "learning_rate": 5.9425905822247527e-05, + "loss": -0.9458, + "step": 28060 + }, + { + "epoch": 62.937219730941706, + "grad_norm": 0.22890524566173553, + "learning_rate": 5.939884068147864e-05, + "loss": -0.9462, + "step": 28070 + }, + { + "epoch": 62.95964125560538, + "grad_norm": 0.20963731408119202, + "learning_rate": 5.937177268559011e-05, + "loss": -0.947, + "step": 28080 + }, + { + "epoch": 62.98206278026906, + "grad_norm": 0.2695780098438263, + "learning_rate": 5.934470184280448e-05, + "loss": -0.9472, + "step": 28090 + }, + { + "epoch": 63.00448430493274, + "grad_norm": 0.2177513837814331, + "learning_rate": 5.931762816134516e-05, + "loss": -0.9457, + "step": 28100 + }, + { + "epoch": 63.026905829596416, + "grad_norm": 0.35290661454200745, + "learning_rate": 5.9290551649436434e-05, + "loss": -0.9462, + "step": 28110 + }, + { + "epoch": 63.04932735426009, + "grad_norm": 0.2525990307331085, + "learning_rate": 5.9263472315303416e-05, + "loss": -0.945, + "step": 28120 + }, + { + "epoch": 63.07174887892376, + "grad_norm": 0.33376526832580566, + "learning_rate": 5.9236390167172096e-05, + "loss": -0.9451, + "step": 28130 + }, + { + "epoch": 63.09417040358744, + "grad_norm": 0.2902498245239258, + "learning_rate": 5.920930521326932e-05, + "loss": -0.9428, + "step": 28140 + }, + { + "epoch": 63.11659192825112, + "grad_norm": 0.3443736135959625, + "learning_rate": 5.918221746182276e-05, + "loss": -0.9453, + "step": 28150 + }, + { + "epoch": 63.139013452914796, + "grad_norm": 0.1490629017353058, + "learning_rate": 5.9155126921061e-05, + "loss": -0.9469, + "step": 28160 + }, + { + "epoch": 63.16143497757847, + "grad_norm": 0.21432268619537354, + "learning_rate": 5.91280335992134e-05, + "loss": -0.9496, + "step": 28170 + }, + { + "epoch": 63.18385650224215, + "grad_norm": 0.19336065649986267, + "learning_rate": 5.91009375045102e-05, + "loss": -0.9469, + "step": 28180 + }, + { + "epoch": 63.20627802690583, + "grad_norm": 0.22291114926338196, + "learning_rate": 5.9073838645182476e-05, + "loss": -0.9484, + "step": 28190 + }, + { + "epoch": 63.228699551569505, + "grad_norm": 0.28069746494293213, + "learning_rate": 5.904673702946217e-05, + "loss": -0.9418, + "step": 28200 + }, + { + "epoch": 63.25112107623318, + "grad_norm": 0.233761727809906, + "learning_rate": 5.9019632665582004e-05, + "loss": -0.948, + "step": 28210 + }, + { + "epoch": 63.27354260089686, + "grad_norm": 0.304340124130249, + "learning_rate": 5.899252556177559e-05, + "loss": -0.9468, + "step": 28220 + }, + { + "epoch": 63.29596412556054, + "grad_norm": 0.1860508918762207, + "learning_rate": 5.896541572627735e-05, + "loss": -0.946, + "step": 28230 + }, + { + "epoch": 63.318385650224215, + "grad_norm": 0.23897132277488708, + "learning_rate": 5.893830316732253e-05, + "loss": -0.9465, + "step": 28240 + }, + { + "epoch": 63.34080717488789, + "grad_norm": 0.2723633050918579, + "learning_rate": 5.8911187893147214e-05, + "loss": -0.9423, + "step": 28250 + }, + { + "epoch": 63.36322869955157, + "grad_norm": 0.2524152100086212, + "learning_rate": 5.888406991198828e-05, + "loss": -0.9463, + "step": 28260 + }, + { + "epoch": 63.38565022421525, + "grad_norm": 0.28987517952919006, + "learning_rate": 5.885694923208349e-05, + "loss": -0.9404, + "step": 28270 + }, + { + "epoch": 63.408071748878925, + "grad_norm": 0.1985883265733719, + "learning_rate": 5.882982586167138e-05, + "loss": -0.946, + "step": 28280 + }, + { + "epoch": 63.4304932735426, + "grad_norm": 0.19945982098579407, + "learning_rate": 5.880269980899131e-05, + "loss": -0.9452, + "step": 28290 + }, + { + "epoch": 63.45291479820628, + "grad_norm": 0.2519518733024597, + "learning_rate": 5.8775571082283465e-05, + "loss": -0.9466, + "step": 28300 + }, + { + "epoch": 63.47533632286996, + "grad_norm": 0.1524859368801117, + "learning_rate": 5.8748439689788824e-05, + "loss": -0.9456, + "step": 28310 + }, + { + "epoch": 63.497757847533634, + "grad_norm": 0.1927192658185959, + "learning_rate": 5.87213056397492e-05, + "loss": -0.9462, + "step": 28320 + }, + { + "epoch": 63.52017937219731, + "grad_norm": 0.13800381124019623, + "learning_rate": 5.869416894040719e-05, + "loss": -0.9407, + "step": 28330 + }, + { + "epoch": 63.54260089686099, + "grad_norm": 0.18785759806632996, + "learning_rate": 5.866702960000621e-05, + "loss": -0.9469, + "step": 28340 + }, + { + "epoch": 63.56502242152467, + "grad_norm": 0.1873423457145691, + "learning_rate": 5.863988762679048e-05, + "loss": -0.9447, + "step": 28350 + }, + { + "epoch": 63.587443946188344, + "grad_norm": 0.17903590202331543, + "learning_rate": 5.8612743029005e-05, + "loss": -0.946, + "step": 28360 + }, + { + "epoch": 63.609865470852014, + "grad_norm": 0.21734580397605896, + "learning_rate": 5.858559581489561e-05, + "loss": -0.9478, + "step": 28370 + }, + { + "epoch": 63.63228699551569, + "grad_norm": 0.20040547847747803, + "learning_rate": 5.85584459927089e-05, + "loss": -0.9453, + "step": 28380 + }, + { + "epoch": 63.65470852017937, + "grad_norm": 0.14153578877449036, + "learning_rate": 5.853129357069227e-05, + "loss": -0.9475, + "step": 28390 + }, + { + "epoch": 63.67713004484305, + "grad_norm": 0.23065148293972015, + "learning_rate": 5.8504138557093913e-05, + "loss": -0.9453, + "step": 28400 + }, + { + "epoch": 63.699551569506724, + "grad_norm": 0.2536846101284027, + "learning_rate": 5.8476980960162784e-05, + "loss": -0.9453, + "step": 28410 + }, + { + "epoch": 63.7219730941704, + "grad_norm": 0.23224374651908875, + "learning_rate": 5.844982078814868e-05, + "loss": -0.9446, + "step": 28420 + }, + { + "epoch": 63.74439461883408, + "grad_norm": 0.24533696472644806, + "learning_rate": 5.842265804930211e-05, + "loss": -0.9411, + "step": 28430 + }, + { + "epoch": 63.766816143497756, + "grad_norm": 0.2526395320892334, + "learning_rate": 5.839549275187444e-05, + "loss": -0.9468, + "step": 28440 + }, + { + "epoch": 63.789237668161434, + "grad_norm": 0.2674257755279541, + "learning_rate": 5.836832490411771e-05, + "loss": -0.9463, + "step": 28450 + }, + { + "epoch": 63.81165919282511, + "grad_norm": 0.16586579382419586, + "learning_rate": 5.834115451428485e-05, + "loss": -0.9446, + "step": 28460 + }, + { + "epoch": 63.83408071748879, + "grad_norm": 0.24220693111419678, + "learning_rate": 5.831398159062946e-05, + "loss": -0.9457, + "step": 28470 + }, + { + "epoch": 63.856502242152466, + "grad_norm": 0.24030104279518127, + "learning_rate": 5.828680614140599e-05, + "loss": -0.9469, + "step": 28480 + }, + { + "epoch": 63.87892376681614, + "grad_norm": 0.31478726863861084, + "learning_rate": 5.825962817486962e-05, + "loss": -0.9445, + "step": 28490 + }, + { + "epoch": 63.90134529147982, + "grad_norm": 0.19045192003250122, + "learning_rate": 5.823244769927629e-05, + "loss": -0.9428, + "step": 28500 + }, + { + "epoch": 63.9237668161435, + "grad_norm": 0.19991032779216766, + "learning_rate": 5.8205264722882716e-05, + "loss": -0.9409, + "step": 28510 + }, + { + "epoch": 63.946188340807176, + "grad_norm": 0.25910407304763794, + "learning_rate": 5.817807925394636e-05, + "loss": -0.9475, + "step": 28520 + }, + { + "epoch": 63.96860986547085, + "grad_norm": 0.12414736300706863, + "learning_rate": 5.815089130072546e-05, + "loss": -0.9456, + "step": 28530 + }, + { + "epoch": 63.99103139013453, + "grad_norm": 0.17597714066505432, + "learning_rate": 5.8123700871479e-05, + "loss": -0.9486, + "step": 28540 + }, + { + "epoch": 64.01345291479821, + "grad_norm": 0.2533094882965088, + "learning_rate": 5.809650797446671e-05, + "loss": -0.9493, + "step": 28550 + }, + { + "epoch": 64.03587443946188, + "grad_norm": 0.21551169455051422, + "learning_rate": 5.806931261794907e-05, + "loss": -0.9455, + "step": 28560 + }, + { + "epoch": 64.05829596412556, + "grad_norm": 0.22908146679401398, + "learning_rate": 5.804211481018731e-05, + "loss": -0.9495, + "step": 28570 + }, + { + "epoch": 64.08071748878923, + "grad_norm": 0.1886877417564392, + "learning_rate": 5.801491455944341e-05, + "loss": -0.9496, + "step": 28580 + }, + { + "epoch": 64.10313901345292, + "grad_norm": 0.2047959566116333, + "learning_rate": 5.79877118739801e-05, + "loss": -0.9456, + "step": 28590 + }, + { + "epoch": 64.12556053811659, + "grad_norm": 0.15567545592784882, + "learning_rate": 5.7960506762060816e-05, + "loss": -0.9441, + "step": 28600 + }, + { + "epoch": 64.14798206278027, + "grad_norm": 0.18174372613430023, + "learning_rate": 5.793329923194977e-05, + "loss": -0.9474, + "step": 28610 + }, + { + "epoch": 64.17040358744394, + "grad_norm": 0.19707222282886505, + "learning_rate": 5.790608929191187e-05, + "loss": -0.9489, + "step": 28620 + }, + { + "epoch": 64.19282511210763, + "grad_norm": 0.16544093191623688, + "learning_rate": 5.78788769502128e-05, + "loss": -0.9419, + "step": 28630 + }, + { + "epoch": 64.2152466367713, + "grad_norm": 0.17979463934898376, + "learning_rate": 5.785166221511894e-05, + "loss": -0.9458, + "step": 28640 + }, + { + "epoch": 64.23766816143498, + "grad_norm": 0.201614648103714, + "learning_rate": 5.7824445094897415e-05, + "loss": -0.9416, + "step": 28650 + }, + { + "epoch": 64.26008968609865, + "grad_norm": 0.2759535610675812, + "learning_rate": 5.7797225597816065e-05, + "loss": -0.9464, + "step": 28660 + }, + { + "epoch": 64.28251121076234, + "grad_norm": 0.21454636752605438, + "learning_rate": 5.777000373214345e-05, + "loss": -0.9482, + "step": 28670 + }, + { + "epoch": 64.30493273542601, + "grad_norm": 0.3098246157169342, + "learning_rate": 5.774277950614885e-05, + "loss": -0.9424, + "step": 28680 + }, + { + "epoch": 64.32735426008969, + "grad_norm": 0.3453390300273895, + "learning_rate": 5.771555292810227e-05, + "loss": -0.9469, + "step": 28690 + }, + { + "epoch": 64.34977578475336, + "grad_norm": 0.2678505778312683, + "learning_rate": 5.768832400627444e-05, + "loss": -0.9442, + "step": 28700 + }, + { + "epoch": 64.37219730941705, + "grad_norm": 0.21370592713356018, + "learning_rate": 5.7661092748936775e-05, + "loss": -0.944, + "step": 28710 + }, + { + "epoch": 64.39461883408072, + "grad_norm": 0.23120124638080597, + "learning_rate": 5.76338591643614e-05, + "loss": -0.9422, + "step": 28720 + }, + { + "epoch": 64.4170403587444, + "grad_norm": 0.20705805718898773, + "learning_rate": 5.760662326082118e-05, + "loss": -0.9415, + "step": 28730 + }, + { + "epoch": 64.43946188340807, + "grad_norm": 0.21844908595085144, + "learning_rate": 5.757938504658965e-05, + "loss": -0.946, + "step": 28740 + }, + { + "epoch": 64.46188340807174, + "grad_norm": 0.19843974709510803, + "learning_rate": 5.755214452994107e-05, + "loss": -0.9455, + "step": 28750 + }, + { + "epoch": 64.48430493273543, + "grad_norm": 0.1619061678647995, + "learning_rate": 5.752490171915039e-05, + "loss": -0.9427, + "step": 28760 + }, + { + "epoch": 64.5067264573991, + "grad_norm": 0.31097838282585144, + "learning_rate": 5.749765662249324e-05, + "loss": -0.9425, + "step": 28770 + }, + { + "epoch": 64.52914798206278, + "grad_norm": 0.23309332132339478, + "learning_rate": 5.747040924824596e-05, + "loss": -0.9493, + "step": 28780 + }, + { + "epoch": 64.55156950672645, + "grad_norm": 0.22676602005958557, + "learning_rate": 5.7443159604685613e-05, + "loss": -0.9473, + "step": 28790 + }, + { + "epoch": 64.57399103139014, + "grad_norm": 0.20650210976600647, + "learning_rate": 5.74159077000899e-05, + "loss": -0.945, + "step": 28800 + }, + { + "epoch": 64.5964125560538, + "grad_norm": 0.21641050279140472, + "learning_rate": 5.7388653542737235e-05, + "loss": -0.9443, + "step": 28810 + }, + { + "epoch": 64.61883408071749, + "grad_norm": 0.19724029302597046, + "learning_rate": 5.736139714090672e-05, + "loss": -0.9437, + "step": 28820 + }, + { + "epoch": 64.64125560538116, + "grad_norm": 0.3902049958705902, + "learning_rate": 5.73341385028781e-05, + "loss": -0.9452, + "step": 28830 + }, + { + "epoch": 64.66367713004485, + "grad_norm": 0.23829293251037598, + "learning_rate": 5.7306877636931855e-05, + "loss": -0.9482, + "step": 28840 + }, + { + "epoch": 64.68609865470852, + "grad_norm": 0.2347460836172104, + "learning_rate": 5.7279614551349125e-05, + "loss": -0.9482, + "step": 28850 + }, + { + "epoch": 64.7085201793722, + "grad_norm": 0.18588988482952118, + "learning_rate": 5.725234925441169e-05, + "loss": -0.9503, + "step": 28860 + }, + { + "epoch": 64.73094170403587, + "grad_norm": 0.21722666919231415, + "learning_rate": 5.7225081754402044e-05, + "loss": -0.949, + "step": 28870 + }, + { + "epoch": 64.75336322869956, + "grad_norm": 0.17424745857715607, + "learning_rate": 5.7197812059603326e-05, + "loss": -0.9477, + "step": 28880 + }, + { + "epoch": 64.77578475336323, + "grad_norm": 0.19522179663181305, + "learning_rate": 5.717054017829934e-05, + "loss": -0.9432, + "step": 28890 + }, + { + "epoch": 64.79820627802691, + "grad_norm": 0.3058919310569763, + "learning_rate": 5.7143266118774584e-05, + "loss": -0.937, + "step": 28900 + }, + { + "epoch": 64.82062780269058, + "grad_norm": 0.23829980194568634, + "learning_rate": 5.711598988931418e-05, + "loss": -0.9473, + "step": 28910 + }, + { + "epoch": 64.84304932735427, + "grad_norm": 0.22455766797065735, + "learning_rate": 5.7088711498203954e-05, + "loss": -0.9456, + "step": 28920 + }, + { + "epoch": 64.86547085201794, + "grad_norm": 0.2515997588634491, + "learning_rate": 5.706143095373033e-05, + "loss": -0.9366, + "step": 28930 + }, + { + "epoch": 64.88789237668162, + "grad_norm": 0.28931719064712524, + "learning_rate": 5.703414826418042e-05, + "loss": -0.9447, + "step": 28940 + }, + { + "epoch": 64.91031390134529, + "grad_norm": 0.17977948486804962, + "learning_rate": 5.7006863437842007e-05, + "loss": -0.9426, + "step": 28950 + }, + { + "epoch": 64.93273542600897, + "grad_norm": 0.17805036902427673, + "learning_rate": 5.697957648300348e-05, + "loss": -0.9418, + "step": 28960 + }, + { + "epoch": 64.95515695067265, + "grad_norm": 0.20140139758586884, + "learning_rate": 5.695228740795391e-05, + "loss": -0.9463, + "step": 28970 + }, + { + "epoch": 64.97757847533633, + "grad_norm": 0.3033163547515869, + "learning_rate": 5.6924996220982985e-05, + "loss": -0.9459, + "step": 28980 + }, + { + "epoch": 65.0, + "grad_norm": 0.26405927538871765, + "learning_rate": 5.6897702930381045e-05, + "loss": -0.9477, + "step": 28990 + }, + { + "epoch": 65.02242152466367, + "grad_norm": 0.23177985846996307, + "learning_rate": 5.687040754443908e-05, + "loss": -0.9501, + "step": 29000 + }, + { + "epoch": 65.04484304932735, + "grad_norm": 0.24696317315101624, + "learning_rate": 5.6843110071448725e-05, + "loss": -0.9456, + "step": 29010 + }, + { + "epoch": 65.06726457399103, + "grad_norm": 0.3011072278022766, + "learning_rate": 5.6815810519702194e-05, + "loss": -0.9471, + "step": 29020 + }, + { + "epoch": 65.08968609865471, + "grad_norm": 0.1703973114490509, + "learning_rate": 5.6788508897492396e-05, + "loss": -0.9481, + "step": 29030 + }, + { + "epoch": 65.11210762331838, + "grad_norm": 0.31566354632377625, + "learning_rate": 5.676120521311282e-05, + "loss": -0.9447, + "step": 29040 + }, + { + "epoch": 65.13452914798206, + "grad_norm": 0.1993447095155716, + "learning_rate": 5.6733899474857634e-05, + "loss": -0.942, + "step": 29050 + }, + { + "epoch": 65.15695067264573, + "grad_norm": 0.1601291447877884, + "learning_rate": 5.670659169102157e-05, + "loss": -0.9488, + "step": 29060 + }, + { + "epoch": 65.17937219730942, + "grad_norm": 0.3343351185321808, + "learning_rate": 5.6679281869900044e-05, + "loss": -0.9462, + "step": 29070 + }, + { + "epoch": 65.20179372197309, + "grad_norm": 0.26863202452659607, + "learning_rate": 5.6651970019789045e-05, + "loss": -0.947, + "step": 29080 + }, + { + "epoch": 65.22421524663677, + "grad_norm": 0.17272868752479553, + "learning_rate": 5.662465614898519e-05, + "loss": -0.9486, + "step": 29090 + }, + { + "epoch": 65.24663677130044, + "grad_norm": 0.41223156452178955, + "learning_rate": 5.6597340265785695e-05, + "loss": -0.9394, + "step": 29100 + }, + { + "epoch": 65.26905829596413, + "grad_norm": 0.24034324288368225, + "learning_rate": 5.657002237848843e-05, + "loss": -0.945, + "step": 29110 + }, + { + "epoch": 65.2914798206278, + "grad_norm": 0.18001845479011536, + "learning_rate": 5.654270249539183e-05, + "loss": -0.9469, + "step": 29120 + }, + { + "epoch": 65.31390134529148, + "grad_norm": 0.20752061903476715, + "learning_rate": 5.651538062479498e-05, + "loss": -0.9452, + "step": 29130 + }, + { + "epoch": 65.33632286995515, + "grad_norm": 0.2797408699989319, + "learning_rate": 5.648805677499751e-05, + "loss": -0.9448, + "step": 29140 + }, + { + "epoch": 65.35874439461884, + "grad_norm": 0.17199255526065826, + "learning_rate": 5.646073095429969e-05, + "loss": -0.9427, + "step": 29150 + }, + { + "epoch": 65.38116591928251, + "grad_norm": 0.16132815182209015, + "learning_rate": 5.643340317100241e-05, + "loss": -0.9477, + "step": 29160 + }, + { + "epoch": 65.4035874439462, + "grad_norm": 0.27303436398506165, + "learning_rate": 5.64060734334071e-05, + "loss": -0.9438, + "step": 29170 + }, + { + "epoch": 65.42600896860986, + "grad_norm": 0.26461198925971985, + "learning_rate": 5.637874174981583e-05, + "loss": -0.9475, + "step": 29180 + }, + { + "epoch": 65.44843049327355, + "grad_norm": 0.1804748773574829, + "learning_rate": 5.635140812853124e-05, + "loss": -0.9452, + "step": 29190 + }, + { + "epoch": 65.47085201793722, + "grad_norm": 0.19120155274868011, + "learning_rate": 5.6324072577856544e-05, + "loss": -0.9481, + "step": 29200 + }, + { + "epoch": 65.4932735426009, + "grad_norm": 0.2836911678314209, + "learning_rate": 5.629673510609559e-05, + "loss": -0.9476, + "step": 29210 + }, + { + "epoch": 65.51569506726457, + "grad_norm": 0.19129332900047302, + "learning_rate": 5.626939572155276e-05, + "loss": -0.9431, + "step": 29220 + }, + { + "epoch": 65.53811659192826, + "grad_norm": 0.15232113003730774, + "learning_rate": 5.6242054432533054e-05, + "loss": -0.9481, + "step": 29230 + }, + { + "epoch": 65.56053811659193, + "grad_norm": 0.22004075348377228, + "learning_rate": 5.621471124734201e-05, + "loss": -0.9471, + "step": 29240 + }, + { + "epoch": 65.5829596412556, + "grad_norm": 0.2729659676551819, + "learning_rate": 5.6187366174285794e-05, + "loss": -0.9354, + "step": 29250 + }, + { + "epoch": 65.60538116591928, + "grad_norm": 0.2468756139278412, + "learning_rate": 5.616001922167109e-05, + "loss": -0.946, + "step": 29260 + }, + { + "epoch": 65.62780269058295, + "grad_norm": 0.2546185553073883, + "learning_rate": 5.61326703978052e-05, + "loss": -0.9443, + "step": 29270 + }, + { + "epoch": 65.65022421524664, + "grad_norm": 0.317744642496109, + "learning_rate": 5.6105319710995964e-05, + "loss": -0.9465, + "step": 29280 + }, + { + "epoch": 65.67264573991031, + "grad_norm": 0.3212983012199402, + "learning_rate": 5.60779671695518e-05, + "loss": -0.944, + "step": 29290 + }, + { + "epoch": 65.69506726457399, + "grad_norm": 0.25896260142326355, + "learning_rate": 5.6050612781781684e-05, + "loss": -0.9485, + "step": 29300 + }, + { + "epoch": 65.71748878923766, + "grad_norm": 0.2540436089038849, + "learning_rate": 5.602325655599516e-05, + "loss": -0.9474, + "step": 29310 + }, + { + "epoch": 65.73991031390135, + "grad_norm": 0.2239045798778534, + "learning_rate": 5.599589850050234e-05, + "loss": -0.9486, + "step": 29320 + }, + { + "epoch": 65.76233183856502, + "grad_norm": 0.17304404079914093, + "learning_rate": 5.5968538623613874e-05, + "loss": -0.9492, + "step": 29330 + }, + { + "epoch": 65.7847533632287, + "grad_norm": 0.19479063153266907, + "learning_rate": 5.594117693364095e-05, + "loss": -0.9492, + "step": 29340 + }, + { + "epoch": 65.80717488789237, + "grad_norm": 0.21066783368587494, + "learning_rate": 5.591381343889535e-05, + "loss": -0.9475, + "step": 29350 + }, + { + "epoch": 65.82959641255606, + "grad_norm": 0.2936171889305115, + "learning_rate": 5.5886448147689355e-05, + "loss": -0.9433, + "step": 29360 + }, + { + "epoch": 65.85201793721973, + "grad_norm": 0.19208423793315887, + "learning_rate": 5.585908106833585e-05, + "loss": -0.948, + "step": 29370 + }, + { + "epoch": 65.87443946188341, + "grad_norm": 0.15056174993515015, + "learning_rate": 5.5831712209148226e-05, + "loss": -0.9477, + "step": 29380 + }, + { + "epoch": 65.89686098654708, + "grad_norm": 0.16401173174381256, + "learning_rate": 5.58043415784404e-05, + "loss": -0.9441, + "step": 29390 + }, + { + "epoch": 65.91928251121077, + "grad_norm": 0.24881847202777863, + "learning_rate": 5.577696918452686e-05, + "loss": -0.9484, + "step": 29400 + }, + { + "epoch": 65.94170403587444, + "grad_norm": 0.1611267328262329, + "learning_rate": 5.5749595035722604e-05, + "loss": -0.9286, + "step": 29410 + }, + { + "epoch": 65.96412556053812, + "grad_norm": 0.1840929538011551, + "learning_rate": 5.5722219140343193e-05, + "loss": -0.9424, + "step": 29420 + }, + { + "epoch": 65.98654708520179, + "grad_norm": 0.22235694527626038, + "learning_rate": 5.56948415067047e-05, + "loss": -0.9469, + "step": 29430 + }, + { + "epoch": 66.00896860986548, + "grad_norm": 0.18485760688781738, + "learning_rate": 5.5667462143123704e-05, + "loss": -0.9436, + "step": 29440 + }, + { + "epoch": 66.03139013452915, + "grad_norm": 0.17439641058444977, + "learning_rate": 5.564008105791737e-05, + "loss": -0.9501, + "step": 29450 + }, + { + "epoch": 66.05381165919283, + "grad_norm": 0.2850072681903839, + "learning_rate": 5.5612698259403316e-05, + "loss": -0.9421, + "step": 29460 + }, + { + "epoch": 66.0762331838565, + "grad_norm": 0.1816813349723816, + "learning_rate": 5.5585313755899724e-05, + "loss": -0.9441, + "step": 29470 + }, + { + "epoch": 66.09865470852019, + "grad_norm": 0.21424666047096252, + "learning_rate": 5.5557927555725285e-05, + "loss": -0.9381, + "step": 29480 + }, + { + "epoch": 66.12107623318386, + "grad_norm": 0.2332298308610916, + "learning_rate": 5.55305396671992e-05, + "loss": -0.9455, + "step": 29490 + }, + { + "epoch": 66.14349775784753, + "grad_norm": 0.2548530101776123, + "learning_rate": 5.55031500986412e-05, + "loss": -0.9399, + "step": 29500 + }, + { + "epoch": 66.16591928251121, + "grad_norm": 0.32325467467308044, + "learning_rate": 5.547575885837149e-05, + "loss": -0.9462, + "step": 29510 + }, + { + "epoch": 66.18834080717488, + "grad_norm": 0.3054800033569336, + "learning_rate": 5.5448365954710825e-05, + "loss": -0.9471, + "step": 29520 + }, + { + "epoch": 66.21076233183857, + "grad_norm": 0.19075936079025269, + "learning_rate": 5.5420971395980446e-05, + "loss": -0.9434, + "step": 29530 + }, + { + "epoch": 66.23318385650224, + "grad_norm": 0.27364787459373474, + "learning_rate": 5.539357519050209e-05, + "loss": -0.9428, + "step": 29540 + }, + { + "epoch": 66.25560538116592, + "grad_norm": 0.1954621523618698, + "learning_rate": 5.536617734659799e-05, + "loss": -0.9439, + "step": 29550 + }, + { + "epoch": 66.27802690582959, + "grad_norm": 0.1782686412334442, + "learning_rate": 5.533877787259091e-05, + "loss": -0.9474, + "step": 29560 + }, + { + "epoch": 66.30044843049328, + "grad_norm": 0.1634395718574524, + "learning_rate": 5.5311376776804044e-05, + "loss": -0.9461, + "step": 29570 + }, + { + "epoch": 66.32286995515695, + "grad_norm": 0.22039519250392914, + "learning_rate": 5.528397406756118e-05, + "loss": -0.9439, + "step": 29580 + }, + { + "epoch": 66.34529147982063, + "grad_norm": 0.2505650222301483, + "learning_rate": 5.525656975318652e-05, + "loss": -0.9471, + "step": 29590 + }, + { + "epoch": 66.3677130044843, + "grad_norm": 0.24391187727451324, + "learning_rate": 5.522916384200474e-05, + "loss": -0.9515, + "step": 29600 + }, + { + "epoch": 66.39013452914799, + "grad_norm": 0.1621241569519043, + "learning_rate": 5.520175634234106e-05, + "loss": -0.9473, + "step": 29610 + }, + { + "epoch": 66.41255605381166, + "grad_norm": 0.29726627469062805, + "learning_rate": 5.517434726252113e-05, + "loss": -0.9456, + "step": 29620 + }, + { + "epoch": 66.43497757847534, + "grad_norm": 0.2177072912454605, + "learning_rate": 5.514693661087113e-05, + "loss": -0.9469, + "step": 29630 + }, + { + "epoch": 66.45739910313901, + "grad_norm": 0.3543635606765747, + "learning_rate": 5.511952439571769e-05, + "loss": -0.9471, + "step": 29640 + }, + { + "epoch": 66.4798206278027, + "grad_norm": 0.17889504134655, + "learning_rate": 5.509211062538791e-05, + "loss": -0.9487, + "step": 29650 + }, + { + "epoch": 66.50224215246637, + "grad_norm": 0.28312554955482483, + "learning_rate": 5.506469530820939e-05, + "loss": -0.9451, + "step": 29660 + }, + { + "epoch": 66.52466367713005, + "grad_norm": 0.4608617424964905, + "learning_rate": 5.503727845251014e-05, + "loss": -0.9501, + "step": 29670 + }, + { + "epoch": 66.54708520179372, + "grad_norm": 0.1748092621564865, + "learning_rate": 5.50098600666187e-05, + "loss": -0.9488, + "step": 29680 + }, + { + "epoch": 66.5695067264574, + "grad_norm": 0.18028169870376587, + "learning_rate": 5.498244015886406e-05, + "loss": -0.9475, + "step": 29690 + }, + { + "epoch": 66.59192825112108, + "grad_norm": 0.18763531744480133, + "learning_rate": 5.495501873757565e-05, + "loss": -0.9466, + "step": 29700 + }, + { + "epoch": 66.61434977578476, + "grad_norm": 0.23497653007507324, + "learning_rate": 5.492759581108336e-05, + "loss": -0.9466, + "step": 29710 + }, + { + "epoch": 66.63677130044843, + "grad_norm": 0.24023810029029846, + "learning_rate": 5.490017138771759e-05, + "loss": -0.9462, + "step": 29720 + }, + { + "epoch": 66.65919282511211, + "grad_norm": 0.20955735445022583, + "learning_rate": 5.487274547580912e-05, + "loss": -0.9474, + "step": 29730 + }, + { + "epoch": 66.68161434977578, + "grad_norm": 0.41134583950042725, + "learning_rate": 5.484531808368923e-05, + "loss": -0.9466, + "step": 29740 + }, + { + "epoch": 66.70403587443946, + "grad_norm": 0.2590046226978302, + "learning_rate": 5.4817889219689656e-05, + "loss": -0.9448, + "step": 29750 + }, + { + "epoch": 66.72645739910314, + "grad_norm": 0.4190821647644043, + "learning_rate": 5.4790458892142536e-05, + "loss": -0.9488, + "step": 29760 + }, + { + "epoch": 66.74887892376681, + "grad_norm": 0.14163252711296082, + "learning_rate": 5.476302710938048e-05, + "loss": -0.9493, + "step": 29770 + }, + { + "epoch": 66.7713004484305, + "grad_norm": 0.2032262235879898, + "learning_rate": 5.473559387973657e-05, + "loss": -0.9448, + "step": 29780 + }, + { + "epoch": 66.79372197309416, + "grad_norm": 0.2001803070306778, + "learning_rate": 5.470815921154425e-05, + "loss": -0.9453, + "step": 29790 + }, + { + "epoch": 66.81614349775785, + "grad_norm": 0.17449571192264557, + "learning_rate": 5.468072311313749e-05, + "loss": -0.9491, + "step": 29800 + }, + { + "epoch": 66.83856502242152, + "grad_norm": 0.27023905515670776, + "learning_rate": 5.465328559285063e-05, + "loss": -0.9448, + "step": 29810 + }, + { + "epoch": 66.8609865470852, + "grad_norm": 0.2230394184589386, + "learning_rate": 5.462584665901849e-05, + "loss": -0.9487, + "step": 29820 + }, + { + "epoch": 66.88340807174887, + "grad_norm": 0.3106291592121124, + "learning_rate": 5.4598406319976235e-05, + "loss": -0.9421, + "step": 29830 + }, + { + "epoch": 66.90582959641256, + "grad_norm": 0.29631736874580383, + "learning_rate": 5.457096458405958e-05, + "loss": -0.9381, + "step": 29840 + }, + { + "epoch": 66.92825112107623, + "grad_norm": 0.30028271675109863, + "learning_rate": 5.454352145960457e-05, + "loss": -0.9453, + "step": 29850 + }, + { + "epoch": 66.95067264573991, + "grad_norm": 0.2589881420135498, + "learning_rate": 5.4516076954947715e-05, + "loss": -0.9485, + "step": 29860 + }, + { + "epoch": 66.97309417040358, + "grad_norm": 0.2552477717399597, + "learning_rate": 5.448863107842591e-05, + "loss": -0.945, + "step": 29870 + }, + { + "epoch": 66.99551569506727, + "grad_norm": 0.26274389028549194, + "learning_rate": 5.446118383837651e-05, + "loss": -0.9459, + "step": 29880 + }, + { + "epoch": 67.01793721973094, + "grad_norm": 0.29429513216018677, + "learning_rate": 5.443373524313722e-05, + "loss": -0.9439, + "step": 29890 + }, + { + "epoch": 67.04035874439462, + "grad_norm": 0.26817652583122253, + "learning_rate": 5.440628530104626e-05, + "loss": -0.9495, + "step": 29900 + }, + { + "epoch": 67.0627802690583, + "grad_norm": 0.17662078142166138, + "learning_rate": 5.4378834020442146e-05, + "loss": -0.9514, + "step": 29910 + }, + { + "epoch": 67.08520179372198, + "grad_norm": 0.20232775807380676, + "learning_rate": 5.4351381409663884e-05, + "loss": -0.949, + "step": 29920 + }, + { + "epoch": 67.10762331838565, + "grad_norm": 0.14753393828868866, + "learning_rate": 5.432392747705084e-05, + "loss": -0.9489, + "step": 29930 + }, + { + "epoch": 67.13004484304933, + "grad_norm": 0.17900040745735168, + "learning_rate": 5.429647223094278e-05, + "loss": -0.9495, + "step": 29940 + }, + { + "epoch": 67.152466367713, + "grad_norm": 0.17455554008483887, + "learning_rate": 5.4269015679679924e-05, + "loss": -0.9457, + "step": 29950 + }, + { + "epoch": 67.17488789237669, + "grad_norm": 0.2899738848209381, + "learning_rate": 5.424155783160281e-05, + "loss": -0.9511, + "step": 29960 + }, + { + "epoch": 67.19730941704036, + "grad_norm": 0.2853543758392334, + "learning_rate": 5.4214098695052415e-05, + "loss": -0.9462, + "step": 29970 + }, + { + "epoch": 67.21973094170404, + "grad_norm": 0.2992771565914154, + "learning_rate": 5.418663827837012e-05, + "loss": -0.945, + "step": 29980 + }, + { + "epoch": 67.24215246636771, + "grad_norm": 0.3808242082595825, + "learning_rate": 5.415917658989763e-05, + "loss": -0.9462, + "step": 29990 + }, + { + "epoch": 67.26457399103138, + "grad_norm": 0.23671209812164307, + "learning_rate": 5.413171363797713e-05, + "loss": -0.9443, + "step": 30000 + } + ], + "logging_steps": 10, + "max_steps": 60000, + "num_input_tokens_seen": 0, + "num_train_epochs": 135, + "save_steps": 10000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}