| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 76.92307692307692, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.038461538461538464, |
| "grad_norm": 0.8465695381164551, |
| "learning_rate": 2.7e-07, |
| "loss": 1.3641, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.6664445400238037, |
| "learning_rate": 5.7e-07, |
| "loss": 1.3695, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11538461538461539, |
| "grad_norm": 0.7535883784294128, |
| "learning_rate": 8.7e-07, |
| "loss": 1.3618, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.6306048631668091, |
| "learning_rate": 1.17e-06, |
| "loss": 1.3613, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19230769230769232, |
| "grad_norm": 0.778509795665741, |
| "learning_rate": 1.4700000000000001e-06, |
| "loss": 1.3601, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.8616118431091309, |
| "learning_rate": 1.77e-06, |
| "loss": 1.3455, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2692307692307692, |
| "grad_norm": 0.9706582427024841, |
| "learning_rate": 2.07e-06, |
| "loss": 1.3393, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 1.0790719985961914, |
| "learning_rate": 2.37e-06, |
| "loss": 1.338, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.34615384615384615, |
| "grad_norm": 1.1439405679702759, |
| "learning_rate": 2.67e-06, |
| "loss": 1.3231, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 1.5780725479125977, |
| "learning_rate": 2.9700000000000004e-06, |
| "loss": 1.3026, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4230769230769231, |
| "grad_norm": 1.6573809385299683, |
| "learning_rate": 3.27e-06, |
| "loss": 1.2959, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 1.5089571475982666, |
| "learning_rate": 3.57e-06, |
| "loss": 1.2855, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.3819092512130737, |
| "learning_rate": 3.87e-06, |
| "loss": 1.2571, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 1.3043365478515625, |
| "learning_rate": 4.170000000000001e-06, |
| "loss": 1.2411, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5769230769230769, |
| "grad_norm": 1.0912373065948486, |
| "learning_rate": 4.4699999999999996e-06, |
| "loss": 1.224, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 1.026869535446167, |
| "learning_rate": 4.77e-06, |
| "loss": 1.1946, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6538461538461539, |
| "grad_norm": 0.9299987554550171, |
| "learning_rate": 5.070000000000001e-06, |
| "loss": 1.1772, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.9173968434333801, |
| "learning_rate": 5.37e-06, |
| "loss": 1.1611, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7307692307692307, |
| "grad_norm": 0.9373317360877991, |
| "learning_rate": 5.67e-06, |
| "loss": 1.1386, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.8152101635932922, |
| "learning_rate": 5.9700000000000004e-06, |
| "loss": 1.1378, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8076923076923077, |
| "grad_norm": 1.0374643802642822, |
| "learning_rate": 6.27e-06, |
| "loss": 1.1314, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.9438158869743347, |
| "learning_rate": 6.57e-06, |
| "loss": 1.1076, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8846153846153846, |
| "grad_norm": 0.9103168845176697, |
| "learning_rate": 6.87e-06, |
| "loss": 1.0955, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.743010401725769, |
| "learning_rate": 7.17e-06, |
| "loss": 1.093, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 0.8534924983978271, |
| "learning_rate": 7.4700000000000005e-06, |
| "loss": 1.0831, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.0246775150299072, |
| "learning_rate": 7.77e-06, |
| "loss": 1.0725, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0384615384615385, |
| "grad_norm": 0.9404919743537903, |
| "learning_rate": 8.07e-06, |
| "loss": 1.0711, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 1.093787431716919, |
| "learning_rate": 8.370000000000001e-06, |
| "loss": 1.0709, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1153846153846154, |
| "grad_norm": 0.870293378829956, |
| "learning_rate": 8.67e-06, |
| "loss": 1.0667, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 0.9972339272499084, |
| "learning_rate": 8.97e-06, |
| "loss": 1.0592, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1923076923076923, |
| "grad_norm": 1.1312875747680664, |
| "learning_rate": 9.27e-06, |
| "loss": 1.0592, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 1.0009738206863403, |
| "learning_rate": 9.57e-06, |
| "loss": 1.0524, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2692307692307692, |
| "grad_norm": 0.9962753057479858, |
| "learning_rate": 9.87e-06, |
| "loss": 1.0491, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 1.0982589721679688, |
| "learning_rate": 1.0170000000000001e-05, |
| "loss": 1.051, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3461538461538463, |
| "grad_norm": 1.1114718914031982, |
| "learning_rate": 1.047e-05, |
| "loss": 1.0461, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 1.2868746519088745, |
| "learning_rate": 1.077e-05, |
| "loss": 1.0319, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4230769230769231, |
| "grad_norm": 1.2511143684387207, |
| "learning_rate": 1.107e-05, |
| "loss": 1.0278, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 1.0229848623275757, |
| "learning_rate": 1.137e-05, |
| "loss": 1.0187, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.2445800304412842, |
| "learning_rate": 1.167e-05, |
| "loss": 1.0057, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 1.2295691967010498, |
| "learning_rate": 1.197e-05, |
| "loss": 0.9782, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5769230769230769, |
| "grad_norm": 1.7420762777328491, |
| "learning_rate": 1.227e-05, |
| "loss": 0.9389, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 1.8028358221054077, |
| "learning_rate": 1.257e-05, |
| "loss": 0.8751, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6538461538461537, |
| "grad_norm": 2.2324907779693604, |
| "learning_rate": 1.287e-05, |
| "loss": 0.828, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 2.3086061477661133, |
| "learning_rate": 1.3170000000000001e-05, |
| "loss": 0.7754, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.7307692307692308, |
| "grad_norm": 2.051551342010498, |
| "learning_rate": 1.3470000000000001e-05, |
| "loss": 0.7396, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 2.074948787689209, |
| "learning_rate": 1.377e-05, |
| "loss": 0.7048, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.8076923076923077, |
| "grad_norm": 2.608893871307373, |
| "learning_rate": 1.4069999999999999e-05, |
| "loss": 0.684, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 2.885021209716797, |
| "learning_rate": 1.437e-05, |
| "loss": 0.6542, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.8846153846153846, |
| "grad_norm": 2.5614078044891357, |
| "learning_rate": 1.467e-05, |
| "loss": 0.6306, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 2.7305147647857666, |
| "learning_rate": 1.497e-05, |
| "loss": 0.6043, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9615384615384617, |
| "grad_norm": 3.067171812057495, |
| "learning_rate": 1.527e-05, |
| "loss": 0.5919, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.1146395206451416, |
| "learning_rate": 1.5570000000000002e-05, |
| "loss": 0.5696, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.0384615384615383, |
| "grad_norm": 3.35973858833313, |
| "learning_rate": 1.5870000000000002e-05, |
| "loss": 0.5662, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 3.7923669815063477, |
| "learning_rate": 1.6170000000000003e-05, |
| "loss": 0.5478, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.1153846153846154, |
| "grad_norm": 3.8711159229278564, |
| "learning_rate": 1.6470000000000003e-05, |
| "loss": 0.5375, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 3.2727854251861572, |
| "learning_rate": 1.677e-05, |
| "loss": 0.522, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.1923076923076925, |
| "grad_norm": 3.385040521621704, |
| "learning_rate": 1.7069999999999998e-05, |
| "loss": 0.5089, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 3.5132815837860107, |
| "learning_rate": 1.7369999999999998e-05, |
| "loss": 0.5031, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.269230769230769, |
| "grad_norm": 3.465324878692627, |
| "learning_rate": 1.767e-05, |
| "loss": 0.5019, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 3.7787463665008545, |
| "learning_rate": 1.797e-05, |
| "loss": 0.502, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.3461538461538463, |
| "grad_norm": 3.8140358924865723, |
| "learning_rate": 1.827e-05, |
| "loss": 0.4996, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 3.4496121406555176, |
| "learning_rate": 1.857e-05, |
| "loss": 0.4875, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.423076923076923, |
| "grad_norm": 3.9799156188964844, |
| "learning_rate": 1.887e-05, |
| "loss": 0.4834, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 3.7426834106445312, |
| "learning_rate": 1.917e-05, |
| "loss": 0.4802, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 3.552870988845825, |
| "learning_rate": 1.947e-05, |
| "loss": 0.4839, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 3.0882790088653564, |
| "learning_rate": 1.9770000000000002e-05, |
| "loss": 0.4777, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.5769230769230766, |
| "grad_norm": 3.536569595336914, |
| "learning_rate": 2.0070000000000003e-05, |
| "loss": 0.4892, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 3.4024746417999268, |
| "learning_rate": 2.0370000000000003e-05, |
| "loss": 0.4761, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.6538461538461537, |
| "grad_norm": 3.4993839263916016, |
| "learning_rate": 2.067e-05, |
| "loss": 0.4699, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 4.353096961975098, |
| "learning_rate": 2.097e-05, |
| "loss": 0.4741, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.730769230769231, |
| "grad_norm": 4.202399730682373, |
| "learning_rate": 2.1269999999999998e-05, |
| "loss": 0.4794, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 4.3636980056762695, |
| "learning_rate": 2.157e-05, |
| "loss": 0.4712, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.8076923076923075, |
| "grad_norm": 4.043900489807129, |
| "learning_rate": 2.187e-05, |
| "loss": 0.4676, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 3.271538496017456, |
| "learning_rate": 2.217e-05, |
| "loss": 0.459, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "grad_norm": 3.215296506881714, |
| "learning_rate": 2.247e-05, |
| "loss": 0.4648, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 3.489201784133911, |
| "learning_rate": 2.277e-05, |
| "loss": 0.4556, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.9615384615384617, |
| "grad_norm": 3.2616641521453857, |
| "learning_rate": 2.307e-05, |
| "loss": 0.4613, |
| "step": 770 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 3.8984408378601074, |
| "learning_rate": 2.337e-05, |
| "loss": 0.4573, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.0384615384615383, |
| "grad_norm": 4.524011135101318, |
| "learning_rate": 2.3670000000000002e-05, |
| "loss": 0.4587, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 3.7047295570373535, |
| "learning_rate": 2.3970000000000003e-05, |
| "loss": 0.4582, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.1153846153846154, |
| "grad_norm": 4.147561073303223, |
| "learning_rate": 2.4270000000000003e-05, |
| "loss": 0.4652, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 3.836184024810791, |
| "learning_rate": 2.457e-05, |
| "loss": 0.4549, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.1923076923076925, |
| "grad_norm": 4.417998313903809, |
| "learning_rate": 2.487e-05, |
| "loss": 0.4531, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 4.859844207763672, |
| "learning_rate": 2.517e-05, |
| "loss": 0.4527, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.269230769230769, |
| "grad_norm": 3.956090211868286, |
| "learning_rate": 2.547e-05, |
| "loss": 0.4513, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 3.7535667419433594, |
| "learning_rate": 2.577e-05, |
| "loss": 0.4543, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.3461538461538463, |
| "grad_norm": 3.8291614055633545, |
| "learning_rate": 2.607e-05, |
| "loss": 0.448, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.3846153846153846, |
| "grad_norm": 3.6051249504089355, |
| "learning_rate": 2.637e-05, |
| "loss": 0.4425, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.423076923076923, |
| "grad_norm": 4.240597248077393, |
| "learning_rate": 2.667e-05, |
| "loss": 0.4432, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 4.126594543457031, |
| "learning_rate": 2.697e-05, |
| "loss": 0.4481, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 3.919288396835327, |
| "learning_rate": 2.727e-05, |
| "loss": 0.4392, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.5384615384615383, |
| "grad_norm": 3.442884683609009, |
| "learning_rate": 2.7570000000000002e-05, |
| "loss": 0.4453, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.5769230769230766, |
| "grad_norm": 4.0917067527771, |
| "learning_rate": 2.7870000000000003e-05, |
| "loss": 0.4407, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 4.487368106842041, |
| "learning_rate": 2.817e-05, |
| "loss": 0.4435, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.6538461538461537, |
| "grad_norm": 3.6351025104522705, |
| "learning_rate": 2.847e-05, |
| "loss": 0.4509, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 4.146681308746338, |
| "learning_rate": 2.877e-05, |
| "loss": 0.4346, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.730769230769231, |
| "grad_norm": 4.30366849899292, |
| "learning_rate": 2.907e-05, |
| "loss": 0.4398, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 2.9849531650543213, |
| "learning_rate": 2.9370000000000002e-05, |
| "loss": 0.4414, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.8076923076923075, |
| "grad_norm": 3.1285574436187744, |
| "learning_rate": 2.967e-05, |
| "loss": 0.4431, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 3.2634713649749756, |
| "learning_rate": 2.997e-05, |
| "loss": 0.443, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.8846153846153846, |
| "grad_norm": 3.845669746398926, |
| "learning_rate": 2.9999983391181253e-05, |
| "loss": 0.4364, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 3.47135853767395, |
| "learning_rate": 2.9999925978027876e-05, |
| "loss": 0.4414, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.9615384615384617, |
| "grad_norm": 3.6643917560577393, |
| "learning_rate": 2.9999827555649637e-05, |
| "loss": 0.4331, |
| "step": 1030 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.6928398609161377, |
| "learning_rate": 2.999968812431563e-05, |
| "loss": 0.4358, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.038461538461538, |
| "grad_norm": 3.31592059135437, |
| "learning_rate": 2.999950768440706e-05, |
| "loss": 0.4304, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.076923076923077, |
| "grad_norm": 3.2825427055358887, |
| "learning_rate": 2.999928623641723e-05, |
| "loss": 0.4372, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.115384615384615, |
| "grad_norm": 3.60921311378479, |
| "learning_rate": 2.9999023780951575e-05, |
| "loss": 0.4321, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 3.924734354019165, |
| "learning_rate": 2.999872031872764e-05, |
| "loss": 0.4291, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.1923076923076925, |
| "grad_norm": 3.746192693710327, |
| "learning_rate": 2.999837585057508e-05, |
| "loss": 0.4212, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.230769230769231, |
| "grad_norm": 3.685960531234741, |
| "learning_rate": 2.999799037743565e-05, |
| "loss": 0.4186, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.269230769230769, |
| "grad_norm": 3.234938621520996, |
| "learning_rate": 2.999756390036323e-05, |
| "loss": 0.4166, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 4.075984001159668, |
| "learning_rate": 2.9997096420523788e-05, |
| "loss": 0.4191, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.346153846153846, |
| "grad_norm": 3.920379161834717, |
| "learning_rate": 2.9996587939195395e-05, |
| "loss": 0.4224, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.384615384615385, |
| "grad_norm": 3.652268171310425, |
| "learning_rate": 2.999603845776822e-05, |
| "loss": 0.4188, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.423076923076923, |
| "grad_norm": 4.860605716705322, |
| "learning_rate": 2.999544797774452e-05, |
| "loss": 0.4217, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 4.3559088706970215, |
| "learning_rate": 2.9994816500738648e-05, |
| "loss": 0.4161, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 3.8840413093566895, |
| "learning_rate": 2.999414402847704e-05, |
| "loss": 0.4172, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.538461538461538, |
| "grad_norm": 3.8383638858795166, |
| "learning_rate": 2.999343056279821e-05, |
| "loss": 0.4133, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.576923076923077, |
| "grad_norm": 3.845787286758423, |
| "learning_rate": 2.9992676105652746e-05, |
| "loss": 0.4143, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 3.575833320617676, |
| "learning_rate": 2.9991880659103298e-05, |
| "loss": 0.4152, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.653846153846154, |
| "grad_norm": 3.3321430683135986, |
| "learning_rate": 2.9991044225324593e-05, |
| "loss": 0.4121, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.6923076923076925, |
| "grad_norm": 3.8986430168151855, |
| "learning_rate": 2.9990166806603407e-05, |
| "loss": 0.4084, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.730769230769231, |
| "grad_norm": 3.6424148082733154, |
| "learning_rate": 2.9989248405338573e-05, |
| "loss": 0.414, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.769230769230769, |
| "grad_norm": 4.0105671882629395, |
| "learning_rate": 2.9988289024040962e-05, |
| "loss": 0.4047, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.8076923076923075, |
| "grad_norm": 3.252894401550293, |
| "learning_rate": 2.998728866533348e-05, |
| "loss": 0.4123, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.846153846153846, |
| "grad_norm": 3.5156307220458984, |
| "learning_rate": 2.9986247331951083e-05, |
| "loss": 0.4068, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.884615384615385, |
| "grad_norm": 2.7843873500823975, |
| "learning_rate": 2.998516502674072e-05, |
| "loss": 0.4116, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 3.0897672176361084, |
| "learning_rate": 2.9984041752661386e-05, |
| "loss": 0.4074, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.961538461538462, |
| "grad_norm": 3.780107021331787, |
| "learning_rate": 2.9982877512784067e-05, |
| "loss": 0.4063, |
| "step": 1290 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 4.262648582458496, |
| "learning_rate": 2.998167231029174e-05, |
| "loss": 0.4051, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.038461538461538, |
| "grad_norm": 3.8252484798431396, |
| "learning_rate": 2.99804261484794e-05, |
| "loss": 0.4053, |
| "step": 1310 |
| }, |
| { |
| "epoch": 5.076923076923077, |
| "grad_norm": 3.634221076965332, |
| "learning_rate": 2.997913903075399e-05, |
| "loss": 0.4077, |
| "step": 1320 |
| }, |
| { |
| "epoch": 5.115384615384615, |
| "grad_norm": 2.8847663402557373, |
| "learning_rate": 2.997781096063445e-05, |
| "loss": 0.4022, |
| "step": 1330 |
| }, |
| { |
| "epoch": 5.153846153846154, |
| "grad_norm": 3.5783627033233643, |
| "learning_rate": 2.9976441941751663e-05, |
| "loss": 0.401, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.1923076923076925, |
| "grad_norm": 3.9379782676696777, |
| "learning_rate": 2.997503197784849e-05, |
| "loss": 0.3991, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.230769230769231, |
| "grad_norm": 3.22617506980896, |
| "learning_rate": 2.9973581072779702e-05, |
| "loss": 0.399, |
| "step": 1360 |
| }, |
| { |
| "epoch": 5.269230769230769, |
| "grad_norm": 3.1377646923065186, |
| "learning_rate": 2.9972089230512035e-05, |
| "loss": 0.3961, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.3076923076923075, |
| "grad_norm": 3.380993127822876, |
| "learning_rate": 2.997055645512411e-05, |
| "loss": 0.402, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.346153846153846, |
| "grad_norm": 3.271967649459839, |
| "learning_rate": 2.9968982750806492e-05, |
| "loss": 0.3929, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.384615384615385, |
| "grad_norm": 3.3165857791900635, |
| "learning_rate": 2.9967368121861623e-05, |
| "loss": 0.3996, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.423076923076923, |
| "grad_norm": 3.385725975036621, |
| "learning_rate": 2.9965712572703834e-05, |
| "loss": 0.3936, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.461538461538462, |
| "grad_norm": 3.0297584533691406, |
| "learning_rate": 2.996401610785934e-05, |
| "loss": 0.3991, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 3.3578076362609863, |
| "learning_rate": 2.99622787319662e-05, |
| "loss": 0.3939, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.538461538461538, |
| "grad_norm": 3.9298784732818604, |
| "learning_rate": 2.9960500449774338e-05, |
| "loss": 0.3997, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.576923076923077, |
| "grad_norm": 3.5111310482025146, |
| "learning_rate": 2.9958681266145517e-05, |
| "loss": 0.3994, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.615384615384615, |
| "grad_norm": 3.912317991256714, |
| "learning_rate": 2.995682118605331e-05, |
| "loss": 0.392, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.653846153846154, |
| "grad_norm": 3.1432626247406006, |
| "learning_rate": 2.9954920214583107e-05, |
| "loss": 0.3951, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.6923076923076925, |
| "grad_norm": 3.084312915802002, |
| "learning_rate": 2.9952978356932084e-05, |
| "loss": 0.3941, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.730769230769231, |
| "grad_norm": 3.1436219215393066, |
| "learning_rate": 2.9950995618409215e-05, |
| "loss": 0.3924, |
| "step": 1490 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 3.0541083812713623, |
| "learning_rate": 2.9948972004435228e-05, |
| "loss": 0.4004, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.8076923076923075, |
| "grad_norm": 3.0577759742736816, |
| "learning_rate": 2.9946907520542602e-05, |
| "loss": 0.3949, |
| "step": 1510 |
| }, |
| { |
| "epoch": 5.846153846153846, |
| "grad_norm": 3.1131491661071777, |
| "learning_rate": 2.9944802172375566e-05, |
| "loss": 0.3936, |
| "step": 1520 |
| }, |
| { |
| "epoch": 5.884615384615385, |
| "grad_norm": 2.9365406036376953, |
| "learning_rate": 2.9942655965690053e-05, |
| "loss": 0.3904, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 3.253673791885376, |
| "learning_rate": 2.9940468906353712e-05, |
| "loss": 0.3916, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.961538461538462, |
| "grad_norm": 3.9712073802948, |
| "learning_rate": 2.9938241000345887e-05, |
| "loss": 0.3864, |
| "step": 1550 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.6548357009887695, |
| "learning_rate": 2.993597225375758e-05, |
| "loss": 0.3842, |
| "step": 1560 |
| }, |
| { |
| "epoch": 6.038461538461538, |
| "grad_norm": 4.270135402679443, |
| "learning_rate": 2.993366267279146e-05, |
| "loss": 0.385, |
| "step": 1570 |
| }, |
| { |
| "epoch": 6.076923076923077, |
| "grad_norm": 4.103717803955078, |
| "learning_rate": 2.993131226376183e-05, |
| "loss": 0.382, |
| "step": 1580 |
| }, |
| { |
| "epoch": 6.115384615384615, |
| "grad_norm": 3.5214860439300537, |
| "learning_rate": 2.9928921033094626e-05, |
| "loss": 0.3818, |
| "step": 1590 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 3.6836981773376465, |
| "learning_rate": 2.9926488987327376e-05, |
| "loss": 0.377, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.1923076923076925, |
| "grad_norm": 3.7300078868865967, |
| "learning_rate": 2.99240161331092e-05, |
| "loss": 0.3705, |
| "step": 1610 |
| }, |
| { |
| "epoch": 6.230769230769231, |
| "grad_norm": 4.4989118576049805, |
| "learning_rate": 2.992150247720079e-05, |
| "loss": 0.3683, |
| "step": 1620 |
| }, |
| { |
| "epoch": 6.269230769230769, |
| "grad_norm": 3.8246257305145264, |
| "learning_rate": 2.991894802647438e-05, |
| "loss": 0.3666, |
| "step": 1630 |
| }, |
| { |
| "epoch": 6.3076923076923075, |
| "grad_norm": 3.7448792457580566, |
| "learning_rate": 2.9916352787913746e-05, |
| "loss": 0.3657, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.346153846153846, |
| "grad_norm": 5.380867004394531, |
| "learning_rate": 2.991371676861417e-05, |
| "loss": 0.3704, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.384615384615385, |
| "grad_norm": 5.416983127593994, |
| "learning_rate": 2.991103997578243e-05, |
| "loss": 0.36, |
| "step": 1660 |
| }, |
| { |
| "epoch": 6.423076923076923, |
| "grad_norm": 4.012523174285889, |
| "learning_rate": 2.9908322416736767e-05, |
| "loss": 0.3645, |
| "step": 1670 |
| }, |
| { |
| "epoch": 6.461538461538462, |
| "grad_norm": 3.2570512294769287, |
| "learning_rate": 2.990556409890689e-05, |
| "loss": 0.3583, |
| "step": 1680 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 3.389817714691162, |
| "learning_rate": 2.990276502983394e-05, |
| "loss": 0.3583, |
| "step": 1690 |
| }, |
| { |
| "epoch": 6.538461538461538, |
| "grad_norm": 4.151524066925049, |
| "learning_rate": 2.9899925217170455e-05, |
| "loss": 0.3525, |
| "step": 1700 |
| }, |
| { |
| "epoch": 6.576923076923077, |
| "grad_norm": 4.0402140617370605, |
| "learning_rate": 2.989704466868038e-05, |
| "loss": 0.3543, |
| "step": 1710 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 3.9035301208496094, |
| "learning_rate": 2.9894123392239018e-05, |
| "loss": 0.3524, |
| "step": 1720 |
| }, |
| { |
| "epoch": 6.653846153846154, |
| "grad_norm": 4.771100044250488, |
| "learning_rate": 2.9891161395833037e-05, |
| "loss": 0.346, |
| "step": 1730 |
| }, |
| { |
| "epoch": 6.6923076923076925, |
| "grad_norm": 4.07216739654541, |
| "learning_rate": 2.988815868756042e-05, |
| "loss": 0.3412, |
| "step": 1740 |
| }, |
| { |
| "epoch": 6.730769230769231, |
| "grad_norm": 3.648852586746216, |
| "learning_rate": 2.9885115275630447e-05, |
| "loss": 0.3364, |
| "step": 1750 |
| }, |
| { |
| "epoch": 6.769230769230769, |
| "grad_norm": 4.100618362426758, |
| "learning_rate": 2.9882031168363703e-05, |
| "loss": 0.3314, |
| "step": 1760 |
| }, |
| { |
| "epoch": 6.8076923076923075, |
| "grad_norm": 4.023171901702881, |
| "learning_rate": 2.9878906374192013e-05, |
| "loss": 0.3298, |
| "step": 1770 |
| }, |
| { |
| "epoch": 6.846153846153846, |
| "grad_norm": 3.8320722579956055, |
| "learning_rate": 2.9875740901658446e-05, |
| "loss": 0.3208, |
| "step": 1780 |
| }, |
| { |
| "epoch": 6.884615384615385, |
| "grad_norm": 3.7699222564697266, |
| "learning_rate": 2.987253475941728e-05, |
| "loss": 0.3203, |
| "step": 1790 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 4.349817752838135, |
| "learning_rate": 2.9869287956233986e-05, |
| "loss": 0.3092, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.961538461538462, |
| "grad_norm": 3.1222894191741943, |
| "learning_rate": 2.9866000500985207e-05, |
| "loss": 0.3076, |
| "step": 1810 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 5.115797519683838, |
| "learning_rate": 2.9862672402658712e-05, |
| "loss": 0.3011, |
| "step": 1820 |
| }, |
| { |
| "epoch": 7.038461538461538, |
| "grad_norm": 4.312737941741943, |
| "learning_rate": 2.98593036703534e-05, |
| "loss": 0.3016, |
| "step": 1830 |
| }, |
| { |
| "epoch": 7.076923076923077, |
| "grad_norm": 5.156615734100342, |
| "learning_rate": 2.9855894313279256e-05, |
| "loss": 0.2959, |
| "step": 1840 |
| }, |
| { |
| "epoch": 7.115384615384615, |
| "grad_norm": 4.620377540588379, |
| "learning_rate": 2.9852444340757326e-05, |
| "loss": 0.2916, |
| "step": 1850 |
| }, |
| { |
| "epoch": 7.153846153846154, |
| "grad_norm": 4.231908321380615, |
| "learning_rate": 2.9848953762219707e-05, |
| "loss": 0.2947, |
| "step": 1860 |
| }, |
| { |
| "epoch": 7.1923076923076925, |
| "grad_norm": 4.510662078857422, |
| "learning_rate": 2.984542258720951e-05, |
| "loss": 0.2846, |
| "step": 1870 |
| }, |
| { |
| "epoch": 7.230769230769231, |
| "grad_norm": 5.093549728393555, |
| "learning_rate": 2.984185082538083e-05, |
| "loss": 0.2707, |
| "step": 1880 |
| }, |
| { |
| "epoch": 7.269230769230769, |
| "grad_norm": 5.482805252075195, |
| "learning_rate": 2.983823848649873e-05, |
| "loss": 0.2624, |
| "step": 1890 |
| }, |
| { |
| "epoch": 7.3076923076923075, |
| "grad_norm": 3.6456329822540283, |
| "learning_rate": 2.9834585580439203e-05, |
| "loss": 0.2769, |
| "step": 1900 |
| }, |
| { |
| "epoch": 7.346153846153846, |
| "grad_norm": 4.198169708251953, |
| "learning_rate": 2.9830892117189157e-05, |
| "loss": 0.2659, |
| "step": 1910 |
| }, |
| { |
| "epoch": 7.384615384615385, |
| "grad_norm": 4.8469929695129395, |
| "learning_rate": 2.982715810684638e-05, |
| "loss": 0.2612, |
| "step": 1920 |
| }, |
| { |
| "epoch": 7.423076923076923, |
| "grad_norm": 5.01643705368042, |
| "learning_rate": 2.982338355961951e-05, |
| "loss": 0.2562, |
| "step": 1930 |
| }, |
| { |
| "epoch": 7.461538461538462, |
| "grad_norm": 5.112571716308594, |
| "learning_rate": 2.981956848582802e-05, |
| "loss": 0.2523, |
| "step": 1940 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 3.7991206645965576, |
| "learning_rate": 2.981571289590217e-05, |
| "loss": 0.2475, |
| "step": 1950 |
| }, |
| { |
| "epoch": 7.538461538461538, |
| "grad_norm": 4.921756267547607, |
| "learning_rate": 2.9811816800383003e-05, |
| "loss": 0.237, |
| "step": 1960 |
| }, |
| { |
| "epoch": 7.576923076923077, |
| "grad_norm": 4.657440662384033, |
| "learning_rate": 2.9807880209922288e-05, |
| "loss": 0.2464, |
| "step": 1970 |
| }, |
| { |
| "epoch": 7.615384615384615, |
| "grad_norm": 4.40252685546875, |
| "learning_rate": 2.9803903135282518e-05, |
| "loss": 0.245, |
| "step": 1980 |
| }, |
| { |
| "epoch": 7.653846153846154, |
| "grad_norm": 4.782217025756836, |
| "learning_rate": 2.9799885587336862e-05, |
| "loss": 0.2325, |
| "step": 1990 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 5.208028316497803, |
| "learning_rate": 2.9795827577069145e-05, |
| "loss": 0.2242, |
| "step": 2000 |
| }, |
| { |
| "epoch": 7.730769230769231, |
| "grad_norm": 4.525110244750977, |
| "learning_rate": 2.9791729115573808e-05, |
| "loss": 0.2211, |
| "step": 2010 |
| }, |
| { |
| "epoch": 7.769230769230769, |
| "grad_norm": 5.336142063140869, |
| "learning_rate": 2.9787590214055887e-05, |
| "loss": 0.2227, |
| "step": 2020 |
| }, |
| { |
| "epoch": 7.8076923076923075, |
| "grad_norm": 5.140171527862549, |
| "learning_rate": 2.9783410883830983e-05, |
| "loss": 0.2235, |
| "step": 2030 |
| }, |
| { |
| "epoch": 7.846153846153846, |
| "grad_norm": 5.8251142501831055, |
| "learning_rate": 2.9779191136325233e-05, |
| "loss": 0.2256, |
| "step": 2040 |
| }, |
| { |
| "epoch": 7.884615384615385, |
| "grad_norm": 4.06174373626709, |
| "learning_rate": 2.977493098307525e-05, |
| "loss": 0.2101, |
| "step": 2050 |
| }, |
| { |
| "epoch": 7.923076923076923, |
| "grad_norm": 5.9502739906311035, |
| "learning_rate": 2.9770630435728142e-05, |
| "loss": 0.1998, |
| "step": 2060 |
| }, |
| { |
| "epoch": 7.961538461538462, |
| "grad_norm": 5.872840881347656, |
| "learning_rate": 2.976628950604144e-05, |
| "loss": 0.2011, |
| "step": 2070 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 5.389374256134033, |
| "learning_rate": 2.9761908205883073e-05, |
| "loss": 0.1917, |
| "step": 2080 |
| }, |
| { |
| "epoch": 8.038461538461538, |
| "grad_norm": 5.9729905128479, |
| "learning_rate": 2.9757486547231357e-05, |
| "loss": 0.1891, |
| "step": 2090 |
| }, |
| { |
| "epoch": 8.076923076923077, |
| "grad_norm": 4.516313076019287, |
| "learning_rate": 2.9753024542174934e-05, |
| "loss": 0.1779, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.115384615384615, |
| "grad_norm": 5.897557735443115, |
| "learning_rate": 2.9748522202912755e-05, |
| "loss": 0.1768, |
| "step": 2110 |
| }, |
| { |
| "epoch": 8.153846153846153, |
| "grad_norm": 5.554835796356201, |
| "learning_rate": 2.974397954175404e-05, |
| "loss": 0.1748, |
| "step": 2120 |
| }, |
| { |
| "epoch": 8.192307692307692, |
| "grad_norm": 4.796779632568359, |
| "learning_rate": 2.973939657111826e-05, |
| "loss": 0.1797, |
| "step": 2130 |
| }, |
| { |
| "epoch": 8.23076923076923, |
| "grad_norm": 4.482677936553955, |
| "learning_rate": 2.9734773303535078e-05, |
| "loss": 0.1756, |
| "step": 2140 |
| }, |
| { |
| "epoch": 8.26923076923077, |
| "grad_norm": 5.248786449432373, |
| "learning_rate": 2.9730109751644325e-05, |
| "loss": 0.1696, |
| "step": 2150 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 6.799733638763428, |
| "learning_rate": 2.9725405928195985e-05, |
| "loss": 0.1645, |
| "step": 2160 |
| }, |
| { |
| "epoch": 8.346153846153847, |
| "grad_norm": 7.030920028686523, |
| "learning_rate": 2.9720661846050123e-05, |
| "loss": 0.1504, |
| "step": 2170 |
| }, |
| { |
| "epoch": 8.384615384615385, |
| "grad_norm": 4.81633996963501, |
| "learning_rate": 2.971587751817688e-05, |
| "loss": 0.1605, |
| "step": 2180 |
| }, |
| { |
| "epoch": 8.423076923076923, |
| "grad_norm": 4.238231658935547, |
| "learning_rate": 2.9711052957656425e-05, |
| "loss": 0.1455, |
| "step": 2190 |
| }, |
| { |
| "epoch": 8.461538461538462, |
| "grad_norm": 4.765488147735596, |
| "learning_rate": 2.9706188177678924e-05, |
| "loss": 0.141, |
| "step": 2200 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 5.133832931518555, |
| "learning_rate": 2.97012831915445e-05, |
| "loss": 0.1367, |
| "step": 2210 |
| }, |
| { |
| "epoch": 8.538461538461538, |
| "grad_norm": 6.479177951812744, |
| "learning_rate": 2.96963380126632e-05, |
| "loss": 0.1356, |
| "step": 2220 |
| }, |
| { |
| "epoch": 8.576923076923077, |
| "grad_norm": 5.76138162612915, |
| "learning_rate": 2.9691352654554953e-05, |
| "loss": 0.1367, |
| "step": 2230 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "grad_norm": 4.94566011428833, |
| "learning_rate": 2.9686327130849536e-05, |
| "loss": 0.127, |
| "step": 2240 |
| }, |
| { |
| "epoch": 8.653846153846153, |
| "grad_norm": 4.86747407913208, |
| "learning_rate": 2.9681261455286538e-05, |
| "loss": 0.134, |
| "step": 2250 |
| }, |
| { |
| "epoch": 8.692307692307692, |
| "grad_norm": 4.12553071975708, |
| "learning_rate": 2.9676155641715318e-05, |
| "loss": 0.1293, |
| "step": 2260 |
| }, |
| { |
| "epoch": 8.73076923076923, |
| "grad_norm": 5.830256462097168, |
| "learning_rate": 2.9671009704094988e-05, |
| "loss": 0.1184, |
| "step": 2270 |
| }, |
| { |
| "epoch": 8.76923076923077, |
| "grad_norm": 4.664676666259766, |
| "learning_rate": 2.9665823656494335e-05, |
| "loss": 0.1248, |
| "step": 2280 |
| }, |
| { |
| "epoch": 8.807692307692308, |
| "grad_norm": 3.934089183807373, |
| "learning_rate": 2.9660597513091824e-05, |
| "loss": 0.1141, |
| "step": 2290 |
| }, |
| { |
| "epoch": 8.846153846153847, |
| "grad_norm": 3.1599950790405273, |
| "learning_rate": 2.965533128817552e-05, |
| "loss": 0.1055, |
| "step": 2300 |
| }, |
| { |
| "epoch": 8.884615384615385, |
| "grad_norm": 4.693429470062256, |
| "learning_rate": 2.9650024996143084e-05, |
| "loss": 0.1189, |
| "step": 2310 |
| }, |
| { |
| "epoch": 8.923076923076923, |
| "grad_norm": 4.345015048980713, |
| "learning_rate": 2.964467865150172e-05, |
| "loss": 0.1011, |
| "step": 2320 |
| }, |
| { |
| "epoch": 8.961538461538462, |
| "grad_norm": 4.986255645751953, |
| "learning_rate": 2.9639292268868133e-05, |
| "loss": 0.1146, |
| "step": 2330 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 4.554662227630615, |
| "learning_rate": 2.9633865862968478e-05, |
| "loss": 0.1019, |
| "step": 2340 |
| }, |
| { |
| "epoch": 9.038461538461538, |
| "grad_norm": 4.482904434204102, |
| "learning_rate": 2.9628399448638352e-05, |
| "loss": 0.1091, |
| "step": 2350 |
| }, |
| { |
| "epoch": 9.076923076923077, |
| "grad_norm": 5.466559410095215, |
| "learning_rate": 2.9622893040822714e-05, |
| "loss": 0.0983, |
| "step": 2360 |
| }, |
| { |
| "epoch": 9.115384615384615, |
| "grad_norm": 5.750704765319824, |
| "learning_rate": 2.9617346654575875e-05, |
| "loss": 0.0986, |
| "step": 2370 |
| }, |
| { |
| "epoch": 9.153846153846153, |
| "grad_norm": 4.733994483947754, |
| "learning_rate": 2.9611760305061447e-05, |
| "loss": 0.0926, |
| "step": 2380 |
| }, |
| { |
| "epoch": 9.192307692307692, |
| "grad_norm": 4.952576637268066, |
| "learning_rate": 2.9606134007552292e-05, |
| "loss": 0.1036, |
| "step": 2390 |
| }, |
| { |
| "epoch": 9.23076923076923, |
| "grad_norm": 5.682345390319824, |
| "learning_rate": 2.9600467777430497e-05, |
| "loss": 0.0992, |
| "step": 2400 |
| }, |
| { |
| "epoch": 9.26923076923077, |
| "grad_norm": 4.997755527496338, |
| "learning_rate": 2.9594761630187312e-05, |
| "loss": 0.0886, |
| "step": 2410 |
| }, |
| { |
| "epoch": 9.307692307692308, |
| "grad_norm": 4.387738227844238, |
| "learning_rate": 2.9589015581423132e-05, |
| "loss": 0.0907, |
| "step": 2420 |
| }, |
| { |
| "epoch": 9.346153846153847, |
| "grad_norm": 4.436047077178955, |
| "learning_rate": 2.958322964684743e-05, |
| "loss": 0.0875, |
| "step": 2430 |
| }, |
| { |
| "epoch": 9.384615384615385, |
| "grad_norm": 4.745996952056885, |
| "learning_rate": 2.9577403842278735e-05, |
| "loss": 0.0949, |
| "step": 2440 |
| }, |
| { |
| "epoch": 9.423076923076923, |
| "grad_norm": 5.043275356292725, |
| "learning_rate": 2.957153818364457e-05, |
| "loss": 0.0904, |
| "step": 2450 |
| }, |
| { |
| "epoch": 9.461538461538462, |
| "grad_norm": 4.609716415405273, |
| "learning_rate": 2.9565632686981428e-05, |
| "loss": 0.0989, |
| "step": 2460 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 4.066394329071045, |
| "learning_rate": 2.9559687368434702e-05, |
| "loss": 0.0872, |
| "step": 2470 |
| }, |
| { |
| "epoch": 9.538461538461538, |
| "grad_norm": 4.978208541870117, |
| "learning_rate": 2.9553702244258674e-05, |
| "loss": 0.0874, |
| "step": 2480 |
| }, |
| { |
| "epoch": 9.576923076923077, |
| "grad_norm": 4.116719722747803, |
| "learning_rate": 2.954767733081644e-05, |
| "loss": 0.0802, |
| "step": 2490 |
| }, |
| { |
| "epoch": 9.615384615384615, |
| "grad_norm": 4.07297420501709, |
| "learning_rate": 2.9541612644579887e-05, |
| "loss": 0.0899, |
| "step": 2500 |
| }, |
| { |
| "epoch": 9.653846153846153, |
| "grad_norm": 3.5808463096618652, |
| "learning_rate": 2.9535508202129634e-05, |
| "loss": 0.0858, |
| "step": 2510 |
| }, |
| { |
| "epoch": 9.692307692307692, |
| "grad_norm": 5.122105598449707, |
| "learning_rate": 2.9529364020154994e-05, |
| "loss": 0.08, |
| "step": 2520 |
| }, |
| { |
| "epoch": 9.73076923076923, |
| "grad_norm": 3.463164806365967, |
| "learning_rate": 2.9523180115453922e-05, |
| "loss": 0.0788, |
| "step": 2530 |
| }, |
| { |
| "epoch": 9.76923076923077, |
| "grad_norm": 4.045137405395508, |
| "learning_rate": 2.9516956504932984e-05, |
| "loss": 0.0883, |
| "step": 2540 |
| }, |
| { |
| "epoch": 9.807692307692308, |
| "grad_norm": 3.9384825229644775, |
| "learning_rate": 2.9510693205607286e-05, |
| "loss": 0.0952, |
| "step": 2550 |
| }, |
| { |
| "epoch": 9.846153846153847, |
| "grad_norm": 3.5259459018707275, |
| "learning_rate": 2.9504390234600456e-05, |
| "loss": 0.0862, |
| "step": 2560 |
| }, |
| { |
| "epoch": 9.884615384615385, |
| "grad_norm": 3.8916735649108887, |
| "learning_rate": 2.9498047609144577e-05, |
| "loss": 0.082, |
| "step": 2570 |
| }, |
| { |
| "epoch": 9.923076923076923, |
| "grad_norm": 4.234613418579102, |
| "learning_rate": 2.9491665346580134e-05, |
| "loss": 0.0844, |
| "step": 2580 |
| }, |
| { |
| "epoch": 9.961538461538462, |
| "grad_norm": 3.3096225261688232, |
| "learning_rate": 2.9485243464356e-05, |
| "loss": 0.0835, |
| "step": 2590 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 3.675506353378296, |
| "learning_rate": 2.9478781980029352e-05, |
| "loss": 0.0798, |
| "step": 2600 |
| }, |
| { |
| "epoch": 10.038461538461538, |
| "grad_norm": 3.9946699142456055, |
| "learning_rate": 2.9472280911265642e-05, |
| "loss": 0.0764, |
| "step": 2610 |
| }, |
| { |
| "epoch": 10.076923076923077, |
| "grad_norm": 3.801903247833252, |
| "learning_rate": 2.9465740275838543e-05, |
| "loss": 0.0794, |
| "step": 2620 |
| }, |
| { |
| "epoch": 10.115384615384615, |
| "grad_norm": 4.382718086242676, |
| "learning_rate": 2.94591600916299e-05, |
| "loss": 0.0768, |
| "step": 2630 |
| }, |
| { |
| "epoch": 10.153846153846153, |
| "grad_norm": 4.071086406707764, |
| "learning_rate": 2.9452540376629692e-05, |
| "loss": 0.0722, |
| "step": 2640 |
| }, |
| { |
| "epoch": 10.192307692307692, |
| "grad_norm": 3.6516573429107666, |
| "learning_rate": 2.944588114893596e-05, |
| "loss": 0.0833, |
| "step": 2650 |
| }, |
| { |
| "epoch": 10.23076923076923, |
| "grad_norm": 3.51243257522583, |
| "learning_rate": 2.9439182426754784e-05, |
| "loss": 0.0891, |
| "step": 2660 |
| }, |
| { |
| "epoch": 10.26923076923077, |
| "grad_norm": 4.3780741691589355, |
| "learning_rate": 2.9432444228400208e-05, |
| "loss": 0.0781, |
| "step": 2670 |
| }, |
| { |
| "epoch": 10.307692307692308, |
| "grad_norm": 3.2519776821136475, |
| "learning_rate": 2.9425666572294218e-05, |
| "loss": 0.0808, |
| "step": 2680 |
| }, |
| { |
| "epoch": 10.346153846153847, |
| "grad_norm": 4.084770679473877, |
| "learning_rate": 2.941884947696666e-05, |
| "loss": 0.0759, |
| "step": 2690 |
| }, |
| { |
| "epoch": 10.384615384615385, |
| "grad_norm": 3.3953628540039062, |
| "learning_rate": 2.9411992961055214e-05, |
| "loss": 0.084, |
| "step": 2700 |
| }, |
| { |
| "epoch": 10.423076923076923, |
| "grad_norm": 3.6471056938171387, |
| "learning_rate": 2.9405097043305334e-05, |
| "loss": 0.0793, |
| "step": 2710 |
| }, |
| { |
| "epoch": 10.461538461538462, |
| "grad_norm": 3.899949789047241, |
| "learning_rate": 2.9398161742570196e-05, |
| "loss": 0.0774, |
| "step": 2720 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 3.219599723815918, |
| "learning_rate": 2.9391187077810644e-05, |
| "loss": 0.081, |
| "step": 2730 |
| }, |
| { |
| "epoch": 10.538461538461538, |
| "grad_norm": 3.684591293334961, |
| "learning_rate": 2.9384173068095145e-05, |
| "loss": 0.0774, |
| "step": 2740 |
| }, |
| { |
| "epoch": 10.576923076923077, |
| "grad_norm": 3.664158582687378, |
| "learning_rate": 2.937711973259974e-05, |
| "loss": 0.0687, |
| "step": 2750 |
| }, |
| { |
| "epoch": 10.615384615384615, |
| "grad_norm": 3.140185832977295, |
| "learning_rate": 2.9370027090607974e-05, |
| "loss": 0.0741, |
| "step": 2760 |
| }, |
| { |
| "epoch": 10.653846153846153, |
| "grad_norm": 3.908612012863159, |
| "learning_rate": 2.936289516151086e-05, |
| "loss": 0.0716, |
| "step": 2770 |
| }, |
| { |
| "epoch": 10.692307692307692, |
| "grad_norm": 3.865504503250122, |
| "learning_rate": 2.935572396480682e-05, |
| "loss": 0.0745, |
| "step": 2780 |
| }, |
| { |
| "epoch": 10.73076923076923, |
| "grad_norm": 3.879666566848755, |
| "learning_rate": 2.9348513520101636e-05, |
| "loss": 0.0694, |
| "step": 2790 |
| }, |
| { |
| "epoch": 10.76923076923077, |
| "grad_norm": 3.2028911113739014, |
| "learning_rate": 2.9341263847108383e-05, |
| "loss": 0.0739, |
| "step": 2800 |
| }, |
| { |
| "epoch": 10.807692307692308, |
| "grad_norm": 3.3011929988861084, |
| "learning_rate": 2.933397496564739e-05, |
| "loss": 0.0743, |
| "step": 2810 |
| }, |
| { |
| "epoch": 10.846153846153847, |
| "grad_norm": 3.6709179878234863, |
| "learning_rate": 2.9326646895646178e-05, |
| "loss": 0.0675, |
| "step": 2820 |
| }, |
| { |
| "epoch": 10.884615384615385, |
| "grad_norm": 2.8137264251708984, |
| "learning_rate": 2.931927965713942e-05, |
| "loss": 0.0729, |
| "step": 2830 |
| }, |
| { |
| "epoch": 10.923076923076923, |
| "grad_norm": 4.2008819580078125, |
| "learning_rate": 2.931187327026886e-05, |
| "loss": 0.0702, |
| "step": 2840 |
| }, |
| { |
| "epoch": 10.961538461538462, |
| "grad_norm": 3.371169090270996, |
| "learning_rate": 2.9304427755283278e-05, |
| "loss": 0.0728, |
| "step": 2850 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 3.6277854442596436, |
| "learning_rate": 2.9296943132538425e-05, |
| "loss": 0.0681, |
| "step": 2860 |
| }, |
| { |
| "epoch": 11.038461538461538, |
| "grad_norm": 3.383019208908081, |
| "learning_rate": 2.928941942249697e-05, |
| "loss": 0.0692, |
| "step": 2870 |
| }, |
| { |
| "epoch": 11.076923076923077, |
| "grad_norm": 4.068559169769287, |
| "learning_rate": 2.928185664572846e-05, |
| "loss": 0.07, |
| "step": 2880 |
| }, |
| { |
| "epoch": 11.115384615384615, |
| "grad_norm": 3.2356979846954346, |
| "learning_rate": 2.927425482290923e-05, |
| "loss": 0.072, |
| "step": 2890 |
| }, |
| { |
| "epoch": 11.153846153846153, |
| "grad_norm": 3.1939711570739746, |
| "learning_rate": 2.926661397482238e-05, |
| "loss": 0.0739, |
| "step": 2900 |
| }, |
| { |
| "epoch": 11.192307692307692, |
| "grad_norm": 3.55348539352417, |
| "learning_rate": 2.9258934122357685e-05, |
| "loss": 0.091, |
| "step": 2910 |
| }, |
| { |
| "epoch": 11.23076923076923, |
| "grad_norm": 3.496305227279663, |
| "learning_rate": 2.9251215286511574e-05, |
| "loss": 0.0699, |
| "step": 2920 |
| }, |
| { |
| "epoch": 11.26923076923077, |
| "grad_norm": 3.339102268218994, |
| "learning_rate": 2.924345748838706e-05, |
| "loss": 0.0688, |
| "step": 2930 |
| }, |
| { |
| "epoch": 11.307692307692308, |
| "grad_norm": 3.273207426071167, |
| "learning_rate": 2.923566074919365e-05, |
| "loss": 0.0687, |
| "step": 2940 |
| }, |
| { |
| "epoch": 11.346153846153847, |
| "grad_norm": 3.1789557933807373, |
| "learning_rate": 2.9227825090247346e-05, |
| "loss": 0.0659, |
| "step": 2950 |
| }, |
| { |
| "epoch": 11.384615384615385, |
| "grad_norm": 3.100209951400757, |
| "learning_rate": 2.9219950532970526e-05, |
| "loss": 0.0783, |
| "step": 2960 |
| }, |
| { |
| "epoch": 11.423076923076923, |
| "grad_norm": 3.196394920349121, |
| "learning_rate": 2.921203709889194e-05, |
| "loss": 0.0751, |
| "step": 2970 |
| }, |
| { |
| "epoch": 11.461538461538462, |
| "grad_norm": 3.14573073387146, |
| "learning_rate": 2.9204084809646607e-05, |
| "loss": 0.0663, |
| "step": 2980 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 3.1672585010528564, |
| "learning_rate": 2.9196093686975793e-05, |
| "loss": 0.0644, |
| "step": 2990 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 3.119452953338623, |
| "learning_rate": 2.918806375272691e-05, |
| "loss": 0.0752, |
| "step": 3000 |
| }, |
| { |
| "epoch": 11.576923076923077, |
| "grad_norm": 3.139831781387329, |
| "learning_rate": 2.9179995028853498e-05, |
| "loss": 0.0683, |
| "step": 3010 |
| }, |
| { |
| "epoch": 11.615384615384615, |
| "grad_norm": 3.1740639209747314, |
| "learning_rate": 2.917188753741514e-05, |
| "loss": 0.0772, |
| "step": 3020 |
| }, |
| { |
| "epoch": 11.653846153846153, |
| "grad_norm": 3.1087207794189453, |
| "learning_rate": 2.916374130057741e-05, |
| "loss": 0.0678, |
| "step": 3030 |
| }, |
| { |
| "epoch": 11.692307692307692, |
| "grad_norm": 3.149672269821167, |
| "learning_rate": 2.91555563406118e-05, |
| "loss": 0.0621, |
| "step": 3040 |
| }, |
| { |
| "epoch": 11.73076923076923, |
| "grad_norm": 3.219829559326172, |
| "learning_rate": 2.9147332679895683e-05, |
| "loss": 0.0717, |
| "step": 3050 |
| }, |
| { |
| "epoch": 11.76923076923077, |
| "grad_norm": 3.3537230491638184, |
| "learning_rate": 2.9139070340912236e-05, |
| "loss": 0.0705, |
| "step": 3060 |
| }, |
| { |
| "epoch": 11.807692307692308, |
| "grad_norm": 2.787032127380371, |
| "learning_rate": 2.9130769346250376e-05, |
| "loss": 0.0776, |
| "step": 3070 |
| }, |
| { |
| "epoch": 11.846153846153847, |
| "grad_norm": 3.3528003692626953, |
| "learning_rate": 2.9122429718604704e-05, |
| "loss": 0.07, |
| "step": 3080 |
| }, |
| { |
| "epoch": 11.884615384615385, |
| "grad_norm": 3.332606792449951, |
| "learning_rate": 2.911405148077545e-05, |
| "loss": 0.0687, |
| "step": 3090 |
| }, |
| { |
| "epoch": 11.923076923076923, |
| "grad_norm": 2.7732484340667725, |
| "learning_rate": 2.9105634655668385e-05, |
| "loss": 0.0714, |
| "step": 3100 |
| }, |
| { |
| "epoch": 11.961538461538462, |
| "grad_norm": 4.068169593811035, |
| "learning_rate": 2.9097179266294794e-05, |
| "loss": 0.0766, |
| "step": 3110 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 3.828740358352661, |
| "learning_rate": 2.9088685335771396e-05, |
| "loss": 0.0625, |
| "step": 3120 |
| }, |
| { |
| "epoch": 12.038461538461538, |
| "grad_norm": 3.0695571899414062, |
| "learning_rate": 2.9080152887320255e-05, |
| "loss": 0.0716, |
| "step": 3130 |
| }, |
| { |
| "epoch": 12.076923076923077, |
| "grad_norm": 3.0535635948181152, |
| "learning_rate": 2.9071581944268778e-05, |
| "loss": 0.0696, |
| "step": 3140 |
| }, |
| { |
| "epoch": 12.115384615384615, |
| "grad_norm": 2.860771894454956, |
| "learning_rate": 2.906297253004958e-05, |
| "loss": 0.0691, |
| "step": 3150 |
| }, |
| { |
| "epoch": 12.153846153846153, |
| "grad_norm": 2.567913293838501, |
| "learning_rate": 2.9054324668200483e-05, |
| "loss": 0.0666, |
| "step": 3160 |
| }, |
| { |
| "epoch": 12.192307692307692, |
| "grad_norm": 3.2005672454833984, |
| "learning_rate": 2.9045638382364404e-05, |
| "loss": 0.0699, |
| "step": 3170 |
| }, |
| { |
| "epoch": 12.23076923076923, |
| "grad_norm": 3.3413379192352295, |
| "learning_rate": 2.9036913696289318e-05, |
| "loss": 0.0658, |
| "step": 3180 |
| }, |
| { |
| "epoch": 12.26923076923077, |
| "grad_norm": 3.1574761867523193, |
| "learning_rate": 2.9028150633828186e-05, |
| "loss": 0.0673, |
| "step": 3190 |
| }, |
| { |
| "epoch": 12.307692307692308, |
| "grad_norm": 3.2442660331726074, |
| "learning_rate": 2.9019349218938887e-05, |
| "loss": 0.0769, |
| "step": 3200 |
| }, |
| { |
| "epoch": 12.346153846153847, |
| "grad_norm": 2.781846046447754, |
| "learning_rate": 2.9010509475684146e-05, |
| "loss": 0.068, |
| "step": 3210 |
| }, |
| { |
| "epoch": 12.384615384615385, |
| "grad_norm": 3.350027084350586, |
| "learning_rate": 2.900163142823149e-05, |
| "loss": 0.0619, |
| "step": 3220 |
| }, |
| { |
| "epoch": 12.423076923076923, |
| "grad_norm": 3.0505483150482178, |
| "learning_rate": 2.8992715100853166e-05, |
| "loss": 0.0669, |
| "step": 3230 |
| }, |
| { |
| "epoch": 12.461538461538462, |
| "grad_norm": 3.5292305946350098, |
| "learning_rate": 2.898376051792606e-05, |
| "loss": 0.0591, |
| "step": 3240 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 3.194944143295288, |
| "learning_rate": 2.897476770393167e-05, |
| "loss": 0.0574, |
| "step": 3250 |
| }, |
| { |
| "epoch": 12.538461538461538, |
| "grad_norm": 3.1942076683044434, |
| "learning_rate": 2.8965736683456e-05, |
| "loss": 0.0752, |
| "step": 3260 |
| }, |
| { |
| "epoch": 12.576923076923077, |
| "grad_norm": 2.857682943344116, |
| "learning_rate": 2.895666748118952e-05, |
| "loss": 0.0666, |
| "step": 3270 |
| }, |
| { |
| "epoch": 12.615384615384615, |
| "grad_norm": 3.969834566116333, |
| "learning_rate": 2.8947560121927077e-05, |
| "loss": 0.0607, |
| "step": 3280 |
| }, |
| { |
| "epoch": 12.653846153846153, |
| "grad_norm": 3.6644859313964844, |
| "learning_rate": 2.8938414630567852e-05, |
| "loss": 0.066, |
| "step": 3290 |
| }, |
| { |
| "epoch": 12.692307692307692, |
| "grad_norm": 2.7282423973083496, |
| "learning_rate": 2.892923103211526e-05, |
| "loss": 0.0596, |
| "step": 3300 |
| }, |
| { |
| "epoch": 12.73076923076923, |
| "grad_norm": 3.0733189582824707, |
| "learning_rate": 2.892000935167691e-05, |
| "loss": 0.0583, |
| "step": 3310 |
| }, |
| { |
| "epoch": 12.76923076923077, |
| "grad_norm": 3.726287364959717, |
| "learning_rate": 2.8910749614464536e-05, |
| "loss": 0.0594, |
| "step": 3320 |
| }, |
| { |
| "epoch": 12.807692307692308, |
| "grad_norm": 3.276707887649536, |
| "learning_rate": 2.890145184579389e-05, |
| "loss": 0.0665, |
| "step": 3330 |
| }, |
| { |
| "epoch": 12.846153846153847, |
| "grad_norm": 3.513742208480835, |
| "learning_rate": 2.8892116071084727e-05, |
| "loss": 0.0635, |
| "step": 3340 |
| }, |
| { |
| "epoch": 12.884615384615385, |
| "grad_norm": 3.981471300125122, |
| "learning_rate": 2.8882742315860692e-05, |
| "loss": 0.0683, |
| "step": 3350 |
| }, |
| { |
| "epoch": 12.923076923076923, |
| "grad_norm": 3.698300838470459, |
| "learning_rate": 2.8873330605749275e-05, |
| "loss": 0.0624, |
| "step": 3360 |
| }, |
| { |
| "epoch": 12.961538461538462, |
| "grad_norm": 2.87587571144104, |
| "learning_rate": 2.886388096648174e-05, |
| "loss": 0.0752, |
| "step": 3370 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 3.0200350284576416, |
| "learning_rate": 2.8854393423893024e-05, |
| "loss": 0.0653, |
| "step": 3380 |
| }, |
| { |
| "epoch": 13.038461538461538, |
| "grad_norm": 2.7889177799224854, |
| "learning_rate": 2.8844868003921723e-05, |
| "loss": 0.0678, |
| "step": 3390 |
| }, |
| { |
| "epoch": 13.076923076923077, |
| "grad_norm": 3.0269007682800293, |
| "learning_rate": 2.8835304732609962e-05, |
| "loss": 0.0713, |
| "step": 3400 |
| }, |
| { |
| "epoch": 13.115384615384615, |
| "grad_norm": 2.714719295501709, |
| "learning_rate": 2.882570363610336e-05, |
| "loss": 0.0641, |
| "step": 3410 |
| }, |
| { |
| "epoch": 13.153846153846153, |
| "grad_norm": 3.500612497329712, |
| "learning_rate": 2.8816064740650954e-05, |
| "loss": 0.0676, |
| "step": 3420 |
| }, |
| { |
| "epoch": 13.192307692307692, |
| "grad_norm": 3.845914840698242, |
| "learning_rate": 2.880638807260511e-05, |
| "loss": 0.0609, |
| "step": 3430 |
| }, |
| { |
| "epoch": 13.23076923076923, |
| "grad_norm": 2.972393274307251, |
| "learning_rate": 2.8796673658421472e-05, |
| "loss": 0.0659, |
| "step": 3440 |
| }, |
| { |
| "epoch": 13.26923076923077, |
| "grad_norm": 2.625223159790039, |
| "learning_rate": 2.8786921524658877e-05, |
| "loss": 0.0573, |
| "step": 3450 |
| }, |
| { |
| "epoch": 13.307692307692308, |
| "grad_norm": 3.5500714778900146, |
| "learning_rate": 2.8777131697979283e-05, |
| "loss": 0.0694, |
| "step": 3460 |
| }, |
| { |
| "epoch": 13.346153846153847, |
| "grad_norm": 2.848158121109009, |
| "learning_rate": 2.876730420514771e-05, |
| "loss": 0.0574, |
| "step": 3470 |
| }, |
| { |
| "epoch": 13.384615384615385, |
| "grad_norm": 3.2372846603393555, |
| "learning_rate": 2.8757439073032136e-05, |
| "loss": 0.0638, |
| "step": 3480 |
| }, |
| { |
| "epoch": 13.423076923076923, |
| "grad_norm": 2.9966297149658203, |
| "learning_rate": 2.874753632860347e-05, |
| "loss": 0.0636, |
| "step": 3490 |
| }, |
| { |
| "epoch": 13.461538461538462, |
| "grad_norm": 2.996350049972534, |
| "learning_rate": 2.873759599893543e-05, |
| "loss": 0.0721, |
| "step": 3500 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 2.873504400253296, |
| "learning_rate": 2.8727618111204494e-05, |
| "loss": 0.0656, |
| "step": 3510 |
| }, |
| { |
| "epoch": 13.538461538461538, |
| "grad_norm": 2.916673183441162, |
| "learning_rate": 2.871760269268983e-05, |
| "loss": 0.0594, |
| "step": 3520 |
| }, |
| { |
| "epoch": 13.576923076923077, |
| "grad_norm": 2.4159295558929443, |
| "learning_rate": 2.870754977077321e-05, |
| "loss": 0.061, |
| "step": 3530 |
| }, |
| { |
| "epoch": 13.615384615384615, |
| "grad_norm": 2.685166597366333, |
| "learning_rate": 2.869745937293894e-05, |
| "loss": 0.0671, |
| "step": 3540 |
| }, |
| { |
| "epoch": 13.653846153846153, |
| "grad_norm": 2.81882905960083, |
| "learning_rate": 2.8687331526773775e-05, |
| "loss": 0.0645, |
| "step": 3550 |
| }, |
| { |
| "epoch": 13.692307692307692, |
| "grad_norm": 2.4796597957611084, |
| "learning_rate": 2.867716625996687e-05, |
| "loss": 0.0647, |
| "step": 3560 |
| }, |
| { |
| "epoch": 13.73076923076923, |
| "grad_norm": 2.852337598800659, |
| "learning_rate": 2.8666963600309672e-05, |
| "loss": 0.0701, |
| "step": 3570 |
| }, |
| { |
| "epoch": 13.76923076923077, |
| "grad_norm": 2.938345432281494, |
| "learning_rate": 2.8656723575695862e-05, |
| "loss": 0.0616, |
| "step": 3580 |
| }, |
| { |
| "epoch": 13.807692307692308, |
| "grad_norm": 2.8633902072906494, |
| "learning_rate": 2.8646446214121276e-05, |
| "loss": 0.0586, |
| "step": 3590 |
| }, |
| { |
| "epoch": 13.846153846153847, |
| "grad_norm": 2.704404830932617, |
| "learning_rate": 2.8636131543683828e-05, |
| "loss": 0.0654, |
| "step": 3600 |
| }, |
| { |
| "epoch": 13.884615384615385, |
| "grad_norm": 2.430389642715454, |
| "learning_rate": 2.8625779592583436e-05, |
| "loss": 0.0563, |
| "step": 3610 |
| }, |
| { |
| "epoch": 13.923076923076923, |
| "grad_norm": 2.908712387084961, |
| "learning_rate": 2.861539038912193e-05, |
| "loss": 0.061, |
| "step": 3620 |
| }, |
| { |
| "epoch": 13.961538461538462, |
| "grad_norm": 2.6836538314819336, |
| "learning_rate": 2.860496396170301e-05, |
| "loss": 0.0608, |
| "step": 3630 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 2.7360219955444336, |
| "learning_rate": 2.859450033883212e-05, |
| "loss": 0.0627, |
| "step": 3640 |
| }, |
| { |
| "epoch": 14.038461538461538, |
| "grad_norm": 3.1698520183563232, |
| "learning_rate": 2.8583999549116413e-05, |
| "loss": 0.0627, |
| "step": 3650 |
| }, |
| { |
| "epoch": 14.076923076923077, |
| "grad_norm": 2.8817367553710938, |
| "learning_rate": 2.857346162126464e-05, |
| "loss": 0.053, |
| "step": 3660 |
| }, |
| { |
| "epoch": 14.115384615384615, |
| "grad_norm": 3.117218017578125, |
| "learning_rate": 2.8562886584087092e-05, |
| "loss": 0.0634, |
| "step": 3670 |
| }, |
| { |
| "epoch": 14.153846153846153, |
| "grad_norm": 3.047295331954956, |
| "learning_rate": 2.8552274466495525e-05, |
| "loss": 0.0654, |
| "step": 3680 |
| }, |
| { |
| "epoch": 14.192307692307692, |
| "grad_norm": 2.8166685104370117, |
| "learning_rate": 2.8541625297503056e-05, |
| "loss": 0.0647, |
| "step": 3690 |
| }, |
| { |
| "epoch": 14.23076923076923, |
| "grad_norm": 2.7225589752197266, |
| "learning_rate": 2.8530939106224106e-05, |
| "loss": 0.0592, |
| "step": 3700 |
| }, |
| { |
| "epoch": 14.26923076923077, |
| "grad_norm": 3.0143346786499023, |
| "learning_rate": 2.8520215921874325e-05, |
| "loss": 0.0619, |
| "step": 3710 |
| }, |
| { |
| "epoch": 14.307692307692308, |
| "grad_norm": 3.057224988937378, |
| "learning_rate": 2.850945577377048e-05, |
| "loss": 0.0636, |
| "step": 3720 |
| }, |
| { |
| "epoch": 14.346153846153847, |
| "grad_norm": 2.7452213764190674, |
| "learning_rate": 2.8498658691330406e-05, |
| "loss": 0.0544, |
| "step": 3730 |
| }, |
| { |
| "epoch": 14.384615384615385, |
| "grad_norm": 2.056774377822876, |
| "learning_rate": 2.8487824704072913e-05, |
| "loss": 0.062, |
| "step": 3740 |
| }, |
| { |
| "epoch": 14.423076923076923, |
| "grad_norm": 3.2659449577331543, |
| "learning_rate": 2.8476953841617713e-05, |
| "loss": 0.0614, |
| "step": 3750 |
| }, |
| { |
| "epoch": 14.461538461538462, |
| "grad_norm": 2.6030311584472656, |
| "learning_rate": 2.846604613368532e-05, |
| "loss": 0.056, |
| "step": 3760 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 2.8850746154785156, |
| "learning_rate": 2.8455101610097002e-05, |
| "loss": 0.0681, |
| "step": 3770 |
| }, |
| { |
| "epoch": 14.538461538461538, |
| "grad_norm": 3.431685209274292, |
| "learning_rate": 2.8444120300774666e-05, |
| "loss": 0.0614, |
| "step": 3780 |
| }, |
| { |
| "epoch": 14.576923076923077, |
| "grad_norm": 3.1018617153167725, |
| "learning_rate": 2.8433102235740788e-05, |
| "loss": 0.057, |
| "step": 3790 |
| }, |
| { |
| "epoch": 14.615384615384615, |
| "grad_norm": 3.772675037384033, |
| "learning_rate": 2.842204744511834e-05, |
| "loss": 0.0591, |
| "step": 3800 |
| }, |
| { |
| "epoch": 14.653846153846153, |
| "grad_norm": 2.6003100872039795, |
| "learning_rate": 2.8410955959130693e-05, |
| "loss": 0.0536, |
| "step": 3810 |
| }, |
| { |
| "epoch": 14.692307692307692, |
| "grad_norm": 2.9984867572784424, |
| "learning_rate": 2.8399827808101554e-05, |
| "loss": 0.0635, |
| "step": 3820 |
| }, |
| { |
| "epoch": 14.73076923076923, |
| "grad_norm": 2.4007630348205566, |
| "learning_rate": 2.8388663022454857e-05, |
| "loss": 0.0612, |
| "step": 3830 |
| }, |
| { |
| "epoch": 14.76923076923077, |
| "grad_norm": 2.7119133472442627, |
| "learning_rate": 2.83774616327147e-05, |
| "loss": 0.0575, |
| "step": 3840 |
| }, |
| { |
| "epoch": 14.807692307692308, |
| "grad_norm": 3.0233607292175293, |
| "learning_rate": 2.836622366950526e-05, |
| "loss": 0.0586, |
| "step": 3850 |
| }, |
| { |
| "epoch": 14.846153846153847, |
| "grad_norm": 2.880004405975342, |
| "learning_rate": 2.835494916355069e-05, |
| "loss": 0.0647, |
| "step": 3860 |
| }, |
| { |
| "epoch": 14.884615384615385, |
| "grad_norm": 3.2674670219421387, |
| "learning_rate": 2.8343638145675072e-05, |
| "loss": 0.0619, |
| "step": 3870 |
| }, |
| { |
| "epoch": 14.923076923076923, |
| "grad_norm": 2.571911573410034, |
| "learning_rate": 2.8332290646802282e-05, |
| "loss": 0.0588, |
| "step": 3880 |
| }, |
| { |
| "epoch": 14.961538461538462, |
| "grad_norm": 2.711045026779175, |
| "learning_rate": 2.8320906697955963e-05, |
| "loss": 0.0547, |
| "step": 3890 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 2.6947715282440186, |
| "learning_rate": 2.8309486330259385e-05, |
| "loss": 0.0614, |
| "step": 3900 |
| }, |
| { |
| "epoch": 15.038461538461538, |
| "grad_norm": 3.1563644409179688, |
| "learning_rate": 2.82980295749354e-05, |
| "loss": 0.0619, |
| "step": 3910 |
| }, |
| { |
| "epoch": 15.076923076923077, |
| "grad_norm": 2.481595993041992, |
| "learning_rate": 2.828653646330634e-05, |
| "loss": 0.0603, |
| "step": 3920 |
| }, |
| { |
| "epoch": 15.115384615384615, |
| "grad_norm": 3.0050806999206543, |
| "learning_rate": 2.8275007026793938e-05, |
| "loss": 0.0594, |
| "step": 3930 |
| }, |
| { |
| "epoch": 15.153846153846153, |
| "grad_norm": 3.2291088104248047, |
| "learning_rate": 2.826344129691923e-05, |
| "loss": 0.0632, |
| "step": 3940 |
| }, |
| { |
| "epoch": 15.192307692307692, |
| "grad_norm": 3.0251619815826416, |
| "learning_rate": 2.8251839305302478e-05, |
| "loss": 0.063, |
| "step": 3950 |
| }, |
| { |
| "epoch": 15.23076923076923, |
| "grad_norm": 2.865499258041382, |
| "learning_rate": 2.8240201083663088e-05, |
| "loss": 0.0574, |
| "step": 3960 |
| }, |
| { |
| "epoch": 15.26923076923077, |
| "grad_norm": 2.642956018447876, |
| "learning_rate": 2.8228526663819504e-05, |
| "loss": 0.06, |
| "step": 3970 |
| }, |
| { |
| "epoch": 15.307692307692308, |
| "grad_norm": 2.802671194076538, |
| "learning_rate": 2.8216816077689158e-05, |
| "loss": 0.0568, |
| "step": 3980 |
| }, |
| { |
| "epoch": 15.346153846153847, |
| "grad_norm": 2.3889827728271484, |
| "learning_rate": 2.8205069357288337e-05, |
| "loss": 0.0553, |
| "step": 3990 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 2.5534961223602295, |
| "learning_rate": 2.8193286534732128e-05, |
| "loss": 0.0609, |
| "step": 4000 |
| }, |
| { |
| "epoch": 15.423076923076923, |
| "grad_norm": 2.4432270526885986, |
| "learning_rate": 2.8181467642234317e-05, |
| "loss": 0.0571, |
| "step": 4010 |
| }, |
| { |
| "epoch": 15.461538461538462, |
| "grad_norm": 2.605041980743408, |
| "learning_rate": 2.8169612712107306e-05, |
| "loss": 0.0554, |
| "step": 4020 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 2.6760783195495605, |
| "learning_rate": 2.8157721776762017e-05, |
| "loss": 0.0631, |
| "step": 4030 |
| }, |
| { |
| "epoch": 15.538461538461538, |
| "grad_norm": 3.092106819152832, |
| "learning_rate": 2.814579486870782e-05, |
| "loss": 0.0608, |
| "step": 4040 |
| }, |
| { |
| "epoch": 15.576923076923077, |
| "grad_norm": 2.732588052749634, |
| "learning_rate": 2.813383202055242e-05, |
| "loss": 0.0552, |
| "step": 4050 |
| }, |
| { |
| "epoch": 15.615384615384615, |
| "grad_norm": 2.6859591007232666, |
| "learning_rate": 2.8121833265001792e-05, |
| "loss": 0.0654, |
| "step": 4060 |
| }, |
| { |
| "epoch": 15.653846153846153, |
| "grad_norm": 1.9384580850601196, |
| "learning_rate": 2.8109798634860072e-05, |
| "loss": 0.059, |
| "step": 4070 |
| }, |
| { |
| "epoch": 15.692307692307692, |
| "grad_norm": 2.3426663875579834, |
| "learning_rate": 2.8097728163029482e-05, |
| "loss": 0.0527, |
| "step": 4080 |
| }, |
| { |
| "epoch": 15.73076923076923, |
| "grad_norm": 2.785004138946533, |
| "learning_rate": 2.8085621882510233e-05, |
| "loss": 0.0568, |
| "step": 4090 |
| }, |
| { |
| "epoch": 15.76923076923077, |
| "grad_norm": 3.2040162086486816, |
| "learning_rate": 2.8073479826400425e-05, |
| "loss": 0.0622, |
| "step": 4100 |
| }, |
| { |
| "epoch": 15.807692307692308, |
| "grad_norm": 2.6994717121124268, |
| "learning_rate": 2.806130202789598e-05, |
| "loss": 0.0596, |
| "step": 4110 |
| }, |
| { |
| "epoch": 15.846153846153847, |
| "grad_norm": 2.955667018890381, |
| "learning_rate": 2.804908852029054e-05, |
| "loss": 0.0576, |
| "step": 4120 |
| }, |
| { |
| "epoch": 15.884615384615385, |
| "grad_norm": 2.229564905166626, |
| "learning_rate": 2.8036839336975367e-05, |
| "loss": 0.0567, |
| "step": 4130 |
| }, |
| { |
| "epoch": 15.923076923076923, |
| "grad_norm": 2.8583757877349854, |
| "learning_rate": 2.8024554511439253e-05, |
| "loss": 0.0588, |
| "step": 4140 |
| }, |
| { |
| "epoch": 15.961538461538462, |
| "grad_norm": 3.233686685562134, |
| "learning_rate": 2.801223407726844e-05, |
| "loss": 0.055, |
| "step": 4150 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 3.210627794265747, |
| "learning_rate": 2.7999878068146537e-05, |
| "loss": 0.0493, |
| "step": 4160 |
| }, |
| { |
| "epoch": 16.03846153846154, |
| "grad_norm": 2.979895830154419, |
| "learning_rate": 2.7987486517854396e-05, |
| "loss": 0.0604, |
| "step": 4170 |
| }, |
| { |
| "epoch": 16.076923076923077, |
| "grad_norm": 2.135582447052002, |
| "learning_rate": 2.7975059460270037e-05, |
| "loss": 0.0618, |
| "step": 4180 |
| }, |
| { |
| "epoch": 16.115384615384617, |
| "grad_norm": 2.363574743270874, |
| "learning_rate": 2.7962596929368566e-05, |
| "loss": 0.0581, |
| "step": 4190 |
| }, |
| { |
| "epoch": 16.153846153846153, |
| "grad_norm": 2.2562291622161865, |
| "learning_rate": 2.795009895922207e-05, |
| "loss": 0.0541, |
| "step": 4200 |
| }, |
| { |
| "epoch": 16.192307692307693, |
| "grad_norm": 2.4633421897888184, |
| "learning_rate": 2.7937565583999513e-05, |
| "loss": 0.0501, |
| "step": 4210 |
| }, |
| { |
| "epoch": 16.23076923076923, |
| "grad_norm": 2.5683891773223877, |
| "learning_rate": 2.792499683796667e-05, |
| "loss": 0.0537, |
| "step": 4220 |
| }, |
| { |
| "epoch": 16.26923076923077, |
| "grad_norm": 2.360839605331421, |
| "learning_rate": 2.791239275548601e-05, |
| "loss": 0.0478, |
| "step": 4230 |
| }, |
| { |
| "epoch": 16.307692307692307, |
| "grad_norm": 2.2146573066711426, |
| "learning_rate": 2.789975337101662e-05, |
| "loss": 0.0591, |
| "step": 4240 |
| }, |
| { |
| "epoch": 16.346153846153847, |
| "grad_norm": 2.4830048084259033, |
| "learning_rate": 2.788707871911409e-05, |
| "loss": 0.0597, |
| "step": 4250 |
| }, |
| { |
| "epoch": 16.384615384615383, |
| "grad_norm": 2.256363868713379, |
| "learning_rate": 2.7874368834430426e-05, |
| "loss": 0.0549, |
| "step": 4260 |
| }, |
| { |
| "epoch": 16.423076923076923, |
| "grad_norm": 2.8379452228546143, |
| "learning_rate": 2.7861623751713982e-05, |
| "loss": 0.0524, |
| "step": 4270 |
| }, |
| { |
| "epoch": 16.46153846153846, |
| "grad_norm": 2.0764126777648926, |
| "learning_rate": 2.7848843505809317e-05, |
| "loss": 0.0553, |
| "step": 4280 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 2.321157217025757, |
| "learning_rate": 2.7836028131657142e-05, |
| "loss": 0.0512, |
| "step": 4290 |
| }, |
| { |
| "epoch": 16.53846153846154, |
| "grad_norm": 2.401200532913208, |
| "learning_rate": 2.7823177664294197e-05, |
| "loss": 0.0541, |
| "step": 4300 |
| }, |
| { |
| "epoch": 16.576923076923077, |
| "grad_norm": 2.6631157398223877, |
| "learning_rate": 2.7810292138853168e-05, |
| "loss": 0.0532, |
| "step": 4310 |
| }, |
| { |
| "epoch": 16.615384615384617, |
| "grad_norm": 2.2633821964263916, |
| "learning_rate": 2.779737159056259e-05, |
| "loss": 0.0536, |
| "step": 4320 |
| }, |
| { |
| "epoch": 16.653846153846153, |
| "grad_norm": 2.5215775966644287, |
| "learning_rate": 2.7784416054746753e-05, |
| "loss": 0.0526, |
| "step": 4330 |
| }, |
| { |
| "epoch": 16.692307692307693, |
| "grad_norm": 2.0003058910369873, |
| "learning_rate": 2.7771425566825593e-05, |
| "loss": 0.0531, |
| "step": 4340 |
| }, |
| { |
| "epoch": 16.73076923076923, |
| "grad_norm": 2.353093385696411, |
| "learning_rate": 2.7758400162314605e-05, |
| "loss": 0.0544, |
| "step": 4350 |
| }, |
| { |
| "epoch": 16.76923076923077, |
| "grad_norm": 2.478005886077881, |
| "learning_rate": 2.7745339876824756e-05, |
| "loss": 0.0503, |
| "step": 4360 |
| }, |
| { |
| "epoch": 16.807692307692307, |
| "grad_norm": 2.6122934818267822, |
| "learning_rate": 2.7732244746062363e-05, |
| "loss": 0.0543, |
| "step": 4370 |
| }, |
| { |
| "epoch": 16.846153846153847, |
| "grad_norm": 2.5369300842285156, |
| "learning_rate": 2.7719114805829015e-05, |
| "loss": 0.052, |
| "step": 4380 |
| }, |
| { |
| "epoch": 16.884615384615383, |
| "grad_norm": 2.575892925262451, |
| "learning_rate": 2.7705950092021465e-05, |
| "loss": 0.0577, |
| "step": 4390 |
| }, |
| { |
| "epoch": 16.923076923076923, |
| "grad_norm": 3.0408105850219727, |
| "learning_rate": 2.7692750640631533e-05, |
| "loss": 0.0659, |
| "step": 4400 |
| }, |
| { |
| "epoch": 16.96153846153846, |
| "grad_norm": 2.7433135509490967, |
| "learning_rate": 2.767951648774603e-05, |
| "loss": 0.0558, |
| "step": 4410 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 2.37965989112854, |
| "learning_rate": 2.766624766954661e-05, |
| "loss": 0.0563, |
| "step": 4420 |
| }, |
| { |
| "epoch": 17.03846153846154, |
| "grad_norm": 2.1070268154144287, |
| "learning_rate": 2.7652944222309727e-05, |
| "loss": 0.0541, |
| "step": 4430 |
| }, |
| { |
| "epoch": 17.076923076923077, |
| "grad_norm": 2.4608118534088135, |
| "learning_rate": 2.7639606182406484e-05, |
| "loss": 0.0619, |
| "step": 4440 |
| }, |
| { |
| "epoch": 17.115384615384617, |
| "grad_norm": 2.633108615875244, |
| "learning_rate": 2.7626233586302583e-05, |
| "loss": 0.0496, |
| "step": 4450 |
| }, |
| { |
| "epoch": 17.153846153846153, |
| "grad_norm": 2.1537821292877197, |
| "learning_rate": 2.7612826470558192e-05, |
| "loss": 0.0578, |
| "step": 4460 |
| }, |
| { |
| "epoch": 17.192307692307693, |
| "grad_norm": 2.589418411254883, |
| "learning_rate": 2.7599384871827846e-05, |
| "loss": 0.0547, |
| "step": 4470 |
| }, |
| { |
| "epoch": 17.23076923076923, |
| "grad_norm": 3.257871389389038, |
| "learning_rate": 2.7585908826860368e-05, |
| "loss": 0.0537, |
| "step": 4480 |
| }, |
| { |
| "epoch": 17.26923076923077, |
| "grad_norm": 2.2862274646759033, |
| "learning_rate": 2.757239837249875e-05, |
| "loss": 0.0552, |
| "step": 4490 |
| }, |
| { |
| "epoch": 17.307692307692307, |
| "grad_norm": 2.6018054485321045, |
| "learning_rate": 2.7558853545680057e-05, |
| "loss": 0.0523, |
| "step": 4500 |
| }, |
| { |
| "epoch": 17.346153846153847, |
| "grad_norm": 2.4538543224334717, |
| "learning_rate": 2.754527438343533e-05, |
| "loss": 0.0621, |
| "step": 4510 |
| }, |
| { |
| "epoch": 17.384615384615383, |
| "grad_norm": 2.5169732570648193, |
| "learning_rate": 2.7531660922889477e-05, |
| "loss": 0.0532, |
| "step": 4520 |
| }, |
| { |
| "epoch": 17.423076923076923, |
| "grad_norm": 2.893049955368042, |
| "learning_rate": 2.751801320126118e-05, |
| "loss": 0.0559, |
| "step": 4530 |
| }, |
| { |
| "epoch": 17.46153846153846, |
| "grad_norm": 2.932993173599243, |
| "learning_rate": 2.750433125586279e-05, |
| "loss": 0.0512, |
| "step": 4540 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 2.4409329891204834, |
| "learning_rate": 2.7490615124100225e-05, |
| "loss": 0.0478, |
| "step": 4550 |
| }, |
| { |
| "epoch": 17.53846153846154, |
| "grad_norm": 2.2984349727630615, |
| "learning_rate": 2.747686484347286e-05, |
| "loss": 0.0484, |
| "step": 4560 |
| }, |
| { |
| "epoch": 17.576923076923077, |
| "grad_norm": 2.0560920238494873, |
| "learning_rate": 2.7463080451573447e-05, |
| "loss": 0.0545, |
| "step": 4570 |
| }, |
| { |
| "epoch": 17.615384615384617, |
| "grad_norm": 2.1629302501678467, |
| "learning_rate": 2.744926198608798e-05, |
| "loss": 0.0522, |
| "step": 4580 |
| }, |
| { |
| "epoch": 17.653846153846153, |
| "grad_norm": 2.228933811187744, |
| "learning_rate": 2.743540948479561e-05, |
| "loss": 0.0534, |
| "step": 4590 |
| }, |
| { |
| "epoch": 17.692307692307693, |
| "grad_norm": 2.0013070106506348, |
| "learning_rate": 2.7421522985568562e-05, |
| "loss": 0.0538, |
| "step": 4600 |
| }, |
| { |
| "epoch": 17.73076923076923, |
| "grad_norm": 2.4562020301818848, |
| "learning_rate": 2.7407602526371983e-05, |
| "loss": 0.0549, |
| "step": 4610 |
| }, |
| { |
| "epoch": 17.76923076923077, |
| "grad_norm": 2.1591811180114746, |
| "learning_rate": 2.7393648145263873e-05, |
| "loss": 0.055, |
| "step": 4620 |
| }, |
| { |
| "epoch": 17.807692307692307, |
| "grad_norm": 2.4402129650115967, |
| "learning_rate": 2.7379659880394996e-05, |
| "loss": 0.0546, |
| "step": 4630 |
| }, |
| { |
| "epoch": 17.846153846153847, |
| "grad_norm": 2.294828176498413, |
| "learning_rate": 2.7365637770008717e-05, |
| "loss": 0.0538, |
| "step": 4640 |
| }, |
| { |
| "epoch": 17.884615384615383, |
| "grad_norm": 2.2330775260925293, |
| "learning_rate": 2.7351581852440953e-05, |
| "loss": 0.0527, |
| "step": 4650 |
| }, |
| { |
| "epoch": 17.923076923076923, |
| "grad_norm": 2.5576770305633545, |
| "learning_rate": 2.7337492166120053e-05, |
| "loss": 0.0633, |
| "step": 4660 |
| }, |
| { |
| "epoch": 17.96153846153846, |
| "grad_norm": 2.897631883621216, |
| "learning_rate": 2.732336874956667e-05, |
| "loss": 0.0551, |
| "step": 4670 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 2.2748541831970215, |
| "learning_rate": 2.7309211641393696e-05, |
| "loss": 0.0444, |
| "step": 4680 |
| }, |
| { |
| "epoch": 18.03846153846154, |
| "grad_norm": 2.8125858306884766, |
| "learning_rate": 2.7295020880306123e-05, |
| "loss": 0.0575, |
| "step": 4690 |
| }, |
| { |
| "epoch": 18.076923076923077, |
| "grad_norm": 2.6854939460754395, |
| "learning_rate": 2.7280796505100946e-05, |
| "loss": 0.0586, |
| "step": 4700 |
| }, |
| { |
| "epoch": 18.115384615384617, |
| "grad_norm": 2.2231080532073975, |
| "learning_rate": 2.7266538554667065e-05, |
| "loss": 0.0524, |
| "step": 4710 |
| }, |
| { |
| "epoch": 18.153846153846153, |
| "grad_norm": 1.815577745437622, |
| "learning_rate": 2.725224706798517e-05, |
| "loss": 0.0568, |
| "step": 4720 |
| }, |
| { |
| "epoch": 18.192307692307693, |
| "grad_norm": 2.6000752449035645, |
| "learning_rate": 2.7237922084127643e-05, |
| "loss": 0.0534, |
| "step": 4730 |
| }, |
| { |
| "epoch": 18.23076923076923, |
| "grad_norm": 2.511378526687622, |
| "learning_rate": 2.7223563642258446e-05, |
| "loss": 0.0497, |
| "step": 4740 |
| }, |
| { |
| "epoch": 18.26923076923077, |
| "grad_norm": 2.3093998432159424, |
| "learning_rate": 2.7209171781633e-05, |
| "loss": 0.0528, |
| "step": 4750 |
| }, |
| { |
| "epoch": 18.307692307692307, |
| "grad_norm": 2.629744529724121, |
| "learning_rate": 2.7194746541598113e-05, |
| "loss": 0.0513, |
| "step": 4760 |
| }, |
| { |
| "epoch": 18.346153846153847, |
| "grad_norm": 2.0061843395233154, |
| "learning_rate": 2.7180287961591835e-05, |
| "loss": 0.0527, |
| "step": 4770 |
| }, |
| { |
| "epoch": 18.384615384615383, |
| "grad_norm": 2.1390366554260254, |
| "learning_rate": 2.7165796081143377e-05, |
| "loss": 0.0537, |
| "step": 4780 |
| }, |
| { |
| "epoch": 18.423076923076923, |
| "grad_norm": 1.9976611137390137, |
| "learning_rate": 2.715127093987298e-05, |
| "loss": 0.0498, |
| "step": 4790 |
| }, |
| { |
| "epoch": 18.46153846153846, |
| "grad_norm": 2.1415915489196777, |
| "learning_rate": 2.713671257749183e-05, |
| "loss": 0.0518, |
| "step": 4800 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 2.861093044281006, |
| "learning_rate": 2.712212103380193e-05, |
| "loss": 0.0499, |
| "step": 4810 |
| }, |
| { |
| "epoch": 18.53846153846154, |
| "grad_norm": 2.4393386840820312, |
| "learning_rate": 2.7107496348696004e-05, |
| "loss": 0.0469, |
| "step": 4820 |
| }, |
| { |
| "epoch": 18.576923076923077, |
| "grad_norm": 2.497103691101074, |
| "learning_rate": 2.7092838562157386e-05, |
| "loss": 0.0608, |
| "step": 4830 |
| }, |
| { |
| "epoch": 18.615384615384617, |
| "grad_norm": 2.7781546115875244, |
| "learning_rate": 2.7078147714259905e-05, |
| "loss": 0.0613, |
| "step": 4840 |
| }, |
| { |
| "epoch": 18.653846153846153, |
| "grad_norm": 2.271496057510376, |
| "learning_rate": 2.7063423845167773e-05, |
| "loss": 0.0519, |
| "step": 4850 |
| }, |
| { |
| "epoch": 18.692307692307693, |
| "grad_norm": 2.31038761138916, |
| "learning_rate": 2.7048666995135494e-05, |
| "loss": 0.0521, |
| "step": 4860 |
| }, |
| { |
| "epoch": 18.73076923076923, |
| "grad_norm": 2.466082811355591, |
| "learning_rate": 2.7033877204507722e-05, |
| "loss": 0.0517, |
| "step": 4870 |
| }, |
| { |
| "epoch": 18.76923076923077, |
| "grad_norm": 2.2604198455810547, |
| "learning_rate": 2.701905451371919e-05, |
| "loss": 0.0554, |
| "step": 4880 |
| }, |
| { |
| "epoch": 18.807692307692307, |
| "grad_norm": 2.663639545440674, |
| "learning_rate": 2.7004198963294558e-05, |
| "loss": 0.0548, |
| "step": 4890 |
| }, |
| { |
| "epoch": 18.846153846153847, |
| "grad_norm": 2.3590447902679443, |
| "learning_rate": 2.6989310593848345e-05, |
| "loss": 0.0547, |
| "step": 4900 |
| }, |
| { |
| "epoch": 18.884615384615383, |
| "grad_norm": 2.3990252017974854, |
| "learning_rate": 2.6974389446084776e-05, |
| "loss": 0.0483, |
| "step": 4910 |
| }, |
| { |
| "epoch": 18.923076923076923, |
| "grad_norm": 2.5169942378997803, |
| "learning_rate": 2.6959435560797706e-05, |
| "loss": 0.0487, |
| "step": 4920 |
| }, |
| { |
| "epoch": 18.96153846153846, |
| "grad_norm": 2.5757737159729004, |
| "learning_rate": 2.6944448978870478e-05, |
| "loss": 0.0564, |
| "step": 4930 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 2.7602148056030273, |
| "learning_rate": 2.6929429741275845e-05, |
| "loss": 0.0496, |
| "step": 4940 |
| }, |
| { |
| "epoch": 19.03846153846154, |
| "grad_norm": 2.638188123703003, |
| "learning_rate": 2.691437788907582e-05, |
| "loss": 0.0519, |
| "step": 4950 |
| }, |
| { |
| "epoch": 19.076923076923077, |
| "grad_norm": 2.3091073036193848, |
| "learning_rate": 2.689929346342159e-05, |
| "loss": 0.0457, |
| "step": 4960 |
| }, |
| { |
| "epoch": 19.115384615384617, |
| "grad_norm": 2.0856239795684814, |
| "learning_rate": 2.688417650555341e-05, |
| "loss": 0.0588, |
| "step": 4970 |
| }, |
| { |
| "epoch": 19.153846153846153, |
| "grad_norm": 1.9847303628921509, |
| "learning_rate": 2.686902705680046e-05, |
| "loss": 0.0571, |
| "step": 4980 |
| }, |
| { |
| "epoch": 19.192307692307693, |
| "grad_norm": 2.181013822555542, |
| "learning_rate": 2.6853845158580756e-05, |
| "loss": 0.0518, |
| "step": 4990 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 2.1970889568328857, |
| "learning_rate": 2.6838630852401028e-05, |
| "loss": 0.0531, |
| "step": 5000 |
| }, |
| { |
| "epoch": 19.26923076923077, |
| "grad_norm": 2.3489420413970947, |
| "learning_rate": 2.6823384179856602e-05, |
| "loss": 0.052, |
| "step": 5010 |
| }, |
| { |
| "epoch": 19.307692307692307, |
| "grad_norm": 2.067884683609009, |
| "learning_rate": 2.6808105182631303e-05, |
| "loss": 0.0545, |
| "step": 5020 |
| }, |
| { |
| "epoch": 19.346153846153847, |
| "grad_norm": 2.4723005294799805, |
| "learning_rate": 2.6792793902497328e-05, |
| "loss": 0.0506, |
| "step": 5030 |
| }, |
| { |
| "epoch": 19.384615384615383, |
| "grad_norm": 2.328829050064087, |
| "learning_rate": 2.6777450381315133e-05, |
| "loss": 0.0499, |
| "step": 5040 |
| }, |
| { |
| "epoch": 19.423076923076923, |
| "grad_norm": 2.5263142585754395, |
| "learning_rate": 2.676207466103331e-05, |
| "loss": 0.052, |
| "step": 5050 |
| }, |
| { |
| "epoch": 19.46153846153846, |
| "grad_norm": 3.0785343647003174, |
| "learning_rate": 2.6746666783688503e-05, |
| "loss": 0.0507, |
| "step": 5060 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 2.390286684036255, |
| "learning_rate": 2.673122679140525e-05, |
| "loss": 0.0421, |
| "step": 5070 |
| }, |
| { |
| "epoch": 19.53846153846154, |
| "grad_norm": 2.432105302810669, |
| "learning_rate": 2.671575472639591e-05, |
| "loss": 0.0515, |
| "step": 5080 |
| }, |
| { |
| "epoch": 19.576923076923077, |
| "grad_norm": 1.704558253288269, |
| "learning_rate": 2.6700250630960506e-05, |
| "loss": 0.0527, |
| "step": 5090 |
| }, |
| { |
| "epoch": 19.615384615384617, |
| "grad_norm": 2.047685384750366, |
| "learning_rate": 2.6684714547486654e-05, |
| "loss": 0.0538, |
| "step": 5100 |
| }, |
| { |
| "epoch": 19.653846153846153, |
| "grad_norm": 2.3358631134033203, |
| "learning_rate": 2.6669146518449407e-05, |
| "loss": 0.0479, |
| "step": 5110 |
| }, |
| { |
| "epoch": 19.692307692307693, |
| "grad_norm": 1.9280929565429688, |
| "learning_rate": 2.665354658641117e-05, |
| "loss": 0.0599, |
| "step": 5120 |
| }, |
| { |
| "epoch": 19.73076923076923, |
| "grad_norm": 1.7903410196304321, |
| "learning_rate": 2.6637914794021552e-05, |
| "loss": 0.0457, |
| "step": 5130 |
| }, |
| { |
| "epoch": 19.76923076923077, |
| "grad_norm": 2.035609483718872, |
| "learning_rate": 2.6622251184017274e-05, |
| "loss": 0.0483, |
| "step": 5140 |
| }, |
| { |
| "epoch": 19.807692307692307, |
| "grad_norm": 2.2856998443603516, |
| "learning_rate": 2.660655579922206e-05, |
| "loss": 0.0488, |
| "step": 5150 |
| }, |
| { |
| "epoch": 19.846153846153847, |
| "grad_norm": 2.0502729415893555, |
| "learning_rate": 2.6590828682546487e-05, |
| "loss": 0.0545, |
| "step": 5160 |
| }, |
| { |
| "epoch": 19.884615384615383, |
| "grad_norm": 2.134788990020752, |
| "learning_rate": 2.657506987698789e-05, |
| "loss": 0.0409, |
| "step": 5170 |
| }, |
| { |
| "epoch": 19.923076923076923, |
| "grad_norm": 2.1987850666046143, |
| "learning_rate": 2.655927942563024e-05, |
| "loss": 0.0503, |
| "step": 5180 |
| }, |
| { |
| "epoch": 19.96153846153846, |
| "grad_norm": 2.169670581817627, |
| "learning_rate": 2.6543457371644027e-05, |
| "loss": 0.0515, |
| "step": 5190 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 2.150855779647827, |
| "learning_rate": 2.652760375828615e-05, |
| "loss": 0.0554, |
| "step": 5200 |
| }, |
| { |
| "epoch": 20.03846153846154, |
| "grad_norm": 2.300340414047241, |
| "learning_rate": 2.651171862889978e-05, |
| "loss": 0.0507, |
| "step": 5210 |
| }, |
| { |
| "epoch": 20.076923076923077, |
| "grad_norm": 2.2516908645629883, |
| "learning_rate": 2.649580202691425e-05, |
| "loss": 0.0514, |
| "step": 5220 |
| }, |
| { |
| "epoch": 20.115384615384617, |
| "grad_norm": 2.0438835620880127, |
| "learning_rate": 2.6479853995844942e-05, |
| "loss": 0.052, |
| "step": 5230 |
| }, |
| { |
| "epoch": 20.153846153846153, |
| "grad_norm": 2.1609389781951904, |
| "learning_rate": 2.646387457929317e-05, |
| "loss": 0.0489, |
| "step": 5240 |
| }, |
| { |
| "epoch": 20.192307692307693, |
| "grad_norm": 1.979067087173462, |
| "learning_rate": 2.6447863820946047e-05, |
| "loss": 0.0455, |
| "step": 5250 |
| }, |
| { |
| "epoch": 20.23076923076923, |
| "grad_norm": 2.4219133853912354, |
| "learning_rate": 2.6431821764576367e-05, |
| "loss": 0.0505, |
| "step": 5260 |
| }, |
| { |
| "epoch": 20.26923076923077, |
| "grad_norm": 1.8512394428253174, |
| "learning_rate": 2.641574845404251e-05, |
| "loss": 0.0499, |
| "step": 5270 |
| }, |
| { |
| "epoch": 20.307692307692307, |
| "grad_norm": 1.9653434753417969, |
| "learning_rate": 2.639964393328829e-05, |
| "loss": 0.0533, |
| "step": 5280 |
| }, |
| { |
| "epoch": 20.346153846153847, |
| "grad_norm": 2.2867801189422607, |
| "learning_rate": 2.6383508246342844e-05, |
| "loss": 0.0503, |
| "step": 5290 |
| }, |
| { |
| "epoch": 20.384615384615383, |
| "grad_norm": 1.9846916198730469, |
| "learning_rate": 2.636734143732054e-05, |
| "loss": 0.0493, |
| "step": 5300 |
| }, |
| { |
| "epoch": 20.423076923076923, |
| "grad_norm": 2.2939069271087646, |
| "learning_rate": 2.63511435504208e-05, |
| "loss": 0.0499, |
| "step": 5310 |
| }, |
| { |
| "epoch": 20.46153846153846, |
| "grad_norm": 1.8951475620269775, |
| "learning_rate": 2.633491462992804e-05, |
| "loss": 0.0508, |
| "step": 5320 |
| }, |
| { |
| "epoch": 20.5, |
| "grad_norm": 2.4194393157958984, |
| "learning_rate": 2.63186547202115e-05, |
| "loss": 0.0461, |
| "step": 5330 |
| }, |
| { |
| "epoch": 20.53846153846154, |
| "grad_norm": 2.025209665298462, |
| "learning_rate": 2.6302363865725158e-05, |
| "loss": 0.0518, |
| "step": 5340 |
| }, |
| { |
| "epoch": 20.576923076923077, |
| "grad_norm": 3.0230369567871094, |
| "learning_rate": 2.628604211100759e-05, |
| "loss": 0.051, |
| "step": 5350 |
| }, |
| { |
| "epoch": 20.615384615384617, |
| "grad_norm": 2.486722707748413, |
| "learning_rate": 2.6269689500681846e-05, |
| "loss": 0.052, |
| "step": 5360 |
| }, |
| { |
| "epoch": 20.653846153846153, |
| "grad_norm": 2.9617080688476562, |
| "learning_rate": 2.6253306079455337e-05, |
| "loss": 0.0545, |
| "step": 5370 |
| }, |
| { |
| "epoch": 20.692307692307693, |
| "grad_norm": 2.7517824172973633, |
| "learning_rate": 2.6236891892119713e-05, |
| "loss": 0.0501, |
| "step": 5380 |
| }, |
| { |
| "epoch": 20.73076923076923, |
| "grad_norm": 2.343135118484497, |
| "learning_rate": 2.6220446983550738e-05, |
| "loss": 0.0486, |
| "step": 5390 |
| }, |
| { |
| "epoch": 20.76923076923077, |
| "grad_norm": 2.22013258934021, |
| "learning_rate": 2.6203971398708162e-05, |
| "loss": 0.0491, |
| "step": 5400 |
| }, |
| { |
| "epoch": 20.807692307692307, |
| "grad_norm": 2.3863282203674316, |
| "learning_rate": 2.6187465182635598e-05, |
| "loss": 0.052, |
| "step": 5410 |
| }, |
| { |
| "epoch": 20.846153846153847, |
| "grad_norm": 2.3278868198394775, |
| "learning_rate": 2.6170928380460424e-05, |
| "loss": 0.046, |
| "step": 5420 |
| }, |
| { |
| "epoch": 20.884615384615383, |
| "grad_norm": 2.228511095046997, |
| "learning_rate": 2.615436103739362e-05, |
| "loss": 0.0498, |
| "step": 5430 |
| }, |
| { |
| "epoch": 20.923076923076923, |
| "grad_norm": 2.5615031719207764, |
| "learning_rate": 2.6137763198729665e-05, |
| "loss": 0.0472, |
| "step": 5440 |
| }, |
| { |
| "epoch": 20.96153846153846, |
| "grad_norm": 2.5572619438171387, |
| "learning_rate": 2.6121134909846416e-05, |
| "loss": 0.0549, |
| "step": 5450 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 2.7003588676452637, |
| "learning_rate": 2.6104476216204985e-05, |
| "loss": 0.0451, |
| "step": 5460 |
| }, |
| { |
| "epoch": 21.03846153846154, |
| "grad_norm": 2.0107014179229736, |
| "learning_rate": 2.6087787163349605e-05, |
| "loss": 0.0481, |
| "step": 5470 |
| }, |
| { |
| "epoch": 21.076923076923077, |
| "grad_norm": 2.2759485244750977, |
| "learning_rate": 2.60710677969075e-05, |
| "loss": 0.0473, |
| "step": 5480 |
| }, |
| { |
| "epoch": 21.115384615384617, |
| "grad_norm": 2.0311803817749023, |
| "learning_rate": 2.6054318162588792e-05, |
| "loss": 0.0437, |
| "step": 5490 |
| }, |
| { |
| "epoch": 21.153846153846153, |
| "grad_norm": 2.2505581378936768, |
| "learning_rate": 2.6037538306186337e-05, |
| "loss": 0.0445, |
| "step": 5500 |
| }, |
| { |
| "epoch": 21.192307692307693, |
| "grad_norm": 2.1031274795532227, |
| "learning_rate": 2.602072827357562e-05, |
| "loss": 0.0491, |
| "step": 5510 |
| }, |
| { |
| "epoch": 21.23076923076923, |
| "grad_norm": 2.2294819355010986, |
| "learning_rate": 2.6003888110714624e-05, |
| "loss": 0.0405, |
| "step": 5520 |
| }, |
| { |
| "epoch": 21.26923076923077, |
| "grad_norm": 2.206677198410034, |
| "learning_rate": 2.5987017863643714e-05, |
| "loss": 0.054, |
| "step": 5530 |
| }, |
| { |
| "epoch": 21.307692307692307, |
| "grad_norm": 1.9745968580245972, |
| "learning_rate": 2.5970117578485506e-05, |
| "loss": 0.0469, |
| "step": 5540 |
| }, |
| { |
| "epoch": 21.346153846153847, |
| "grad_norm": 1.8446788787841797, |
| "learning_rate": 2.5953187301444733e-05, |
| "loss": 0.0431, |
| "step": 5550 |
| }, |
| { |
| "epoch": 21.384615384615383, |
| "grad_norm": 2.375255584716797, |
| "learning_rate": 2.5936227078808123e-05, |
| "loss": 0.0489, |
| "step": 5560 |
| }, |
| { |
| "epoch": 21.423076923076923, |
| "grad_norm": 2.1657443046569824, |
| "learning_rate": 2.5919236956944277e-05, |
| "loss": 0.0442, |
| "step": 5570 |
| }, |
| { |
| "epoch": 21.46153846153846, |
| "grad_norm": 2.1673221588134766, |
| "learning_rate": 2.5902216982303544e-05, |
| "loss": 0.0489, |
| "step": 5580 |
| }, |
| { |
| "epoch": 21.5, |
| "grad_norm": 2.0828003883361816, |
| "learning_rate": 2.588516720141788e-05, |
| "loss": 0.0465, |
| "step": 5590 |
| }, |
| { |
| "epoch": 21.53846153846154, |
| "grad_norm": 2.3611319065093994, |
| "learning_rate": 2.5868087660900735e-05, |
| "loss": 0.0506, |
| "step": 5600 |
| }, |
| { |
| "epoch": 21.576923076923077, |
| "grad_norm": 2.302957773208618, |
| "learning_rate": 2.5850978407446924e-05, |
| "loss": 0.0542, |
| "step": 5610 |
| }, |
| { |
| "epoch": 21.615384615384617, |
| "grad_norm": 2.3799068927764893, |
| "learning_rate": 2.5833839487832488e-05, |
| "loss": 0.0513, |
| "step": 5620 |
| }, |
| { |
| "epoch": 21.653846153846153, |
| "grad_norm": 2.2304797172546387, |
| "learning_rate": 2.5816670948914583e-05, |
| "loss": 0.0439, |
| "step": 5630 |
| }, |
| { |
| "epoch": 21.692307692307693, |
| "grad_norm": 2.4063093662261963, |
| "learning_rate": 2.5799472837631338e-05, |
| "loss": 0.0521, |
| "step": 5640 |
| }, |
| { |
| "epoch": 21.73076923076923, |
| "grad_norm": 1.8401644229888916, |
| "learning_rate": 2.578224520100173e-05, |
| "loss": 0.0416, |
| "step": 5650 |
| }, |
| { |
| "epoch": 21.76923076923077, |
| "grad_norm": 2.4482624530792236, |
| "learning_rate": 2.576498808612546e-05, |
| "loss": 0.0477, |
| "step": 5660 |
| }, |
| { |
| "epoch": 21.807692307692307, |
| "grad_norm": 2.242112398147583, |
| "learning_rate": 2.5747701540182825e-05, |
| "loss": 0.051, |
| "step": 5670 |
| }, |
| { |
| "epoch": 21.846153846153847, |
| "grad_norm": 2.264697790145874, |
| "learning_rate": 2.573038561043458e-05, |
| "loss": 0.0463, |
| "step": 5680 |
| }, |
| { |
| "epoch": 21.884615384615383, |
| "grad_norm": 2.5730340480804443, |
| "learning_rate": 2.5713040344221815e-05, |
| "loss": 0.0539, |
| "step": 5690 |
| }, |
| { |
| "epoch": 21.923076923076923, |
| "grad_norm": 2.2590243816375732, |
| "learning_rate": 2.5695665788965823e-05, |
| "loss": 0.0423, |
| "step": 5700 |
| }, |
| { |
| "epoch": 21.96153846153846, |
| "grad_norm": 2.2034289836883545, |
| "learning_rate": 2.5678261992167978e-05, |
| "loss": 0.0539, |
| "step": 5710 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 2.3551292419433594, |
| "learning_rate": 2.5660829001409594e-05, |
| "loss": 0.0555, |
| "step": 5720 |
| }, |
| { |
| "epoch": 22.03846153846154, |
| "grad_norm": 1.970501184463501, |
| "learning_rate": 2.5643366864351806e-05, |
| "loss": 0.0554, |
| "step": 5730 |
| }, |
| { |
| "epoch": 22.076923076923077, |
| "grad_norm": 2.2068798542022705, |
| "learning_rate": 2.5625875628735423e-05, |
| "loss": 0.0461, |
| "step": 5740 |
| }, |
| { |
| "epoch": 22.115384615384617, |
| "grad_norm": 2.198699712753296, |
| "learning_rate": 2.560835534238082e-05, |
| "loss": 0.0463, |
| "step": 5750 |
| }, |
| { |
| "epoch": 22.153846153846153, |
| "grad_norm": 1.864600658416748, |
| "learning_rate": 2.5590806053187793e-05, |
| "loss": 0.0474, |
| "step": 5760 |
| }, |
| { |
| "epoch": 22.192307692307693, |
| "grad_norm": 2.2763874530792236, |
| "learning_rate": 2.557322780913542e-05, |
| "loss": 0.0493, |
| "step": 5770 |
| }, |
| { |
| "epoch": 22.23076923076923, |
| "grad_norm": 2.336698293685913, |
| "learning_rate": 2.555562065828196e-05, |
| "loss": 0.0532, |
| "step": 5780 |
| }, |
| { |
| "epoch": 22.26923076923077, |
| "grad_norm": 2.111300230026245, |
| "learning_rate": 2.5537984648764684e-05, |
| "loss": 0.0461, |
| "step": 5790 |
| }, |
| { |
| "epoch": 22.307692307692307, |
| "grad_norm": 1.9061249494552612, |
| "learning_rate": 2.5520319828799766e-05, |
| "loss": 0.0478, |
| "step": 5800 |
| }, |
| { |
| "epoch": 22.346153846153847, |
| "grad_norm": 1.8777246475219727, |
| "learning_rate": 2.550262624668216e-05, |
| "loss": 0.0433, |
| "step": 5810 |
| }, |
| { |
| "epoch": 22.384615384615383, |
| "grad_norm": 1.811094880104065, |
| "learning_rate": 2.5484903950785432e-05, |
| "loss": 0.0443, |
| "step": 5820 |
| }, |
| { |
| "epoch": 22.423076923076923, |
| "grad_norm": 2.2233502864837646, |
| "learning_rate": 2.546715298956167e-05, |
| "loss": 0.0534, |
| "step": 5830 |
| }, |
| { |
| "epoch": 22.46153846153846, |
| "grad_norm": 1.968826174736023, |
| "learning_rate": 2.5449373411541322e-05, |
| "loss": 0.05, |
| "step": 5840 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 2.2315022945404053, |
| "learning_rate": 2.5431565265333074e-05, |
| "loss": 0.0558, |
| "step": 5850 |
| }, |
| { |
| "epoch": 22.53846153846154, |
| "grad_norm": 1.852952480316162, |
| "learning_rate": 2.541372859962372e-05, |
| "loss": 0.0478, |
| "step": 5860 |
| }, |
| { |
| "epoch": 22.576923076923077, |
| "grad_norm": 2.2287561893463135, |
| "learning_rate": 2.5395863463178023e-05, |
| "loss": 0.0466, |
| "step": 5870 |
| }, |
| { |
| "epoch": 22.615384615384617, |
| "grad_norm": 2.396843671798706, |
| "learning_rate": 2.537796990483858e-05, |
| "loss": 0.0601, |
| "step": 5880 |
| }, |
| { |
| "epoch": 22.653846153846153, |
| "grad_norm": 2.18788480758667, |
| "learning_rate": 2.53600479735257e-05, |
| "loss": 0.0458, |
| "step": 5890 |
| }, |
| { |
| "epoch": 22.692307692307693, |
| "grad_norm": 1.890351414680481, |
| "learning_rate": 2.5342097718237262e-05, |
| "loss": 0.0492, |
| "step": 5900 |
| }, |
| { |
| "epoch": 22.73076923076923, |
| "grad_norm": 1.6278502941131592, |
| "learning_rate": 2.5324119188048567e-05, |
| "loss": 0.0476, |
| "step": 5910 |
| }, |
| { |
| "epoch": 22.76923076923077, |
| "grad_norm": 2.3576247692108154, |
| "learning_rate": 2.530611243211224e-05, |
| "loss": 0.0477, |
| "step": 5920 |
| }, |
| { |
| "epoch": 22.807692307692307, |
| "grad_norm": 1.9282854795455933, |
| "learning_rate": 2.5288077499658064e-05, |
| "loss": 0.0442, |
| "step": 5930 |
| }, |
| { |
| "epoch": 22.846153846153847, |
| "grad_norm": 1.774977684020996, |
| "learning_rate": 2.527001443999285e-05, |
| "loss": 0.0466, |
| "step": 5940 |
| }, |
| { |
| "epoch": 22.884615384615383, |
| "grad_norm": 1.9546281099319458, |
| "learning_rate": 2.5251923302500318e-05, |
| "loss": 0.0446, |
| "step": 5950 |
| }, |
| { |
| "epoch": 22.923076923076923, |
| "grad_norm": 2.2042019367218018, |
| "learning_rate": 2.523380413664095e-05, |
| "loss": 0.0459, |
| "step": 5960 |
| }, |
| { |
| "epoch": 22.96153846153846, |
| "grad_norm": 1.848617672920227, |
| "learning_rate": 2.5215656991951844e-05, |
| "loss": 0.0433, |
| "step": 5970 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 2.180008888244629, |
| "learning_rate": 2.5197481918046606e-05, |
| "loss": 0.0476, |
| "step": 5980 |
| }, |
| { |
| "epoch": 23.03846153846154, |
| "grad_norm": 2.211040735244751, |
| "learning_rate": 2.5179278964615192e-05, |
| "loss": 0.0448, |
| "step": 5990 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 2.2788994312286377, |
| "learning_rate": 2.516104818142379e-05, |
| "loss": 0.0494, |
| "step": 6000 |
| }, |
| { |
| "epoch": 23.115384615384617, |
| "grad_norm": 1.680962324142456, |
| "learning_rate": 2.5142789618314654e-05, |
| "loss": 0.0409, |
| "step": 6010 |
| }, |
| { |
| "epoch": 23.153846153846153, |
| "grad_norm": 1.8511399030685425, |
| "learning_rate": 2.5124503325206006e-05, |
| "loss": 0.0469, |
| "step": 6020 |
| }, |
| { |
| "epoch": 23.192307692307693, |
| "grad_norm": 2.0306243896484375, |
| "learning_rate": 2.5106189352091867e-05, |
| "loss": 0.0427, |
| "step": 6030 |
| }, |
| { |
| "epoch": 23.23076923076923, |
| "grad_norm": 1.6079951524734497, |
| "learning_rate": 2.5087847749041944e-05, |
| "loss": 0.0501, |
| "step": 6040 |
| }, |
| { |
| "epoch": 23.26923076923077, |
| "grad_norm": 1.870809555053711, |
| "learning_rate": 2.506947856620148e-05, |
| "loss": 0.04, |
| "step": 6050 |
| }, |
| { |
| "epoch": 23.307692307692307, |
| "grad_norm": 1.878944993019104, |
| "learning_rate": 2.505108185379111e-05, |
| "loss": 0.0463, |
| "step": 6060 |
| }, |
| { |
| "epoch": 23.346153846153847, |
| "grad_norm": 2.2728562355041504, |
| "learning_rate": 2.503265766210676e-05, |
| "loss": 0.0436, |
| "step": 6070 |
| }, |
| { |
| "epoch": 23.384615384615383, |
| "grad_norm": 2.2612860202789307, |
| "learning_rate": 2.5014206041519456e-05, |
| "loss": 0.0517, |
| "step": 6080 |
| }, |
| { |
| "epoch": 23.423076923076923, |
| "grad_norm": 1.6607364416122437, |
| "learning_rate": 2.499572704247523e-05, |
| "loss": 0.0456, |
| "step": 6090 |
| }, |
| { |
| "epoch": 23.46153846153846, |
| "grad_norm": 2.1083016395568848, |
| "learning_rate": 2.497722071549495e-05, |
| "loss": 0.0414, |
| "step": 6100 |
| }, |
| { |
| "epoch": 23.5, |
| "grad_norm": 1.410343050956726, |
| "learning_rate": 2.4958687111174216e-05, |
| "loss": 0.0413, |
| "step": 6110 |
| }, |
| { |
| "epoch": 23.53846153846154, |
| "grad_norm": 1.9180458784103394, |
| "learning_rate": 2.494012628018319e-05, |
| "loss": 0.0479, |
| "step": 6120 |
| }, |
| { |
| "epoch": 23.576923076923077, |
| "grad_norm": 1.7604724168777466, |
| "learning_rate": 2.4921538273266475e-05, |
| "loss": 0.0486, |
| "step": 6130 |
| }, |
| { |
| "epoch": 23.615384615384617, |
| "grad_norm": 2.1101067066192627, |
| "learning_rate": 2.490292314124298e-05, |
| "loss": 0.0413, |
| "step": 6140 |
| }, |
| { |
| "epoch": 23.653846153846153, |
| "grad_norm": 1.9359272718429565, |
| "learning_rate": 2.4884280935005755e-05, |
| "loss": 0.0396, |
| "step": 6150 |
| }, |
| { |
| "epoch": 23.692307692307693, |
| "grad_norm": 1.9525443315505981, |
| "learning_rate": 2.486561170552188e-05, |
| "loss": 0.0503, |
| "step": 6160 |
| }, |
| { |
| "epoch": 23.73076923076923, |
| "grad_norm": 2.2428786754608154, |
| "learning_rate": 2.4846915503832326e-05, |
| "loss": 0.05, |
| "step": 6170 |
| }, |
| { |
| "epoch": 23.76923076923077, |
| "grad_norm": 2.0034830570220947, |
| "learning_rate": 2.4828192381051787e-05, |
| "loss": 0.0492, |
| "step": 6180 |
| }, |
| { |
| "epoch": 23.807692307692307, |
| "grad_norm": 2.311720132827759, |
| "learning_rate": 2.480944238836857e-05, |
| "loss": 0.0531, |
| "step": 6190 |
| }, |
| { |
| "epoch": 23.846153846153847, |
| "grad_norm": 1.5703109502792358, |
| "learning_rate": 2.4790665577044428e-05, |
| "loss": 0.0453, |
| "step": 6200 |
| }, |
| { |
| "epoch": 23.884615384615383, |
| "grad_norm": 1.7515316009521484, |
| "learning_rate": 2.4771861998414458e-05, |
| "loss": 0.046, |
| "step": 6210 |
| }, |
| { |
| "epoch": 23.923076923076923, |
| "grad_norm": 2.0526859760284424, |
| "learning_rate": 2.475303170388692e-05, |
| "loss": 0.0445, |
| "step": 6220 |
| }, |
| { |
| "epoch": 23.96153846153846, |
| "grad_norm": 1.8120594024658203, |
| "learning_rate": 2.4734174744943122e-05, |
| "loss": 0.0451, |
| "step": 6230 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 2.8347058296203613, |
| "learning_rate": 2.471529117313727e-05, |
| "loss": 0.0456, |
| "step": 6240 |
| }, |
| { |
| "epoch": 24.03846153846154, |
| "grad_norm": 2.466557502746582, |
| "learning_rate": 2.4696381040096335e-05, |
| "loss": 0.0481, |
| "step": 6250 |
| }, |
| { |
| "epoch": 24.076923076923077, |
| "grad_norm": 2.347093105316162, |
| "learning_rate": 2.4677444397519883e-05, |
| "loss": 0.0475, |
| "step": 6260 |
| }, |
| { |
| "epoch": 24.115384615384617, |
| "grad_norm": 1.7908564805984497, |
| "learning_rate": 2.4658481297179987e-05, |
| "loss": 0.0468, |
| "step": 6270 |
| }, |
| { |
| "epoch": 24.153846153846153, |
| "grad_norm": 2.0475430488586426, |
| "learning_rate": 2.4639491790921028e-05, |
| "loss": 0.0474, |
| "step": 6280 |
| }, |
| { |
| "epoch": 24.192307692307693, |
| "grad_norm": 2.225431442260742, |
| "learning_rate": 2.4620475930659596e-05, |
| "loss": 0.0488, |
| "step": 6290 |
| }, |
| { |
| "epoch": 24.23076923076923, |
| "grad_norm": 1.9803553819656372, |
| "learning_rate": 2.4601433768384327e-05, |
| "loss": 0.0421, |
| "step": 6300 |
| }, |
| { |
| "epoch": 24.26923076923077, |
| "grad_norm": 2.0825839042663574, |
| "learning_rate": 2.4582365356155766e-05, |
| "loss": 0.0452, |
| "step": 6310 |
| }, |
| { |
| "epoch": 24.307692307692307, |
| "grad_norm": 1.9435968399047852, |
| "learning_rate": 2.4563270746106224e-05, |
| "loss": 0.0423, |
| "step": 6320 |
| }, |
| { |
| "epoch": 24.346153846153847, |
| "grad_norm": 2.0983896255493164, |
| "learning_rate": 2.4544149990439632e-05, |
| "loss": 0.0431, |
| "step": 6330 |
| }, |
| { |
| "epoch": 24.384615384615383, |
| "grad_norm": 1.7998305559158325, |
| "learning_rate": 2.4525003141431413e-05, |
| "loss": 0.0434, |
| "step": 6340 |
| }, |
| { |
| "epoch": 24.423076923076923, |
| "grad_norm": 2.194087028503418, |
| "learning_rate": 2.450583025142831e-05, |
| "loss": 0.0406, |
| "step": 6350 |
| }, |
| { |
| "epoch": 24.46153846153846, |
| "grad_norm": 2.305424690246582, |
| "learning_rate": 2.4486631372848286e-05, |
| "loss": 0.0461, |
| "step": 6360 |
| }, |
| { |
| "epoch": 24.5, |
| "grad_norm": 2.211747646331787, |
| "learning_rate": 2.4467406558180328e-05, |
| "loss": 0.0454, |
| "step": 6370 |
| }, |
| { |
| "epoch": 24.53846153846154, |
| "grad_norm": 2.2912757396698, |
| "learning_rate": 2.4448155859984357e-05, |
| "loss": 0.0425, |
| "step": 6380 |
| }, |
| { |
| "epoch": 24.576923076923077, |
| "grad_norm": 1.930274248123169, |
| "learning_rate": 2.442887933089104e-05, |
| "loss": 0.0495, |
| "step": 6390 |
| }, |
| { |
| "epoch": 24.615384615384617, |
| "grad_norm": 2.166144609451294, |
| "learning_rate": 2.440957702360167e-05, |
| "loss": 0.0453, |
| "step": 6400 |
| }, |
| { |
| "epoch": 24.653846153846153, |
| "grad_norm": 1.9380121231079102, |
| "learning_rate": 2.4390248990888026e-05, |
| "loss": 0.0467, |
| "step": 6410 |
| }, |
| { |
| "epoch": 24.692307692307693, |
| "grad_norm": 1.9483615159988403, |
| "learning_rate": 2.4370895285592202e-05, |
| "loss": 0.0461, |
| "step": 6420 |
| }, |
| { |
| "epoch": 24.73076923076923, |
| "grad_norm": 1.7530697584152222, |
| "learning_rate": 2.43515159606265e-05, |
| "loss": 0.0404, |
| "step": 6430 |
| }, |
| { |
| "epoch": 24.76923076923077, |
| "grad_norm": 2.1312994956970215, |
| "learning_rate": 2.4332111068973243e-05, |
| "loss": 0.0447, |
| "step": 6440 |
| }, |
| { |
| "epoch": 24.807692307692307, |
| "grad_norm": 1.7659395933151245, |
| "learning_rate": 2.4312680663684674e-05, |
| "loss": 0.0409, |
| "step": 6450 |
| }, |
| { |
| "epoch": 24.846153846153847, |
| "grad_norm": 1.9349921941757202, |
| "learning_rate": 2.429322479788277e-05, |
| "loss": 0.042, |
| "step": 6460 |
| }, |
| { |
| "epoch": 24.884615384615383, |
| "grad_norm": 1.856867790222168, |
| "learning_rate": 2.4273743524759132e-05, |
| "loss": 0.0494, |
| "step": 6470 |
| }, |
| { |
| "epoch": 24.923076923076923, |
| "grad_norm": 2.1651554107666016, |
| "learning_rate": 2.4254236897574818e-05, |
| "loss": 0.0423, |
| "step": 6480 |
| }, |
| { |
| "epoch": 24.96153846153846, |
| "grad_norm": 1.993500828742981, |
| "learning_rate": 2.4234704969660192e-05, |
| "loss": 0.0415, |
| "step": 6490 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 1.9535496234893799, |
| "learning_rate": 2.4215147794414806e-05, |
| "loss": 0.0425, |
| "step": 6500 |
| }, |
| { |
| "epoch": 25.03846153846154, |
| "grad_norm": 1.6581015586853027, |
| "learning_rate": 2.419556542530723e-05, |
| "loss": 0.0461, |
| "step": 6510 |
| }, |
| { |
| "epoch": 25.076923076923077, |
| "grad_norm": 1.8992817401885986, |
| "learning_rate": 2.4175957915874916e-05, |
| "loss": 0.0488, |
| "step": 6520 |
| }, |
| { |
| "epoch": 25.115384615384617, |
| "grad_norm": 1.6976550817489624, |
| "learning_rate": 2.4156325319724037e-05, |
| "loss": 0.0471, |
| "step": 6530 |
| }, |
| { |
| "epoch": 25.153846153846153, |
| "grad_norm": 1.9944517612457275, |
| "learning_rate": 2.4136667690529372e-05, |
| "loss": 0.0429, |
| "step": 6540 |
| }, |
| { |
| "epoch": 25.192307692307693, |
| "grad_norm": 1.7885385751724243, |
| "learning_rate": 2.4116985082034126e-05, |
| "loss": 0.0444, |
| "step": 6550 |
| }, |
| { |
| "epoch": 25.23076923076923, |
| "grad_norm": 1.9043548107147217, |
| "learning_rate": 2.409727754804979e-05, |
| "loss": 0.0483, |
| "step": 6560 |
| }, |
| { |
| "epoch": 25.26923076923077, |
| "grad_norm": 1.5960673093795776, |
| "learning_rate": 2.4077545142456025e-05, |
| "loss": 0.0481, |
| "step": 6570 |
| }, |
| { |
| "epoch": 25.307692307692307, |
| "grad_norm": 1.7031124830245972, |
| "learning_rate": 2.405778791920046e-05, |
| "loss": 0.0569, |
| "step": 6580 |
| }, |
| { |
| "epoch": 25.346153846153847, |
| "grad_norm": 2.0520358085632324, |
| "learning_rate": 2.4038005932298594e-05, |
| "loss": 0.0444, |
| "step": 6590 |
| }, |
| { |
| "epoch": 25.384615384615383, |
| "grad_norm": 2.1107587814331055, |
| "learning_rate": 2.4018199235833624e-05, |
| "loss": 0.0429, |
| "step": 6600 |
| }, |
| { |
| "epoch": 25.423076923076923, |
| "grad_norm": 1.6484862565994263, |
| "learning_rate": 2.3998367883956306e-05, |
| "loss": 0.0463, |
| "step": 6610 |
| }, |
| { |
| "epoch": 25.46153846153846, |
| "grad_norm": 1.5690255165100098, |
| "learning_rate": 2.3978511930884795e-05, |
| "loss": 0.0423, |
| "step": 6620 |
| }, |
| { |
| "epoch": 25.5, |
| "grad_norm": 1.9984314441680908, |
| "learning_rate": 2.3958631430904504e-05, |
| "loss": 0.0494, |
| "step": 6630 |
| }, |
| { |
| "epoch": 25.53846153846154, |
| "grad_norm": 1.9124306440353394, |
| "learning_rate": 2.393872643836797e-05, |
| "loss": 0.0469, |
| "step": 6640 |
| }, |
| { |
| "epoch": 25.576923076923077, |
| "grad_norm": 1.6842821836471558, |
| "learning_rate": 2.3918797007694675e-05, |
| "loss": 0.041, |
| "step": 6650 |
| }, |
| { |
| "epoch": 25.615384615384617, |
| "grad_norm": 2.1914467811584473, |
| "learning_rate": 2.3898843193370923e-05, |
| "loss": 0.0452, |
| "step": 6660 |
| }, |
| { |
| "epoch": 25.653846153846153, |
| "grad_norm": 1.7262556552886963, |
| "learning_rate": 2.387886504994969e-05, |
| "loss": 0.0436, |
| "step": 6670 |
| }, |
| { |
| "epoch": 25.692307692307693, |
| "grad_norm": 2.0586323738098145, |
| "learning_rate": 2.385886263205044e-05, |
| "loss": 0.0465, |
| "step": 6680 |
| }, |
| { |
| "epoch": 25.73076923076923, |
| "grad_norm": 1.9069286584854126, |
| "learning_rate": 2.3838835994359036e-05, |
| "loss": 0.0443, |
| "step": 6690 |
| }, |
| { |
| "epoch": 25.76923076923077, |
| "grad_norm": 2.2853593826293945, |
| "learning_rate": 2.3818785191627525e-05, |
| "loss": 0.0445, |
| "step": 6700 |
| }, |
| { |
| "epoch": 25.807692307692307, |
| "grad_norm": 1.947430968284607, |
| "learning_rate": 2.379871027867405e-05, |
| "loss": 0.0449, |
| "step": 6710 |
| }, |
| { |
| "epoch": 25.846153846153847, |
| "grad_norm": 1.848595142364502, |
| "learning_rate": 2.3778611310382653e-05, |
| "loss": 0.0433, |
| "step": 6720 |
| }, |
| { |
| "epoch": 25.884615384615383, |
| "grad_norm": 2.047389507293701, |
| "learning_rate": 2.3758488341703137e-05, |
| "loss": 0.0451, |
| "step": 6730 |
| }, |
| { |
| "epoch": 25.923076923076923, |
| "grad_norm": 1.6822788715362549, |
| "learning_rate": 2.3738341427650945e-05, |
| "loss": 0.0504, |
| "step": 6740 |
| }, |
| { |
| "epoch": 25.96153846153846, |
| "grad_norm": 1.673630714416504, |
| "learning_rate": 2.3718170623306955e-05, |
| "loss": 0.0422, |
| "step": 6750 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 1.6114610433578491, |
| "learning_rate": 2.369797598381739e-05, |
| "loss": 0.041, |
| "step": 6760 |
| }, |
| { |
| "epoch": 26.03846153846154, |
| "grad_norm": 1.9758553504943848, |
| "learning_rate": 2.3677757564393612e-05, |
| "loss": 0.0398, |
| "step": 6770 |
| }, |
| { |
| "epoch": 26.076923076923077, |
| "grad_norm": 1.816084623336792, |
| "learning_rate": 2.3657515420312015e-05, |
| "loss": 0.0408, |
| "step": 6780 |
| }, |
| { |
| "epoch": 26.115384615384617, |
| "grad_norm": 1.7615054845809937, |
| "learning_rate": 2.3637249606913847e-05, |
| "loss": 0.0445, |
| "step": 6790 |
| }, |
| { |
| "epoch": 26.153846153846153, |
| "grad_norm": 2.0626277923583984, |
| "learning_rate": 2.3616960179605064e-05, |
| "loss": 0.0527, |
| "step": 6800 |
| }, |
| { |
| "epoch": 26.192307692307693, |
| "grad_norm": 1.707031011581421, |
| "learning_rate": 2.3596647193856188e-05, |
| "loss": 0.04, |
| "step": 6810 |
| }, |
| { |
| "epoch": 26.23076923076923, |
| "grad_norm": 1.8059414625167847, |
| "learning_rate": 2.3576310705202143e-05, |
| "loss": 0.0394, |
| "step": 6820 |
| }, |
| { |
| "epoch": 26.26923076923077, |
| "grad_norm": 2.0285634994506836, |
| "learning_rate": 2.3555950769242122e-05, |
| "loss": 0.037, |
| "step": 6830 |
| }, |
| { |
| "epoch": 26.307692307692307, |
| "grad_norm": 1.6764180660247803, |
| "learning_rate": 2.3535567441639396e-05, |
| "loss": 0.041, |
| "step": 6840 |
| }, |
| { |
| "epoch": 26.346153846153847, |
| "grad_norm": 1.839801549911499, |
| "learning_rate": 2.351516077812122e-05, |
| "loss": 0.0385, |
| "step": 6850 |
| }, |
| { |
| "epoch": 26.384615384615383, |
| "grad_norm": 2.2130534648895264, |
| "learning_rate": 2.349473083447863e-05, |
| "loss": 0.0481, |
| "step": 6860 |
| }, |
| { |
| "epoch": 26.423076923076923, |
| "grad_norm": 1.7864869832992554, |
| "learning_rate": 2.3474277666566307e-05, |
| "loss": 0.0495, |
| "step": 6870 |
| }, |
| { |
| "epoch": 26.46153846153846, |
| "grad_norm": 1.2948951721191406, |
| "learning_rate": 2.345380133030243e-05, |
| "loss": 0.0503, |
| "step": 6880 |
| }, |
| { |
| "epoch": 26.5, |
| "grad_norm": 1.299521803855896, |
| "learning_rate": 2.343330188166853e-05, |
| "loss": 0.0441, |
| "step": 6890 |
| }, |
| { |
| "epoch": 26.53846153846154, |
| "grad_norm": 2.135817289352417, |
| "learning_rate": 2.3412779376709304e-05, |
| "loss": 0.0435, |
| "step": 6900 |
| }, |
| { |
| "epoch": 26.576923076923077, |
| "grad_norm": 1.7558770179748535, |
| "learning_rate": 2.3392233871532504e-05, |
| "loss": 0.0471, |
| "step": 6910 |
| }, |
| { |
| "epoch": 26.615384615384617, |
| "grad_norm": 2.0203568935394287, |
| "learning_rate": 2.337166542230876e-05, |
| "loss": 0.0517, |
| "step": 6920 |
| }, |
| { |
| "epoch": 26.653846153846153, |
| "grad_norm": 2.188866138458252, |
| "learning_rate": 2.335107408527142e-05, |
| "loss": 0.0398, |
| "step": 6930 |
| }, |
| { |
| "epoch": 26.692307692307693, |
| "grad_norm": 1.6940990686416626, |
| "learning_rate": 2.3330459916716417e-05, |
| "loss": 0.0392, |
| "step": 6940 |
| }, |
| { |
| "epoch": 26.73076923076923, |
| "grad_norm": 1.8685543537139893, |
| "learning_rate": 2.3309822973002097e-05, |
| "loss": 0.0482, |
| "step": 6950 |
| }, |
| { |
| "epoch": 26.76923076923077, |
| "grad_norm": 1.6639153957366943, |
| "learning_rate": 2.328916331054908e-05, |
| "loss": 0.044, |
| "step": 6960 |
| }, |
| { |
| "epoch": 26.807692307692307, |
| "grad_norm": 1.8613269329071045, |
| "learning_rate": 2.3268480985840093e-05, |
| "loss": 0.0431, |
| "step": 6970 |
| }, |
| { |
| "epoch": 26.846153846153847, |
| "grad_norm": 1.5119969844818115, |
| "learning_rate": 2.3247776055419826e-05, |
| "loss": 0.0422, |
| "step": 6980 |
| }, |
| { |
| "epoch": 26.884615384615383, |
| "grad_norm": 1.6979011297225952, |
| "learning_rate": 2.3227048575894758e-05, |
| "loss": 0.0446, |
| "step": 6990 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 2.1201868057250977, |
| "learning_rate": 2.3206298603933037e-05, |
| "loss": 0.0394, |
| "step": 7000 |
| }, |
| { |
| "epoch": 26.96153846153846, |
| "grad_norm": 1.884023666381836, |
| "learning_rate": 2.3185526196264288e-05, |
| "loss": 0.0433, |
| "step": 7010 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 1.637145757675171, |
| "learning_rate": 2.3164731409679476e-05, |
| "loss": 0.0359, |
| "step": 7020 |
| }, |
| { |
| "epoch": 27.03846153846154, |
| "grad_norm": 1.3160382509231567, |
| "learning_rate": 2.3143914301030765e-05, |
| "loss": 0.0439, |
| "step": 7030 |
| }, |
| { |
| "epoch": 27.076923076923077, |
| "grad_norm": 1.8100475072860718, |
| "learning_rate": 2.3123074927231332e-05, |
| "loss": 0.0452, |
| "step": 7040 |
| }, |
| { |
| "epoch": 27.115384615384617, |
| "grad_norm": 2.004098653793335, |
| "learning_rate": 2.310221334525522e-05, |
| "loss": 0.0513, |
| "step": 7050 |
| }, |
| { |
| "epoch": 27.153846153846153, |
| "grad_norm": 2.4186182022094727, |
| "learning_rate": 2.3081329612137207e-05, |
| "loss": 0.0411, |
| "step": 7060 |
| }, |
| { |
| "epoch": 27.192307692307693, |
| "grad_norm": 1.9002735614776611, |
| "learning_rate": 2.3060423784972625e-05, |
| "loss": 0.0456, |
| "step": 7070 |
| }, |
| { |
| "epoch": 27.23076923076923, |
| "grad_norm": 2.0039029121398926, |
| "learning_rate": 2.3039495920917193e-05, |
| "loss": 0.0395, |
| "step": 7080 |
| }, |
| { |
| "epoch": 27.26923076923077, |
| "grad_norm": 1.979544758796692, |
| "learning_rate": 2.301854607718691e-05, |
| "loss": 0.039, |
| "step": 7090 |
| }, |
| { |
| "epoch": 27.307692307692307, |
| "grad_norm": 1.5195621252059937, |
| "learning_rate": 2.299757431105783e-05, |
| "loss": 0.0426, |
| "step": 7100 |
| }, |
| { |
| "epoch": 27.346153846153847, |
| "grad_norm": 1.6798497438430786, |
| "learning_rate": 2.2976580679865972e-05, |
| "loss": 0.0433, |
| "step": 7110 |
| }, |
| { |
| "epoch": 27.384615384615383, |
| "grad_norm": 1.4978078603744507, |
| "learning_rate": 2.2955565241007123e-05, |
| "loss": 0.045, |
| "step": 7120 |
| }, |
| { |
| "epoch": 27.423076923076923, |
| "grad_norm": 1.5086406469345093, |
| "learning_rate": 2.293452805193669e-05, |
| "loss": 0.0403, |
| "step": 7130 |
| }, |
| { |
| "epoch": 27.46153846153846, |
| "grad_norm": 1.614099144935608, |
| "learning_rate": 2.291346917016954e-05, |
| "loss": 0.0455, |
| "step": 7140 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 1.391572117805481, |
| "learning_rate": 2.289238865327985e-05, |
| "loss": 0.0446, |
| "step": 7150 |
| }, |
| { |
| "epoch": 27.53846153846154, |
| "grad_norm": 1.4410821199417114, |
| "learning_rate": 2.2871286558900956e-05, |
| "loss": 0.0456, |
| "step": 7160 |
| }, |
| { |
| "epoch": 27.576923076923077, |
| "grad_norm": 1.736544132232666, |
| "learning_rate": 2.285016294472517e-05, |
| "loss": 0.0448, |
| "step": 7170 |
| }, |
| { |
| "epoch": 27.615384615384617, |
| "grad_norm": 1.6949403285980225, |
| "learning_rate": 2.2829017868503658e-05, |
| "loss": 0.0417, |
| "step": 7180 |
| }, |
| { |
| "epoch": 27.653846153846153, |
| "grad_norm": 1.5383449792861938, |
| "learning_rate": 2.280785138804624e-05, |
| "loss": 0.0511, |
| "step": 7190 |
| }, |
| { |
| "epoch": 27.692307692307693, |
| "grad_norm": 1.8487229347229004, |
| "learning_rate": 2.2786663561221265e-05, |
| "loss": 0.0414, |
| "step": 7200 |
| }, |
| { |
| "epoch": 27.73076923076923, |
| "grad_norm": 1.7891006469726562, |
| "learning_rate": 2.2765454445955452e-05, |
| "loss": 0.0377, |
| "step": 7210 |
| }, |
| { |
| "epoch": 27.76923076923077, |
| "grad_norm": 1.737947940826416, |
| "learning_rate": 2.2744224100233705e-05, |
| "loss": 0.0492, |
| "step": 7220 |
| }, |
| { |
| "epoch": 27.807692307692307, |
| "grad_norm": 1.625235915184021, |
| "learning_rate": 2.2722972582098984e-05, |
| "loss": 0.0412, |
| "step": 7230 |
| }, |
| { |
| "epoch": 27.846153846153847, |
| "grad_norm": 1.8091816902160645, |
| "learning_rate": 2.2701699949652118e-05, |
| "loss": 0.0417, |
| "step": 7240 |
| }, |
| { |
| "epoch": 27.884615384615383, |
| "grad_norm": 2.394031047821045, |
| "learning_rate": 2.2680406261051685e-05, |
| "loss": 0.0388, |
| "step": 7250 |
| }, |
| { |
| "epoch": 27.923076923076923, |
| "grad_norm": 1.3932080268859863, |
| "learning_rate": 2.2659091574513805e-05, |
| "loss": 0.0438, |
| "step": 7260 |
| }, |
| { |
| "epoch": 27.96153846153846, |
| "grad_norm": 1.760535717010498, |
| "learning_rate": 2.263775594831202e-05, |
| "loss": 0.0432, |
| "step": 7270 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 1.6721904277801514, |
| "learning_rate": 2.2616399440777128e-05, |
| "loss": 0.0384, |
| "step": 7280 |
| }, |
| { |
| "epoch": 28.03846153846154, |
| "grad_norm": 1.8300305604934692, |
| "learning_rate": 2.2595022110296988e-05, |
| "loss": 0.0415, |
| "step": 7290 |
| }, |
| { |
| "epoch": 28.076923076923077, |
| "grad_norm": 1.8963747024536133, |
| "learning_rate": 2.2573624015316418e-05, |
| "loss": 0.0457, |
| "step": 7300 |
| }, |
| { |
| "epoch": 28.115384615384617, |
| "grad_norm": 1.676701545715332, |
| "learning_rate": 2.2552205214336986e-05, |
| "loss": 0.0448, |
| "step": 7310 |
| }, |
| { |
| "epoch": 28.153846153846153, |
| "grad_norm": 1.6721583604812622, |
| "learning_rate": 2.253076576591688e-05, |
| "loss": 0.0415, |
| "step": 7320 |
| }, |
| { |
| "epoch": 28.192307692307693, |
| "grad_norm": 2.002814531326294, |
| "learning_rate": 2.2509305728670733e-05, |
| "loss": 0.0462, |
| "step": 7330 |
| }, |
| { |
| "epoch": 28.23076923076923, |
| "grad_norm": 1.5034431219100952, |
| "learning_rate": 2.2487825161269463e-05, |
| "loss": 0.0416, |
| "step": 7340 |
| }, |
| { |
| "epoch": 28.26923076923077, |
| "grad_norm": 2.0287587642669678, |
| "learning_rate": 2.2466324122440125e-05, |
| "loss": 0.0401, |
| "step": 7350 |
| }, |
| { |
| "epoch": 28.307692307692307, |
| "grad_norm": 1.6312346458435059, |
| "learning_rate": 2.2444802670965732e-05, |
| "loss": 0.0403, |
| "step": 7360 |
| }, |
| { |
| "epoch": 28.346153846153847, |
| "grad_norm": 1.8766647577285767, |
| "learning_rate": 2.2423260865685124e-05, |
| "loss": 0.0404, |
| "step": 7370 |
| }, |
| { |
| "epoch": 28.384615384615383, |
| "grad_norm": 1.6648188829421997, |
| "learning_rate": 2.2401698765492762e-05, |
| "loss": 0.0437, |
| "step": 7380 |
| }, |
| { |
| "epoch": 28.423076923076923, |
| "grad_norm": 1.7376813888549805, |
| "learning_rate": 2.2380116429338612e-05, |
| "loss": 0.0443, |
| "step": 7390 |
| }, |
| { |
| "epoch": 28.46153846153846, |
| "grad_norm": 1.6222645044326782, |
| "learning_rate": 2.2358513916227945e-05, |
| "loss": 0.0414, |
| "step": 7400 |
| }, |
| { |
| "epoch": 28.5, |
| "grad_norm": 1.5098398923873901, |
| "learning_rate": 2.233689128522122e-05, |
| "loss": 0.0369, |
| "step": 7410 |
| }, |
| { |
| "epoch": 28.53846153846154, |
| "grad_norm": 1.603192925453186, |
| "learning_rate": 2.2315248595433883e-05, |
| "loss": 0.0457, |
| "step": 7420 |
| }, |
| { |
| "epoch": 28.576923076923077, |
| "grad_norm": 1.890964388847351, |
| "learning_rate": 2.2293585906036214e-05, |
| "loss": 0.0427, |
| "step": 7430 |
| }, |
| { |
| "epoch": 28.615384615384617, |
| "grad_norm": 1.8507992029190063, |
| "learning_rate": 2.2271903276253183e-05, |
| "loss": 0.0426, |
| "step": 7440 |
| }, |
| { |
| "epoch": 28.653846153846153, |
| "grad_norm": 1.867964744567871, |
| "learning_rate": 2.2250200765364273e-05, |
| "loss": 0.0391, |
| "step": 7450 |
| }, |
| { |
| "epoch": 28.692307692307693, |
| "grad_norm": 1.5608159303665161, |
| "learning_rate": 2.2228478432703317e-05, |
| "loss": 0.043, |
| "step": 7460 |
| }, |
| { |
| "epoch": 28.73076923076923, |
| "grad_norm": 1.7745044231414795, |
| "learning_rate": 2.2206736337658348e-05, |
| "loss": 0.0421, |
| "step": 7470 |
| }, |
| { |
| "epoch": 28.76923076923077, |
| "grad_norm": 1.6739052534103394, |
| "learning_rate": 2.2184974539671417e-05, |
| "loss": 0.0419, |
| "step": 7480 |
| }, |
| { |
| "epoch": 28.807692307692307, |
| "grad_norm": 2.010806083679199, |
| "learning_rate": 2.2163193098238453e-05, |
| "loss": 0.0465, |
| "step": 7490 |
| }, |
| { |
| "epoch": 28.846153846153847, |
| "grad_norm": 1.8989545106887817, |
| "learning_rate": 2.2141392072909082e-05, |
| "loss": 0.0423, |
| "step": 7500 |
| }, |
| { |
| "epoch": 28.884615384615383, |
| "grad_norm": 1.8697062730789185, |
| "learning_rate": 2.2119571523286484e-05, |
| "loss": 0.0371, |
| "step": 7510 |
| }, |
| { |
| "epoch": 28.923076923076923, |
| "grad_norm": 1.5613651275634766, |
| "learning_rate": 2.2097731509027196e-05, |
| "loss": 0.0444, |
| "step": 7520 |
| }, |
| { |
| "epoch": 28.96153846153846, |
| "grad_norm": 1.36122465133667, |
| "learning_rate": 2.207587208984099e-05, |
| "loss": 0.0442, |
| "step": 7530 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 1.5111128091812134, |
| "learning_rate": 2.205399332549068e-05, |
| "loss": 0.0383, |
| "step": 7540 |
| }, |
| { |
| "epoch": 29.03846153846154, |
| "grad_norm": 1.6100459098815918, |
| "learning_rate": 2.2032095275791974e-05, |
| "loss": 0.0403, |
| "step": 7550 |
| }, |
| { |
| "epoch": 29.076923076923077, |
| "grad_norm": 1.7022507190704346, |
| "learning_rate": 2.2010178000613307e-05, |
| "loss": 0.0408, |
| "step": 7560 |
| }, |
| { |
| "epoch": 29.115384615384617, |
| "grad_norm": 1.8633601665496826, |
| "learning_rate": 2.1988241559875666e-05, |
| "loss": 0.0422, |
| "step": 7570 |
| }, |
| { |
| "epoch": 29.153846153846153, |
| "grad_norm": 1.7628028392791748, |
| "learning_rate": 2.1966286013552448e-05, |
| "loss": 0.0476, |
| "step": 7580 |
| }, |
| { |
| "epoch": 29.192307692307693, |
| "grad_norm": 1.9117072820663452, |
| "learning_rate": 2.1944311421669274e-05, |
| "loss": 0.0436, |
| "step": 7590 |
| }, |
| { |
| "epoch": 29.23076923076923, |
| "grad_norm": 1.4103853702545166, |
| "learning_rate": 2.1922317844303846e-05, |
| "loss": 0.0456, |
| "step": 7600 |
| }, |
| { |
| "epoch": 29.26923076923077, |
| "grad_norm": 1.4408249855041504, |
| "learning_rate": 2.1900305341585756e-05, |
| "loss": 0.037, |
| "step": 7610 |
| }, |
| { |
| "epoch": 29.307692307692307, |
| "grad_norm": 1.3809062242507935, |
| "learning_rate": 2.187827397369635e-05, |
| "loss": 0.0385, |
| "step": 7620 |
| }, |
| { |
| "epoch": 29.346153846153847, |
| "grad_norm": 1.9578503370285034, |
| "learning_rate": 2.1856223800868542e-05, |
| "loss": 0.041, |
| "step": 7630 |
| }, |
| { |
| "epoch": 29.384615384615383, |
| "grad_norm": 1.955604910850525, |
| "learning_rate": 2.183415488338667e-05, |
| "loss": 0.0424, |
| "step": 7640 |
| }, |
| { |
| "epoch": 29.423076923076923, |
| "grad_norm": 1.5779365301132202, |
| "learning_rate": 2.1812067281586312e-05, |
| "loss": 0.0444, |
| "step": 7650 |
| }, |
| { |
| "epoch": 29.46153846153846, |
| "grad_norm": 1.7006980180740356, |
| "learning_rate": 2.178996105585412e-05, |
| "loss": 0.0413, |
| "step": 7660 |
| }, |
| { |
| "epoch": 29.5, |
| "grad_norm": 1.7158124446868896, |
| "learning_rate": 2.1767836266627676e-05, |
| "loss": 0.0418, |
| "step": 7670 |
| }, |
| { |
| "epoch": 29.53846153846154, |
| "grad_norm": 1.5377243757247925, |
| "learning_rate": 2.174569297439531e-05, |
| "loss": 0.0379, |
| "step": 7680 |
| }, |
| { |
| "epoch": 29.576923076923077, |
| "grad_norm": 1.7341413497924805, |
| "learning_rate": 2.1723531239695932e-05, |
| "loss": 0.0411, |
| "step": 7690 |
| }, |
| { |
| "epoch": 29.615384615384617, |
| "grad_norm": 1.6391249895095825, |
| "learning_rate": 2.1701351123118886e-05, |
| "loss": 0.0403, |
| "step": 7700 |
| }, |
| { |
| "epoch": 29.653846153846153, |
| "grad_norm": 1.8113683462142944, |
| "learning_rate": 2.167915268530376e-05, |
| "loss": 0.0433, |
| "step": 7710 |
| }, |
| { |
| "epoch": 29.692307692307693, |
| "grad_norm": 1.8385941982269287, |
| "learning_rate": 2.165693598694023e-05, |
| "loss": 0.0396, |
| "step": 7720 |
| }, |
| { |
| "epoch": 29.73076923076923, |
| "grad_norm": 1.5339360237121582, |
| "learning_rate": 2.163470108876791e-05, |
| "loss": 0.0473, |
| "step": 7730 |
| }, |
| { |
| "epoch": 29.76923076923077, |
| "grad_norm": 1.451357126235962, |
| "learning_rate": 2.161244805157616e-05, |
| "loss": 0.0363, |
| "step": 7740 |
| }, |
| { |
| "epoch": 29.807692307692307, |
| "grad_norm": 1.3908246755599976, |
| "learning_rate": 2.159017693620393e-05, |
| "loss": 0.0415, |
| "step": 7750 |
| }, |
| { |
| "epoch": 29.846153846153847, |
| "grad_norm": 1.707660436630249, |
| "learning_rate": 2.15678878035396e-05, |
| "loss": 0.0398, |
| "step": 7760 |
| }, |
| { |
| "epoch": 29.884615384615383, |
| "grad_norm": 1.7854512929916382, |
| "learning_rate": 2.1545580714520817e-05, |
| "loss": 0.0355, |
| "step": 7770 |
| }, |
| { |
| "epoch": 29.923076923076923, |
| "grad_norm": 1.6504265069961548, |
| "learning_rate": 2.1523255730134294e-05, |
| "loss": 0.0428, |
| "step": 7780 |
| }, |
| { |
| "epoch": 29.96153846153846, |
| "grad_norm": 1.5774441957473755, |
| "learning_rate": 2.15009129114157e-05, |
| "loss": 0.0371, |
| "step": 7790 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 1.6256803274154663, |
| "learning_rate": 2.1478552319449443e-05, |
| "loss": 0.0405, |
| "step": 7800 |
| }, |
| { |
| "epoch": 30.03846153846154, |
| "grad_norm": 1.8955668210983276, |
| "learning_rate": 2.1456174015368527e-05, |
| "loss": 0.0426, |
| "step": 7810 |
| }, |
| { |
| "epoch": 30.076923076923077, |
| "grad_norm": 1.6124283075332642, |
| "learning_rate": 2.1433778060354375e-05, |
| "loss": 0.0466, |
| "step": 7820 |
| }, |
| { |
| "epoch": 30.115384615384617, |
| "grad_norm": 1.5108370780944824, |
| "learning_rate": 2.1411364515636685e-05, |
| "loss": 0.0513, |
| "step": 7830 |
| }, |
| { |
| "epoch": 30.153846153846153, |
| "grad_norm": 1.566898226737976, |
| "learning_rate": 2.1388933442493232e-05, |
| "loss": 0.0437, |
| "step": 7840 |
| }, |
| { |
| "epoch": 30.192307692307693, |
| "grad_norm": 1.4218852519989014, |
| "learning_rate": 2.13664849022497e-05, |
| "loss": 0.0394, |
| "step": 7850 |
| }, |
| { |
| "epoch": 30.23076923076923, |
| "grad_norm": 1.6596649885177612, |
| "learning_rate": 2.1344018956279547e-05, |
| "loss": 0.0365, |
| "step": 7860 |
| }, |
| { |
| "epoch": 30.26923076923077, |
| "grad_norm": 1.4985148906707764, |
| "learning_rate": 2.1321535666003817e-05, |
| "loss": 0.0389, |
| "step": 7870 |
| }, |
| { |
| "epoch": 30.307692307692307, |
| "grad_norm": 1.470139980316162, |
| "learning_rate": 2.1299035092890966e-05, |
| "loss": 0.0446, |
| "step": 7880 |
| }, |
| { |
| "epoch": 30.346153846153847, |
| "grad_norm": 1.7060062885284424, |
| "learning_rate": 2.12765172984567e-05, |
| "loss": 0.0415, |
| "step": 7890 |
| }, |
| { |
| "epoch": 30.384615384615383, |
| "grad_norm": 1.4624581336975098, |
| "learning_rate": 2.1253982344263803e-05, |
| "loss": 0.0375, |
| "step": 7900 |
| }, |
| { |
| "epoch": 30.423076923076923, |
| "grad_norm": 1.9595462083816528, |
| "learning_rate": 2.1231430291921987e-05, |
| "loss": 0.0461, |
| "step": 7910 |
| }, |
| { |
| "epoch": 30.46153846153846, |
| "grad_norm": 1.7338889837265015, |
| "learning_rate": 2.1208861203087695e-05, |
| "loss": 0.0406, |
| "step": 7920 |
| }, |
| { |
| "epoch": 30.5, |
| "grad_norm": 1.601374864578247, |
| "learning_rate": 2.1186275139463967e-05, |
| "loss": 0.0418, |
| "step": 7930 |
| }, |
| { |
| "epoch": 30.53846153846154, |
| "grad_norm": 1.823254108428955, |
| "learning_rate": 2.1163672162800222e-05, |
| "loss": 0.0403, |
| "step": 7940 |
| }, |
| { |
| "epoch": 30.576923076923077, |
| "grad_norm": 1.9945504665374756, |
| "learning_rate": 2.114105233489215e-05, |
| "loss": 0.0508, |
| "step": 7950 |
| }, |
| { |
| "epoch": 30.615384615384617, |
| "grad_norm": 1.810983657836914, |
| "learning_rate": 2.1118415717581487e-05, |
| "loss": 0.0509, |
| "step": 7960 |
| }, |
| { |
| "epoch": 30.653846153846153, |
| "grad_norm": 1.639147400856018, |
| "learning_rate": 2.1095762372755885e-05, |
| "loss": 0.043, |
| "step": 7970 |
| }, |
| { |
| "epoch": 30.692307692307693, |
| "grad_norm": 1.810479998588562, |
| "learning_rate": 2.1073092362348716e-05, |
| "loss": 0.0496, |
| "step": 7980 |
| }, |
| { |
| "epoch": 30.73076923076923, |
| "grad_norm": 1.596810221672058, |
| "learning_rate": 2.1050405748338933e-05, |
| "loss": 0.0423, |
| "step": 7990 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 1.752320408821106, |
| "learning_rate": 2.102770259275087e-05, |
| "loss": 0.0391, |
| "step": 8000 |
| }, |
| { |
| "epoch": 30.807692307692307, |
| "grad_norm": 1.5737982988357544, |
| "learning_rate": 2.100498295765408e-05, |
| "loss": 0.0447, |
| "step": 8010 |
| }, |
| { |
| "epoch": 30.846153846153847, |
| "grad_norm": 1.6139024496078491, |
| "learning_rate": 2.098224690516319e-05, |
| "loss": 0.0404, |
| "step": 8020 |
| }, |
| { |
| "epoch": 30.884615384615383, |
| "grad_norm": 1.4000976085662842, |
| "learning_rate": 2.0959494497437688e-05, |
| "loss": 0.0424, |
| "step": 8030 |
| }, |
| { |
| "epoch": 30.923076923076923, |
| "grad_norm": 1.5702110528945923, |
| "learning_rate": 2.0936725796681796e-05, |
| "loss": 0.0379, |
| "step": 8040 |
| }, |
| { |
| "epoch": 30.96153846153846, |
| "grad_norm": 1.8499011993408203, |
| "learning_rate": 2.0913940865144266e-05, |
| "loss": 0.0373, |
| "step": 8050 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 1.890944242477417, |
| "learning_rate": 2.0891139765118235e-05, |
| "loss": 0.0416, |
| "step": 8060 |
| }, |
| { |
| "epoch": 31.03846153846154, |
| "grad_norm": 1.509231448173523, |
| "learning_rate": 2.086832255894104e-05, |
| "loss": 0.0383, |
| "step": 8070 |
| }, |
| { |
| "epoch": 31.076923076923077, |
| "grad_norm": 1.7421714067459106, |
| "learning_rate": 2.084548930899405e-05, |
| "loss": 0.0386, |
| "step": 8080 |
| }, |
| { |
| "epoch": 31.115384615384617, |
| "grad_norm": 1.293750524520874, |
| "learning_rate": 2.08226400777025e-05, |
| "loss": 0.0365, |
| "step": 8090 |
| }, |
| { |
| "epoch": 31.153846153846153, |
| "grad_norm": 1.332698106765747, |
| "learning_rate": 2.0799774927535313e-05, |
| "loss": 0.0353, |
| "step": 8100 |
| }, |
| { |
| "epoch": 31.192307692307693, |
| "grad_norm": 1.566386342048645, |
| "learning_rate": 2.0776893921004936e-05, |
| "loss": 0.0386, |
| "step": 8110 |
| }, |
| { |
| "epoch": 31.23076923076923, |
| "grad_norm": 1.5467346906661987, |
| "learning_rate": 2.0753997120667172e-05, |
| "loss": 0.0423, |
| "step": 8120 |
| }, |
| { |
| "epoch": 31.26923076923077, |
| "grad_norm": 1.2984790802001953, |
| "learning_rate": 2.0731084589120995e-05, |
| "loss": 0.0351, |
| "step": 8130 |
| }, |
| { |
| "epoch": 31.307692307692307, |
| "grad_norm": 1.6691569089889526, |
| "learning_rate": 2.070815638900839e-05, |
| "loss": 0.0441, |
| "step": 8140 |
| }, |
| { |
| "epoch": 31.346153846153847, |
| "grad_norm": 1.67073392868042, |
| "learning_rate": 2.0685212583014186e-05, |
| "loss": 0.0408, |
| "step": 8150 |
| }, |
| { |
| "epoch": 31.384615384615383, |
| "grad_norm": 1.7874646186828613, |
| "learning_rate": 2.0662253233865866e-05, |
| "loss": 0.0401, |
| "step": 8160 |
| }, |
| { |
| "epoch": 31.423076923076923, |
| "grad_norm": 1.6154216527938843, |
| "learning_rate": 2.063927840433342e-05, |
| "loss": 0.0395, |
| "step": 8170 |
| }, |
| { |
| "epoch": 31.46153846153846, |
| "grad_norm": 1.4242808818817139, |
| "learning_rate": 2.0616288157229154e-05, |
| "loss": 0.0424, |
| "step": 8180 |
| }, |
| { |
| "epoch": 31.5, |
| "grad_norm": 1.4706525802612305, |
| "learning_rate": 2.0593282555407522e-05, |
| "loss": 0.0405, |
| "step": 8190 |
| }, |
| { |
| "epoch": 31.53846153846154, |
| "grad_norm": 1.7018382549285889, |
| "learning_rate": 2.057026166176496e-05, |
| "loss": 0.0397, |
| "step": 8200 |
| }, |
| { |
| "epoch": 31.576923076923077, |
| "grad_norm": 1.3753076791763306, |
| "learning_rate": 2.0547225539239715e-05, |
| "loss": 0.0398, |
| "step": 8210 |
| }, |
| { |
| "epoch": 31.615384615384617, |
| "grad_norm": 1.6274869441986084, |
| "learning_rate": 2.0524174250811665e-05, |
| "loss": 0.0442, |
| "step": 8220 |
| }, |
| { |
| "epoch": 31.653846153846153, |
| "grad_norm": 1.7797141075134277, |
| "learning_rate": 2.050110785950216e-05, |
| "loss": 0.041, |
| "step": 8230 |
| }, |
| { |
| "epoch": 31.692307692307693, |
| "grad_norm": 1.6109033823013306, |
| "learning_rate": 2.047802642837382e-05, |
| "loss": 0.0377, |
| "step": 8240 |
| }, |
| { |
| "epoch": 31.73076923076923, |
| "grad_norm": 1.7530934810638428, |
| "learning_rate": 2.0454930020530403e-05, |
| "loss": 0.038, |
| "step": 8250 |
| }, |
| { |
| "epoch": 31.76923076923077, |
| "grad_norm": 1.6273490190505981, |
| "learning_rate": 2.0431818699116606e-05, |
| "loss": 0.0372, |
| "step": 8260 |
| }, |
| { |
| "epoch": 31.807692307692307, |
| "grad_norm": 1.5764533281326294, |
| "learning_rate": 2.04086925273179e-05, |
| "loss": 0.0515, |
| "step": 8270 |
| }, |
| { |
| "epoch": 31.846153846153847, |
| "grad_norm": 1.7474285364151, |
| "learning_rate": 2.0385551568360357e-05, |
| "loss": 0.0389, |
| "step": 8280 |
| }, |
| { |
| "epoch": 31.884615384615383, |
| "grad_norm": 1.3446240425109863, |
| "learning_rate": 2.036239588551047e-05, |
| "loss": 0.0404, |
| "step": 8290 |
| }, |
| { |
| "epoch": 31.923076923076923, |
| "grad_norm": 1.7598605155944824, |
| "learning_rate": 2.0339225542074996e-05, |
| "loss": 0.0351, |
| "step": 8300 |
| }, |
| { |
| "epoch": 31.96153846153846, |
| "grad_norm": 1.6149331331253052, |
| "learning_rate": 2.0316040601400765e-05, |
| "loss": 0.0431, |
| "step": 8310 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 2.087171792984009, |
| "learning_rate": 2.029284112687453e-05, |
| "loss": 0.0421, |
| "step": 8320 |
| }, |
| { |
| "epoch": 32.03846153846154, |
| "grad_norm": 1.3845360279083252, |
| "learning_rate": 2.0269627181922752e-05, |
| "loss": 0.0416, |
| "step": 8330 |
| }, |
| { |
| "epoch": 32.07692307692308, |
| "grad_norm": 1.316144347190857, |
| "learning_rate": 2.0246398830011482e-05, |
| "loss": 0.038, |
| "step": 8340 |
| }, |
| { |
| "epoch": 32.11538461538461, |
| "grad_norm": 1.228372573852539, |
| "learning_rate": 2.0223156134646142e-05, |
| "loss": 0.0416, |
| "step": 8350 |
| }, |
| { |
| "epoch": 32.15384615384615, |
| "grad_norm": 1.584571361541748, |
| "learning_rate": 2.019989915937138e-05, |
| "loss": 0.0395, |
| "step": 8360 |
| }, |
| { |
| "epoch": 32.19230769230769, |
| "grad_norm": 1.7417460680007935, |
| "learning_rate": 2.0176627967770873e-05, |
| "loss": 0.0343, |
| "step": 8370 |
| }, |
| { |
| "epoch": 32.23076923076923, |
| "grad_norm": 1.6731925010681152, |
| "learning_rate": 2.015334262346717e-05, |
| "loss": 0.0407, |
| "step": 8380 |
| }, |
| { |
| "epoch": 32.26923076923077, |
| "grad_norm": 1.829028844833374, |
| "learning_rate": 2.0130043190121515e-05, |
| "loss": 0.0402, |
| "step": 8390 |
| }, |
| { |
| "epoch": 32.30769230769231, |
| "grad_norm": 1.5785481929779053, |
| "learning_rate": 2.0106729731433663e-05, |
| "loss": 0.0342, |
| "step": 8400 |
| }, |
| { |
| "epoch": 32.34615384615385, |
| "grad_norm": 1.5879954099655151, |
| "learning_rate": 2.008340231114173e-05, |
| "loss": 0.0388, |
| "step": 8410 |
| }, |
| { |
| "epoch": 32.38461538461539, |
| "grad_norm": 1.334164023399353, |
| "learning_rate": 2.006006099302199e-05, |
| "loss": 0.0377, |
| "step": 8420 |
| }, |
| { |
| "epoch": 32.42307692307692, |
| "grad_norm": 1.397067904472351, |
| "learning_rate": 2.003670584088871e-05, |
| "loss": 0.0381, |
| "step": 8430 |
| }, |
| { |
| "epoch": 32.46153846153846, |
| "grad_norm": 1.5849800109863281, |
| "learning_rate": 2.001333691859399e-05, |
| "loss": 0.0352, |
| "step": 8440 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 1.4086744785308838, |
| "learning_rate": 1.9989954290027565e-05, |
| "loss": 0.0381, |
| "step": 8450 |
| }, |
| { |
| "epoch": 32.53846153846154, |
| "grad_norm": 1.6298279762268066, |
| "learning_rate": 1.9966558019116654e-05, |
| "loss": 0.0345, |
| "step": 8460 |
| }, |
| { |
| "epoch": 32.57692307692308, |
| "grad_norm": 1.541572093963623, |
| "learning_rate": 1.9943148169825766e-05, |
| "loss": 0.0393, |
| "step": 8470 |
| }, |
| { |
| "epoch": 32.61538461538461, |
| "grad_norm": 1.4838348627090454, |
| "learning_rate": 1.991972480615653e-05, |
| "loss": 0.0461, |
| "step": 8480 |
| }, |
| { |
| "epoch": 32.65384615384615, |
| "grad_norm": 1.341556429862976, |
| "learning_rate": 1.989628799214754e-05, |
| "loss": 0.0411, |
| "step": 8490 |
| }, |
| { |
| "epoch": 32.69230769230769, |
| "grad_norm": 1.6740994453430176, |
| "learning_rate": 1.987283779187414e-05, |
| "loss": 0.0354, |
| "step": 8500 |
| }, |
| { |
| "epoch": 32.73076923076923, |
| "grad_norm": 1.841543436050415, |
| "learning_rate": 1.9849374269448288e-05, |
| "loss": 0.0407, |
| "step": 8510 |
| }, |
| { |
| "epoch": 32.76923076923077, |
| "grad_norm": 1.9429466724395752, |
| "learning_rate": 1.982589748901836e-05, |
| "loss": 0.0389, |
| "step": 8520 |
| }, |
| { |
| "epoch": 32.80769230769231, |
| "grad_norm": 1.6010141372680664, |
| "learning_rate": 1.9802407514768964e-05, |
| "loss": 0.0367, |
| "step": 8530 |
| }, |
| { |
| "epoch": 32.84615384615385, |
| "grad_norm": 1.6420562267303467, |
| "learning_rate": 1.9778904410920808e-05, |
| "loss": 0.0353, |
| "step": 8540 |
| }, |
| { |
| "epoch": 32.88461538461539, |
| "grad_norm": 1.6107007265090942, |
| "learning_rate": 1.9755388241730475e-05, |
| "loss": 0.0411, |
| "step": 8550 |
| }, |
| { |
| "epoch": 32.92307692307692, |
| "grad_norm": 1.331921935081482, |
| "learning_rate": 1.973185907149027e-05, |
| "loss": 0.0356, |
| "step": 8560 |
| }, |
| { |
| "epoch": 32.96153846153846, |
| "grad_norm": 2.029869556427002, |
| "learning_rate": 1.970831696452805e-05, |
| "loss": 0.0426, |
| "step": 8570 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 1.8805733919143677, |
| "learning_rate": 1.9684761985207038e-05, |
| "loss": 0.0369, |
| "step": 8580 |
| }, |
| { |
| "epoch": 33.03846153846154, |
| "grad_norm": 1.8220821619033813, |
| "learning_rate": 1.9661194197925644e-05, |
| "loss": 0.0357, |
| "step": 8590 |
| }, |
| { |
| "epoch": 33.07692307692308, |
| "grad_norm": 1.6471298933029175, |
| "learning_rate": 1.9637613667117303e-05, |
| "loss": 0.0432, |
| "step": 8600 |
| }, |
| { |
| "epoch": 33.11538461538461, |
| "grad_norm": 1.5916423797607422, |
| "learning_rate": 1.961402045725028e-05, |
| "loss": 0.0405, |
| "step": 8610 |
| }, |
| { |
| "epoch": 33.15384615384615, |
| "grad_norm": 1.7723677158355713, |
| "learning_rate": 1.9590414632827513e-05, |
| "loss": 0.041, |
| "step": 8620 |
| }, |
| { |
| "epoch": 33.19230769230769, |
| "grad_norm": 1.4833166599273682, |
| "learning_rate": 1.9566796258386424e-05, |
| "loss": 0.0368, |
| "step": 8630 |
| }, |
| { |
| "epoch": 33.23076923076923, |
| "grad_norm": 1.5924839973449707, |
| "learning_rate": 1.9543165398498743e-05, |
| "loss": 0.0378, |
| "step": 8640 |
| }, |
| { |
| "epoch": 33.26923076923077, |
| "grad_norm": 1.5793626308441162, |
| "learning_rate": 1.9519522117770355e-05, |
| "loss": 0.041, |
| "step": 8650 |
| }, |
| { |
| "epoch": 33.30769230769231, |
| "grad_norm": 1.8710663318634033, |
| "learning_rate": 1.9495866480841063e-05, |
| "loss": 0.0384, |
| "step": 8660 |
| }, |
| { |
| "epoch": 33.34615384615385, |
| "grad_norm": 1.8821028470993042, |
| "learning_rate": 1.9472198552384494e-05, |
| "loss": 0.0377, |
| "step": 8670 |
| }, |
| { |
| "epoch": 33.38461538461539, |
| "grad_norm": 1.6655566692352295, |
| "learning_rate": 1.9448518397107848e-05, |
| "loss": 0.0443, |
| "step": 8680 |
| }, |
| { |
| "epoch": 33.42307692307692, |
| "grad_norm": 1.593344807624817, |
| "learning_rate": 1.942482607975177e-05, |
| "loss": 0.0384, |
| "step": 8690 |
| }, |
| { |
| "epoch": 33.46153846153846, |
| "grad_norm": 1.651685118675232, |
| "learning_rate": 1.940112166509016e-05, |
| "loss": 0.0383, |
| "step": 8700 |
| }, |
| { |
| "epoch": 33.5, |
| "grad_norm": 1.4256179332733154, |
| "learning_rate": 1.937740521792996e-05, |
| "loss": 0.0343, |
| "step": 8710 |
| }, |
| { |
| "epoch": 33.53846153846154, |
| "grad_norm": 1.3333659172058105, |
| "learning_rate": 1.935367680311106e-05, |
| "loss": 0.0383, |
| "step": 8720 |
| }, |
| { |
| "epoch": 33.57692307692308, |
| "grad_norm": 1.2656036615371704, |
| "learning_rate": 1.9329936485506012e-05, |
| "loss": 0.0402, |
| "step": 8730 |
| }, |
| { |
| "epoch": 33.61538461538461, |
| "grad_norm": 1.6089822053909302, |
| "learning_rate": 1.930618433001996e-05, |
| "loss": 0.0392, |
| "step": 8740 |
| }, |
| { |
| "epoch": 33.65384615384615, |
| "grad_norm": 1.4962538480758667, |
| "learning_rate": 1.9282420401590377e-05, |
| "loss": 0.0408, |
| "step": 8750 |
| }, |
| { |
| "epoch": 33.69230769230769, |
| "grad_norm": 2.0058789253234863, |
| "learning_rate": 1.925864476518694e-05, |
| "loss": 0.043, |
| "step": 8760 |
| }, |
| { |
| "epoch": 33.73076923076923, |
| "grad_norm": 1.5615150928497314, |
| "learning_rate": 1.9234857485811336e-05, |
| "loss": 0.0369, |
| "step": 8770 |
| }, |
| { |
| "epoch": 33.76923076923077, |
| "grad_norm": 1.595367193222046, |
| "learning_rate": 1.9211058628497066e-05, |
| "loss": 0.041, |
| "step": 8780 |
| }, |
| { |
| "epoch": 33.80769230769231, |
| "grad_norm": 1.1622068881988525, |
| "learning_rate": 1.918724825830931e-05, |
| "loss": 0.0438, |
| "step": 8790 |
| }, |
| { |
| "epoch": 33.84615384615385, |
| "grad_norm": 1.4720163345336914, |
| "learning_rate": 1.9163426440344702e-05, |
| "loss": 0.0397, |
| "step": 8800 |
| }, |
| { |
| "epoch": 33.88461538461539, |
| "grad_norm": 1.658189058303833, |
| "learning_rate": 1.913959323973119e-05, |
| "loss": 0.0411, |
| "step": 8810 |
| }, |
| { |
| "epoch": 33.92307692307692, |
| "grad_norm": 1.8597990274429321, |
| "learning_rate": 1.9115748721627827e-05, |
| "loss": 0.0419, |
| "step": 8820 |
| }, |
| { |
| "epoch": 33.96153846153846, |
| "grad_norm": 1.6139363050460815, |
| "learning_rate": 1.9091892951224614e-05, |
| "loss": 0.0394, |
| "step": 8830 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 1.4286227226257324, |
| "learning_rate": 1.906802599374233e-05, |
| "loss": 0.0352, |
| "step": 8840 |
| }, |
| { |
| "epoch": 34.03846153846154, |
| "grad_norm": 1.7807533740997314, |
| "learning_rate": 1.904414791443231e-05, |
| "loss": 0.0425, |
| "step": 8850 |
| }, |
| { |
| "epoch": 34.07692307692308, |
| "grad_norm": 1.8693132400512695, |
| "learning_rate": 1.9020258778576324e-05, |
| "loss": 0.0416, |
| "step": 8860 |
| }, |
| { |
| "epoch": 34.11538461538461, |
| "grad_norm": 1.5255342721939087, |
| "learning_rate": 1.8996358651486347e-05, |
| "loss": 0.0364, |
| "step": 8870 |
| }, |
| { |
| "epoch": 34.15384615384615, |
| "grad_norm": 1.4123107194900513, |
| "learning_rate": 1.8972447598504417e-05, |
| "loss": 0.0409, |
| "step": 8880 |
| }, |
| { |
| "epoch": 34.19230769230769, |
| "grad_norm": 1.831511378288269, |
| "learning_rate": 1.8948525685002438e-05, |
| "loss": 0.0353, |
| "step": 8890 |
| }, |
| { |
| "epoch": 34.23076923076923, |
| "grad_norm": 1.5092872381210327, |
| "learning_rate": 1.892459297638201e-05, |
| "loss": 0.043, |
| "step": 8900 |
| }, |
| { |
| "epoch": 34.26923076923077, |
| "grad_norm": 1.5909144878387451, |
| "learning_rate": 1.890064953807425e-05, |
| "loss": 0.034, |
| "step": 8910 |
| }, |
| { |
| "epoch": 34.30769230769231, |
| "grad_norm": 1.4315061569213867, |
| "learning_rate": 1.8876695435539596e-05, |
| "loss": 0.0371, |
| "step": 8920 |
| }, |
| { |
| "epoch": 34.34615384615385, |
| "grad_norm": 1.372524380683899, |
| "learning_rate": 1.8852730734267653e-05, |
| "loss": 0.0348, |
| "step": 8930 |
| }, |
| { |
| "epoch": 34.38461538461539, |
| "grad_norm": 1.4289276599884033, |
| "learning_rate": 1.8828755499776997e-05, |
| "loss": 0.0376, |
| "step": 8940 |
| }, |
| { |
| "epoch": 34.42307692307692, |
| "grad_norm": 1.375580072402954, |
| "learning_rate": 1.8804769797615007e-05, |
| "loss": 0.0386, |
| "step": 8950 |
| }, |
| { |
| "epoch": 34.46153846153846, |
| "grad_norm": 1.6561743021011353, |
| "learning_rate": 1.8780773693357675e-05, |
| "loss": 0.0415, |
| "step": 8960 |
| }, |
| { |
| "epoch": 34.5, |
| "grad_norm": 1.3034884929656982, |
| "learning_rate": 1.8756767252609433e-05, |
| "loss": 0.0337, |
| "step": 8970 |
| }, |
| { |
| "epoch": 34.53846153846154, |
| "grad_norm": 1.5481013059616089, |
| "learning_rate": 1.8732750541002974e-05, |
| "loss": 0.0389, |
| "step": 8980 |
| }, |
| { |
| "epoch": 34.57692307692308, |
| "grad_norm": 1.4725347757339478, |
| "learning_rate": 1.870872362419907e-05, |
| "loss": 0.0392, |
| "step": 8990 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 1.3678796291351318, |
| "learning_rate": 1.8684686567886398e-05, |
| "loss": 0.0432, |
| "step": 9000 |
| }, |
| { |
| "epoch": 34.65384615384615, |
| "grad_norm": 1.5329874753952026, |
| "learning_rate": 1.8660639437781344e-05, |
| "loss": 0.0354, |
| "step": 9010 |
| }, |
| { |
| "epoch": 34.69230769230769, |
| "grad_norm": 1.523851990699768, |
| "learning_rate": 1.8636582299627854e-05, |
| "loss": 0.0356, |
| "step": 9020 |
| }, |
| { |
| "epoch": 34.73076923076923, |
| "grad_norm": 1.2790080308914185, |
| "learning_rate": 1.8612515219197215e-05, |
| "loss": 0.0354, |
| "step": 9030 |
| }, |
| { |
| "epoch": 34.76923076923077, |
| "grad_norm": 1.50131356716156, |
| "learning_rate": 1.858843826228791e-05, |
| "loss": 0.035, |
| "step": 9040 |
| }, |
| { |
| "epoch": 34.80769230769231, |
| "grad_norm": 1.5479196310043335, |
| "learning_rate": 1.8564351494725423e-05, |
| "loss": 0.0364, |
| "step": 9050 |
| }, |
| { |
| "epoch": 34.84615384615385, |
| "grad_norm": 1.5020997524261475, |
| "learning_rate": 1.8540254982362053e-05, |
| "loss": 0.0375, |
| "step": 9060 |
| }, |
| { |
| "epoch": 34.88461538461539, |
| "grad_norm": 1.6902251243591309, |
| "learning_rate": 1.8516148791076743e-05, |
| "loss": 0.0386, |
| "step": 9070 |
| }, |
| { |
| "epoch": 34.92307692307692, |
| "grad_norm": 1.5879284143447876, |
| "learning_rate": 1.8492032986774904e-05, |
| "loss": 0.0357, |
| "step": 9080 |
| }, |
| { |
| "epoch": 34.96153846153846, |
| "grad_norm": 1.6842821836471558, |
| "learning_rate": 1.8467907635388225e-05, |
| "loss": 0.037, |
| "step": 9090 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 1.4614428281784058, |
| "learning_rate": 1.844377280287449e-05, |
| "loss": 0.038, |
| "step": 9100 |
| }, |
| { |
| "epoch": 35.03846153846154, |
| "grad_norm": 1.5030885934829712, |
| "learning_rate": 1.8419628555217407e-05, |
| "loss": 0.0402, |
| "step": 9110 |
| }, |
| { |
| "epoch": 35.07692307692308, |
| "grad_norm": 1.419340968132019, |
| "learning_rate": 1.839547495842644e-05, |
| "loss": 0.0385, |
| "step": 9120 |
| }, |
| { |
| "epoch": 35.11538461538461, |
| "grad_norm": 1.6496049165725708, |
| "learning_rate": 1.8371312078536587e-05, |
| "loss": 0.0315, |
| "step": 9130 |
| }, |
| { |
| "epoch": 35.15384615384615, |
| "grad_norm": 1.2874219417572021, |
| "learning_rate": 1.834713998160825e-05, |
| "loss": 0.0388, |
| "step": 9140 |
| }, |
| { |
| "epoch": 35.19230769230769, |
| "grad_norm": 1.6375945806503296, |
| "learning_rate": 1.832295873372701e-05, |
| "loss": 0.0419, |
| "step": 9150 |
| }, |
| { |
| "epoch": 35.23076923076923, |
| "grad_norm": 1.6813386678695679, |
| "learning_rate": 1.8298768401003477e-05, |
| "loss": 0.0379, |
| "step": 9160 |
| }, |
| { |
| "epoch": 35.26923076923077, |
| "grad_norm": 1.5185751914978027, |
| "learning_rate": 1.8274569049573103e-05, |
| "loss": 0.0415, |
| "step": 9170 |
| }, |
| { |
| "epoch": 35.30769230769231, |
| "grad_norm": 1.4227298498153687, |
| "learning_rate": 1.8250360745595983e-05, |
| "loss": 0.0394, |
| "step": 9180 |
| }, |
| { |
| "epoch": 35.34615384615385, |
| "grad_norm": 1.1683084964752197, |
| "learning_rate": 1.8226143555256703e-05, |
| "loss": 0.0341, |
| "step": 9190 |
| }, |
| { |
| "epoch": 35.38461538461539, |
| "grad_norm": 1.2086158990859985, |
| "learning_rate": 1.820191754476413e-05, |
| "loss": 0.0395, |
| "step": 9200 |
| }, |
| { |
| "epoch": 35.42307692307692, |
| "grad_norm": 1.3567473888397217, |
| "learning_rate": 1.8177682780351256e-05, |
| "loss": 0.0374, |
| "step": 9210 |
| }, |
| { |
| "epoch": 35.46153846153846, |
| "grad_norm": 1.413490653038025, |
| "learning_rate": 1.8153439328275e-05, |
| "loss": 0.0435, |
| "step": 9220 |
| }, |
| { |
| "epoch": 35.5, |
| "grad_norm": 1.8671621084213257, |
| "learning_rate": 1.8129187254816035e-05, |
| "loss": 0.0396, |
| "step": 9230 |
| }, |
| { |
| "epoch": 35.53846153846154, |
| "grad_norm": 1.7603775262832642, |
| "learning_rate": 1.81049266262786e-05, |
| "loss": 0.0367, |
| "step": 9240 |
| }, |
| { |
| "epoch": 35.57692307692308, |
| "grad_norm": 1.467278003692627, |
| "learning_rate": 1.808065750899033e-05, |
| "loss": 0.041, |
| "step": 9250 |
| }, |
| { |
| "epoch": 35.61538461538461, |
| "grad_norm": 1.8476868867874146, |
| "learning_rate": 1.8056379969302066e-05, |
| "loss": 0.0373, |
| "step": 9260 |
| }, |
| { |
| "epoch": 35.65384615384615, |
| "grad_norm": 1.9753694534301758, |
| "learning_rate": 1.8032094073587675e-05, |
| "loss": 0.0374, |
| "step": 9270 |
| }, |
| { |
| "epoch": 35.69230769230769, |
| "grad_norm": 1.9146374464035034, |
| "learning_rate": 1.800779988824387e-05, |
| "loss": 0.0344, |
| "step": 9280 |
| }, |
| { |
| "epoch": 35.73076923076923, |
| "grad_norm": 1.2956942319869995, |
| "learning_rate": 1.7983497479690018e-05, |
| "loss": 0.0348, |
| "step": 9290 |
| }, |
| { |
| "epoch": 35.76923076923077, |
| "grad_norm": 1.381451964378357, |
| "learning_rate": 1.795918691436798e-05, |
| "loss": 0.0394, |
| "step": 9300 |
| }, |
| { |
| "epoch": 35.80769230769231, |
| "grad_norm": 1.3659331798553467, |
| "learning_rate": 1.7934868258741917e-05, |
| "loss": 0.0366, |
| "step": 9310 |
| }, |
| { |
| "epoch": 35.84615384615385, |
| "grad_norm": 1.5138591527938843, |
| "learning_rate": 1.79105415792981e-05, |
| "loss": 0.037, |
| "step": 9320 |
| }, |
| { |
| "epoch": 35.88461538461539, |
| "grad_norm": 1.5586953163146973, |
| "learning_rate": 1.788620694254475e-05, |
| "loss": 0.0409, |
| "step": 9330 |
| }, |
| { |
| "epoch": 35.92307692307692, |
| "grad_norm": 1.2028257846832275, |
| "learning_rate": 1.7861864415011827e-05, |
| "loss": 0.0409, |
| "step": 9340 |
| }, |
| { |
| "epoch": 35.96153846153846, |
| "grad_norm": 1.3556921482086182, |
| "learning_rate": 1.783751406325087e-05, |
| "loss": 0.0334, |
| "step": 9350 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 1.3308838605880737, |
| "learning_rate": 1.7813155953834814e-05, |
| "loss": 0.0397, |
| "step": 9360 |
| }, |
| { |
| "epoch": 36.03846153846154, |
| "grad_norm": 1.681963324546814, |
| "learning_rate": 1.7788790153357803e-05, |
| "loss": 0.0436, |
| "step": 9370 |
| }, |
| { |
| "epoch": 36.07692307692308, |
| "grad_norm": 1.1227318048477173, |
| "learning_rate": 1.7764416728435e-05, |
| "loss": 0.0422, |
| "step": 9380 |
| }, |
| { |
| "epoch": 36.11538461538461, |
| "grad_norm": 1.438764214515686, |
| "learning_rate": 1.774003574570242e-05, |
| "loss": 0.0415, |
| "step": 9390 |
| }, |
| { |
| "epoch": 36.15384615384615, |
| "grad_norm": 1.7580561637878418, |
| "learning_rate": 1.7715647271816744e-05, |
| "loss": 0.0349, |
| "step": 9400 |
| }, |
| { |
| "epoch": 36.19230769230769, |
| "grad_norm": 1.7516335248947144, |
| "learning_rate": 1.769125137345512e-05, |
| "loss": 0.0359, |
| "step": 9410 |
| }, |
| { |
| "epoch": 36.23076923076923, |
| "grad_norm": 1.7185901403427124, |
| "learning_rate": 1.7666848117315008e-05, |
| "loss": 0.0396, |
| "step": 9420 |
| }, |
| { |
| "epoch": 36.26923076923077, |
| "grad_norm": 1.6472246646881104, |
| "learning_rate": 1.7642437570113974e-05, |
| "loss": 0.0354, |
| "step": 9430 |
| }, |
| { |
| "epoch": 36.30769230769231, |
| "grad_norm": 1.1611000299453735, |
| "learning_rate": 1.7618019798589525e-05, |
| "loss": 0.0426, |
| "step": 9440 |
| }, |
| { |
| "epoch": 36.34615384615385, |
| "grad_norm": 1.5789954662322998, |
| "learning_rate": 1.7593594869498915e-05, |
| "loss": 0.0346, |
| "step": 9450 |
| }, |
| { |
| "epoch": 36.38461538461539, |
| "grad_norm": 1.232185959815979, |
| "learning_rate": 1.7569162849618966e-05, |
| "loss": 0.0392, |
| "step": 9460 |
| }, |
| { |
| "epoch": 36.42307692307692, |
| "grad_norm": 1.2570834159851074, |
| "learning_rate": 1.75447238057459e-05, |
| "loss": 0.0379, |
| "step": 9470 |
| }, |
| { |
| "epoch": 36.46153846153846, |
| "grad_norm": 1.4947396516799927, |
| "learning_rate": 1.752027780469511e-05, |
| "loss": 0.0367, |
| "step": 9480 |
| }, |
| { |
| "epoch": 36.5, |
| "grad_norm": 1.4545466899871826, |
| "learning_rate": 1.7495824913301043e-05, |
| "loss": 0.0419, |
| "step": 9490 |
| }, |
| { |
| "epoch": 36.53846153846154, |
| "grad_norm": 1.4352824687957764, |
| "learning_rate": 1.7471365198416957e-05, |
| "loss": 0.0381, |
| "step": 9500 |
| }, |
| { |
| "epoch": 36.57692307692308, |
| "grad_norm": 1.5588959455490112, |
| "learning_rate": 1.7446898726914797e-05, |
| "loss": 0.0366, |
| "step": 9510 |
| }, |
| { |
| "epoch": 36.61538461538461, |
| "grad_norm": 1.6907953023910522, |
| "learning_rate": 1.742242556568495e-05, |
| "loss": 0.0425, |
| "step": 9520 |
| }, |
| { |
| "epoch": 36.65384615384615, |
| "grad_norm": 1.4294540882110596, |
| "learning_rate": 1.73979457816361e-05, |
| "loss": 0.0387, |
| "step": 9530 |
| }, |
| { |
| "epoch": 36.69230769230769, |
| "grad_norm": 1.4833825826644897, |
| "learning_rate": 1.7373459441695058e-05, |
| "loss": 0.0427, |
| "step": 9540 |
| }, |
| { |
| "epoch": 36.73076923076923, |
| "grad_norm": 1.466599702835083, |
| "learning_rate": 1.7348966612806524e-05, |
| "loss": 0.0419, |
| "step": 9550 |
| }, |
| { |
| "epoch": 36.76923076923077, |
| "grad_norm": 1.2378714084625244, |
| "learning_rate": 1.7324467361932973e-05, |
| "loss": 0.0392, |
| "step": 9560 |
| }, |
| { |
| "epoch": 36.80769230769231, |
| "grad_norm": 1.413780689239502, |
| "learning_rate": 1.729996175605441e-05, |
| "loss": 0.0402, |
| "step": 9570 |
| }, |
| { |
| "epoch": 36.84615384615385, |
| "grad_norm": 1.374563217163086, |
| "learning_rate": 1.7275449862168235e-05, |
| "loss": 0.0395, |
| "step": 9580 |
| }, |
| { |
| "epoch": 36.88461538461539, |
| "grad_norm": 1.3563233613967896, |
| "learning_rate": 1.725093174728902e-05, |
| "loss": 0.0323, |
| "step": 9590 |
| }, |
| { |
| "epoch": 36.92307692307692, |
| "grad_norm": 1.4940588474273682, |
| "learning_rate": 1.7226407478448357e-05, |
| "loss": 0.0384, |
| "step": 9600 |
| }, |
| { |
| "epoch": 36.96153846153846, |
| "grad_norm": 1.593131422996521, |
| "learning_rate": 1.7201877122694666e-05, |
| "loss": 0.0421, |
| "step": 9610 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 1.7007324695587158, |
| "learning_rate": 1.7177340747093e-05, |
| "loss": 0.0394, |
| "step": 9620 |
| }, |
| { |
| "epoch": 37.03846153846154, |
| "grad_norm": 1.4172247648239136, |
| "learning_rate": 1.7152798418724873e-05, |
| "loss": 0.0414, |
| "step": 9630 |
| }, |
| { |
| "epoch": 37.07692307692308, |
| "grad_norm": 1.3566648960113525, |
| "learning_rate": 1.712825020468807e-05, |
| "loss": 0.0356, |
| "step": 9640 |
| }, |
| { |
| "epoch": 37.11538461538461, |
| "grad_norm": 1.1719372272491455, |
| "learning_rate": 1.710369617209648e-05, |
| "loss": 0.0407, |
| "step": 9650 |
| }, |
| { |
| "epoch": 37.15384615384615, |
| "grad_norm": 1.3853756189346313, |
| "learning_rate": 1.7079136388079884e-05, |
| "loss": 0.0379, |
| "step": 9660 |
| }, |
| { |
| "epoch": 37.19230769230769, |
| "grad_norm": 1.4619250297546387, |
| "learning_rate": 1.7054570919783796e-05, |
| "loss": 0.0378, |
| "step": 9670 |
| }, |
| { |
| "epoch": 37.23076923076923, |
| "grad_norm": 1.4194920063018799, |
| "learning_rate": 1.7029999834369264e-05, |
| "loss": 0.0381, |
| "step": 9680 |
| }, |
| { |
| "epoch": 37.26923076923077, |
| "grad_norm": 1.2735564708709717, |
| "learning_rate": 1.7005423199012696e-05, |
| "loss": 0.0339, |
| "step": 9690 |
| }, |
| { |
| "epoch": 37.30769230769231, |
| "grad_norm": 1.3071863651275635, |
| "learning_rate": 1.6980841080905687e-05, |
| "loss": 0.0344, |
| "step": 9700 |
| }, |
| { |
| "epoch": 37.34615384615385, |
| "grad_norm": 1.2849153280258179, |
| "learning_rate": 1.6956253547254798e-05, |
| "loss": 0.0406, |
| "step": 9710 |
| }, |
| { |
| "epoch": 37.38461538461539, |
| "grad_norm": 1.2601699829101562, |
| "learning_rate": 1.693166066528141e-05, |
| "loss": 0.0373, |
| "step": 9720 |
| }, |
| { |
| "epoch": 37.42307692307692, |
| "grad_norm": 1.2954739332199097, |
| "learning_rate": 1.690706250222152e-05, |
| "loss": 0.0404, |
| "step": 9730 |
| }, |
| { |
| "epoch": 37.46153846153846, |
| "grad_norm": 1.5108919143676758, |
| "learning_rate": 1.6882459125325573e-05, |
| "loss": 0.0372, |
| "step": 9740 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 1.3737967014312744, |
| "learning_rate": 1.685785060185826e-05, |
| "loss": 0.036, |
| "step": 9750 |
| }, |
| { |
| "epoch": 37.53846153846154, |
| "grad_norm": 1.3580888509750366, |
| "learning_rate": 1.683323699909834e-05, |
| "loss": 0.0366, |
| "step": 9760 |
| }, |
| { |
| "epoch": 37.57692307692308, |
| "grad_norm": 1.485553503036499, |
| "learning_rate": 1.6808618384338472e-05, |
| "loss": 0.0369, |
| "step": 9770 |
| }, |
| { |
| "epoch": 37.61538461538461, |
| "grad_norm": 1.2370328903198242, |
| "learning_rate": 1.6783994824885e-05, |
| "loss": 0.0355, |
| "step": 9780 |
| }, |
| { |
| "epoch": 37.65384615384615, |
| "grad_norm": 1.3363810777664185, |
| "learning_rate": 1.6759366388057795e-05, |
| "loss": 0.0399, |
| "step": 9790 |
| }, |
| { |
| "epoch": 37.69230769230769, |
| "grad_norm": 1.4240365028381348, |
| "learning_rate": 1.6734733141190073e-05, |
| "loss": 0.0322, |
| "step": 9800 |
| }, |
| { |
| "epoch": 37.73076923076923, |
| "grad_norm": 1.1581053733825684, |
| "learning_rate": 1.6710095151628182e-05, |
| "loss": 0.0398, |
| "step": 9810 |
| }, |
| { |
| "epoch": 37.76923076923077, |
| "grad_norm": 1.2987163066864014, |
| "learning_rate": 1.668545248673144e-05, |
| "loss": 0.0326, |
| "step": 9820 |
| }, |
| { |
| "epoch": 37.80769230769231, |
| "grad_norm": 1.334222674369812, |
| "learning_rate": 1.6660805213871962e-05, |
| "loss": 0.0434, |
| "step": 9830 |
| }, |
| { |
| "epoch": 37.84615384615385, |
| "grad_norm": 1.4126960039138794, |
| "learning_rate": 1.663615340043445e-05, |
| "loss": 0.0402, |
| "step": 9840 |
| }, |
| { |
| "epoch": 37.88461538461539, |
| "grad_norm": 1.3437000513076782, |
| "learning_rate": 1.6611497113816014e-05, |
| "loss": 0.0424, |
| "step": 9850 |
| }, |
| { |
| "epoch": 37.92307692307692, |
| "grad_norm": 1.3839226961135864, |
| "learning_rate": 1.6586836421426007e-05, |
| "loss": 0.038, |
| "step": 9860 |
| }, |
| { |
| "epoch": 37.96153846153846, |
| "grad_norm": 1.3781486749649048, |
| "learning_rate": 1.6562171390685815e-05, |
| "loss": 0.0328, |
| "step": 9870 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 1.212611436843872, |
| "learning_rate": 1.653750208902869e-05, |
| "loss": 0.0372, |
| "step": 9880 |
| }, |
| { |
| "epoch": 38.03846153846154, |
| "grad_norm": 1.7444937229156494, |
| "learning_rate": 1.6512828583899562e-05, |
| "loss": 0.042, |
| "step": 9890 |
| }, |
| { |
| "epoch": 38.07692307692308, |
| "grad_norm": 1.4450607299804688, |
| "learning_rate": 1.648815094275486e-05, |
| "loss": 0.0368, |
| "step": 9900 |
| }, |
| { |
| "epoch": 38.11538461538461, |
| "grad_norm": 1.3216570615768433, |
| "learning_rate": 1.6463469233062302e-05, |
| "loss": 0.0377, |
| "step": 9910 |
| }, |
| { |
| "epoch": 38.15384615384615, |
| "grad_norm": 1.340665578842163, |
| "learning_rate": 1.6438783522300742e-05, |
| "loss": 0.0398, |
| "step": 9920 |
| }, |
| { |
| "epoch": 38.19230769230769, |
| "grad_norm": 1.386986255645752, |
| "learning_rate": 1.641409387795997e-05, |
| "loss": 0.0314, |
| "step": 9930 |
| }, |
| { |
| "epoch": 38.23076923076923, |
| "grad_norm": 1.4249019622802734, |
| "learning_rate": 1.6389400367540534e-05, |
| "loss": 0.0438, |
| "step": 9940 |
| }, |
| { |
| "epoch": 38.26923076923077, |
| "grad_norm": 1.537424087524414, |
| "learning_rate": 1.6364703058553552e-05, |
| "loss": 0.0404, |
| "step": 9950 |
| }, |
| { |
| "epoch": 38.30769230769231, |
| "grad_norm": 1.465016484260559, |
| "learning_rate": 1.6340002018520512e-05, |
| "loss": 0.0371, |
| "step": 9960 |
| }, |
| { |
| "epoch": 38.34615384615385, |
| "grad_norm": 1.5465450286865234, |
| "learning_rate": 1.6315297314973126e-05, |
| "loss": 0.0351, |
| "step": 9970 |
| }, |
| { |
| "epoch": 38.38461538461539, |
| "grad_norm": 1.1357992887496948, |
| "learning_rate": 1.6290589015453102e-05, |
| "loss": 0.0358, |
| "step": 9980 |
| }, |
| { |
| "epoch": 38.42307692307692, |
| "grad_norm": 1.0869678258895874, |
| "learning_rate": 1.6265877187511993e-05, |
| "loss": 0.0376, |
| "step": 9990 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 1.5823304653167725, |
| "learning_rate": 1.6241161898710993e-05, |
| "loss": 0.0408, |
| "step": 10000 |
| }, |
| { |
| "epoch": 38.5, |
| "grad_norm": 1.1973952054977417, |
| "learning_rate": 1.6216443216620752e-05, |
| "loss": 0.0364, |
| "step": 10010 |
| }, |
| { |
| "epoch": 38.53846153846154, |
| "grad_norm": 1.229051947593689, |
| "learning_rate": 1.6191721208821208e-05, |
| "loss": 0.0372, |
| "step": 10020 |
| }, |
| { |
| "epoch": 38.57692307692308, |
| "grad_norm": 1.330804467201233, |
| "learning_rate": 1.6166995942901382e-05, |
| "loss": 0.0371, |
| "step": 10030 |
| }, |
| { |
| "epoch": 38.61538461538461, |
| "grad_norm": 1.4124090671539307, |
| "learning_rate": 1.614226748645921e-05, |
| "loss": 0.0433, |
| "step": 10040 |
| }, |
| { |
| "epoch": 38.65384615384615, |
| "grad_norm": 1.0600825548171997, |
| "learning_rate": 1.6117535907101354e-05, |
| "loss": 0.0326, |
| "step": 10050 |
| }, |
| { |
| "epoch": 38.69230769230769, |
| "grad_norm": 1.338943600654602, |
| "learning_rate": 1.6092801272442996e-05, |
| "loss": 0.0392, |
| "step": 10060 |
| }, |
| { |
| "epoch": 38.73076923076923, |
| "grad_norm": 1.3412127494812012, |
| "learning_rate": 1.606806365010769e-05, |
| "loss": 0.0368, |
| "step": 10070 |
| }, |
| { |
| "epoch": 38.76923076923077, |
| "grad_norm": 1.6434335708618164, |
| "learning_rate": 1.6043323107727143e-05, |
| "loss": 0.0362, |
| "step": 10080 |
| }, |
| { |
| "epoch": 38.80769230769231, |
| "grad_norm": 1.3812055587768555, |
| "learning_rate": 1.6018579712941064e-05, |
| "loss": 0.0343, |
| "step": 10090 |
| }, |
| { |
| "epoch": 38.84615384615385, |
| "grad_norm": 1.2723709344863892, |
| "learning_rate": 1.599383353339694e-05, |
| "loss": 0.0361, |
| "step": 10100 |
| }, |
| { |
| "epoch": 38.88461538461539, |
| "grad_norm": 1.6107410192489624, |
| "learning_rate": 1.596908463674989e-05, |
| "loss": 0.0385, |
| "step": 10110 |
| }, |
| { |
| "epoch": 38.92307692307692, |
| "grad_norm": 1.566305160522461, |
| "learning_rate": 1.5944333090662442e-05, |
| "loss": 0.0364, |
| "step": 10120 |
| }, |
| { |
| "epoch": 38.96153846153846, |
| "grad_norm": 1.3718688488006592, |
| "learning_rate": 1.5919578962804386e-05, |
| "loss": 0.0349, |
| "step": 10130 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 1.5631097555160522, |
| "learning_rate": 1.5894822320852563e-05, |
| "loss": 0.0347, |
| "step": 10140 |
| }, |
| { |
| "epoch": 39.03846153846154, |
| "grad_norm": 1.1739816665649414, |
| "learning_rate": 1.5870063232490677e-05, |
| "loss": 0.0339, |
| "step": 10150 |
| }, |
| { |
| "epoch": 39.07692307692308, |
| "grad_norm": 1.4080265760421753, |
| "learning_rate": 1.5845301765409144e-05, |
| "loss": 0.0341, |
| "step": 10160 |
| }, |
| { |
| "epoch": 39.11538461538461, |
| "grad_norm": 1.1734020709991455, |
| "learning_rate": 1.5820537987304856e-05, |
| "loss": 0.0375, |
| "step": 10170 |
| }, |
| { |
| "epoch": 39.15384615384615, |
| "grad_norm": 1.4352694749832153, |
| "learning_rate": 1.5795771965881044e-05, |
| "loss": 0.0369, |
| "step": 10180 |
| }, |
| { |
| "epoch": 39.19230769230769, |
| "grad_norm": 1.4638075828552246, |
| "learning_rate": 1.577100376884707e-05, |
| "loss": 0.0376, |
| "step": 10190 |
| }, |
| { |
| "epoch": 39.23076923076923, |
| "grad_norm": 1.5176154375076294, |
| "learning_rate": 1.5746233463918226e-05, |
| "loss": 0.0351, |
| "step": 10200 |
| }, |
| { |
| "epoch": 39.26923076923077, |
| "grad_norm": 1.6033369302749634, |
| "learning_rate": 1.572146111881559e-05, |
| "loss": 0.0384, |
| "step": 10210 |
| }, |
| { |
| "epoch": 39.30769230769231, |
| "grad_norm": 1.4504886865615845, |
| "learning_rate": 1.56966868012658e-05, |
| "loss": 0.0355, |
| "step": 10220 |
| }, |
| { |
| "epoch": 39.34615384615385, |
| "grad_norm": 1.1136956214904785, |
| "learning_rate": 1.56719105790009e-05, |
| "loss": 0.0376, |
| "step": 10230 |
| }, |
| { |
| "epoch": 39.38461538461539, |
| "grad_norm": 1.2752516269683838, |
| "learning_rate": 1.5647132519758135e-05, |
| "loss": 0.034, |
| "step": 10240 |
| }, |
| { |
| "epoch": 39.42307692307692, |
| "grad_norm": 1.298332929611206, |
| "learning_rate": 1.562235269127977e-05, |
| "loss": 0.0348, |
| "step": 10250 |
| }, |
| { |
| "epoch": 39.46153846153846, |
| "grad_norm": 1.5165218114852905, |
| "learning_rate": 1.5597571161312914e-05, |
| "loss": 0.0365, |
| "step": 10260 |
| }, |
| { |
| "epoch": 39.5, |
| "grad_norm": 1.3248893022537231, |
| "learning_rate": 1.557278799760932e-05, |
| "loss": 0.0345, |
| "step": 10270 |
| }, |
| { |
| "epoch": 39.53846153846154, |
| "grad_norm": 1.1644551753997803, |
| "learning_rate": 1.5548003267925214e-05, |
| "loss": 0.045, |
| "step": 10280 |
| }, |
| { |
| "epoch": 39.57692307692308, |
| "grad_norm": 1.4049543142318726, |
| "learning_rate": 1.5523217040021094e-05, |
| "loss": 0.0405, |
| "step": 10290 |
| }, |
| { |
| "epoch": 39.61538461538461, |
| "grad_norm": 1.3734569549560547, |
| "learning_rate": 1.549842938166157e-05, |
| "loss": 0.0342, |
| "step": 10300 |
| }, |
| { |
| "epoch": 39.65384615384615, |
| "grad_norm": 1.476423978805542, |
| "learning_rate": 1.5473640360615146e-05, |
| "loss": 0.0358, |
| "step": 10310 |
| }, |
| { |
| "epoch": 39.69230769230769, |
| "grad_norm": 1.3189231157302856, |
| "learning_rate": 1.5448850044654063e-05, |
| "loss": 0.0391, |
| "step": 10320 |
| }, |
| { |
| "epoch": 39.73076923076923, |
| "grad_norm": 1.5631731748580933, |
| "learning_rate": 1.5424058501554102e-05, |
| "loss": 0.0347, |
| "step": 10330 |
| }, |
| { |
| "epoch": 39.76923076923077, |
| "grad_norm": 1.3164517879486084, |
| "learning_rate": 1.5399265799094383e-05, |
| "loss": 0.0333, |
| "step": 10340 |
| }, |
| { |
| "epoch": 39.80769230769231, |
| "grad_norm": 1.293961524963379, |
| "learning_rate": 1.537447200505722e-05, |
| "loss": 0.0358, |
| "step": 10350 |
| }, |
| { |
| "epoch": 39.84615384615385, |
| "grad_norm": 1.3249964714050293, |
| "learning_rate": 1.5349677187227892e-05, |
| "loss": 0.0297, |
| "step": 10360 |
| }, |
| { |
| "epoch": 39.88461538461539, |
| "grad_norm": 1.552517056465149, |
| "learning_rate": 1.532488141339449e-05, |
| "loss": 0.0396, |
| "step": 10370 |
| }, |
| { |
| "epoch": 39.92307692307692, |
| "grad_norm": 1.540193796157837, |
| "learning_rate": 1.5300084751347703e-05, |
| "loss": 0.0368, |
| "step": 10380 |
| }, |
| { |
| "epoch": 39.96153846153846, |
| "grad_norm": 1.5178617238998413, |
| "learning_rate": 1.527528726888067e-05, |
| "loss": 0.032, |
| "step": 10390 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 1.5166221857070923, |
| "learning_rate": 1.5250489033788757e-05, |
| "loss": 0.0371, |
| "step": 10400 |
| }, |
| { |
| "epoch": 40.03846153846154, |
| "grad_norm": 1.6149933338165283, |
| "learning_rate": 1.5225690113869383e-05, |
| "loss": 0.0355, |
| "step": 10410 |
| }, |
| { |
| "epoch": 40.07692307692308, |
| "grad_norm": 1.4362809658050537, |
| "learning_rate": 1.5200890576921863e-05, |
| "loss": 0.0354, |
| "step": 10420 |
| }, |
| { |
| "epoch": 40.11538461538461, |
| "grad_norm": 1.6698564291000366, |
| "learning_rate": 1.5176090490747174e-05, |
| "loss": 0.0343, |
| "step": 10430 |
| }, |
| { |
| "epoch": 40.15384615384615, |
| "grad_norm": 1.6468229293823242, |
| "learning_rate": 1.5151289923147806e-05, |
| "loss": 0.0376, |
| "step": 10440 |
| }, |
| { |
| "epoch": 40.19230769230769, |
| "grad_norm": 1.571349859237671, |
| "learning_rate": 1.5126488941927568e-05, |
| "loss": 0.0365, |
| "step": 10450 |
| }, |
| { |
| "epoch": 40.23076923076923, |
| "grad_norm": 1.5099705457687378, |
| "learning_rate": 1.5101687614891385e-05, |
| "loss": 0.0349, |
| "step": 10460 |
| }, |
| { |
| "epoch": 40.26923076923077, |
| "grad_norm": 1.4673430919647217, |
| "learning_rate": 1.5076886009845157e-05, |
| "loss": 0.0357, |
| "step": 10470 |
| }, |
| { |
| "epoch": 40.30769230769231, |
| "grad_norm": 1.2761073112487793, |
| "learning_rate": 1.5052084194595507e-05, |
| "loss": 0.0353, |
| "step": 10480 |
| }, |
| { |
| "epoch": 40.34615384615385, |
| "grad_norm": 1.3340052366256714, |
| "learning_rate": 1.5027282236949662e-05, |
| "loss": 0.0337, |
| "step": 10490 |
| }, |
| { |
| "epoch": 40.38461538461539, |
| "grad_norm": 1.1699473857879639, |
| "learning_rate": 1.5002480204715218e-05, |
| "loss": 0.0321, |
| "step": 10500 |
| }, |
| { |
| "epoch": 40.42307692307692, |
| "grad_norm": 1.432531714439392, |
| "learning_rate": 1.4977678165699992e-05, |
| "loss": 0.038, |
| "step": 10510 |
| }, |
| { |
| "epoch": 40.46153846153846, |
| "grad_norm": 1.7699759006500244, |
| "learning_rate": 1.4952876187711806e-05, |
| "loss": 0.0413, |
| "step": 10520 |
| }, |
| { |
| "epoch": 40.5, |
| "grad_norm": 1.4600393772125244, |
| "learning_rate": 1.4928074338558326e-05, |
| "loss": 0.039, |
| "step": 10530 |
| }, |
| { |
| "epoch": 40.53846153846154, |
| "grad_norm": 1.0337119102478027, |
| "learning_rate": 1.4903272686046857e-05, |
| "loss": 0.0335, |
| "step": 10540 |
| }, |
| { |
| "epoch": 40.57692307692308, |
| "grad_norm": 1.0465848445892334, |
| "learning_rate": 1.4878471297984174e-05, |
| "loss": 0.0327, |
| "step": 10550 |
| }, |
| { |
| "epoch": 40.61538461538461, |
| "grad_norm": 1.2741106748580933, |
| "learning_rate": 1.4853670242176318e-05, |
| "loss": 0.0359, |
| "step": 10560 |
| }, |
| { |
| "epoch": 40.65384615384615, |
| "grad_norm": 1.2669447660446167, |
| "learning_rate": 1.4828869586428433e-05, |
| "loss": 0.0355, |
| "step": 10570 |
| }, |
| { |
| "epoch": 40.69230769230769, |
| "grad_norm": 1.4156800508499146, |
| "learning_rate": 1.4804069398544563e-05, |
| "loss": 0.0332, |
| "step": 10580 |
| }, |
| { |
| "epoch": 40.73076923076923, |
| "grad_norm": 1.3298932313919067, |
| "learning_rate": 1.477926974632748e-05, |
| "loss": 0.0373, |
| "step": 10590 |
| }, |
| { |
| "epoch": 40.76923076923077, |
| "grad_norm": 1.3701391220092773, |
| "learning_rate": 1.4754470697578478e-05, |
| "loss": 0.035, |
| "step": 10600 |
| }, |
| { |
| "epoch": 40.80769230769231, |
| "grad_norm": 1.5692551136016846, |
| "learning_rate": 1.4729672320097214e-05, |
| "loss": 0.0356, |
| "step": 10610 |
| }, |
| { |
| "epoch": 40.84615384615385, |
| "grad_norm": 1.1283255815505981, |
| "learning_rate": 1.4704874681681504e-05, |
| "loss": 0.0339, |
| "step": 10620 |
| }, |
| { |
| "epoch": 40.88461538461539, |
| "grad_norm": 1.3027880191802979, |
| "learning_rate": 1.4680077850127146e-05, |
| "loss": 0.0373, |
| "step": 10630 |
| }, |
| { |
| "epoch": 40.92307692307692, |
| "grad_norm": 1.37151300907135, |
| "learning_rate": 1.465528189322773e-05, |
| "loss": 0.0337, |
| "step": 10640 |
| }, |
| { |
| "epoch": 40.96153846153846, |
| "grad_norm": 1.3166391849517822, |
| "learning_rate": 1.4630486878774455e-05, |
| "loss": 0.0318, |
| "step": 10650 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 1.0613034963607788, |
| "learning_rate": 1.4605692874555942e-05, |
| "loss": 0.0355, |
| "step": 10660 |
| }, |
| { |
| "epoch": 41.03846153846154, |
| "grad_norm": 1.1301933526992798, |
| "learning_rate": 1.4580899948358054e-05, |
| "loss": 0.0335, |
| "step": 10670 |
| }, |
| { |
| "epoch": 41.07692307692308, |
| "grad_norm": 1.2418525218963623, |
| "learning_rate": 1.4556108167963702e-05, |
| "loss": 0.0368, |
| "step": 10680 |
| }, |
| { |
| "epoch": 41.11538461538461, |
| "grad_norm": 1.166088342666626, |
| "learning_rate": 1.4531317601152675e-05, |
| "loss": 0.0327, |
| "step": 10690 |
| }, |
| { |
| "epoch": 41.15384615384615, |
| "grad_norm": 1.1724998950958252, |
| "learning_rate": 1.4506528315701425e-05, |
| "loss": 0.0347, |
| "step": 10700 |
| }, |
| { |
| "epoch": 41.19230769230769, |
| "grad_norm": 1.4540597200393677, |
| "learning_rate": 1.4481740379382916e-05, |
| "loss": 0.0395, |
| "step": 10710 |
| }, |
| { |
| "epoch": 41.23076923076923, |
| "grad_norm": 1.3581476211547852, |
| "learning_rate": 1.445695385996642e-05, |
| "loss": 0.0305, |
| "step": 10720 |
| }, |
| { |
| "epoch": 41.26923076923077, |
| "grad_norm": 0.9946508407592773, |
| "learning_rate": 1.4432168825217344e-05, |
| "loss": 0.0321, |
| "step": 10730 |
| }, |
| { |
| "epoch": 41.30769230769231, |
| "grad_norm": 1.1900794506072998, |
| "learning_rate": 1.4407385342897005e-05, |
| "loss": 0.038, |
| "step": 10740 |
| }, |
| { |
| "epoch": 41.34615384615385, |
| "grad_norm": 1.0426830053329468, |
| "learning_rate": 1.4382603480762514e-05, |
| "loss": 0.0325, |
| "step": 10750 |
| }, |
| { |
| "epoch": 41.38461538461539, |
| "grad_norm": 1.3630422353744507, |
| "learning_rate": 1.4357823306566529e-05, |
| "loss": 0.0376, |
| "step": 10760 |
| }, |
| { |
| "epoch": 41.42307692307692, |
| "grad_norm": 1.1893590688705444, |
| "learning_rate": 1.4333044888057104e-05, |
| "loss": 0.036, |
| "step": 10770 |
| }, |
| { |
| "epoch": 41.46153846153846, |
| "grad_norm": 1.2472355365753174, |
| "learning_rate": 1.4308268292977496e-05, |
| "loss": 0.0351, |
| "step": 10780 |
| }, |
| { |
| "epoch": 41.5, |
| "grad_norm": 1.2175953388214111, |
| "learning_rate": 1.4283493589065948e-05, |
| "loss": 0.0363, |
| "step": 10790 |
| }, |
| { |
| "epoch": 41.53846153846154, |
| "grad_norm": 1.1068720817565918, |
| "learning_rate": 1.4258720844055573e-05, |
| "loss": 0.0353, |
| "step": 10800 |
| }, |
| { |
| "epoch": 41.57692307692308, |
| "grad_norm": 1.0755929946899414, |
| "learning_rate": 1.4233950125674105e-05, |
| "loss": 0.0351, |
| "step": 10810 |
| }, |
| { |
| "epoch": 41.61538461538461, |
| "grad_norm": 1.193617820739746, |
| "learning_rate": 1.420918150164374e-05, |
| "loss": 0.0384, |
| "step": 10820 |
| }, |
| { |
| "epoch": 41.65384615384615, |
| "grad_norm": 1.4359036684036255, |
| "learning_rate": 1.4184415039680958e-05, |
| "loss": 0.0387, |
| "step": 10830 |
| }, |
| { |
| "epoch": 41.69230769230769, |
| "grad_norm": 1.1195001602172852, |
| "learning_rate": 1.415965080749631e-05, |
| "loss": 0.0368, |
| "step": 10840 |
| }, |
| { |
| "epoch": 41.73076923076923, |
| "grad_norm": 1.4046603441238403, |
| "learning_rate": 1.4134888872794265e-05, |
| "loss": 0.0339, |
| "step": 10850 |
| }, |
| { |
| "epoch": 41.76923076923077, |
| "grad_norm": 0.9813646078109741, |
| "learning_rate": 1.411012930327301e-05, |
| "loss": 0.0338, |
| "step": 10860 |
| }, |
| { |
| "epoch": 41.80769230769231, |
| "grad_norm": 1.283523678779602, |
| "learning_rate": 1.4085372166624263e-05, |
| "loss": 0.0331, |
| "step": 10870 |
| }, |
| { |
| "epoch": 41.84615384615385, |
| "grad_norm": 1.3945362567901611, |
| "learning_rate": 1.406061753053308e-05, |
| "loss": 0.0342, |
| "step": 10880 |
| }, |
| { |
| "epoch": 41.88461538461539, |
| "grad_norm": 1.5126872062683105, |
| "learning_rate": 1.40358654626777e-05, |
| "loss": 0.0336, |
| "step": 10890 |
| }, |
| { |
| "epoch": 41.92307692307692, |
| "grad_norm": 1.2869038581848145, |
| "learning_rate": 1.4011116030729333e-05, |
| "loss": 0.0349, |
| "step": 10900 |
| }, |
| { |
| "epoch": 41.96153846153846, |
| "grad_norm": 1.316064715385437, |
| "learning_rate": 1.3986369302351974e-05, |
| "loss": 0.0329, |
| "step": 10910 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 1.2845642566680908, |
| "learning_rate": 1.3961625345202245e-05, |
| "loss": 0.0303, |
| "step": 10920 |
| }, |
| { |
| "epoch": 42.03846153846154, |
| "grad_norm": 1.0855242013931274, |
| "learning_rate": 1.3936884226929163e-05, |
| "loss": 0.038, |
| "step": 10930 |
| }, |
| { |
| "epoch": 42.07692307692308, |
| "grad_norm": 1.4729291200637817, |
| "learning_rate": 1.391214601517401e-05, |
| "loss": 0.0336, |
| "step": 10940 |
| }, |
| { |
| "epoch": 42.11538461538461, |
| "grad_norm": 1.260464072227478, |
| "learning_rate": 1.3887410777570116e-05, |
| "loss": 0.0355, |
| "step": 10950 |
| }, |
| { |
| "epoch": 42.15384615384615, |
| "grad_norm": 1.0688891410827637, |
| "learning_rate": 1.3862678581742667e-05, |
| "loss": 0.0342, |
| "step": 10960 |
| }, |
| { |
| "epoch": 42.19230769230769, |
| "grad_norm": 1.4798998832702637, |
| "learning_rate": 1.3837949495308558e-05, |
| "loss": 0.033, |
| "step": 10970 |
| }, |
| { |
| "epoch": 42.23076923076923, |
| "grad_norm": 1.2672547101974487, |
| "learning_rate": 1.3813223585876145e-05, |
| "loss": 0.0362, |
| "step": 10980 |
| }, |
| { |
| "epoch": 42.26923076923077, |
| "grad_norm": 1.104485034942627, |
| "learning_rate": 1.3788500921045135e-05, |
| "loss": 0.0355, |
| "step": 10990 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 1.1904023885726929, |
| "learning_rate": 1.3763781568406343e-05, |
| "loss": 0.0423, |
| "step": 11000 |
| }, |
| { |
| "epoch": 42.34615384615385, |
| "grad_norm": 1.222254991531372, |
| "learning_rate": 1.3739065595541548e-05, |
| "loss": 0.0317, |
| "step": 11010 |
| }, |
| { |
| "epoch": 42.38461538461539, |
| "grad_norm": 1.4406765699386597, |
| "learning_rate": 1.3714353070023257e-05, |
| "loss": 0.0347, |
| "step": 11020 |
| }, |
| { |
| "epoch": 42.42307692307692, |
| "grad_norm": 1.3593086004257202, |
| "learning_rate": 1.368964405941458e-05, |
| "loss": 0.0402, |
| "step": 11030 |
| }, |
| { |
| "epoch": 42.46153846153846, |
| "grad_norm": 1.4239052534103394, |
| "learning_rate": 1.366493863126901e-05, |
| "loss": 0.0313, |
| "step": 11040 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 1.1864207983016968, |
| "learning_rate": 1.3640236853130243e-05, |
| "loss": 0.0363, |
| "step": 11050 |
| }, |
| { |
| "epoch": 42.53846153846154, |
| "grad_norm": 1.0722951889038086, |
| "learning_rate": 1.3615538792532002e-05, |
| "loss": 0.0297, |
| "step": 11060 |
| }, |
| { |
| "epoch": 42.57692307692308, |
| "grad_norm": 1.3497544527053833, |
| "learning_rate": 1.3590844516997832e-05, |
| "loss": 0.0397, |
| "step": 11070 |
| }, |
| { |
| "epoch": 42.61538461538461, |
| "grad_norm": 1.445272445678711, |
| "learning_rate": 1.356615409404094e-05, |
| "loss": 0.036, |
| "step": 11080 |
| }, |
| { |
| "epoch": 42.65384615384615, |
| "grad_norm": 1.157558798789978, |
| "learning_rate": 1.354146759116401e-05, |
| "loss": 0.0345, |
| "step": 11090 |
| }, |
| { |
| "epoch": 42.69230769230769, |
| "grad_norm": 1.1287713050842285, |
| "learning_rate": 1.3516785075858988e-05, |
| "loss": 0.0357, |
| "step": 11100 |
| }, |
| { |
| "epoch": 42.73076923076923, |
| "grad_norm": 1.0962657928466797, |
| "learning_rate": 1.3492106615606941e-05, |
| "loss": 0.0415, |
| "step": 11110 |
| }, |
| { |
| "epoch": 42.76923076923077, |
| "grad_norm": 1.3343199491500854, |
| "learning_rate": 1.346743227787782e-05, |
| "loss": 0.0351, |
| "step": 11120 |
| }, |
| { |
| "epoch": 42.80769230769231, |
| "grad_norm": 1.0660537481307983, |
| "learning_rate": 1.344276213013033e-05, |
| "loss": 0.033, |
| "step": 11130 |
| }, |
| { |
| "epoch": 42.84615384615385, |
| "grad_norm": 1.3008800745010376, |
| "learning_rate": 1.3418096239811712e-05, |
| "loss": 0.0387, |
| "step": 11140 |
| }, |
| { |
| "epoch": 42.88461538461539, |
| "grad_norm": 1.3870251178741455, |
| "learning_rate": 1.3393434674357579e-05, |
| "loss": 0.0424, |
| "step": 11150 |
| }, |
| { |
| "epoch": 42.92307692307692, |
| "grad_norm": 1.0750961303710938, |
| "learning_rate": 1.3368777501191692e-05, |
| "loss": 0.0383, |
| "step": 11160 |
| }, |
| { |
| "epoch": 42.96153846153846, |
| "grad_norm": 1.3516203165054321, |
| "learning_rate": 1.334412478772583e-05, |
| "loss": 0.0366, |
| "step": 11170 |
| }, |
| { |
| "epoch": 43.0, |
| "grad_norm": 1.1265571117401123, |
| "learning_rate": 1.3319476601359565e-05, |
| "loss": 0.0341, |
| "step": 11180 |
| }, |
| { |
| "epoch": 43.03846153846154, |
| "grad_norm": 1.2286325693130493, |
| "learning_rate": 1.3294833009480105e-05, |
| "loss": 0.0407, |
| "step": 11190 |
| }, |
| { |
| "epoch": 43.07692307692308, |
| "grad_norm": 1.0944629907608032, |
| "learning_rate": 1.3270194079462091e-05, |
| "loss": 0.0393, |
| "step": 11200 |
| }, |
| { |
| "epoch": 43.11538461538461, |
| "grad_norm": 1.2205157279968262, |
| "learning_rate": 1.3245559878667405e-05, |
| "loss": 0.0428, |
| "step": 11210 |
| }, |
| { |
| "epoch": 43.15384615384615, |
| "grad_norm": 1.2515414953231812, |
| "learning_rate": 1.3220930474445019e-05, |
| "loss": 0.0343, |
| "step": 11220 |
| }, |
| { |
| "epoch": 43.19230769230769, |
| "grad_norm": 1.3750489950180054, |
| "learning_rate": 1.3196305934130778e-05, |
| "loss": 0.0401, |
| "step": 11230 |
| }, |
| { |
| "epoch": 43.23076923076923, |
| "grad_norm": 1.156083583831787, |
| "learning_rate": 1.3171686325047241e-05, |
| "loss": 0.0327, |
| "step": 11240 |
| }, |
| { |
| "epoch": 43.26923076923077, |
| "grad_norm": 1.4203742742538452, |
| "learning_rate": 1.3147071714503484e-05, |
| "loss": 0.0399, |
| "step": 11250 |
| }, |
| { |
| "epoch": 43.30769230769231, |
| "grad_norm": 1.0962517261505127, |
| "learning_rate": 1.3122462169794903e-05, |
| "loss": 0.0337, |
| "step": 11260 |
| }, |
| { |
| "epoch": 43.34615384615385, |
| "grad_norm": 1.2661428451538086, |
| "learning_rate": 1.3097857758203053e-05, |
| "loss": 0.0329, |
| "step": 11270 |
| }, |
| { |
| "epoch": 43.38461538461539, |
| "grad_norm": 1.1555863618850708, |
| "learning_rate": 1.3073258546995455e-05, |
| "loss": 0.0344, |
| "step": 11280 |
| }, |
| { |
| "epoch": 43.42307692307692, |
| "grad_norm": 1.220564842224121, |
| "learning_rate": 1.3048664603425429e-05, |
| "loss": 0.0366, |
| "step": 11290 |
| }, |
| { |
| "epoch": 43.46153846153846, |
| "grad_norm": 1.1790637969970703, |
| "learning_rate": 1.3024075994731859e-05, |
| "loss": 0.0363, |
| "step": 11300 |
| }, |
| { |
| "epoch": 43.5, |
| "grad_norm": 1.1303735971450806, |
| "learning_rate": 1.2999492788139068e-05, |
| "loss": 0.0337, |
| "step": 11310 |
| }, |
| { |
| "epoch": 43.53846153846154, |
| "grad_norm": 0.850884199142456, |
| "learning_rate": 1.2974915050856605e-05, |
| "loss": 0.0315, |
| "step": 11320 |
| }, |
| { |
| "epoch": 43.57692307692308, |
| "grad_norm": 1.1740418672561646, |
| "learning_rate": 1.2950342850079061e-05, |
| "loss": 0.0371, |
| "step": 11330 |
| }, |
| { |
| "epoch": 43.61538461538461, |
| "grad_norm": 1.2208044528961182, |
| "learning_rate": 1.292577625298591e-05, |
| "loss": 0.0381, |
| "step": 11340 |
| }, |
| { |
| "epoch": 43.65384615384615, |
| "grad_norm": 1.4097691774368286, |
| "learning_rate": 1.2901215326741273e-05, |
| "loss": 0.0321, |
| "step": 11350 |
| }, |
| { |
| "epoch": 43.69230769230769, |
| "grad_norm": 1.2222874164581299, |
| "learning_rate": 1.287666013849379e-05, |
| "loss": 0.0324, |
| "step": 11360 |
| }, |
| { |
| "epoch": 43.73076923076923, |
| "grad_norm": 1.394254207611084, |
| "learning_rate": 1.285211075537641e-05, |
| "loss": 0.0391, |
| "step": 11370 |
| }, |
| { |
| "epoch": 43.76923076923077, |
| "grad_norm": 1.0281349420547485, |
| "learning_rate": 1.2827567244506203e-05, |
| "loss": 0.0314, |
| "step": 11380 |
| }, |
| { |
| "epoch": 43.80769230769231, |
| "grad_norm": 1.1789246797561646, |
| "learning_rate": 1.2803029672984208e-05, |
| "loss": 0.0315, |
| "step": 11390 |
| }, |
| { |
| "epoch": 43.84615384615385, |
| "grad_norm": 1.0867520570755005, |
| "learning_rate": 1.2778498107895186e-05, |
| "loss": 0.0295, |
| "step": 11400 |
| }, |
| { |
| "epoch": 43.88461538461539, |
| "grad_norm": 1.2576417922973633, |
| "learning_rate": 1.275397261630751e-05, |
| "loss": 0.0364, |
| "step": 11410 |
| }, |
| { |
| "epoch": 43.92307692307692, |
| "grad_norm": 1.128233551979065, |
| "learning_rate": 1.2729453265272935e-05, |
| "loss": 0.0372, |
| "step": 11420 |
| }, |
| { |
| "epoch": 43.96153846153846, |
| "grad_norm": 1.1675398349761963, |
| "learning_rate": 1.270494012182644e-05, |
| "loss": 0.0333, |
| "step": 11430 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 1.2023512125015259, |
| "learning_rate": 1.268043325298601e-05, |
| "loss": 0.0329, |
| "step": 11440 |
| }, |
| { |
| "epoch": 44.03846153846154, |
| "grad_norm": 1.3402878046035767, |
| "learning_rate": 1.2655932725752494e-05, |
| "loss": 0.0367, |
| "step": 11450 |
| }, |
| { |
| "epoch": 44.07692307692308, |
| "grad_norm": 1.0831363201141357, |
| "learning_rate": 1.26314386071094e-05, |
| "loss": 0.043, |
| "step": 11460 |
| }, |
| { |
| "epoch": 44.11538461538461, |
| "grad_norm": 1.265649676322937, |
| "learning_rate": 1.2606950964022701e-05, |
| "loss": 0.0376, |
| "step": 11470 |
| }, |
| { |
| "epoch": 44.15384615384615, |
| "grad_norm": 1.0680443048477173, |
| "learning_rate": 1.2582469863440704e-05, |
| "loss": 0.0344, |
| "step": 11480 |
| }, |
| { |
| "epoch": 44.19230769230769, |
| "grad_norm": 1.0312443971633911, |
| "learning_rate": 1.2557995372293778e-05, |
| "loss": 0.0308, |
| "step": 11490 |
| }, |
| { |
| "epoch": 44.23076923076923, |
| "grad_norm": 1.2013682126998901, |
| "learning_rate": 1.2533527557494257e-05, |
| "loss": 0.0361, |
| "step": 11500 |
| }, |
| { |
| "epoch": 44.26923076923077, |
| "grad_norm": 1.2492719888687134, |
| "learning_rate": 1.250906648593621e-05, |
| "loss": 0.0326, |
| "step": 11510 |
| }, |
| { |
| "epoch": 44.30769230769231, |
| "grad_norm": 0.8632230162620544, |
| "learning_rate": 1.2484612224495275e-05, |
| "loss": 0.0342, |
| "step": 11520 |
| }, |
| { |
| "epoch": 44.34615384615385, |
| "grad_norm": 1.1540902853012085, |
| "learning_rate": 1.2460164840028477e-05, |
| "loss": 0.0365, |
| "step": 11530 |
| }, |
| { |
| "epoch": 44.38461538461539, |
| "grad_norm": 1.1001176834106445, |
| "learning_rate": 1.2435724399374016e-05, |
| "loss": 0.0311, |
| "step": 11540 |
| }, |
| { |
| "epoch": 44.42307692307692, |
| "grad_norm": 1.1563735008239746, |
| "learning_rate": 1.2411290969351129e-05, |
| "loss": 0.0363, |
| "step": 11550 |
| }, |
| { |
| "epoch": 44.46153846153846, |
| "grad_norm": 1.2087491750717163, |
| "learning_rate": 1.2386864616759883e-05, |
| "loss": 0.0348, |
| "step": 11560 |
| }, |
| { |
| "epoch": 44.5, |
| "grad_norm": 1.2683261632919312, |
| "learning_rate": 1.2362445408380996e-05, |
| "loss": 0.0329, |
| "step": 11570 |
| }, |
| { |
| "epoch": 44.53846153846154, |
| "grad_norm": 1.2558414936065674, |
| "learning_rate": 1.2338033410975644e-05, |
| "loss": 0.0303, |
| "step": 11580 |
| }, |
| { |
| "epoch": 44.57692307692308, |
| "grad_norm": 1.2375128269195557, |
| "learning_rate": 1.2313628691285301e-05, |
| "loss": 0.0362, |
| "step": 11590 |
| }, |
| { |
| "epoch": 44.61538461538461, |
| "grad_norm": 1.0311369895935059, |
| "learning_rate": 1.2289231316031536e-05, |
| "loss": 0.033, |
| "step": 11600 |
| }, |
| { |
| "epoch": 44.65384615384615, |
| "grad_norm": 1.0355188846588135, |
| "learning_rate": 1.2264841351915842e-05, |
| "loss": 0.0335, |
| "step": 11610 |
| }, |
| { |
| "epoch": 44.69230769230769, |
| "grad_norm": 1.3842549324035645, |
| "learning_rate": 1.2240458865619455e-05, |
| "loss": 0.0371, |
| "step": 11620 |
| }, |
| { |
| "epoch": 44.73076923076923, |
| "grad_norm": 1.252090573310852, |
| "learning_rate": 1.2216083923803152e-05, |
| "loss": 0.0316, |
| "step": 11630 |
| }, |
| { |
| "epoch": 44.76923076923077, |
| "grad_norm": 1.099759817123413, |
| "learning_rate": 1.2191716593107097e-05, |
| "loss": 0.036, |
| "step": 11640 |
| }, |
| { |
| "epoch": 44.80769230769231, |
| "grad_norm": 1.2019200325012207, |
| "learning_rate": 1.2167356940150645e-05, |
| "loss": 0.0329, |
| "step": 11650 |
| }, |
| { |
| "epoch": 44.84615384615385, |
| "grad_norm": 1.1980955600738525, |
| "learning_rate": 1.2143005031532152e-05, |
| "loss": 0.0317, |
| "step": 11660 |
| }, |
| { |
| "epoch": 44.88461538461539, |
| "grad_norm": 1.3130910396575928, |
| "learning_rate": 1.2118660933828813e-05, |
| "loss": 0.0357, |
| "step": 11670 |
| }, |
| { |
| "epoch": 44.92307692307692, |
| "grad_norm": 1.326754093170166, |
| "learning_rate": 1.2094324713596453e-05, |
| "loss": 0.0317, |
| "step": 11680 |
| }, |
| { |
| "epoch": 44.96153846153846, |
| "grad_norm": 1.204189419746399, |
| "learning_rate": 1.2069996437369374e-05, |
| "loss": 0.0355, |
| "step": 11690 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.9756190180778503, |
| "learning_rate": 1.2045676171660154e-05, |
| "loss": 0.035, |
| "step": 11700 |
| }, |
| { |
| "epoch": 45.03846153846154, |
| "grad_norm": 1.243010401725769, |
| "learning_rate": 1.2021363982959472e-05, |
| "loss": 0.0399, |
| "step": 11710 |
| }, |
| { |
| "epoch": 45.07692307692308, |
| "grad_norm": 1.2061409950256348, |
| "learning_rate": 1.1997059937735919e-05, |
| "loss": 0.0308, |
| "step": 11720 |
| }, |
| { |
| "epoch": 45.11538461538461, |
| "grad_norm": 1.214267611503601, |
| "learning_rate": 1.197276410243583e-05, |
| "loss": 0.0321, |
| "step": 11730 |
| }, |
| { |
| "epoch": 45.15384615384615, |
| "grad_norm": 1.2562459707260132, |
| "learning_rate": 1.194847654348309e-05, |
| "loss": 0.0322, |
| "step": 11740 |
| }, |
| { |
| "epoch": 45.19230769230769, |
| "grad_norm": 1.1353240013122559, |
| "learning_rate": 1.1924197327278957e-05, |
| "loss": 0.0334, |
| "step": 11750 |
| }, |
| { |
| "epoch": 45.23076923076923, |
| "grad_norm": 1.0406856536865234, |
| "learning_rate": 1.1899926520201885e-05, |
| "loss": 0.0348, |
| "step": 11760 |
| }, |
| { |
| "epoch": 45.26923076923077, |
| "grad_norm": 1.4781416654586792, |
| "learning_rate": 1.1875664188607327e-05, |
| "loss": 0.0287, |
| "step": 11770 |
| }, |
| { |
| "epoch": 45.30769230769231, |
| "grad_norm": 1.0640053749084473, |
| "learning_rate": 1.1851410398827578e-05, |
| "loss": 0.0311, |
| "step": 11780 |
| }, |
| { |
| "epoch": 45.34615384615385, |
| "grad_norm": 1.1978460550308228, |
| "learning_rate": 1.1827165217171567e-05, |
| "loss": 0.0345, |
| "step": 11790 |
| }, |
| { |
| "epoch": 45.38461538461539, |
| "grad_norm": 1.0306118726730347, |
| "learning_rate": 1.18029287099247e-05, |
| "loss": 0.0328, |
| "step": 11800 |
| }, |
| { |
| "epoch": 45.42307692307692, |
| "grad_norm": 1.2475486993789673, |
| "learning_rate": 1.1778700943348662e-05, |
| "loss": 0.0321, |
| "step": 11810 |
| }, |
| { |
| "epoch": 45.46153846153846, |
| "grad_norm": 1.11435067653656, |
| "learning_rate": 1.1754481983681238e-05, |
| "loss": 0.0371, |
| "step": 11820 |
| }, |
| { |
| "epoch": 45.5, |
| "grad_norm": 1.4166765213012695, |
| "learning_rate": 1.173027189713614e-05, |
| "loss": 0.0348, |
| "step": 11830 |
| }, |
| { |
| "epoch": 45.53846153846154, |
| "grad_norm": 1.6289052963256836, |
| "learning_rate": 1.170607074990282e-05, |
| "loss": 0.0305, |
| "step": 11840 |
| }, |
| { |
| "epoch": 45.57692307692308, |
| "grad_norm": 1.254708170890808, |
| "learning_rate": 1.1681878608146297e-05, |
| "loss": 0.0345, |
| "step": 11850 |
| }, |
| { |
| "epoch": 45.61538461538461, |
| "grad_norm": 1.2652217149734497, |
| "learning_rate": 1.1657695538006952e-05, |
| "loss": 0.0389, |
| "step": 11860 |
| }, |
| { |
| "epoch": 45.65384615384615, |
| "grad_norm": 1.1574680805206299, |
| "learning_rate": 1.163352160560038e-05, |
| "loss": 0.0382, |
| "step": 11870 |
| }, |
| { |
| "epoch": 45.69230769230769, |
| "grad_norm": 1.2947102785110474, |
| "learning_rate": 1.1609356877017191e-05, |
| "loss": 0.0323, |
| "step": 11880 |
| }, |
| { |
| "epoch": 45.73076923076923, |
| "grad_norm": 1.23800790309906, |
| "learning_rate": 1.1585201418322828e-05, |
| "loss": 0.0364, |
| "step": 11890 |
| }, |
| { |
| "epoch": 45.76923076923077, |
| "grad_norm": 1.157383918762207, |
| "learning_rate": 1.1561055295557397e-05, |
| "loss": 0.0365, |
| "step": 11900 |
| }, |
| { |
| "epoch": 45.80769230769231, |
| "grad_norm": 1.0947489738464355, |
| "learning_rate": 1.1536918574735469e-05, |
| "loss": 0.0347, |
| "step": 11910 |
| }, |
| { |
| "epoch": 45.84615384615385, |
| "grad_norm": 1.083691120147705, |
| "learning_rate": 1.1512791321845921e-05, |
| "loss": 0.033, |
| "step": 11920 |
| }, |
| { |
| "epoch": 45.88461538461539, |
| "grad_norm": 0.9972357749938965, |
| "learning_rate": 1.148867360285174e-05, |
| "loss": 0.0296, |
| "step": 11930 |
| }, |
| { |
| "epoch": 45.92307692307692, |
| "grad_norm": 1.1867698431015015, |
| "learning_rate": 1.1464565483689853e-05, |
| "loss": 0.0357, |
| "step": 11940 |
| }, |
| { |
| "epoch": 45.96153846153846, |
| "grad_norm": 1.1101211309432983, |
| "learning_rate": 1.144046703027093e-05, |
| "loss": 0.0318, |
| "step": 11950 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 0.9498093128204346, |
| "learning_rate": 1.1416378308479223e-05, |
| "loss": 0.0326, |
| "step": 11960 |
| }, |
| { |
| "epoch": 46.03846153846154, |
| "grad_norm": 0.9711620807647705, |
| "learning_rate": 1.1392299384172383e-05, |
| "loss": 0.0354, |
| "step": 11970 |
| }, |
| { |
| "epoch": 46.07692307692308, |
| "grad_norm": 1.2946242094039917, |
| "learning_rate": 1.1368230323181267e-05, |
| "loss": 0.0331, |
| "step": 11980 |
| }, |
| { |
| "epoch": 46.11538461538461, |
| "grad_norm": 1.1568892002105713, |
| "learning_rate": 1.1344171191309772e-05, |
| "loss": 0.032, |
| "step": 11990 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 1.16463303565979, |
| "learning_rate": 1.1320122054334636e-05, |
| "loss": 0.0314, |
| "step": 12000 |
| }, |
| { |
| "epoch": 46.19230769230769, |
| "grad_norm": 1.1152621507644653, |
| "learning_rate": 1.1296082978005292e-05, |
| "loss": 0.0357, |
| "step": 12010 |
| }, |
| { |
| "epoch": 46.23076923076923, |
| "grad_norm": 1.2109414339065552, |
| "learning_rate": 1.127205402804365e-05, |
| "loss": 0.0385, |
| "step": 12020 |
| }, |
| { |
| "epoch": 46.26923076923077, |
| "grad_norm": 1.1345036029815674, |
| "learning_rate": 1.1248035270143946e-05, |
| "loss": 0.0335, |
| "step": 12030 |
| }, |
| { |
| "epoch": 46.30769230769231, |
| "grad_norm": 0.9837266802787781, |
| "learning_rate": 1.1224026769972545e-05, |
| "loss": 0.0303, |
| "step": 12040 |
| }, |
| { |
| "epoch": 46.34615384615385, |
| "grad_norm": 1.2618961334228516, |
| "learning_rate": 1.1200028593167769e-05, |
| "loss": 0.0343, |
| "step": 12050 |
| }, |
| { |
| "epoch": 46.38461538461539, |
| "grad_norm": 1.2574706077575684, |
| "learning_rate": 1.1176040805339718e-05, |
| "loss": 0.0371, |
| "step": 12060 |
| }, |
| { |
| "epoch": 46.42307692307692, |
| "grad_norm": 1.127816081047058, |
| "learning_rate": 1.1152063472070086e-05, |
| "loss": 0.0335, |
| "step": 12070 |
| }, |
| { |
| "epoch": 46.46153846153846, |
| "grad_norm": 1.2891298532485962, |
| "learning_rate": 1.1128096658911992e-05, |
| "loss": 0.0313, |
| "step": 12080 |
| }, |
| { |
| "epoch": 46.5, |
| "grad_norm": 1.1167298555374146, |
| "learning_rate": 1.1104140431389782e-05, |
| "loss": 0.0291, |
| "step": 12090 |
| }, |
| { |
| "epoch": 46.53846153846154, |
| "grad_norm": 1.112906575202942, |
| "learning_rate": 1.1080194854998868e-05, |
| "loss": 0.0357, |
| "step": 12100 |
| }, |
| { |
| "epoch": 46.57692307692308, |
| "grad_norm": 1.082861304283142, |
| "learning_rate": 1.1056259995205545e-05, |
| "loss": 0.0323, |
| "step": 12110 |
| }, |
| { |
| "epoch": 46.61538461538461, |
| "grad_norm": 1.2894811630249023, |
| "learning_rate": 1.1032335917446803e-05, |
| "loss": 0.0348, |
| "step": 12120 |
| }, |
| { |
| "epoch": 46.65384615384615, |
| "grad_norm": 1.0083926916122437, |
| "learning_rate": 1.100842268713016e-05, |
| "loss": 0.0316, |
| "step": 12130 |
| }, |
| { |
| "epoch": 46.69230769230769, |
| "grad_norm": 1.1639894247055054, |
| "learning_rate": 1.098452036963347e-05, |
| "loss": 0.0332, |
| "step": 12140 |
| }, |
| { |
| "epoch": 46.73076923076923, |
| "grad_norm": 1.2733254432678223, |
| "learning_rate": 1.096062903030476e-05, |
| "loss": 0.0353, |
| "step": 12150 |
| }, |
| { |
| "epoch": 46.76923076923077, |
| "grad_norm": 1.142136812210083, |
| "learning_rate": 1.0936748734462036e-05, |
| "loss": 0.0402, |
| "step": 12160 |
| }, |
| { |
| "epoch": 46.80769230769231, |
| "grad_norm": 1.0598315000534058, |
| "learning_rate": 1.0912879547393119e-05, |
| "loss": 0.033, |
| "step": 12170 |
| }, |
| { |
| "epoch": 46.84615384615385, |
| "grad_norm": 0.8986493945121765, |
| "learning_rate": 1.0889021534355456e-05, |
| "loss": 0.0361, |
| "step": 12180 |
| }, |
| { |
| "epoch": 46.88461538461539, |
| "grad_norm": 1.1551433801651, |
| "learning_rate": 1.0865174760575936e-05, |
| "loss": 0.0367, |
| "step": 12190 |
| }, |
| { |
| "epoch": 46.92307692307692, |
| "grad_norm": 1.0737844705581665, |
| "learning_rate": 1.0841339291250733e-05, |
| "loss": 0.0295, |
| "step": 12200 |
| }, |
| { |
| "epoch": 46.96153846153846, |
| "grad_norm": 1.2422358989715576, |
| "learning_rate": 1.081751519154511e-05, |
| "loss": 0.0379, |
| "step": 12210 |
| }, |
| { |
| "epoch": 47.0, |
| "grad_norm": 1.1002851724624634, |
| "learning_rate": 1.079370252659325e-05, |
| "loss": 0.0335, |
| "step": 12220 |
| }, |
| { |
| "epoch": 47.03846153846154, |
| "grad_norm": 1.1535612344741821, |
| "learning_rate": 1.076990136149806e-05, |
| "loss": 0.0389, |
| "step": 12230 |
| }, |
| { |
| "epoch": 47.07692307692308, |
| "grad_norm": 1.1302235126495361, |
| "learning_rate": 1.0746111761331021e-05, |
| "loss": 0.0317, |
| "step": 12240 |
| }, |
| { |
| "epoch": 47.11538461538461, |
| "grad_norm": 1.2010834217071533, |
| "learning_rate": 1.0722333791131996e-05, |
| "loss": 0.0329, |
| "step": 12250 |
| }, |
| { |
| "epoch": 47.15384615384615, |
| "grad_norm": 1.2279101610183716, |
| "learning_rate": 1.0698567515909041e-05, |
| "loss": 0.0352, |
| "step": 12260 |
| }, |
| { |
| "epoch": 47.19230769230769, |
| "grad_norm": 1.3264673948287964, |
| "learning_rate": 1.0674813000638252e-05, |
| "loss": 0.0338, |
| "step": 12270 |
| }, |
| { |
| "epoch": 47.23076923076923, |
| "grad_norm": 1.1362801790237427, |
| "learning_rate": 1.0651070310263559e-05, |
| "loss": 0.0333, |
| "step": 12280 |
| }, |
| { |
| "epoch": 47.26923076923077, |
| "grad_norm": 1.2410087585449219, |
| "learning_rate": 1.0627339509696574e-05, |
| "loss": 0.0302, |
| "step": 12290 |
| }, |
| { |
| "epoch": 47.30769230769231, |
| "grad_norm": 0.9610795378684998, |
| "learning_rate": 1.06036206638164e-05, |
| "loss": 0.0318, |
| "step": 12300 |
| }, |
| { |
| "epoch": 47.34615384615385, |
| "grad_norm": 0.9377633929252625, |
| "learning_rate": 1.0579913837469455e-05, |
| "loss": 0.0313, |
| "step": 12310 |
| }, |
| { |
| "epoch": 47.38461538461539, |
| "grad_norm": 1.0323609113693237, |
| "learning_rate": 1.0556219095469303e-05, |
| "loss": 0.0322, |
| "step": 12320 |
| }, |
| { |
| "epoch": 47.42307692307692, |
| "grad_norm": 0.9905584454536438, |
| "learning_rate": 1.0532536502596455e-05, |
| "loss": 0.0304, |
| "step": 12330 |
| }, |
| { |
| "epoch": 47.46153846153846, |
| "grad_norm": 1.1498249769210815, |
| "learning_rate": 1.0508866123598218e-05, |
| "loss": 0.0315, |
| "step": 12340 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 0.8419849872589111, |
| "learning_rate": 1.0485208023188505e-05, |
| "loss": 0.0331, |
| "step": 12350 |
| }, |
| { |
| "epoch": 47.53846153846154, |
| "grad_norm": 1.1979166269302368, |
| "learning_rate": 1.0461562266047668e-05, |
| "loss": 0.0295, |
| "step": 12360 |
| }, |
| { |
| "epoch": 47.57692307692308, |
| "grad_norm": 1.025926947593689, |
| "learning_rate": 1.0437928916822286e-05, |
| "loss": 0.0285, |
| "step": 12370 |
| }, |
| { |
| "epoch": 47.61538461538461, |
| "grad_norm": 1.1680309772491455, |
| "learning_rate": 1.0414308040125043e-05, |
| "loss": 0.0391, |
| "step": 12380 |
| }, |
| { |
| "epoch": 47.65384615384615, |
| "grad_norm": 1.0929526090621948, |
| "learning_rate": 1.0390699700534517e-05, |
| "loss": 0.0306, |
| "step": 12390 |
| }, |
| { |
| "epoch": 47.69230769230769, |
| "grad_norm": 1.0418400764465332, |
| "learning_rate": 1.0367103962595003e-05, |
| "loss": 0.0375, |
| "step": 12400 |
| }, |
| { |
| "epoch": 47.73076923076923, |
| "grad_norm": 1.1047964096069336, |
| "learning_rate": 1.0343520890816356e-05, |
| "loss": 0.0352, |
| "step": 12410 |
| }, |
| { |
| "epoch": 47.76923076923077, |
| "grad_norm": 1.0839290618896484, |
| "learning_rate": 1.0319950549673779e-05, |
| "loss": 0.0346, |
| "step": 12420 |
| }, |
| { |
| "epoch": 47.80769230769231, |
| "grad_norm": 1.1870118379592896, |
| "learning_rate": 1.0296393003607692e-05, |
| "loss": 0.0339, |
| "step": 12430 |
| }, |
| { |
| "epoch": 47.84615384615385, |
| "grad_norm": 1.1544498205184937, |
| "learning_rate": 1.0272848317023526e-05, |
| "loss": 0.0316, |
| "step": 12440 |
| }, |
| { |
| "epoch": 47.88461538461539, |
| "grad_norm": 1.0045503377914429, |
| "learning_rate": 1.0249316554291556e-05, |
| "loss": 0.0295, |
| "step": 12450 |
| }, |
| { |
| "epoch": 47.92307692307692, |
| "grad_norm": 1.0486705303192139, |
| "learning_rate": 1.022579777974673e-05, |
| "loss": 0.0348, |
| "step": 12460 |
| }, |
| { |
| "epoch": 47.96153846153846, |
| "grad_norm": 1.0861692428588867, |
| "learning_rate": 1.0202292057688462e-05, |
| "loss": 0.031, |
| "step": 12470 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 0.9276844263076782, |
| "learning_rate": 1.0178799452380511e-05, |
| "loss": 0.0356, |
| "step": 12480 |
| }, |
| { |
| "epoch": 48.03846153846154, |
| "grad_norm": 1.1706113815307617, |
| "learning_rate": 1.0155320028050757e-05, |
| "loss": 0.0316, |
| "step": 12490 |
| }, |
| { |
| "epoch": 48.07692307692308, |
| "grad_norm": 0.9763121008872986, |
| "learning_rate": 1.0131853848891063e-05, |
| "loss": 0.0318, |
| "step": 12500 |
| }, |
| { |
| "epoch": 48.11538461538461, |
| "grad_norm": 0.9173984527587891, |
| "learning_rate": 1.0108400979057048e-05, |
| "loss": 0.03, |
| "step": 12510 |
| }, |
| { |
| "epoch": 48.15384615384615, |
| "grad_norm": 0.9412733316421509, |
| "learning_rate": 1.008496148266797e-05, |
| "loss": 0.0302, |
| "step": 12520 |
| }, |
| { |
| "epoch": 48.19230769230769, |
| "grad_norm": 1.0302488803863525, |
| "learning_rate": 1.0061535423806519e-05, |
| "loss": 0.0299, |
| "step": 12530 |
| }, |
| { |
| "epoch": 48.23076923076923, |
| "grad_norm": 1.2443172931671143, |
| "learning_rate": 1.0038122866518647e-05, |
| "loss": 0.0306, |
| "step": 12540 |
| }, |
| { |
| "epoch": 48.26923076923077, |
| "grad_norm": 1.1415942907333374, |
| "learning_rate": 1.0014723874813394e-05, |
| "loss": 0.0327, |
| "step": 12550 |
| }, |
| { |
| "epoch": 48.30769230769231, |
| "grad_norm": 1.1814067363739014, |
| "learning_rate": 9.991338512662696e-06, |
| "loss": 0.0283, |
| "step": 12560 |
| }, |
| { |
| "epoch": 48.34615384615385, |
| "grad_norm": 1.1458048820495605, |
| "learning_rate": 9.96796684400125e-06, |
| "loss": 0.0351, |
| "step": 12570 |
| }, |
| { |
| "epoch": 48.38461538461539, |
| "grad_norm": 1.0160274505615234, |
| "learning_rate": 9.944608932726306e-06, |
| "loss": 0.0329, |
| "step": 12580 |
| }, |
| { |
| "epoch": 48.42307692307692, |
| "grad_norm": 1.0080182552337646, |
| "learning_rate": 9.921264842697501e-06, |
| "loss": 0.0303, |
| "step": 12590 |
| }, |
| { |
| "epoch": 48.46153846153846, |
| "grad_norm": 1.039967656135559, |
| "learning_rate": 9.897934637736692e-06, |
| "loss": 0.034, |
| "step": 12600 |
| }, |
| { |
| "epoch": 48.5, |
| "grad_norm": 1.1883785724639893, |
| "learning_rate": 9.874618381627751e-06, |
| "loss": 0.0298, |
| "step": 12610 |
| }, |
| { |
| "epoch": 48.53846153846154, |
| "grad_norm": 0.9066627621650696, |
| "learning_rate": 9.851316138116446e-06, |
| "loss": 0.0317, |
| "step": 12620 |
| }, |
| { |
| "epoch": 48.57692307692308, |
| "grad_norm": 1.3488620519638062, |
| "learning_rate": 9.828027970910217e-06, |
| "loss": 0.034, |
| "step": 12630 |
| }, |
| { |
| "epoch": 48.61538461538461, |
| "grad_norm": 1.255429744720459, |
| "learning_rate": 9.80475394367803e-06, |
| "loss": 0.031, |
| "step": 12640 |
| }, |
| { |
| "epoch": 48.65384615384615, |
| "grad_norm": 1.0005918741226196, |
| "learning_rate": 9.781494120050176e-06, |
| "loss": 0.0319, |
| "step": 12650 |
| }, |
| { |
| "epoch": 48.69230769230769, |
| "grad_norm": 1.221487045288086, |
| "learning_rate": 9.758248563618126e-06, |
| "loss": 0.0339, |
| "step": 12660 |
| }, |
| { |
| "epoch": 48.73076923076923, |
| "grad_norm": 1.1094986200332642, |
| "learning_rate": 9.735017337934349e-06, |
| "loss": 0.0351, |
| "step": 12670 |
| }, |
| { |
| "epoch": 48.76923076923077, |
| "grad_norm": 0.8227080702781677, |
| "learning_rate": 9.71180050651213e-06, |
| "loss": 0.0284, |
| "step": 12680 |
| }, |
| { |
| "epoch": 48.80769230769231, |
| "grad_norm": 0.9973031282424927, |
| "learning_rate": 9.688598132825402e-06, |
| "loss": 0.039, |
| "step": 12690 |
| }, |
| { |
| "epoch": 48.84615384615385, |
| "grad_norm": 1.0556597709655762, |
| "learning_rate": 9.665410280308555e-06, |
| "loss": 0.037, |
| "step": 12700 |
| }, |
| { |
| "epoch": 48.88461538461539, |
| "grad_norm": 0.8174852132797241, |
| "learning_rate": 9.642237012356302e-06, |
| "loss": 0.0278, |
| "step": 12710 |
| }, |
| { |
| "epoch": 48.92307692307692, |
| "grad_norm": 1.2452213764190674, |
| "learning_rate": 9.619078392323471e-06, |
| "loss": 0.0337, |
| "step": 12720 |
| }, |
| { |
| "epoch": 48.96153846153846, |
| "grad_norm": 1.3187814950942993, |
| "learning_rate": 9.595934483524847e-06, |
| "loss": 0.0317, |
| "step": 12730 |
| }, |
| { |
| "epoch": 49.0, |
| "grad_norm": 1.147231936454773, |
| "learning_rate": 9.572805349234997e-06, |
| "loss": 0.0321, |
| "step": 12740 |
| }, |
| { |
| "epoch": 49.03846153846154, |
| "grad_norm": 0.9327066540718079, |
| "learning_rate": 9.54969105268808e-06, |
| "loss": 0.0309, |
| "step": 12750 |
| }, |
| { |
| "epoch": 49.07692307692308, |
| "grad_norm": 1.0206278562545776, |
| "learning_rate": 9.526591657077701e-06, |
| "loss": 0.0334, |
| "step": 12760 |
| }, |
| { |
| "epoch": 49.11538461538461, |
| "grad_norm": 0.7847154140472412, |
| "learning_rate": 9.503507225556734e-06, |
| "loss": 0.0301, |
| "step": 12770 |
| }, |
| { |
| "epoch": 49.15384615384615, |
| "grad_norm": 0.9998318552970886, |
| "learning_rate": 9.480437821237134e-06, |
| "loss": 0.0379, |
| "step": 12780 |
| }, |
| { |
| "epoch": 49.19230769230769, |
| "grad_norm": 1.1885015964508057, |
| "learning_rate": 9.457383507189763e-06, |
| "loss": 0.0367, |
| "step": 12790 |
| }, |
| { |
| "epoch": 49.23076923076923, |
| "grad_norm": 0.9944354295730591, |
| "learning_rate": 9.434344346444237e-06, |
| "loss": 0.0355, |
| "step": 12800 |
| }, |
| { |
| "epoch": 49.26923076923077, |
| "grad_norm": 1.2915024757385254, |
| "learning_rate": 9.411320401988744e-06, |
| "loss": 0.0369, |
| "step": 12810 |
| }, |
| { |
| "epoch": 49.30769230769231, |
| "grad_norm": 1.0678672790527344, |
| "learning_rate": 9.388311736769867e-06, |
| "loss": 0.0354, |
| "step": 12820 |
| }, |
| { |
| "epoch": 49.34615384615385, |
| "grad_norm": 1.4048540592193604, |
| "learning_rate": 9.365318413692429e-06, |
| "loss": 0.0328, |
| "step": 12830 |
| }, |
| { |
| "epoch": 49.38461538461539, |
| "grad_norm": 1.215981364250183, |
| "learning_rate": 9.34234049561928e-06, |
| "loss": 0.0337, |
| "step": 12840 |
| }, |
| { |
| "epoch": 49.42307692307692, |
| "grad_norm": 1.0116572380065918, |
| "learning_rate": 9.31937804537118e-06, |
| "loss": 0.0298, |
| "step": 12850 |
| }, |
| { |
| "epoch": 49.46153846153846, |
| "grad_norm": 1.2934813499450684, |
| "learning_rate": 9.296431125726587e-06, |
| "loss": 0.0328, |
| "step": 12860 |
| }, |
| { |
| "epoch": 49.5, |
| "grad_norm": 1.2118585109710693, |
| "learning_rate": 9.27349979942151e-06, |
| "loss": 0.0332, |
| "step": 12870 |
| }, |
| { |
| "epoch": 49.53846153846154, |
| "grad_norm": 1.0535308122634888, |
| "learning_rate": 9.250584129149321e-06, |
| "loss": 0.0326, |
| "step": 12880 |
| }, |
| { |
| "epoch": 49.57692307692308, |
| "grad_norm": 1.0874793529510498, |
| "learning_rate": 9.227684177560575e-06, |
| "loss": 0.0328, |
| "step": 12890 |
| }, |
| { |
| "epoch": 49.61538461538461, |
| "grad_norm": 0.9859468936920166, |
| "learning_rate": 9.204800007262874e-06, |
| "loss": 0.0298, |
| "step": 12900 |
| }, |
| { |
| "epoch": 49.65384615384615, |
| "grad_norm": 0.9996634721755981, |
| "learning_rate": 9.18193168082066e-06, |
| "loss": 0.0307, |
| "step": 12910 |
| }, |
| { |
| "epoch": 49.69230769230769, |
| "grad_norm": 1.0833901166915894, |
| "learning_rate": 9.159079260755079e-06, |
| "loss": 0.0357, |
| "step": 12920 |
| }, |
| { |
| "epoch": 49.73076923076923, |
| "grad_norm": 1.2677006721496582, |
| "learning_rate": 9.136242809543754e-06, |
| "loss": 0.0325, |
| "step": 12930 |
| }, |
| { |
| "epoch": 49.76923076923077, |
| "grad_norm": 1.1370261907577515, |
| "learning_rate": 9.113422389620685e-06, |
| "loss": 0.0269, |
| "step": 12940 |
| }, |
| { |
| "epoch": 49.80769230769231, |
| "grad_norm": 0.9824889898300171, |
| "learning_rate": 9.090618063376021e-06, |
| "loss": 0.033, |
| "step": 12950 |
| }, |
| { |
| "epoch": 49.84615384615385, |
| "grad_norm": 1.1071754693984985, |
| "learning_rate": 9.067829893155922e-06, |
| "loss": 0.0324, |
| "step": 12960 |
| }, |
| { |
| "epoch": 49.88461538461539, |
| "grad_norm": 1.0568147897720337, |
| "learning_rate": 9.045057941262384e-06, |
| "loss": 0.0341, |
| "step": 12970 |
| }, |
| { |
| "epoch": 49.92307692307692, |
| "grad_norm": 1.1383357048034668, |
| "learning_rate": 9.02230226995304e-06, |
| "loss": 0.034, |
| "step": 12980 |
| }, |
| { |
| "epoch": 49.96153846153846, |
| "grad_norm": 1.0124088525772095, |
| "learning_rate": 8.999562941441031e-06, |
| "loss": 0.0329, |
| "step": 12990 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 1.2345850467681885, |
| "learning_rate": 8.976840017894814e-06, |
| "loss": 0.0346, |
| "step": 13000 |
| }, |
| { |
| "epoch": 50.03846153846154, |
| "grad_norm": 1.0567175149917603, |
| "learning_rate": 8.954133561437993e-06, |
| "loss": 0.0362, |
| "step": 13010 |
| }, |
| { |
| "epoch": 50.07692307692308, |
| "grad_norm": 1.0732157230377197, |
| "learning_rate": 8.931443634149163e-06, |
| "loss": 0.0321, |
| "step": 13020 |
| }, |
| { |
| "epoch": 50.11538461538461, |
| "grad_norm": 1.1036709547042847, |
| "learning_rate": 8.908770298061702e-06, |
| "loss": 0.0335, |
| "step": 13030 |
| }, |
| { |
| "epoch": 50.15384615384615, |
| "grad_norm": 0.9429640173912048, |
| "learning_rate": 8.886113615163655e-06, |
| "loss": 0.0304, |
| "step": 13040 |
| }, |
| { |
| "epoch": 50.19230769230769, |
| "grad_norm": 0.9867258667945862, |
| "learning_rate": 8.863473647397522e-06, |
| "loss": 0.0319, |
| "step": 13050 |
| }, |
| { |
| "epoch": 50.23076923076923, |
| "grad_norm": 0.8746216297149658, |
| "learning_rate": 8.840850456660121e-06, |
| "loss": 0.0342, |
| "step": 13060 |
| }, |
| { |
| "epoch": 50.26923076923077, |
| "grad_norm": 0.8380419611930847, |
| "learning_rate": 8.818244104802384e-06, |
| "loss": 0.0246, |
| "step": 13070 |
| }, |
| { |
| "epoch": 50.30769230769231, |
| "grad_norm": 1.0145636796951294, |
| "learning_rate": 8.79565465362921e-06, |
| "loss": 0.0308, |
| "step": 13080 |
| }, |
| { |
| "epoch": 50.34615384615385, |
| "grad_norm": 1.0411081314086914, |
| "learning_rate": 8.773082164899305e-06, |
| "loss": 0.0334, |
| "step": 13090 |
| }, |
| { |
| "epoch": 50.38461538461539, |
| "grad_norm": 1.0380659103393555, |
| "learning_rate": 8.750526700324984e-06, |
| "loss": 0.0332, |
| "step": 13100 |
| }, |
| { |
| "epoch": 50.42307692307692, |
| "grad_norm": 1.0183615684509277, |
| "learning_rate": 8.72798832157203e-06, |
| "loss": 0.0321, |
| "step": 13110 |
| }, |
| { |
| "epoch": 50.46153846153846, |
| "grad_norm": 1.1746301651000977, |
| "learning_rate": 8.705467090259507e-06, |
| "loss": 0.0348, |
| "step": 13120 |
| }, |
| { |
| "epoch": 50.5, |
| "grad_norm": 1.037621259689331, |
| "learning_rate": 8.682963067959607e-06, |
| "loss": 0.0342, |
| "step": 13130 |
| }, |
| { |
| "epoch": 50.53846153846154, |
| "grad_norm": 1.1670657396316528, |
| "learning_rate": 8.660476316197457e-06, |
| "loss": 0.0306, |
| "step": 13140 |
| }, |
| { |
| "epoch": 50.57692307692308, |
| "grad_norm": 0.877946674823761, |
| "learning_rate": 8.638006896450991e-06, |
| "loss": 0.0302, |
| "step": 13150 |
| }, |
| { |
| "epoch": 50.61538461538461, |
| "grad_norm": 0.8860836625099182, |
| "learning_rate": 8.61555487015074e-06, |
| "loss": 0.0261, |
| "step": 13160 |
| }, |
| { |
| "epoch": 50.65384615384615, |
| "grad_norm": 0.7965879440307617, |
| "learning_rate": 8.593120298679676e-06, |
| "loss": 0.0337, |
| "step": 13170 |
| }, |
| { |
| "epoch": 50.69230769230769, |
| "grad_norm": 0.8773680329322815, |
| "learning_rate": 8.570703243373076e-06, |
| "loss": 0.0311, |
| "step": 13180 |
| }, |
| { |
| "epoch": 50.73076923076923, |
| "grad_norm": 0.9037876725196838, |
| "learning_rate": 8.5483037655183e-06, |
| "loss": 0.0278, |
| "step": 13190 |
| }, |
| { |
| "epoch": 50.76923076923077, |
| "grad_norm": 1.0279417037963867, |
| "learning_rate": 8.525921926354686e-06, |
| "loss": 0.0396, |
| "step": 13200 |
| }, |
| { |
| "epoch": 50.80769230769231, |
| "grad_norm": 1.109102487564087, |
| "learning_rate": 8.5035577870733e-06, |
| "loss": 0.0347, |
| "step": 13210 |
| }, |
| { |
| "epoch": 50.84615384615385, |
| "grad_norm": 1.1594183444976807, |
| "learning_rate": 8.481211408816858e-06, |
| "loss": 0.0339, |
| "step": 13220 |
| }, |
| { |
| "epoch": 50.88461538461539, |
| "grad_norm": 1.18452787399292, |
| "learning_rate": 8.45888285267951e-06, |
| "loss": 0.0344, |
| "step": 13230 |
| }, |
| { |
| "epoch": 50.92307692307692, |
| "grad_norm": 0.88536137342453, |
| "learning_rate": 8.436572179706666e-06, |
| "loss": 0.0358, |
| "step": 13240 |
| }, |
| { |
| "epoch": 50.96153846153846, |
| "grad_norm": 1.2674336433410645, |
| "learning_rate": 8.41427945089487e-06, |
| "loss": 0.0337, |
| "step": 13250 |
| }, |
| { |
| "epoch": 51.0, |
| "grad_norm": 0.9828641414642334, |
| "learning_rate": 8.39200472719157e-06, |
| "loss": 0.0371, |
| "step": 13260 |
| }, |
| { |
| "epoch": 51.03846153846154, |
| "grad_norm": 0.9601910710334778, |
| "learning_rate": 8.369748069495017e-06, |
| "loss": 0.0387, |
| "step": 13270 |
| }, |
| { |
| "epoch": 51.07692307692308, |
| "grad_norm": 1.1602641344070435, |
| "learning_rate": 8.347509538654074e-06, |
| "loss": 0.0327, |
| "step": 13280 |
| }, |
| { |
| "epoch": 51.11538461538461, |
| "grad_norm": 0.9831449389457703, |
| "learning_rate": 8.325289195468023e-06, |
| "loss": 0.0359, |
| "step": 13290 |
| }, |
| { |
| "epoch": 51.15384615384615, |
| "grad_norm": 1.0050350427627563, |
| "learning_rate": 8.303087100686449e-06, |
| "loss": 0.0377, |
| "step": 13300 |
| }, |
| { |
| "epoch": 51.19230769230769, |
| "grad_norm": 0.9317255616188049, |
| "learning_rate": 8.280903315009009e-06, |
| "loss": 0.0281, |
| "step": 13310 |
| }, |
| { |
| "epoch": 51.23076923076923, |
| "grad_norm": 1.0531281232833862, |
| "learning_rate": 8.258737899085338e-06, |
| "loss": 0.0324, |
| "step": 13320 |
| }, |
| { |
| "epoch": 51.26923076923077, |
| "grad_norm": 0.9911993741989136, |
| "learning_rate": 8.236590913514841e-06, |
| "loss": 0.0346, |
| "step": 13330 |
| }, |
| { |
| "epoch": 51.30769230769231, |
| "grad_norm": 0.8987839221954346, |
| "learning_rate": 8.214462418846529e-06, |
| "loss": 0.0314, |
| "step": 13340 |
| }, |
| { |
| "epoch": 51.34615384615385, |
| "grad_norm": 0.8279987573623657, |
| "learning_rate": 8.192352475578857e-06, |
| "loss": 0.0273, |
| "step": 13350 |
| }, |
| { |
| "epoch": 51.38461538461539, |
| "grad_norm": 0.9038864970207214, |
| "learning_rate": 8.170261144159563e-06, |
| "loss": 0.0316, |
| "step": 13360 |
| }, |
| { |
| "epoch": 51.42307692307692, |
| "grad_norm": 0.9167174100875854, |
| "learning_rate": 8.148188484985505e-06, |
| "loss": 0.0298, |
| "step": 13370 |
| }, |
| { |
| "epoch": 51.46153846153846, |
| "grad_norm": 1.0757681131362915, |
| "learning_rate": 8.126134558402501e-06, |
| "loss": 0.0327, |
| "step": 13380 |
| }, |
| { |
| "epoch": 51.5, |
| "grad_norm": 1.0499067306518555, |
| "learning_rate": 8.104099424705135e-06, |
| "loss": 0.0324, |
| "step": 13390 |
| }, |
| { |
| "epoch": 51.53846153846154, |
| "grad_norm": 0.9457489252090454, |
| "learning_rate": 8.082083144136625e-06, |
| "loss": 0.0282, |
| "step": 13400 |
| }, |
| { |
| "epoch": 51.57692307692308, |
| "grad_norm": 1.1252720355987549, |
| "learning_rate": 8.060085776888634e-06, |
| "loss": 0.0321, |
| "step": 13410 |
| }, |
| { |
| "epoch": 51.61538461538461, |
| "grad_norm": 1.111789584159851, |
| "learning_rate": 8.038107383101126e-06, |
| "loss": 0.0339, |
| "step": 13420 |
| }, |
| { |
| "epoch": 51.65384615384615, |
| "grad_norm": 0.7417734861373901, |
| "learning_rate": 8.016148022862201e-06, |
| "loss": 0.0351, |
| "step": 13430 |
| }, |
| { |
| "epoch": 51.69230769230769, |
| "grad_norm": 1.0458786487579346, |
| "learning_rate": 7.994207756207903e-06, |
| "loss": 0.0363, |
| "step": 13440 |
| }, |
| { |
| "epoch": 51.73076923076923, |
| "grad_norm": 1.1386157274246216, |
| "learning_rate": 7.972286643122083e-06, |
| "loss": 0.0339, |
| "step": 13450 |
| }, |
| { |
| "epoch": 51.76923076923077, |
| "grad_norm": 1.0273871421813965, |
| "learning_rate": 7.950384743536225e-06, |
| "loss": 0.034, |
| "step": 13460 |
| }, |
| { |
| "epoch": 51.80769230769231, |
| "grad_norm": 1.0202971696853638, |
| "learning_rate": 7.928502117329285e-06, |
| "loss": 0.0341, |
| "step": 13470 |
| }, |
| { |
| "epoch": 51.84615384615385, |
| "grad_norm": 0.9581331014633179, |
| "learning_rate": 7.906638824327545e-06, |
| "loss": 0.0284, |
| "step": 13480 |
| }, |
| { |
| "epoch": 51.88461538461539, |
| "grad_norm": 1.155298113822937, |
| "learning_rate": 7.884794924304392e-06, |
| "loss": 0.0417, |
| "step": 13490 |
| }, |
| { |
| "epoch": 51.92307692307692, |
| "grad_norm": 0.8334839940071106, |
| "learning_rate": 7.862970476980218e-06, |
| "loss": 0.0356, |
| "step": 13500 |
| }, |
| { |
| "epoch": 51.96153846153846, |
| "grad_norm": 1.1004962921142578, |
| "learning_rate": 7.841165542022242e-06, |
| "loss": 0.0381, |
| "step": 13510 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 1.0550732612609863, |
| "learning_rate": 7.819380179044307e-06, |
| "loss": 0.0277, |
| "step": 13520 |
| }, |
| { |
| "epoch": 52.03846153846154, |
| "grad_norm": 0.8858346939086914, |
| "learning_rate": 7.797614447606783e-06, |
| "loss": 0.0303, |
| "step": 13530 |
| }, |
| { |
| "epoch": 52.07692307692308, |
| "grad_norm": 1.1065115928649902, |
| "learning_rate": 7.775868407216326e-06, |
| "loss": 0.0379, |
| "step": 13540 |
| }, |
| { |
| "epoch": 52.11538461538461, |
| "grad_norm": 0.9579010009765625, |
| "learning_rate": 7.754142117325792e-06, |
| "loss": 0.0326, |
| "step": 13550 |
| }, |
| { |
| "epoch": 52.15384615384615, |
| "grad_norm": 1.0247814655303955, |
| "learning_rate": 7.73243563733403e-06, |
| "loss": 0.0356, |
| "step": 13560 |
| }, |
| { |
| "epoch": 52.19230769230769, |
| "grad_norm": 0.9102254509925842, |
| "learning_rate": 7.710749026585726e-06, |
| "loss": 0.0275, |
| "step": 13570 |
| }, |
| { |
| "epoch": 52.23076923076923, |
| "grad_norm": 0.701599657535553, |
| "learning_rate": 7.689082344371244e-06, |
| "loss": 0.0353, |
| "step": 13580 |
| }, |
| { |
| "epoch": 52.26923076923077, |
| "grad_norm": 0.9362711906433105, |
| "learning_rate": 7.667435649926459e-06, |
| "loss": 0.0314, |
| "step": 13590 |
| }, |
| { |
| "epoch": 52.30769230769231, |
| "grad_norm": 0.8040527701377869, |
| "learning_rate": 7.645809002432616e-06, |
| "loss": 0.0331, |
| "step": 13600 |
| }, |
| { |
| "epoch": 52.34615384615385, |
| "grad_norm": 0.7203378081321716, |
| "learning_rate": 7.6242024610161454e-06, |
| "loss": 0.0318, |
| "step": 13610 |
| }, |
| { |
| "epoch": 52.38461538461539, |
| "grad_norm": 0.9988213777542114, |
| "learning_rate": 7.602616084748501e-06, |
| "loss": 0.0323, |
| "step": 13620 |
| }, |
| { |
| "epoch": 52.42307692307692, |
| "grad_norm": 0.9332322478294373, |
| "learning_rate": 7.5810499326460145e-06, |
| "loss": 0.0356, |
| "step": 13630 |
| }, |
| { |
| "epoch": 52.46153846153846, |
| "grad_norm": 1.0301990509033203, |
| "learning_rate": 7.5595040636697145e-06, |
| "loss": 0.034, |
| "step": 13640 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 0.9222707152366638, |
| "learning_rate": 7.537978536725189e-06, |
| "loss": 0.029, |
| "step": 13650 |
| }, |
| { |
| "epoch": 52.53846153846154, |
| "grad_norm": 1.0901727676391602, |
| "learning_rate": 7.5164734106624135e-06, |
| "loss": 0.0343, |
| "step": 13660 |
| }, |
| { |
| "epoch": 52.57692307692308, |
| "grad_norm": 0.9981234073638916, |
| "learning_rate": 7.494988744275575e-06, |
| "loss": 0.0283, |
| "step": 13670 |
| }, |
| { |
| "epoch": 52.61538461538461, |
| "grad_norm": 0.922626256942749, |
| "learning_rate": 7.473524596302931e-06, |
| "loss": 0.037, |
| "step": 13680 |
| }, |
| { |
| "epoch": 52.65384615384615, |
| "grad_norm": 0.9884187579154968, |
| "learning_rate": 7.452081025426639e-06, |
| "loss": 0.031, |
| "step": 13690 |
| }, |
| { |
| "epoch": 52.69230769230769, |
| "grad_norm": 1.013758659362793, |
| "learning_rate": 7.430658090272606e-06, |
| "loss": 0.0333, |
| "step": 13700 |
| }, |
| { |
| "epoch": 52.73076923076923, |
| "grad_norm": 1.1022049188613892, |
| "learning_rate": 7.40925584941033e-06, |
| "loss": 0.0358, |
| "step": 13710 |
| }, |
| { |
| "epoch": 52.76923076923077, |
| "grad_norm": 0.9264612197875977, |
| "learning_rate": 7.3878743613527e-06, |
| "loss": 0.0284, |
| "step": 13720 |
| }, |
| { |
| "epoch": 52.80769230769231, |
| "grad_norm": 1.111238718032837, |
| "learning_rate": 7.366513684555903e-06, |
| "loss": 0.0342, |
| "step": 13730 |
| }, |
| { |
| "epoch": 52.84615384615385, |
| "grad_norm": 1.2126115560531616, |
| "learning_rate": 7.345173877419204e-06, |
| "loss": 0.031, |
| "step": 13740 |
| }, |
| { |
| "epoch": 52.88461538461539, |
| "grad_norm": 1.1086037158966064, |
| "learning_rate": 7.323854998284823e-06, |
| "loss": 0.0348, |
| "step": 13750 |
| }, |
| { |
| "epoch": 52.92307692307692, |
| "grad_norm": 1.1465508937835693, |
| "learning_rate": 7.302557105437775e-06, |
| "loss": 0.0304, |
| "step": 13760 |
| }, |
| { |
| "epoch": 52.96153846153846, |
| "grad_norm": 0.9518792033195496, |
| "learning_rate": 7.281280257105666e-06, |
| "loss": 0.0306, |
| "step": 13770 |
| }, |
| { |
| "epoch": 53.0, |
| "grad_norm": 1.0208402872085571, |
| "learning_rate": 7.260024511458599e-06, |
| "loss": 0.0296, |
| "step": 13780 |
| }, |
| { |
| "epoch": 53.03846153846154, |
| "grad_norm": 0.9359050393104553, |
| "learning_rate": 7.238789926608963e-06, |
| "loss": 0.0331, |
| "step": 13790 |
| }, |
| { |
| "epoch": 53.07692307692308, |
| "grad_norm": 1.0515170097351074, |
| "learning_rate": 7.217576560611305e-06, |
| "loss": 0.0345, |
| "step": 13800 |
| }, |
| { |
| "epoch": 53.11538461538461, |
| "grad_norm": 0.9996568560600281, |
| "learning_rate": 7.196384471462172e-06, |
| "loss": 0.0364, |
| "step": 13810 |
| }, |
| { |
| "epoch": 53.15384615384615, |
| "grad_norm": 1.300593614578247, |
| "learning_rate": 7.175213717099903e-06, |
| "loss": 0.0321, |
| "step": 13820 |
| }, |
| { |
| "epoch": 53.19230769230769, |
| "grad_norm": 1.1695557832717896, |
| "learning_rate": 7.154064355404547e-06, |
| "loss": 0.0327, |
| "step": 13830 |
| }, |
| { |
| "epoch": 53.23076923076923, |
| "grad_norm": 0.9456730484962463, |
| "learning_rate": 7.132936444197641e-06, |
| "loss": 0.0319, |
| "step": 13840 |
| }, |
| { |
| "epoch": 53.26923076923077, |
| "grad_norm": 0.7543469071388245, |
| "learning_rate": 7.111830041242101e-06, |
| "loss": 0.029, |
| "step": 13850 |
| }, |
| { |
| "epoch": 53.30769230769231, |
| "grad_norm": 0.9508689045906067, |
| "learning_rate": 7.090745204242018e-06, |
| "loss": 0.0353, |
| "step": 13860 |
| }, |
| { |
| "epoch": 53.34615384615385, |
| "grad_norm": 0.8398585319519043, |
| "learning_rate": 7.069681990842527e-06, |
| "loss": 0.0276, |
| "step": 13870 |
| }, |
| { |
| "epoch": 53.38461538461539, |
| "grad_norm": 0.7608618140220642, |
| "learning_rate": 7.04864045862966e-06, |
| "loss": 0.0313, |
| "step": 13880 |
| }, |
| { |
| "epoch": 53.42307692307692, |
| "grad_norm": 1.0014206171035767, |
| "learning_rate": 7.027620665130152e-06, |
| "loss": 0.0331, |
| "step": 13890 |
| }, |
| { |
| "epoch": 53.46153846153846, |
| "grad_norm": 0.7588220834732056, |
| "learning_rate": 7.006622667811326e-06, |
| "loss": 0.0351, |
| "step": 13900 |
| }, |
| { |
| "epoch": 53.5, |
| "grad_norm": 0.9668156504631042, |
| "learning_rate": 6.9856465240809e-06, |
| "loss": 0.0292, |
| "step": 13910 |
| }, |
| { |
| "epoch": 53.53846153846154, |
| "grad_norm": 1.0986829996109009, |
| "learning_rate": 6.964692291286844e-06, |
| "loss": 0.0347, |
| "step": 13920 |
| }, |
| { |
| "epoch": 53.57692307692308, |
| "grad_norm": 0.9580195546150208, |
| "learning_rate": 6.943760026717243e-06, |
| "loss": 0.0311, |
| "step": 13930 |
| }, |
| { |
| "epoch": 53.61538461538461, |
| "grad_norm": 1.2510932683944702, |
| "learning_rate": 6.922849787600097e-06, |
| "loss": 0.0319, |
| "step": 13940 |
| }, |
| { |
| "epoch": 53.65384615384615, |
| "grad_norm": 1.139769434928894, |
| "learning_rate": 6.90196163110321e-06, |
| "loss": 0.0357, |
| "step": 13950 |
| }, |
| { |
| "epoch": 53.69230769230769, |
| "grad_norm": 0.9529709815979004, |
| "learning_rate": 6.881095614334002e-06, |
| "loss": 0.0312, |
| "step": 13960 |
| }, |
| { |
| "epoch": 53.73076923076923, |
| "grad_norm": 0.8552069067955017, |
| "learning_rate": 6.860251794339359e-06, |
| "loss": 0.0295, |
| "step": 13970 |
| }, |
| { |
| "epoch": 53.76923076923077, |
| "grad_norm": 0.783135175704956, |
| "learning_rate": 6.839430228105501e-06, |
| "loss": 0.0294, |
| "step": 13980 |
| }, |
| { |
| "epoch": 53.80769230769231, |
| "grad_norm": 0.7776913046836853, |
| "learning_rate": 6.818630972557788e-06, |
| "loss": 0.0309, |
| "step": 13990 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 0.7431850433349609, |
| "learning_rate": 6.797854084560585e-06, |
| "loss": 0.0286, |
| "step": 14000 |
| }, |
| { |
| "epoch": 53.88461538461539, |
| "grad_norm": 0.7169124484062195, |
| "learning_rate": 6.777099620917124e-06, |
| "loss": 0.0315, |
| "step": 14010 |
| }, |
| { |
| "epoch": 53.92307692307692, |
| "grad_norm": 1.0331408977508545, |
| "learning_rate": 6.756367638369301e-06, |
| "loss": 0.0337, |
| "step": 14020 |
| }, |
| { |
| "epoch": 53.96153846153846, |
| "grad_norm": 0.8683583736419678, |
| "learning_rate": 6.735658193597579e-06, |
| "loss": 0.0308, |
| "step": 14030 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 0.9476208090782166, |
| "learning_rate": 6.7149713432207825e-06, |
| "loss": 0.0294, |
| "step": 14040 |
| }, |
| { |
| "epoch": 54.03846153846154, |
| "grad_norm": 0.8476566076278687, |
| "learning_rate": 6.694307143795966e-06, |
| "loss": 0.0268, |
| "step": 14050 |
| }, |
| { |
| "epoch": 54.07692307692308, |
| "grad_norm": 0.9866935014724731, |
| "learning_rate": 6.6736656518182704e-06, |
| "loss": 0.0315, |
| "step": 14060 |
| }, |
| { |
| "epoch": 54.11538461538461, |
| "grad_norm": 0.9184465408325195, |
| "learning_rate": 6.6530469237207375e-06, |
| "loss": 0.0328, |
| "step": 14070 |
| }, |
| { |
| "epoch": 54.15384615384615, |
| "grad_norm": 0.872621476650238, |
| "learning_rate": 6.632451015874193e-06, |
| "loss": 0.0286, |
| "step": 14080 |
| }, |
| { |
| "epoch": 54.19230769230769, |
| "grad_norm": 0.8452703356742859, |
| "learning_rate": 6.611877984587058e-06, |
| "loss": 0.029, |
| "step": 14090 |
| }, |
| { |
| "epoch": 54.23076923076923, |
| "grad_norm": 0.9648960828781128, |
| "learning_rate": 6.591327886105207e-06, |
| "loss": 0.0314, |
| "step": 14100 |
| }, |
| { |
| "epoch": 54.26923076923077, |
| "grad_norm": 1.0145609378814697, |
| "learning_rate": 6.570800776611836e-06, |
| "loss": 0.0264, |
| "step": 14110 |
| }, |
| { |
| "epoch": 54.30769230769231, |
| "grad_norm": 0.7880923748016357, |
| "learning_rate": 6.55029671222727e-06, |
| "loss": 0.0291, |
| "step": 14120 |
| }, |
| { |
| "epoch": 54.34615384615385, |
| "grad_norm": 0.9834394454956055, |
| "learning_rate": 6.529815749008846e-06, |
| "loss": 0.0321, |
| "step": 14130 |
| }, |
| { |
| "epoch": 54.38461538461539, |
| "grad_norm": 0.9993003606796265, |
| "learning_rate": 6.50935794295073e-06, |
| "loss": 0.0341, |
| "step": 14140 |
| }, |
| { |
| "epoch": 54.42307692307692, |
| "grad_norm": 0.8572825789451599, |
| "learning_rate": 6.488923349983779e-06, |
| "loss": 0.0292, |
| "step": 14150 |
| }, |
| { |
| "epoch": 54.46153846153846, |
| "grad_norm": 0.9555164575576782, |
| "learning_rate": 6.468512025975401e-06, |
| "loss": 0.0298, |
| "step": 14160 |
| }, |
| { |
| "epoch": 54.5, |
| "grad_norm": 0.8961513638496399, |
| "learning_rate": 6.448124026729363e-06, |
| "loss": 0.032, |
| "step": 14170 |
| }, |
| { |
| "epoch": 54.53846153846154, |
| "grad_norm": 0.9303173422813416, |
| "learning_rate": 6.427759407985691e-06, |
| "loss": 0.0308, |
| "step": 14180 |
| }, |
| { |
| "epoch": 54.57692307692308, |
| "grad_norm": 0.9030703902244568, |
| "learning_rate": 6.407418225420465e-06, |
| "loss": 0.0306, |
| "step": 14190 |
| }, |
| { |
| "epoch": 54.61538461538461, |
| "grad_norm": 1.0203205347061157, |
| "learning_rate": 6.387100534645698e-06, |
| "loss": 0.0312, |
| "step": 14200 |
| }, |
| { |
| "epoch": 54.65384615384615, |
| "grad_norm": 0.9905704259872437, |
| "learning_rate": 6.366806391209194e-06, |
| "loss": 0.0317, |
| "step": 14210 |
| }, |
| { |
| "epoch": 54.69230769230769, |
| "grad_norm": 0.8993654251098633, |
| "learning_rate": 6.346535850594352e-06, |
| "loss": 0.033, |
| "step": 14220 |
| }, |
| { |
| "epoch": 54.73076923076923, |
| "grad_norm": 0.888511061668396, |
| "learning_rate": 6.326288968220069e-06, |
| "loss": 0.0337, |
| "step": 14230 |
| }, |
| { |
| "epoch": 54.76923076923077, |
| "grad_norm": 0.8489452600479126, |
| "learning_rate": 6.306065799440542e-06, |
| "loss": 0.0283, |
| "step": 14240 |
| }, |
| { |
| "epoch": 54.80769230769231, |
| "grad_norm": 1.16481351852417, |
| "learning_rate": 6.285866399545137e-06, |
| "loss": 0.0328, |
| "step": 14250 |
| }, |
| { |
| "epoch": 54.84615384615385, |
| "grad_norm": 0.9366856813430786, |
| "learning_rate": 6.2656908237582515e-06, |
| "loss": 0.0341, |
| "step": 14260 |
| }, |
| { |
| "epoch": 54.88461538461539, |
| "grad_norm": 0.7751666307449341, |
| "learning_rate": 6.245539127239135e-06, |
| "loss": 0.0284, |
| "step": 14270 |
| }, |
| { |
| "epoch": 54.92307692307692, |
| "grad_norm": 1.0874589681625366, |
| "learning_rate": 6.225411365081752e-06, |
| "loss": 0.0303, |
| "step": 14280 |
| }, |
| { |
| "epoch": 54.96153846153846, |
| "grad_norm": 0.7190085053443909, |
| "learning_rate": 6.205307592314645e-06, |
| "loss": 0.0254, |
| "step": 14290 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.9163426756858826, |
| "learning_rate": 6.185227863900751e-06, |
| "loss": 0.0299, |
| "step": 14300 |
| }, |
| { |
| "epoch": 55.03846153846154, |
| "grad_norm": 0.8473014831542969, |
| "learning_rate": 6.165172234737291e-06, |
| "loss": 0.0277, |
| "step": 14310 |
| }, |
| { |
| "epoch": 55.07692307692308, |
| "grad_norm": 0.6970618367195129, |
| "learning_rate": 6.145140759655586e-06, |
| "loss": 0.0369, |
| "step": 14320 |
| }, |
| { |
| "epoch": 55.11538461538461, |
| "grad_norm": 0.8919505476951599, |
| "learning_rate": 6.125133493420914e-06, |
| "loss": 0.0342, |
| "step": 14330 |
| }, |
| { |
| "epoch": 55.15384615384615, |
| "grad_norm": 0.9447165727615356, |
| "learning_rate": 6.1051504907323915e-06, |
| "loss": 0.0335, |
| "step": 14340 |
| }, |
| { |
| "epoch": 55.19230769230769, |
| "grad_norm": 0.7969279289245605, |
| "learning_rate": 6.085191806222774e-06, |
| "loss": 0.0297, |
| "step": 14350 |
| }, |
| { |
| "epoch": 55.23076923076923, |
| "grad_norm": 0.8206071853637695, |
| "learning_rate": 6.065257494458352e-06, |
| "loss": 0.0347, |
| "step": 14360 |
| }, |
| { |
| "epoch": 55.26923076923077, |
| "grad_norm": 0.7703580856323242, |
| "learning_rate": 6.045347609938767e-06, |
| "loss": 0.0317, |
| "step": 14370 |
| }, |
| { |
| "epoch": 55.30769230769231, |
| "grad_norm": 0.6906372904777527, |
| "learning_rate": 6.025462207096879e-06, |
| "loss": 0.0326, |
| "step": 14380 |
| }, |
| { |
| "epoch": 55.34615384615385, |
| "grad_norm": 0.7366732954978943, |
| "learning_rate": 6.005601340298631e-06, |
| "loss": 0.0297, |
| "step": 14390 |
| }, |
| { |
| "epoch": 55.38461538461539, |
| "grad_norm": 0.9685719609260559, |
| "learning_rate": 5.985765063842862e-06, |
| "loss": 0.031, |
| "step": 14400 |
| }, |
| { |
| "epoch": 55.42307692307692, |
| "grad_norm": 0.7533148527145386, |
| "learning_rate": 5.965953431961206e-06, |
| "loss": 0.0322, |
| "step": 14410 |
| }, |
| { |
| "epoch": 55.46153846153846, |
| "grad_norm": 0.7429641485214233, |
| "learning_rate": 5.946166498817903e-06, |
| "loss": 0.0303, |
| "step": 14420 |
| }, |
| { |
| "epoch": 55.5, |
| "grad_norm": 0.6613432765007019, |
| "learning_rate": 5.926404318509668e-06, |
| "loss": 0.0263, |
| "step": 14430 |
| }, |
| { |
| "epoch": 55.53846153846154, |
| "grad_norm": 0.8684971332550049, |
| "learning_rate": 5.906666945065556e-06, |
| "loss": 0.0278, |
| "step": 14440 |
| }, |
| { |
| "epoch": 55.57692307692308, |
| "grad_norm": 0.8969733119010925, |
| "learning_rate": 5.886954432446784e-06, |
| "loss": 0.0381, |
| "step": 14450 |
| }, |
| { |
| "epoch": 55.61538461538461, |
| "grad_norm": 0.8112402558326721, |
| "learning_rate": 5.867266834546617e-06, |
| "loss": 0.0286, |
| "step": 14460 |
| }, |
| { |
| "epoch": 55.65384615384615, |
| "grad_norm": 1.0435789823532104, |
| "learning_rate": 5.847604205190192e-06, |
| "loss": 0.0328, |
| "step": 14470 |
| }, |
| { |
| "epoch": 55.69230769230769, |
| "grad_norm": 0.8987309336662292, |
| "learning_rate": 5.827966598134383e-06, |
| "loss": 0.0339, |
| "step": 14480 |
| }, |
| { |
| "epoch": 55.73076923076923, |
| "grad_norm": 0.8202237486839294, |
| "learning_rate": 5.808354067067665e-06, |
| "loss": 0.0319, |
| "step": 14490 |
| }, |
| { |
| "epoch": 55.76923076923077, |
| "grad_norm": 0.8230668306350708, |
| "learning_rate": 5.788766665609941e-06, |
| "loss": 0.0255, |
| "step": 14500 |
| }, |
| { |
| "epoch": 55.80769230769231, |
| "grad_norm": 0.9060791730880737, |
| "learning_rate": 5.7692044473124276e-06, |
| "loss": 0.0308, |
| "step": 14510 |
| }, |
| { |
| "epoch": 55.84615384615385, |
| "grad_norm": 1.0444339513778687, |
| "learning_rate": 5.749667465657479e-06, |
| "loss": 0.027, |
| "step": 14520 |
| }, |
| { |
| "epoch": 55.88461538461539, |
| "grad_norm": 0.7329652309417725, |
| "learning_rate": 5.730155774058451e-06, |
| "loss": 0.0313, |
| "step": 14530 |
| }, |
| { |
| "epoch": 55.92307692307692, |
| "grad_norm": 1.0695486068725586, |
| "learning_rate": 5.710669425859575e-06, |
| "loss": 0.0357, |
| "step": 14540 |
| }, |
| { |
| "epoch": 55.96153846153846, |
| "grad_norm": 0.780979335308075, |
| "learning_rate": 5.691208474335774e-06, |
| "loss": 0.0326, |
| "step": 14550 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 0.8867959380149841, |
| "learning_rate": 5.6717729726925446e-06, |
| "loss": 0.0308, |
| "step": 14560 |
| }, |
| { |
| "epoch": 56.03846153846154, |
| "grad_norm": 0.8821061849594116, |
| "learning_rate": 5.652362974065816e-06, |
| "loss": 0.0321, |
| "step": 14570 |
| }, |
| { |
| "epoch": 56.07692307692308, |
| "grad_norm": 0.7196897268295288, |
| "learning_rate": 5.6329785315217726e-06, |
| "loss": 0.0316, |
| "step": 14580 |
| }, |
| { |
| "epoch": 56.11538461538461, |
| "grad_norm": 0.7857591509819031, |
| "learning_rate": 5.6136196980567495e-06, |
| "loss": 0.0286, |
| "step": 14590 |
| }, |
| { |
| "epoch": 56.15384615384615, |
| "grad_norm": 0.7501246929168701, |
| "learning_rate": 5.594286526597054e-06, |
| "loss": 0.0301, |
| "step": 14600 |
| }, |
| { |
| "epoch": 56.19230769230769, |
| "grad_norm": 0.9493558406829834, |
| "learning_rate": 5.574979069998833e-06, |
| "loss": 0.0313, |
| "step": 14610 |
| }, |
| { |
| "epoch": 56.23076923076923, |
| "grad_norm": 0.8972458839416504, |
| "learning_rate": 5.5556973810479486e-06, |
| "loss": 0.0293, |
| "step": 14620 |
| }, |
| { |
| "epoch": 56.26923076923077, |
| "grad_norm": 0.7602220773696899, |
| "learning_rate": 5.536441512459787e-06, |
| "loss": 0.036, |
| "step": 14630 |
| }, |
| { |
| "epoch": 56.30769230769231, |
| "grad_norm": 1.1218823194503784, |
| "learning_rate": 5.517211516879172e-06, |
| "loss": 0.0319, |
| "step": 14640 |
| }, |
| { |
| "epoch": 56.34615384615385, |
| "grad_norm": 0.908366858959198, |
| "learning_rate": 5.49800744688017e-06, |
| "loss": 0.0302, |
| "step": 14650 |
| }, |
| { |
| "epoch": 56.38461538461539, |
| "grad_norm": 0.8256409764289856, |
| "learning_rate": 5.4788293549659694e-06, |
| "loss": 0.0289, |
| "step": 14660 |
| }, |
| { |
| "epoch": 56.42307692307692, |
| "grad_norm": 0.7066758275032043, |
| "learning_rate": 5.459677293568753e-06, |
| "loss": 0.0279, |
| "step": 14670 |
| }, |
| { |
| "epoch": 56.46153846153846, |
| "grad_norm": 0.6973010301589966, |
| "learning_rate": 5.440551315049515e-06, |
| "loss": 0.0307, |
| "step": 14680 |
| }, |
| { |
| "epoch": 56.5, |
| "grad_norm": 0.7377819418907166, |
| "learning_rate": 5.421451471697966e-06, |
| "loss": 0.0281, |
| "step": 14690 |
| }, |
| { |
| "epoch": 56.53846153846154, |
| "grad_norm": 0.8551780581474304, |
| "learning_rate": 5.402377815732326e-06, |
| "loss": 0.0308, |
| "step": 14700 |
| }, |
| { |
| "epoch": 56.57692307692308, |
| "grad_norm": 0.8823787569999695, |
| "learning_rate": 5.383330399299253e-06, |
| "loss": 0.0412, |
| "step": 14710 |
| }, |
| { |
| "epoch": 56.61538461538461, |
| "grad_norm": 0.9362679719924927, |
| "learning_rate": 5.364309274473663e-06, |
| "loss": 0.0339, |
| "step": 14720 |
| }, |
| { |
| "epoch": 56.65384615384615, |
| "grad_norm": 1.0090643167495728, |
| "learning_rate": 5.345314493258573e-06, |
| "loss": 0.031, |
| "step": 14730 |
| }, |
| { |
| "epoch": 56.69230769230769, |
| "grad_norm": 1.0990750789642334, |
| "learning_rate": 5.326346107585e-06, |
| "loss": 0.0381, |
| "step": 14740 |
| }, |
| { |
| "epoch": 56.73076923076923, |
| "grad_norm": 1.0385936498641968, |
| "learning_rate": 5.307404169311782e-06, |
| "loss": 0.0328, |
| "step": 14750 |
| }, |
| { |
| "epoch": 56.76923076923077, |
| "grad_norm": 1.0119373798370361, |
| "learning_rate": 5.288488730225449e-06, |
| "loss": 0.03, |
| "step": 14760 |
| }, |
| { |
| "epoch": 56.80769230769231, |
| "grad_norm": 0.8520657420158386, |
| "learning_rate": 5.2695998420401e-06, |
| "loss": 0.0367, |
| "step": 14770 |
| }, |
| { |
| "epoch": 56.84615384615385, |
| "grad_norm": 0.8646618127822876, |
| "learning_rate": 5.2507375563972236e-06, |
| "loss": 0.0306, |
| "step": 14780 |
| }, |
| { |
| "epoch": 56.88461538461539, |
| "grad_norm": 0.890970766544342, |
| "learning_rate": 5.231901924865596e-06, |
| "loss": 0.0324, |
| "step": 14790 |
| }, |
| { |
| "epoch": 56.92307692307692, |
| "grad_norm": 0.7873624563217163, |
| "learning_rate": 5.213092998941113e-06, |
| "loss": 0.0246, |
| "step": 14800 |
| }, |
| { |
| "epoch": 56.96153846153846, |
| "grad_norm": 0.8681284189224243, |
| "learning_rate": 5.1943108300466555e-06, |
| "loss": 0.0284, |
| "step": 14810 |
| }, |
| { |
| "epoch": 57.0, |
| "grad_norm": 0.9095093607902527, |
| "learning_rate": 5.175555469531964e-06, |
| "loss": 0.0304, |
| "step": 14820 |
| }, |
| { |
| "epoch": 57.03846153846154, |
| "grad_norm": 0.9900658130645752, |
| "learning_rate": 5.1568269686734716e-06, |
| "loss": 0.0312, |
| "step": 14830 |
| }, |
| { |
| "epoch": 57.07692307692308, |
| "grad_norm": 1.1000691652297974, |
| "learning_rate": 5.138125378674182e-06, |
| "loss": 0.0297, |
| "step": 14840 |
| }, |
| { |
| "epoch": 57.11538461538461, |
| "grad_norm": 0.9001245498657227, |
| "learning_rate": 5.119450750663539e-06, |
| "loss": 0.0286, |
| "step": 14850 |
| }, |
| { |
| "epoch": 57.15384615384615, |
| "grad_norm": 0.6249764561653137, |
| "learning_rate": 5.100803135697248e-06, |
| "loss": 0.0303, |
| "step": 14860 |
| }, |
| { |
| "epoch": 57.19230769230769, |
| "grad_norm": 0.7594456076622009, |
| "learning_rate": 5.0821825847571904e-06, |
| "loss": 0.0309, |
| "step": 14870 |
| }, |
| { |
| "epoch": 57.23076923076923, |
| "grad_norm": 0.7925312519073486, |
| "learning_rate": 5.063589148751236e-06, |
| "loss": 0.0309, |
| "step": 14880 |
| }, |
| { |
| "epoch": 57.26923076923077, |
| "grad_norm": 0.7072622776031494, |
| "learning_rate": 5.045022878513122e-06, |
| "loss": 0.0283, |
| "step": 14890 |
| }, |
| { |
| "epoch": 57.30769230769231, |
| "grad_norm": 0.8310312628746033, |
| "learning_rate": 5.026483824802333e-06, |
| "loss": 0.0277, |
| "step": 14900 |
| }, |
| { |
| "epoch": 57.34615384615385, |
| "grad_norm": 0.8580349683761597, |
| "learning_rate": 5.0079720383039245e-06, |
| "loss": 0.028, |
| "step": 14910 |
| }, |
| { |
| "epoch": 57.38461538461539, |
| "grad_norm": 0.9539895057678223, |
| "learning_rate": 4.989487569628425e-06, |
| "loss": 0.0378, |
| "step": 14920 |
| }, |
| { |
| "epoch": 57.42307692307692, |
| "grad_norm": 0.6495718359947205, |
| "learning_rate": 4.971030469311658e-06, |
| "loss": 0.0283, |
| "step": 14930 |
| }, |
| { |
| "epoch": 57.46153846153846, |
| "grad_norm": 0.9080126285552979, |
| "learning_rate": 4.952600787814628e-06, |
| "loss": 0.0321, |
| "step": 14940 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 1.1204878091812134, |
| "learning_rate": 4.934198575523391e-06, |
| "loss": 0.0302, |
| "step": 14950 |
| }, |
| { |
| "epoch": 57.53846153846154, |
| "grad_norm": 0.8599961996078491, |
| "learning_rate": 4.915823882748882e-06, |
| "loss": 0.0263, |
| "step": 14960 |
| }, |
| { |
| "epoch": 57.57692307692308, |
| "grad_norm": 0.7675483226776123, |
| "learning_rate": 4.897476759726823e-06, |
| "loss": 0.0269, |
| "step": 14970 |
| }, |
| { |
| "epoch": 57.61538461538461, |
| "grad_norm": 0.7085859179496765, |
| "learning_rate": 4.87915725661753e-06, |
| "loss": 0.028, |
| "step": 14980 |
| }, |
| { |
| "epoch": 57.65384615384615, |
| "grad_norm": 0.9856685400009155, |
| "learning_rate": 4.860865423505833e-06, |
| "loss": 0.0348, |
| "step": 14990 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 0.9436843991279602, |
| "learning_rate": 4.842601310400912e-06, |
| "loss": 0.0287, |
| "step": 15000 |
| }, |
| { |
| "epoch": 57.73076923076923, |
| "grad_norm": 0.8575741648674011, |
| "learning_rate": 4.824364967236145e-06, |
| "loss": 0.0299, |
| "step": 15010 |
| }, |
| { |
| "epoch": 57.76923076923077, |
| "grad_norm": 0.7252999544143677, |
| "learning_rate": 4.8061564438690095e-06, |
| "loss": 0.0268, |
| "step": 15020 |
| }, |
| { |
| "epoch": 57.80769230769231, |
| "grad_norm": 0.9593229293823242, |
| "learning_rate": 4.787975790080896e-06, |
| "loss": 0.033, |
| "step": 15030 |
| }, |
| { |
| "epoch": 57.84615384615385, |
| "grad_norm": 0.7520903944969177, |
| "learning_rate": 4.769823055577029e-06, |
| "loss": 0.0319, |
| "step": 15040 |
| }, |
| { |
| "epoch": 57.88461538461539, |
| "grad_norm": 0.6410161256790161, |
| "learning_rate": 4.7516982899862934e-06, |
| "loss": 0.0307, |
| "step": 15050 |
| }, |
| { |
| "epoch": 57.92307692307692, |
| "grad_norm": 0.7836447358131409, |
| "learning_rate": 4.733601542861098e-06, |
| "loss": 0.0288, |
| "step": 15060 |
| }, |
| { |
| "epoch": 57.96153846153846, |
| "grad_norm": 0.7842332124710083, |
| "learning_rate": 4.7155328636772735e-06, |
| "loss": 0.0311, |
| "step": 15070 |
| }, |
| { |
| "epoch": 58.0, |
| "grad_norm": 0.7095579504966736, |
| "learning_rate": 4.697492301833878e-06, |
| "loss": 0.027, |
| "step": 15080 |
| }, |
| { |
| "epoch": 58.03846153846154, |
| "grad_norm": 0.6566604375839233, |
| "learning_rate": 4.679479906653128e-06, |
| "loss": 0.0308, |
| "step": 15090 |
| }, |
| { |
| "epoch": 58.07692307692308, |
| "grad_norm": 0.7291385531425476, |
| "learning_rate": 4.661495727380232e-06, |
| "loss": 0.0282, |
| "step": 15100 |
| }, |
| { |
| "epoch": 58.11538461538461, |
| "grad_norm": 0.7904289960861206, |
| "learning_rate": 4.64353981318324e-06, |
| "loss": 0.0363, |
| "step": 15110 |
| }, |
| { |
| "epoch": 58.15384615384615, |
| "grad_norm": 0.7773877382278442, |
| "learning_rate": 4.62561221315294e-06, |
| "loss": 0.0357, |
| "step": 15120 |
| }, |
| { |
| "epoch": 58.19230769230769, |
| "grad_norm": 0.7698177695274353, |
| "learning_rate": 4.6077129763026995e-06, |
| "loss": 0.0314, |
| "step": 15130 |
| }, |
| { |
| "epoch": 58.23076923076923, |
| "grad_norm": 0.8109748959541321, |
| "learning_rate": 4.589842151568354e-06, |
| "loss": 0.0267, |
| "step": 15140 |
| }, |
| { |
| "epoch": 58.26923076923077, |
| "grad_norm": 0.606159508228302, |
| "learning_rate": 4.571999787808057e-06, |
| "loss": 0.0282, |
| "step": 15150 |
| }, |
| { |
| "epoch": 58.30769230769231, |
| "grad_norm": 0.7719907760620117, |
| "learning_rate": 4.554185933802151e-06, |
| "loss": 0.028, |
| "step": 15160 |
| }, |
| { |
| "epoch": 58.34615384615385, |
| "grad_norm": 0.8883272409439087, |
| "learning_rate": 4.5364006382530285e-06, |
| "loss": 0.0287, |
| "step": 15170 |
| }, |
| { |
| "epoch": 58.38461538461539, |
| "grad_norm": 0.782050609588623, |
| "learning_rate": 4.518643949785004e-06, |
| "loss": 0.0277, |
| "step": 15180 |
| }, |
| { |
| "epoch": 58.42307692307692, |
| "grad_norm": 0.7449761033058167, |
| "learning_rate": 4.500915916944193e-06, |
| "loss": 0.03, |
| "step": 15190 |
| }, |
| { |
| "epoch": 58.46153846153846, |
| "grad_norm": 0.7998998761177063, |
| "learning_rate": 4.483216588198366e-06, |
| "loss": 0.0325, |
| "step": 15200 |
| }, |
| { |
| "epoch": 58.5, |
| "grad_norm": 0.6946242451667786, |
| "learning_rate": 4.465546011936797e-06, |
| "loss": 0.0274, |
| "step": 15210 |
| }, |
| { |
| "epoch": 58.53846153846154, |
| "grad_norm": 0.6466119885444641, |
| "learning_rate": 4.447904236470177e-06, |
| "loss": 0.0284, |
| "step": 15220 |
| }, |
| { |
| "epoch": 58.57692307692308, |
| "grad_norm": 0.7280353903770447, |
| "learning_rate": 4.43029131003044e-06, |
| "loss": 0.0293, |
| "step": 15230 |
| }, |
| { |
| "epoch": 58.61538461538461, |
| "grad_norm": 0.8519057631492615, |
| "learning_rate": 4.412707280770658e-06, |
| "loss": 0.0313, |
| "step": 15240 |
| }, |
| { |
| "epoch": 58.65384615384615, |
| "grad_norm": 0.9241837859153748, |
| "learning_rate": 4.395152196764905e-06, |
| "loss": 0.0338, |
| "step": 15250 |
| }, |
| { |
| "epoch": 58.69230769230769, |
| "grad_norm": 0.8291633129119873, |
| "learning_rate": 4.3776261060080916e-06, |
| "loss": 0.0295, |
| "step": 15260 |
| }, |
| { |
| "epoch": 58.73076923076923, |
| "grad_norm": 0.8491876721382141, |
| "learning_rate": 4.360129056415895e-06, |
| "loss": 0.0324, |
| "step": 15270 |
| }, |
| { |
| "epoch": 58.76923076923077, |
| "grad_norm": 0.8303586840629578, |
| "learning_rate": 4.34266109582457e-06, |
| "loss": 0.0285, |
| "step": 15280 |
| }, |
| { |
| "epoch": 58.80769230769231, |
| "grad_norm": 1.0164424180984497, |
| "learning_rate": 4.325222271990861e-06, |
| "loss": 0.0326, |
| "step": 15290 |
| }, |
| { |
| "epoch": 58.84615384615385, |
| "grad_norm": 0.967418372631073, |
| "learning_rate": 4.307812632591853e-06, |
| "loss": 0.0295, |
| "step": 15300 |
| }, |
| { |
| "epoch": 58.88461538461539, |
| "grad_norm": 0.6895191073417664, |
| "learning_rate": 4.2904322252248186e-06, |
| "loss": 0.0329, |
| "step": 15310 |
| }, |
| { |
| "epoch": 58.92307692307692, |
| "grad_norm": 0.9052116870880127, |
| "learning_rate": 4.273081097407142e-06, |
| "loss": 0.037, |
| "step": 15320 |
| }, |
| { |
| "epoch": 58.96153846153846, |
| "grad_norm": 1.0200068950653076, |
| "learning_rate": 4.255759296576133e-06, |
| "loss": 0.0335, |
| "step": 15330 |
| }, |
| { |
| "epoch": 59.0, |
| "grad_norm": 0.899724006652832, |
| "learning_rate": 4.238466870088945e-06, |
| "loss": 0.0305, |
| "step": 15340 |
| }, |
| { |
| "epoch": 59.03846153846154, |
| "grad_norm": 0.9742430448532104, |
| "learning_rate": 4.221203865222405e-06, |
| "loss": 0.0289, |
| "step": 15350 |
| }, |
| { |
| "epoch": 59.07692307692308, |
| "grad_norm": 0.8280800580978394, |
| "learning_rate": 4.203970329172907e-06, |
| "loss": 0.0286, |
| "step": 15360 |
| }, |
| { |
| "epoch": 59.11538461538461, |
| "grad_norm": 0.7113635540008545, |
| "learning_rate": 4.186766309056286e-06, |
| "loss": 0.0274, |
| "step": 15370 |
| }, |
| { |
| "epoch": 59.15384615384615, |
| "grad_norm": 0.7944599390029907, |
| "learning_rate": 4.16959185190767e-06, |
| "loss": 0.0285, |
| "step": 15380 |
| }, |
| { |
| "epoch": 59.19230769230769, |
| "grad_norm": 0.8309881091117859, |
| "learning_rate": 4.152447004681379e-06, |
| "loss": 0.0363, |
| "step": 15390 |
| }, |
| { |
| "epoch": 59.23076923076923, |
| "grad_norm": 0.8225540518760681, |
| "learning_rate": 4.135331814250764e-06, |
| "loss": 0.0345, |
| "step": 15400 |
| }, |
| { |
| "epoch": 59.26923076923077, |
| "grad_norm": 0.603495180606842, |
| "learning_rate": 4.118246327408095e-06, |
| "loss": 0.0277, |
| "step": 15410 |
| }, |
| { |
| "epoch": 59.30769230769231, |
| "grad_norm": 0.8480184078216553, |
| "learning_rate": 4.101190590864457e-06, |
| "loss": 0.0369, |
| "step": 15420 |
| }, |
| { |
| "epoch": 59.34615384615385, |
| "grad_norm": 0.7788599729537964, |
| "learning_rate": 4.084164651249566e-06, |
| "loss": 0.0275, |
| "step": 15430 |
| }, |
| { |
| "epoch": 59.38461538461539, |
| "grad_norm": 0.7458527088165283, |
| "learning_rate": 4.0671685551117035e-06, |
| "loss": 0.0274, |
| "step": 15440 |
| }, |
| { |
| "epoch": 59.42307692307692, |
| "grad_norm": 0.7309711575508118, |
| "learning_rate": 4.050202348917544e-06, |
| "loss": 0.0303, |
| "step": 15450 |
| }, |
| { |
| "epoch": 59.46153846153846, |
| "grad_norm": 0.7994899749755859, |
| "learning_rate": 4.033266079052039e-06, |
| "loss": 0.0291, |
| "step": 15460 |
| }, |
| { |
| "epoch": 59.5, |
| "grad_norm": 0.891418993473053, |
| "learning_rate": 4.016359791818314e-06, |
| "loss": 0.0302, |
| "step": 15470 |
| }, |
| { |
| "epoch": 59.53846153846154, |
| "grad_norm": 1.124557375907898, |
| "learning_rate": 3.999483533437511e-06, |
| "loss": 0.0272, |
| "step": 15480 |
| }, |
| { |
| "epoch": 59.57692307692308, |
| "grad_norm": 0.9168891310691833, |
| "learning_rate": 3.982637350048669e-06, |
| "loss": 0.0337, |
| "step": 15490 |
| }, |
| { |
| "epoch": 59.61538461538461, |
| "grad_norm": 0.8245673775672913, |
| "learning_rate": 3.965821287708619e-06, |
| "loss": 0.0289, |
| "step": 15500 |
| }, |
| { |
| "epoch": 59.65384615384615, |
| "grad_norm": 0.7510520815849304, |
| "learning_rate": 3.949035392391825e-06, |
| "loss": 0.0278, |
| "step": 15510 |
| }, |
| { |
| "epoch": 59.69230769230769, |
| "grad_norm": 0.7584001421928406, |
| "learning_rate": 3.932279709990293e-06, |
| "loss": 0.0306, |
| "step": 15520 |
| }, |
| { |
| "epoch": 59.73076923076923, |
| "grad_norm": 0.7586838006973267, |
| "learning_rate": 3.915554286313413e-06, |
| "loss": 0.0369, |
| "step": 15530 |
| }, |
| { |
| "epoch": 59.76923076923077, |
| "grad_norm": 0.9130982160568237, |
| "learning_rate": 3.898859167087853e-06, |
| "loss": 0.0343, |
| "step": 15540 |
| }, |
| { |
| "epoch": 59.80769230769231, |
| "grad_norm": 0.8298722505569458, |
| "learning_rate": 3.882194397957437e-06, |
| "loss": 0.0307, |
| "step": 15550 |
| }, |
| { |
| "epoch": 59.84615384615385, |
| "grad_norm": 0.7778029441833496, |
| "learning_rate": 3.865560024483002e-06, |
| "loss": 0.0285, |
| "step": 15560 |
| }, |
| { |
| "epoch": 59.88461538461539, |
| "grad_norm": 0.6167328953742981, |
| "learning_rate": 3.848956092142294e-06, |
| "loss": 0.0271, |
| "step": 15570 |
| }, |
| { |
| "epoch": 59.92307692307692, |
| "grad_norm": 0.7541554570198059, |
| "learning_rate": 3.832382646329831e-06, |
| "loss": 0.0297, |
| "step": 15580 |
| }, |
| { |
| "epoch": 59.96153846153846, |
| "grad_norm": 0.7008756399154663, |
| "learning_rate": 3.8158397323567725e-06, |
| "loss": 0.0311, |
| "step": 15590 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.6774673461914062, |
| "learning_rate": 3.7993273954508262e-06, |
| "loss": 0.0281, |
| "step": 15600 |
| }, |
| { |
| "epoch": 60.03846153846154, |
| "grad_norm": 0.7706513404846191, |
| "learning_rate": 3.782845680756078e-06, |
| "loss": 0.03, |
| "step": 15610 |
| }, |
| { |
| "epoch": 60.07692307692308, |
| "grad_norm": 0.6021534204483032, |
| "learning_rate": 3.7663946333329186e-06, |
| "loss": 0.0287, |
| "step": 15620 |
| }, |
| { |
| "epoch": 60.11538461538461, |
| "grad_norm": 0.8721636533737183, |
| "learning_rate": 3.7499742981578753e-06, |
| "loss": 0.0247, |
| "step": 15630 |
| }, |
| { |
| "epoch": 60.15384615384615, |
| "grad_norm": 0.6210073828697205, |
| "learning_rate": 3.7335847201235166e-06, |
| "loss": 0.0288, |
| "step": 15640 |
| }, |
| { |
| "epoch": 60.19230769230769, |
| "grad_norm": 0.8508103489875793, |
| "learning_rate": 3.717225944038331e-06, |
| "loss": 0.0323, |
| "step": 15650 |
| }, |
| { |
| "epoch": 60.23076923076923, |
| "grad_norm": 0.6650373339653015, |
| "learning_rate": 3.7008980146265776e-06, |
| "loss": 0.0291, |
| "step": 15660 |
| }, |
| { |
| "epoch": 60.26923076923077, |
| "grad_norm": 0.5672306418418884, |
| "learning_rate": 3.6846009765282013e-06, |
| "loss": 0.0293, |
| "step": 15670 |
| }, |
| { |
| "epoch": 60.30769230769231, |
| "grad_norm": 0.6800737977027893, |
| "learning_rate": 3.6683348742986784e-06, |
| "loss": 0.0285, |
| "step": 15680 |
| }, |
| { |
| "epoch": 60.34615384615385, |
| "grad_norm": 0.8498525619506836, |
| "learning_rate": 3.6520997524089057e-06, |
| "loss": 0.0303, |
| "step": 15690 |
| }, |
| { |
| "epoch": 60.38461538461539, |
| "grad_norm": 0.6448276042938232, |
| "learning_rate": 3.635895655245096e-06, |
| "loss": 0.0276, |
| "step": 15700 |
| }, |
| { |
| "epoch": 60.42307692307692, |
| "grad_norm": 0.7374983429908752, |
| "learning_rate": 3.619722627108624e-06, |
| "loss": 0.0305, |
| "step": 15710 |
| }, |
| { |
| "epoch": 60.46153846153846, |
| "grad_norm": 0.9034025073051453, |
| "learning_rate": 3.603580712215937e-06, |
| "loss": 0.0301, |
| "step": 15720 |
| }, |
| { |
| "epoch": 60.5, |
| "grad_norm": 0.6876755356788635, |
| "learning_rate": 3.587469954698413e-06, |
| "loss": 0.0272, |
| "step": 15730 |
| }, |
| { |
| "epoch": 60.53846153846154, |
| "grad_norm": 0.628259539604187, |
| "learning_rate": 3.5713903986022425e-06, |
| "loss": 0.0309, |
| "step": 15740 |
| }, |
| { |
| "epoch": 60.57692307692308, |
| "grad_norm": 0.8934166431427002, |
| "learning_rate": 3.555342087888326e-06, |
| "loss": 0.0286, |
| "step": 15750 |
| }, |
| { |
| "epoch": 60.61538461538461, |
| "grad_norm": 0.7909426689147949, |
| "learning_rate": 3.539325066432127e-06, |
| "loss": 0.0308, |
| "step": 15760 |
| }, |
| { |
| "epoch": 60.65384615384615, |
| "grad_norm": 0.8825272917747498, |
| "learning_rate": 3.523339378023569e-06, |
| "loss": 0.0322, |
| "step": 15770 |
| }, |
| { |
| "epoch": 60.69230769230769, |
| "grad_norm": 0.7353061437606812, |
| "learning_rate": 3.5073850663669193e-06, |
| "loss": 0.0325, |
| "step": 15780 |
| }, |
| { |
| "epoch": 60.73076923076923, |
| "grad_norm": 0.5934805274009705, |
| "learning_rate": 3.4914621750806503e-06, |
| "loss": 0.0306, |
| "step": 15790 |
| }, |
| { |
| "epoch": 60.76923076923077, |
| "grad_norm": 0.7742717266082764, |
| "learning_rate": 3.475570747697346e-06, |
| "loss": 0.0305, |
| "step": 15800 |
| }, |
| { |
| "epoch": 60.80769230769231, |
| "grad_norm": 0.8100197911262512, |
| "learning_rate": 3.4597108276635577e-06, |
| "loss": 0.0281, |
| "step": 15810 |
| }, |
| { |
| "epoch": 60.84615384615385, |
| "grad_norm": 0.5236333012580872, |
| "learning_rate": 3.443882458339699e-06, |
| "loss": 0.0336, |
| "step": 15820 |
| }, |
| { |
| "epoch": 60.88461538461539, |
| "grad_norm": 0.8024904131889343, |
| "learning_rate": 3.4280856829999323e-06, |
| "loss": 0.0299, |
| "step": 15830 |
| }, |
| { |
| "epoch": 60.92307692307692, |
| "grad_norm": 0.654270350933075, |
| "learning_rate": 3.412320544832033e-06, |
| "loss": 0.027, |
| "step": 15840 |
| }, |
| { |
| "epoch": 60.96153846153846, |
| "grad_norm": 0.6151342988014221, |
| "learning_rate": 3.396587086937294e-06, |
| "loss": 0.0311, |
| "step": 15850 |
| }, |
| { |
| "epoch": 61.0, |
| "grad_norm": 0.7870935201644897, |
| "learning_rate": 3.380885352330383e-06, |
| "loss": 0.0271, |
| "step": 15860 |
| }, |
| { |
| "epoch": 61.03846153846154, |
| "grad_norm": 0.5324817299842834, |
| "learning_rate": 3.3652153839392414e-06, |
| "loss": 0.0276, |
| "step": 15870 |
| }, |
| { |
| "epoch": 61.07692307692308, |
| "grad_norm": 0.6367908716201782, |
| "learning_rate": 3.34957722460497e-06, |
| "loss": 0.0352, |
| "step": 15880 |
| }, |
| { |
| "epoch": 61.11538461538461, |
| "grad_norm": 0.8792974948883057, |
| "learning_rate": 3.333970917081691e-06, |
| "loss": 0.0275, |
| "step": 15890 |
| }, |
| { |
| "epoch": 61.15384615384615, |
| "grad_norm": 0.7430537939071655, |
| "learning_rate": 3.318396504036465e-06, |
| "loss": 0.0325, |
| "step": 15900 |
| }, |
| { |
| "epoch": 61.19230769230769, |
| "grad_norm": 0.5728083848953247, |
| "learning_rate": 3.302854028049134e-06, |
| "loss": 0.034, |
| "step": 15910 |
| }, |
| { |
| "epoch": 61.23076923076923, |
| "grad_norm": 0.6408311724662781, |
| "learning_rate": 3.287343531612233e-06, |
| "loss": 0.0301, |
| "step": 15920 |
| }, |
| { |
| "epoch": 61.26923076923077, |
| "grad_norm": 0.7218260169029236, |
| "learning_rate": 3.271865057130874e-06, |
| "loss": 0.0386, |
| "step": 15930 |
| }, |
| { |
| "epoch": 61.30769230769231, |
| "grad_norm": 0.7336766719818115, |
| "learning_rate": 3.2564186469226064e-06, |
| "loss": 0.0279, |
| "step": 15940 |
| }, |
| { |
| "epoch": 61.34615384615385, |
| "grad_norm": 0.7276951670646667, |
| "learning_rate": 3.2410043432173353e-06, |
| "loss": 0.0299, |
| "step": 15950 |
| }, |
| { |
| "epoch": 61.38461538461539, |
| "grad_norm": 0.8573492765426636, |
| "learning_rate": 3.2256221881571734e-06, |
| "loss": 0.0343, |
| "step": 15960 |
| }, |
| { |
| "epoch": 61.42307692307692, |
| "grad_norm": 0.7509027719497681, |
| "learning_rate": 3.210272223796341e-06, |
| "loss": 0.0303, |
| "step": 15970 |
| }, |
| { |
| "epoch": 61.46153846153846, |
| "grad_norm": 0.741800844669342, |
| "learning_rate": 3.1949544921010637e-06, |
| "loss": 0.0299, |
| "step": 15980 |
| }, |
| { |
| "epoch": 61.5, |
| "grad_norm": 0.580657958984375, |
| "learning_rate": 3.1796690349494273e-06, |
| "loss": 0.028, |
| "step": 15990 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 0.7470554709434509, |
| "learning_rate": 3.1644158941312935e-06, |
| "loss": 0.0278, |
| "step": 16000 |
| }, |
| { |
| "epoch": 61.57692307692308, |
| "grad_norm": 0.6147487163543701, |
| "learning_rate": 3.149195111348166e-06, |
| "loss": 0.0278, |
| "step": 16010 |
| }, |
| { |
| "epoch": 61.61538461538461, |
| "grad_norm": 0.9255980849266052, |
| "learning_rate": 3.1340067282130765e-06, |
| "loss": 0.0248, |
| "step": 16020 |
| }, |
| { |
| "epoch": 61.65384615384615, |
| "grad_norm": 0.6000899076461792, |
| "learning_rate": 3.118850786250495e-06, |
| "loss": 0.027, |
| "step": 16030 |
| }, |
| { |
| "epoch": 61.69230769230769, |
| "grad_norm": 0.5992773175239563, |
| "learning_rate": 3.1037273268961836e-06, |
| "loss": 0.0319, |
| "step": 16040 |
| }, |
| { |
| "epoch": 61.73076923076923, |
| "grad_norm": 0.5813210606575012, |
| "learning_rate": 3.0886363914970994e-06, |
| "loss": 0.0312, |
| "step": 16050 |
| }, |
| { |
| "epoch": 61.76923076923077, |
| "grad_norm": 0.6334101557731628, |
| "learning_rate": 3.0735780213112896e-06, |
| "loss": 0.0266, |
| "step": 16060 |
| }, |
| { |
| "epoch": 61.80769230769231, |
| "grad_norm": 0.6605973243713379, |
| "learning_rate": 3.0585522575077558e-06, |
| "loss": 0.0302, |
| "step": 16070 |
| }, |
| { |
| "epoch": 61.84615384615385, |
| "grad_norm": 0.716547429561615, |
| "learning_rate": 3.043559141166372e-06, |
| "loss": 0.0328, |
| "step": 16080 |
| }, |
| { |
| "epoch": 61.88461538461539, |
| "grad_norm": 0.7207222580909729, |
| "learning_rate": 3.02859871327774e-06, |
| "loss": 0.0299, |
| "step": 16090 |
| }, |
| { |
| "epoch": 61.92307692307692, |
| "grad_norm": 0.5931048393249512, |
| "learning_rate": 3.0136710147430945e-06, |
| "loss": 0.0287, |
| "step": 16100 |
| }, |
| { |
| "epoch": 61.96153846153846, |
| "grad_norm": 0.6544404625892639, |
| "learning_rate": 2.998776086374202e-06, |
| "loss": 0.0286, |
| "step": 16110 |
| }, |
| { |
| "epoch": 62.0, |
| "grad_norm": 0.7510085701942444, |
| "learning_rate": 2.983913968893221e-06, |
| "loss": 0.0299, |
| "step": 16120 |
| }, |
| { |
| "epoch": 62.03846153846154, |
| "grad_norm": 0.6854420900344849, |
| "learning_rate": 2.969084702932619e-06, |
| "loss": 0.0283, |
| "step": 16130 |
| }, |
| { |
| "epoch": 62.07692307692308, |
| "grad_norm": 0.7260907292366028, |
| "learning_rate": 2.95428832903504e-06, |
| "loss": 0.0311, |
| "step": 16140 |
| }, |
| { |
| "epoch": 62.11538461538461, |
| "grad_norm": 0.6026385426521301, |
| "learning_rate": 2.939524887653201e-06, |
| "loss": 0.0288, |
| "step": 16150 |
| }, |
| { |
| "epoch": 62.15384615384615, |
| "grad_norm": 0.5275495052337646, |
| "learning_rate": 2.924794419149796e-06, |
| "loss": 0.0326, |
| "step": 16160 |
| }, |
| { |
| "epoch": 62.19230769230769, |
| "grad_norm": 0.597240149974823, |
| "learning_rate": 2.910096963797354e-06, |
| "loss": 0.0272, |
| "step": 16170 |
| }, |
| { |
| "epoch": 62.23076923076923, |
| "grad_norm": 0.6819244623184204, |
| "learning_rate": 2.895432561778164e-06, |
| "loss": 0.0344, |
| "step": 16180 |
| }, |
| { |
| "epoch": 62.26923076923077, |
| "grad_norm": 0.624423623085022, |
| "learning_rate": 2.8808012531841393e-06, |
| "loss": 0.0297, |
| "step": 16190 |
| }, |
| { |
| "epoch": 62.30769230769231, |
| "grad_norm": 0.5835046768188477, |
| "learning_rate": 2.8662030780167138e-06, |
| "loss": 0.033, |
| "step": 16200 |
| }, |
| { |
| "epoch": 62.34615384615385, |
| "grad_norm": 0.6391960382461548, |
| "learning_rate": 2.85163807618675e-06, |
| "loss": 0.0254, |
| "step": 16210 |
| }, |
| { |
| "epoch": 62.38461538461539, |
| "grad_norm": 0.6837576031684875, |
| "learning_rate": 2.837106287514397e-06, |
| "loss": 0.029, |
| "step": 16220 |
| }, |
| { |
| "epoch": 62.42307692307692, |
| "grad_norm": 0.8031293153762817, |
| "learning_rate": 2.822607751729018e-06, |
| "loss": 0.0304, |
| "step": 16230 |
| }, |
| { |
| "epoch": 62.46153846153846, |
| "grad_norm": 0.7235280871391296, |
| "learning_rate": 2.808142508469054e-06, |
| "loss": 0.0302, |
| "step": 16240 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.7115002870559692, |
| "learning_rate": 2.7937105972819237e-06, |
| "loss": 0.0287, |
| "step": 16250 |
| }, |
| { |
| "epoch": 62.53846153846154, |
| "grad_norm": 0.7001069784164429, |
| "learning_rate": 2.7793120576239285e-06, |
| "loss": 0.0312, |
| "step": 16260 |
| }, |
| { |
| "epoch": 62.57692307692308, |
| "grad_norm": 0.6503643989562988, |
| "learning_rate": 2.7649469288601175e-06, |
| "loss": 0.0292, |
| "step": 16270 |
| }, |
| { |
| "epoch": 62.61538461538461, |
| "grad_norm": 0.5469167232513428, |
| "learning_rate": 2.7506152502642125e-06, |
| "loss": 0.0271, |
| "step": 16280 |
| }, |
| { |
| "epoch": 62.65384615384615, |
| "grad_norm": 0.6128395199775696, |
| "learning_rate": 2.7363170610184716e-06, |
| "loss": 0.0321, |
| "step": 16290 |
| }, |
| { |
| "epoch": 62.69230769230769, |
| "grad_norm": 0.726383626461029, |
| "learning_rate": 2.722052400213595e-06, |
| "loss": 0.0298, |
| "step": 16300 |
| }, |
| { |
| "epoch": 62.73076923076923, |
| "grad_norm": 0.7536424398422241, |
| "learning_rate": 2.707821306848627e-06, |
| "loss": 0.0309, |
| "step": 16310 |
| }, |
| { |
| "epoch": 62.76923076923077, |
| "grad_norm": 0.7245988249778748, |
| "learning_rate": 2.6936238198308318e-06, |
| "loss": 0.0283, |
| "step": 16320 |
| }, |
| { |
| "epoch": 62.80769230769231, |
| "grad_norm": 0.6714974641799927, |
| "learning_rate": 2.67945997797559e-06, |
| "loss": 0.0323, |
| "step": 16330 |
| }, |
| { |
| "epoch": 62.84615384615385, |
| "grad_norm": 0.5724020600318909, |
| "learning_rate": 2.665329820006314e-06, |
| "loss": 0.0282, |
| "step": 16340 |
| }, |
| { |
| "epoch": 62.88461538461539, |
| "grad_norm": 0.7046947479248047, |
| "learning_rate": 2.6512333845543086e-06, |
| "loss": 0.0274, |
| "step": 16350 |
| }, |
| { |
| "epoch": 62.92307692307692, |
| "grad_norm": 0.6365247368812561, |
| "learning_rate": 2.637170710158697e-06, |
| "loss": 0.0315, |
| "step": 16360 |
| }, |
| { |
| "epoch": 62.96153846153846, |
| "grad_norm": 0.7064822316169739, |
| "learning_rate": 2.6231418352662895e-06, |
| "loss": 0.0296, |
| "step": 16370 |
| }, |
| { |
| "epoch": 63.0, |
| "grad_norm": 0.6772658228874207, |
| "learning_rate": 2.609146798231493e-06, |
| "loss": 0.033, |
| "step": 16380 |
| }, |
| { |
| "epoch": 63.03846153846154, |
| "grad_norm": 0.48029395937919617, |
| "learning_rate": 2.5951856373162097e-06, |
| "loss": 0.0295, |
| "step": 16390 |
| }, |
| { |
| "epoch": 63.07692307692308, |
| "grad_norm": 0.6232547163963318, |
| "learning_rate": 2.581258390689712e-06, |
| "loss": 0.0288, |
| "step": 16400 |
| }, |
| { |
| "epoch": 63.11538461538461, |
| "grad_norm": 0.8133201599121094, |
| "learning_rate": 2.5673650964285718e-06, |
| "loss": 0.0326, |
| "step": 16410 |
| }, |
| { |
| "epoch": 63.15384615384615, |
| "grad_norm": 0.8444731831550598, |
| "learning_rate": 2.553505792516518e-06, |
| "loss": 0.0265, |
| "step": 16420 |
| }, |
| { |
| "epoch": 63.19230769230769, |
| "grad_norm": 0.4815030097961426, |
| "learning_rate": 2.539680516844356e-06, |
| "loss": 0.0249, |
| "step": 16430 |
| }, |
| { |
| "epoch": 63.23076923076923, |
| "grad_norm": 0.6386057138442993, |
| "learning_rate": 2.5258893072098678e-06, |
| "loss": 0.0282, |
| "step": 16440 |
| }, |
| { |
| "epoch": 63.26923076923077, |
| "grad_norm": 0.701069176197052, |
| "learning_rate": 2.512132201317688e-06, |
| "loss": 0.0269, |
| "step": 16450 |
| }, |
| { |
| "epoch": 63.30769230769231, |
| "grad_norm": 1.0269007682800293, |
| "learning_rate": 2.4984092367792272e-06, |
| "loss": 0.0301, |
| "step": 16460 |
| }, |
| { |
| "epoch": 63.34615384615385, |
| "grad_norm": 0.6145013570785522, |
| "learning_rate": 2.484720451112536e-06, |
| "loss": 0.0282, |
| "step": 16470 |
| }, |
| { |
| "epoch": 63.38461538461539, |
| "grad_norm": 0.6777820587158203, |
| "learning_rate": 2.471065881742236e-06, |
| "loss": 0.0306, |
| "step": 16480 |
| }, |
| { |
| "epoch": 63.42307692307692, |
| "grad_norm": 0.7301444411277771, |
| "learning_rate": 2.4574455659994023e-06, |
| "loss": 0.0326, |
| "step": 16490 |
| }, |
| { |
| "epoch": 63.46153846153846, |
| "grad_norm": 0.6660694479942322, |
| "learning_rate": 2.4438595411214528e-06, |
| "loss": 0.029, |
| "step": 16500 |
| }, |
| { |
| "epoch": 63.5, |
| "grad_norm": 0.6402868032455444, |
| "learning_rate": 2.430307844252069e-06, |
| "loss": 0.0262, |
| "step": 16510 |
| }, |
| { |
| "epoch": 63.53846153846154, |
| "grad_norm": 0.7359407544136047, |
| "learning_rate": 2.4167905124410587e-06, |
| "loss": 0.0276, |
| "step": 16520 |
| }, |
| { |
| "epoch": 63.57692307692308, |
| "grad_norm": 0.5874005556106567, |
| "learning_rate": 2.4033075826442995e-06, |
| "loss": 0.0279, |
| "step": 16530 |
| }, |
| { |
| "epoch": 63.61538461538461, |
| "grad_norm": 0.7026081085205078, |
| "learning_rate": 2.389859091723608e-06, |
| "loss": 0.0284, |
| "step": 16540 |
| }, |
| { |
| "epoch": 63.65384615384615, |
| "grad_norm": 0.7727426886558533, |
| "learning_rate": 2.376445076446641e-06, |
| "loss": 0.0307, |
| "step": 16550 |
| }, |
| { |
| "epoch": 63.69230769230769, |
| "grad_norm": 0.7311943769454956, |
| "learning_rate": 2.3630655734868117e-06, |
| "loss": 0.0275, |
| "step": 16560 |
| }, |
| { |
| "epoch": 63.73076923076923, |
| "grad_norm": 0.7108755111694336, |
| "learning_rate": 2.349720619423158e-06, |
| "loss": 0.0293, |
| "step": 16570 |
| }, |
| { |
| "epoch": 63.76923076923077, |
| "grad_norm": 0.981634795665741, |
| "learning_rate": 2.3364102507402817e-06, |
| "loss": 0.0329, |
| "step": 16580 |
| }, |
| { |
| "epoch": 63.80769230769231, |
| "grad_norm": 0.7809610366821289, |
| "learning_rate": 2.3231345038282243e-06, |
| "loss": 0.0287, |
| "step": 16590 |
| }, |
| { |
| "epoch": 63.84615384615385, |
| "grad_norm": 0.9093500971794128, |
| "learning_rate": 2.3098934149823686e-06, |
| "loss": 0.0291, |
| "step": 16600 |
| }, |
| { |
| "epoch": 63.88461538461539, |
| "grad_norm": 0.6164126992225647, |
| "learning_rate": 2.296687020403346e-06, |
| "loss": 0.0306, |
| "step": 16610 |
| }, |
| { |
| "epoch": 63.92307692307692, |
| "grad_norm": 0.7200257778167725, |
| "learning_rate": 2.2835153561969322e-06, |
| "loss": 0.0309, |
| "step": 16620 |
| }, |
| { |
| "epoch": 63.96153846153846, |
| "grad_norm": 0.5784043669700623, |
| "learning_rate": 2.270378458373956e-06, |
| "loss": 0.027, |
| "step": 16630 |
| }, |
| { |
| "epoch": 64.0, |
| "grad_norm": 0.5341262221336365, |
| "learning_rate": 2.257276362850199e-06, |
| "loss": 0.0283, |
| "step": 16640 |
| }, |
| { |
| "epoch": 64.03846153846153, |
| "grad_norm": 0.47131168842315674, |
| "learning_rate": 2.244209105446286e-06, |
| "loss": 0.0304, |
| "step": 16650 |
| }, |
| { |
| "epoch": 64.07692307692308, |
| "grad_norm": 0.5508049726486206, |
| "learning_rate": 2.2311767218875995e-06, |
| "loss": 0.0285, |
| "step": 16660 |
| }, |
| { |
| "epoch": 64.11538461538461, |
| "grad_norm": 0.6628520488739014, |
| "learning_rate": 2.218179247804177e-06, |
| "loss": 0.0306, |
| "step": 16670 |
| }, |
| { |
| "epoch": 64.15384615384616, |
| "grad_norm": 0.6644569635391235, |
| "learning_rate": 2.2052167187306167e-06, |
| "loss": 0.0269, |
| "step": 16680 |
| }, |
| { |
| "epoch": 64.1923076923077, |
| "grad_norm": 0.8234758973121643, |
| "learning_rate": 2.192289170105989e-06, |
| "loss": 0.0286, |
| "step": 16690 |
| }, |
| { |
| "epoch": 64.23076923076923, |
| "grad_norm": 0.7737919688224792, |
| "learning_rate": 2.1793966372737003e-06, |
| "loss": 0.0339, |
| "step": 16700 |
| }, |
| { |
| "epoch": 64.26923076923077, |
| "grad_norm": 0.5969371795654297, |
| "learning_rate": 2.166539155481455e-06, |
| "loss": 0.025, |
| "step": 16710 |
| }, |
| { |
| "epoch": 64.3076923076923, |
| "grad_norm": 0.5700958371162415, |
| "learning_rate": 2.1537167598811118e-06, |
| "loss": 0.0321, |
| "step": 16720 |
| }, |
| { |
| "epoch": 64.34615384615384, |
| "grad_norm": 0.625689685344696, |
| "learning_rate": 2.140929485528612e-06, |
| "loss": 0.0267, |
| "step": 16730 |
| }, |
| { |
| "epoch": 64.38461538461539, |
| "grad_norm": 0.504895031452179, |
| "learning_rate": 2.1281773673838838e-06, |
| "loss": 0.0244, |
| "step": 16740 |
| }, |
| { |
| "epoch": 64.42307692307692, |
| "grad_norm": 0.5708885788917542, |
| "learning_rate": 2.1154604403107175e-06, |
| "loss": 0.0261, |
| "step": 16750 |
| }, |
| { |
| "epoch": 64.46153846153847, |
| "grad_norm": 0.7020063400268555, |
| "learning_rate": 2.102778739076715e-06, |
| "loss": 0.0299, |
| "step": 16760 |
| }, |
| { |
| "epoch": 64.5, |
| "grad_norm": 0.5950106978416443, |
| "learning_rate": 2.0901322983531574e-06, |
| "loss": 0.0323, |
| "step": 16770 |
| }, |
| { |
| "epoch": 64.53846153846153, |
| "grad_norm": 0.6007116436958313, |
| "learning_rate": 2.0775211527149357e-06, |
| "loss": 0.0272, |
| "step": 16780 |
| }, |
| { |
| "epoch": 64.57692307692308, |
| "grad_norm": 0.7846446633338928, |
| "learning_rate": 2.0649453366404438e-06, |
| "loss": 0.03, |
| "step": 16790 |
| }, |
| { |
| "epoch": 64.61538461538461, |
| "grad_norm": 0.6095095276832581, |
| "learning_rate": 2.052404884511472e-06, |
| "loss": 0.0284, |
| "step": 16800 |
| }, |
| { |
| "epoch": 64.65384615384616, |
| "grad_norm": 0.6284276247024536, |
| "learning_rate": 2.039899830613145e-06, |
| "loss": 0.0331, |
| "step": 16810 |
| }, |
| { |
| "epoch": 64.6923076923077, |
| "grad_norm": 0.6151442527770996, |
| "learning_rate": 2.0274302091337987e-06, |
| "loss": 0.0297, |
| "step": 16820 |
| }, |
| { |
| "epoch": 64.73076923076923, |
| "grad_norm": 0.6171903610229492, |
| "learning_rate": 2.0149960541649076e-06, |
| "loss": 0.0279, |
| "step": 16830 |
| }, |
| { |
| "epoch": 64.76923076923077, |
| "grad_norm": 0.6822643876075745, |
| "learning_rate": 2.002597399700974e-06, |
| "loss": 0.0295, |
| "step": 16840 |
| }, |
| { |
| "epoch": 64.8076923076923, |
| "grad_norm": 0.6247174143791199, |
| "learning_rate": 1.990234279639441e-06, |
| "loss": 0.029, |
| "step": 16850 |
| }, |
| { |
| "epoch": 64.84615384615384, |
| "grad_norm": 0.6442417502403259, |
| "learning_rate": 1.977906727780614e-06, |
| "loss": 0.0281, |
| "step": 16860 |
| }, |
| { |
| "epoch": 64.88461538461539, |
| "grad_norm": 0.45084184408187866, |
| "learning_rate": 1.9656147778275423e-06, |
| "loss": 0.0276, |
| "step": 16870 |
| }, |
| { |
| "epoch": 64.92307692307692, |
| "grad_norm": 0.5375664830207825, |
| "learning_rate": 1.953358463385954e-06, |
| "loss": 0.0329, |
| "step": 16880 |
| }, |
| { |
| "epoch": 64.96153846153847, |
| "grad_norm": 0.6008625626564026, |
| "learning_rate": 1.9411378179641435e-06, |
| "loss": 0.0349, |
| "step": 16890 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.7332527041435242, |
| "learning_rate": 1.9289528749728834e-06, |
| "loss": 0.0374, |
| "step": 16900 |
| }, |
| { |
| "epoch": 65.03846153846153, |
| "grad_norm": 0.6559941172599792, |
| "learning_rate": 1.916803667725351e-06, |
| "loss": 0.0289, |
| "step": 16910 |
| }, |
| { |
| "epoch": 65.07692307692308, |
| "grad_norm": 0.6436246633529663, |
| "learning_rate": 1.9046902294370044e-06, |
| "loss": 0.0286, |
| "step": 16920 |
| }, |
| { |
| "epoch": 65.11538461538461, |
| "grad_norm": 0.7137200832366943, |
| "learning_rate": 1.8926125932255328e-06, |
| "loss": 0.0278, |
| "step": 16930 |
| }, |
| { |
| "epoch": 65.15384615384616, |
| "grad_norm": 0.7713292837142944, |
| "learning_rate": 1.8805707921107262e-06, |
| "loss": 0.0296, |
| "step": 16940 |
| }, |
| { |
| "epoch": 65.1923076923077, |
| "grad_norm": 0.4665347635746002, |
| "learning_rate": 1.8685648590144066e-06, |
| "loss": 0.0272, |
| "step": 16950 |
| }, |
| { |
| "epoch": 65.23076923076923, |
| "grad_norm": 0.5581397414207458, |
| "learning_rate": 1.8565948267603444e-06, |
| "loss": 0.0327, |
| "step": 16960 |
| }, |
| { |
| "epoch": 65.26923076923077, |
| "grad_norm": 0.4912269711494446, |
| "learning_rate": 1.8446607280741435e-06, |
| "loss": 0.0339, |
| "step": 16970 |
| }, |
| { |
| "epoch": 65.3076923076923, |
| "grad_norm": 0.6458134651184082, |
| "learning_rate": 1.8327625955831763e-06, |
| "loss": 0.0272, |
| "step": 16980 |
| }, |
| { |
| "epoch": 65.34615384615384, |
| "grad_norm": 0.6138810515403748, |
| "learning_rate": 1.8209004618164837e-06, |
| "loss": 0.0306, |
| "step": 16990 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 0.6538389921188354, |
| "learning_rate": 1.8090743592046843e-06, |
| "loss": 0.0342, |
| "step": 17000 |
| }, |
| { |
| "epoch": 65.42307692307692, |
| "grad_norm": 0.5684748888015747, |
| "learning_rate": 1.7972843200798932e-06, |
| "loss": 0.0281, |
| "step": 17010 |
| }, |
| { |
| "epoch": 65.46153846153847, |
| "grad_norm": 0.6487337350845337, |
| "learning_rate": 1.7855303766756316e-06, |
| "loss": 0.0303, |
| "step": 17020 |
| }, |
| { |
| "epoch": 65.5, |
| "grad_norm": 0.6523866057395935, |
| "learning_rate": 1.7738125611267204e-06, |
| "loss": 0.029, |
| "step": 17030 |
| }, |
| { |
| "epoch": 65.53846153846153, |
| "grad_norm": 0.624439001083374, |
| "learning_rate": 1.7621309054692302e-06, |
| "loss": 0.027, |
| "step": 17040 |
| }, |
| { |
| "epoch": 65.57692307692308, |
| "grad_norm": 0.45990708470344543, |
| "learning_rate": 1.7504854416403542e-06, |
| "loss": 0.0288, |
| "step": 17050 |
| }, |
| { |
| "epoch": 65.61538461538461, |
| "grad_norm": 0.7313023209571838, |
| "learning_rate": 1.7388762014783493e-06, |
| "loss": 0.0287, |
| "step": 17060 |
| }, |
| { |
| "epoch": 65.65384615384616, |
| "grad_norm": 0.7057033777236938, |
| "learning_rate": 1.7273032167224418e-06, |
| "loss": 0.0322, |
| "step": 17070 |
| }, |
| { |
| "epoch": 65.6923076923077, |
| "grad_norm": 0.6121751070022583, |
| "learning_rate": 1.7157665190127154e-06, |
| "loss": 0.0325, |
| "step": 17080 |
| }, |
| { |
| "epoch": 65.73076923076923, |
| "grad_norm": 0.4792765974998474, |
| "learning_rate": 1.7042661398900733e-06, |
| "loss": 0.0253, |
| "step": 17090 |
| }, |
| { |
| "epoch": 65.76923076923077, |
| "grad_norm": 0.6455287933349609, |
| "learning_rate": 1.692802110796105e-06, |
| "loss": 0.0292, |
| "step": 17100 |
| }, |
| { |
| "epoch": 65.8076923076923, |
| "grad_norm": 0.8495745658874512, |
| "learning_rate": 1.6813744630730343e-06, |
| "loss": 0.0326, |
| "step": 17110 |
| }, |
| { |
| "epoch": 65.84615384615384, |
| "grad_norm": 0.7053533792495728, |
| "learning_rate": 1.6699832279636113e-06, |
| "loss": 0.0312, |
| "step": 17120 |
| }, |
| { |
| "epoch": 65.88461538461539, |
| "grad_norm": 0.6184305548667908, |
| "learning_rate": 1.6586284366110355e-06, |
| "loss": 0.0362, |
| "step": 17130 |
| }, |
| { |
| "epoch": 65.92307692307692, |
| "grad_norm": 0.4716685712337494, |
| "learning_rate": 1.647310120058878e-06, |
| "loss": 0.0339, |
| "step": 17140 |
| }, |
| { |
| "epoch": 65.96153846153847, |
| "grad_norm": 0.7297793030738831, |
| "learning_rate": 1.6360283092509765e-06, |
| "loss": 0.0292, |
| "step": 17150 |
| }, |
| { |
| "epoch": 66.0, |
| "grad_norm": 0.6194086074829102, |
| "learning_rate": 1.6247830350313797e-06, |
| "loss": 0.0309, |
| "step": 17160 |
| }, |
| { |
| "epoch": 66.03846153846153, |
| "grad_norm": 0.5371058583259583, |
| "learning_rate": 1.6135743281442333e-06, |
| "loss": 0.028, |
| "step": 17170 |
| }, |
| { |
| "epoch": 66.07692307692308, |
| "grad_norm": 0.6116629838943481, |
| "learning_rate": 1.6024022192337112e-06, |
| "loss": 0.0293, |
| "step": 17180 |
| }, |
| { |
| "epoch": 66.11538461538461, |
| "grad_norm": 0.5863935947418213, |
| "learning_rate": 1.591266738843939e-06, |
| "loss": 0.0276, |
| "step": 17190 |
| }, |
| { |
| "epoch": 66.15384615384616, |
| "grad_norm": 0.7205275893211365, |
| "learning_rate": 1.5801679174188888e-06, |
| "loss": 0.0295, |
| "step": 17200 |
| }, |
| { |
| "epoch": 66.1923076923077, |
| "grad_norm": 0.5224143266677856, |
| "learning_rate": 1.5691057853023199e-06, |
| "loss": 0.0313, |
| "step": 17210 |
| }, |
| { |
| "epoch": 66.23076923076923, |
| "grad_norm": 0.49154120683670044, |
| "learning_rate": 1.5580803727376786e-06, |
| "loss": 0.0278, |
| "step": 17220 |
| }, |
| { |
| "epoch": 66.26923076923077, |
| "grad_norm": 0.43938741087913513, |
| "learning_rate": 1.5470917098680142e-06, |
| "loss": 0.0275, |
| "step": 17230 |
| }, |
| { |
| "epoch": 66.3076923076923, |
| "grad_norm": 0.5548313856124878, |
| "learning_rate": 1.5361398267359205e-06, |
| "loss": 0.0291, |
| "step": 17240 |
| }, |
| { |
| "epoch": 66.34615384615384, |
| "grad_norm": 0.5422559976577759, |
| "learning_rate": 1.5252247532834246e-06, |
| "loss": 0.0318, |
| "step": 17250 |
| }, |
| { |
| "epoch": 66.38461538461539, |
| "grad_norm": 0.6111128330230713, |
| "learning_rate": 1.5143465193519173e-06, |
| "loss": 0.0294, |
| "step": 17260 |
| }, |
| { |
| "epoch": 66.42307692307692, |
| "grad_norm": 0.5277599692344666, |
| "learning_rate": 1.5035051546820821e-06, |
| "loss": 0.0297, |
| "step": 17270 |
| }, |
| { |
| "epoch": 66.46153846153847, |
| "grad_norm": 0.7245263457298279, |
| "learning_rate": 1.4927006889137862e-06, |
| "loss": 0.0317, |
| "step": 17280 |
| }, |
| { |
| "epoch": 66.5, |
| "grad_norm": 0.5456480383872986, |
| "learning_rate": 1.4819331515860357e-06, |
| "loss": 0.0279, |
| "step": 17290 |
| }, |
| { |
| "epoch": 66.53846153846153, |
| "grad_norm": 0.5236904621124268, |
| "learning_rate": 1.4712025721368644e-06, |
| "loss": 0.0324, |
| "step": 17300 |
| }, |
| { |
| "epoch": 66.57692307692308, |
| "grad_norm": 0.45201101899147034, |
| "learning_rate": 1.46050897990326e-06, |
| "loss": 0.0316, |
| "step": 17310 |
| }, |
| { |
| "epoch": 66.61538461538461, |
| "grad_norm": 0.41527774930000305, |
| "learning_rate": 1.449852404121103e-06, |
| "loss": 0.026, |
| "step": 17320 |
| }, |
| { |
| "epoch": 66.65384615384616, |
| "grad_norm": 0.5794553756713867, |
| "learning_rate": 1.4392328739250615e-06, |
| "loss": 0.0311, |
| "step": 17330 |
| }, |
| { |
| "epoch": 66.6923076923077, |
| "grad_norm": 0.4518781006336212, |
| "learning_rate": 1.4286504183485277e-06, |
| "loss": 0.0306, |
| "step": 17340 |
| }, |
| { |
| "epoch": 66.73076923076923, |
| "grad_norm": 0.7854360342025757, |
| "learning_rate": 1.4181050663235284e-06, |
| "loss": 0.0315, |
| "step": 17350 |
| }, |
| { |
| "epoch": 66.76923076923077, |
| "grad_norm": 0.6389064788818359, |
| "learning_rate": 1.4075968466806533e-06, |
| "loss": 0.0302, |
| "step": 17360 |
| }, |
| { |
| "epoch": 66.8076923076923, |
| "grad_norm": 0.4904123544692993, |
| "learning_rate": 1.3971257881489762e-06, |
| "loss": 0.0279, |
| "step": 17370 |
| }, |
| { |
| "epoch": 66.84615384615384, |
| "grad_norm": 0.5139650702476501, |
| "learning_rate": 1.386691919355968e-06, |
| "loss": 0.0258, |
| "step": 17380 |
| }, |
| { |
| "epoch": 66.88461538461539, |
| "grad_norm": 0.5151085257530212, |
| "learning_rate": 1.3762952688274316e-06, |
| "loss": 0.0256, |
| "step": 17390 |
| }, |
| { |
| "epoch": 66.92307692307692, |
| "grad_norm": 0.5301920175552368, |
| "learning_rate": 1.3659358649874104e-06, |
| "loss": 0.026, |
| "step": 17400 |
| }, |
| { |
| "epoch": 66.96153846153847, |
| "grad_norm": 0.7160604000091553, |
| "learning_rate": 1.3556137361581155e-06, |
| "loss": 0.0295, |
| "step": 17410 |
| }, |
| { |
| "epoch": 67.0, |
| "grad_norm": 0.7122892141342163, |
| "learning_rate": 1.3453289105598616e-06, |
| "loss": 0.0316, |
| "step": 17420 |
| }, |
| { |
| "epoch": 67.03846153846153, |
| "grad_norm": 0.6626392602920532, |
| "learning_rate": 1.3350814163109592e-06, |
| "loss": 0.0333, |
| "step": 17430 |
| }, |
| { |
| "epoch": 67.07692307692308, |
| "grad_norm": 0.5471773147583008, |
| "learning_rate": 1.3248712814276732e-06, |
| "loss": 0.026, |
| "step": 17440 |
| }, |
| { |
| "epoch": 67.11538461538461, |
| "grad_norm": 0.4416269361972809, |
| "learning_rate": 1.3146985338241207e-06, |
| "loss": 0.0281, |
| "step": 17450 |
| }, |
| { |
| "epoch": 67.15384615384616, |
| "grad_norm": 0.5858498811721802, |
| "learning_rate": 1.3045632013122032e-06, |
| "loss": 0.0295, |
| "step": 17460 |
| }, |
| { |
| "epoch": 67.1923076923077, |
| "grad_norm": 0.5736910104751587, |
| "learning_rate": 1.294465311601537e-06, |
| "loss": 0.0285, |
| "step": 17470 |
| }, |
| { |
| "epoch": 67.23076923076923, |
| "grad_norm": 0.547957718372345, |
| "learning_rate": 1.2844048922993602e-06, |
| "loss": 0.0311, |
| "step": 17480 |
| }, |
| { |
| "epoch": 67.26923076923077, |
| "grad_norm": 0.5907455682754517, |
| "learning_rate": 1.2743819709104826e-06, |
| "loss": 0.0282, |
| "step": 17490 |
| }, |
| { |
| "epoch": 67.3076923076923, |
| "grad_norm": 0.5498229265213013, |
| "learning_rate": 1.264396574837185e-06, |
| "loss": 0.0239, |
| "step": 17500 |
| }, |
| { |
| "epoch": 67.34615384615384, |
| "grad_norm": 0.3337724804878235, |
| "learning_rate": 1.2544487313791564e-06, |
| "loss": 0.0257, |
| "step": 17510 |
| }, |
| { |
| "epoch": 67.38461538461539, |
| "grad_norm": 0.5026791095733643, |
| "learning_rate": 1.2445384677334282e-06, |
| "loss": 0.0293, |
| "step": 17520 |
| }, |
| { |
| "epoch": 67.42307692307692, |
| "grad_norm": 0.5449739098548889, |
| "learning_rate": 1.2346658109942755e-06, |
| "loss": 0.0312, |
| "step": 17530 |
| }, |
| { |
| "epoch": 67.46153846153847, |
| "grad_norm": 0.5108739733695984, |
| "learning_rate": 1.2248307881531656e-06, |
| "loss": 0.0338, |
| "step": 17540 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 0.6045510768890381, |
| "learning_rate": 1.2150334260986818e-06, |
| "loss": 0.0329, |
| "step": 17550 |
| }, |
| { |
| "epoch": 67.53846153846153, |
| "grad_norm": 0.6545273661613464, |
| "learning_rate": 1.2052737516164292e-06, |
| "loss": 0.0303, |
| "step": 17560 |
| }, |
| { |
| "epoch": 67.57692307692308, |
| "grad_norm": 0.558460533618927, |
| "learning_rate": 1.1955517913889924e-06, |
| "loss": 0.0286, |
| "step": 17570 |
| }, |
| { |
| "epoch": 67.61538461538461, |
| "grad_norm": 0.704518735408783, |
| "learning_rate": 1.185867571995835e-06, |
| "loss": 0.0304, |
| "step": 17580 |
| }, |
| { |
| "epoch": 67.65384615384616, |
| "grad_norm": 0.7233688831329346, |
| "learning_rate": 1.1762211199132433e-06, |
| "loss": 0.0262, |
| "step": 17590 |
| }, |
| { |
| "epoch": 67.6923076923077, |
| "grad_norm": 0.45494726300239563, |
| "learning_rate": 1.1666124615142525e-06, |
| "loss": 0.0253, |
| "step": 17600 |
| }, |
| { |
| "epoch": 67.73076923076923, |
| "grad_norm": 0.4010249972343445, |
| "learning_rate": 1.1570416230685627e-06, |
| "loss": 0.0254, |
| "step": 17610 |
| }, |
| { |
| "epoch": 67.76923076923077, |
| "grad_norm": 0.4817158877849579, |
| "learning_rate": 1.147508630742486e-06, |
| "loss": 0.0273, |
| "step": 17620 |
| }, |
| { |
| "epoch": 67.8076923076923, |
| "grad_norm": 0.5177744030952454, |
| "learning_rate": 1.1380135105988576e-06, |
| "loss": 0.0287, |
| "step": 17630 |
| }, |
| { |
| "epoch": 67.84615384615384, |
| "grad_norm": 0.5829024314880371, |
| "learning_rate": 1.128556288596969e-06, |
| "loss": 0.0316, |
| "step": 17640 |
| }, |
| { |
| "epoch": 67.88461538461539, |
| "grad_norm": 0.5057424902915955, |
| "learning_rate": 1.1191369905925096e-06, |
| "loss": 0.0276, |
| "step": 17650 |
| }, |
| { |
| "epoch": 67.92307692307692, |
| "grad_norm": 0.6341408491134644, |
| "learning_rate": 1.1097556423374765e-06, |
| "loss": 0.0304, |
| "step": 17660 |
| }, |
| { |
| "epoch": 67.96153846153847, |
| "grad_norm": 0.6067168116569519, |
| "learning_rate": 1.1004122694801233e-06, |
| "loss": 0.0364, |
| "step": 17670 |
| }, |
| { |
| "epoch": 68.0, |
| "grad_norm": 0.4588167071342468, |
| "learning_rate": 1.0911068975648697e-06, |
| "loss": 0.026, |
| "step": 17680 |
| }, |
| { |
| "epoch": 68.03846153846153, |
| "grad_norm": 0.5819829106330872, |
| "learning_rate": 1.0818395520322456e-06, |
| "loss": 0.0262, |
| "step": 17690 |
| }, |
| { |
| "epoch": 68.07692307692308, |
| "grad_norm": 0.5707022547721863, |
| "learning_rate": 1.072610258218825e-06, |
| "loss": 0.0361, |
| "step": 17700 |
| }, |
| { |
| "epoch": 68.11538461538461, |
| "grad_norm": 0.5363244414329529, |
| "learning_rate": 1.0634190413571415e-06, |
| "loss": 0.0281, |
| "step": 17710 |
| }, |
| { |
| "epoch": 68.15384615384616, |
| "grad_norm": 0.6520892381668091, |
| "learning_rate": 1.0542659265756337e-06, |
| "loss": 0.0303, |
| "step": 17720 |
| }, |
| { |
| "epoch": 68.1923076923077, |
| "grad_norm": 0.6424570083618164, |
| "learning_rate": 1.0451509388985663e-06, |
| "loss": 0.0327, |
| "step": 17730 |
| }, |
| { |
| "epoch": 68.23076923076923, |
| "grad_norm": 0.5023139715194702, |
| "learning_rate": 1.0360741032459636e-06, |
| "loss": 0.0309, |
| "step": 17740 |
| }, |
| { |
| "epoch": 68.26923076923077, |
| "grad_norm": 0.4547223448753357, |
| "learning_rate": 1.027035444433555e-06, |
| "loss": 0.0261, |
| "step": 17750 |
| }, |
| { |
| "epoch": 68.3076923076923, |
| "grad_norm": 0.440268337726593, |
| "learning_rate": 1.0180349871726819e-06, |
| "loss": 0.0304, |
| "step": 17760 |
| }, |
| { |
| "epoch": 68.34615384615384, |
| "grad_norm": 0.4488914906978607, |
| "learning_rate": 1.0090727560702572e-06, |
| "loss": 0.0321, |
| "step": 17770 |
| }, |
| { |
| "epoch": 68.38461538461539, |
| "grad_norm": 0.47670313715934753, |
| "learning_rate": 1.0001487756286748e-06, |
| "loss": 0.0292, |
| "step": 17780 |
| }, |
| { |
| "epoch": 68.42307692307692, |
| "grad_norm": 0.6052711009979248, |
| "learning_rate": 9.912630702457548e-07, |
| "loss": 0.0279, |
| "step": 17790 |
| }, |
| { |
| "epoch": 68.46153846153847, |
| "grad_norm": 0.38048213720321655, |
| "learning_rate": 9.824156642146798e-07, |
| "loss": 0.032, |
| "step": 17800 |
| }, |
| { |
| "epoch": 68.5, |
| "grad_norm": 0.5576040744781494, |
| "learning_rate": 9.736065817239192e-07, |
| "loss": 0.0292, |
| "step": 17810 |
| }, |
| { |
| "epoch": 68.53846153846153, |
| "grad_norm": 0.8681022524833679, |
| "learning_rate": 9.648358468571667e-07, |
| "loss": 0.0283, |
| "step": 17820 |
| }, |
| { |
| "epoch": 68.57692307692308, |
| "grad_norm": 0.6247050166130066, |
| "learning_rate": 9.561034835932774e-07, |
| "loss": 0.0285, |
| "step": 17830 |
| }, |
| { |
| "epoch": 68.61538461538461, |
| "grad_norm": 0.5081879496574402, |
| "learning_rate": 9.474095158061996e-07, |
| "loss": 0.032, |
| "step": 17840 |
| }, |
| { |
| "epoch": 68.65384615384616, |
| "grad_norm": 0.631948709487915, |
| "learning_rate": 9.387539672649082e-07, |
| "loss": 0.0398, |
| "step": 17850 |
| }, |
| { |
| "epoch": 68.6923076923077, |
| "grad_norm": 0.5832291841506958, |
| "learning_rate": 9.301368616333456e-07, |
| "loss": 0.0268, |
| "step": 17860 |
| }, |
| { |
| "epoch": 68.73076923076923, |
| "grad_norm": 0.5857760310173035, |
| "learning_rate": 9.215582224703417e-07, |
| "loss": 0.0268, |
| "step": 17870 |
| }, |
| { |
| "epoch": 68.76923076923077, |
| "grad_norm": 0.63872891664505, |
| "learning_rate": 9.13018073229579e-07, |
| "loss": 0.0329, |
| "step": 17880 |
| }, |
| { |
| "epoch": 68.8076923076923, |
| "grad_norm": 0.34974420070648193, |
| "learning_rate": 9.045164372594889e-07, |
| "loss": 0.0268, |
| "step": 17890 |
| }, |
| { |
| "epoch": 68.84615384615384, |
| "grad_norm": 0.4673135578632355, |
| "learning_rate": 8.960533378032288e-07, |
| "loss": 0.0308, |
| "step": 17900 |
| }, |
| { |
| "epoch": 68.88461538461539, |
| "grad_norm": 0.496360719203949, |
| "learning_rate": 8.876287979985853e-07, |
| "loss": 0.0268, |
| "step": 17910 |
| }, |
| { |
| "epoch": 68.92307692307692, |
| "grad_norm": 0.4563571810722351, |
| "learning_rate": 8.792428408779246e-07, |
| "loss": 0.033, |
| "step": 17920 |
| }, |
| { |
| "epoch": 68.96153846153847, |
| "grad_norm": 0.5708230137825012, |
| "learning_rate": 8.708954893681421e-07, |
| "loss": 0.0303, |
| "step": 17930 |
| }, |
| { |
| "epoch": 69.0, |
| "grad_norm": 0.624557614326477, |
| "learning_rate": 8.62586766290569e-07, |
| "loss": 0.0317, |
| "step": 17940 |
| }, |
| { |
| "epoch": 69.03846153846153, |
| "grad_norm": 0.3131464123725891, |
| "learning_rate": 8.543166943609448e-07, |
| "loss": 0.027, |
| "step": 17950 |
| }, |
| { |
| "epoch": 69.07692307692308, |
| "grad_norm": 0.8714306950569153, |
| "learning_rate": 8.460852961893234e-07, |
| "loss": 0.0309, |
| "step": 17960 |
| }, |
| { |
| "epoch": 69.11538461538461, |
| "grad_norm": 0.6325943470001221, |
| "learning_rate": 8.378925942800364e-07, |
| "loss": 0.0349, |
| "step": 17970 |
| }, |
| { |
| "epoch": 69.15384615384616, |
| "grad_norm": 0.44092488288879395, |
| "learning_rate": 8.297386110316202e-07, |
| "loss": 0.0275, |
| "step": 17980 |
| }, |
| { |
| "epoch": 69.1923076923077, |
| "grad_norm": 0.5496897101402283, |
| "learning_rate": 8.216233687367491e-07, |
| "loss": 0.0317, |
| "step": 17990 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 0.615792453289032, |
| "learning_rate": 8.135468895821924e-07, |
| "loss": 0.0319, |
| "step": 18000 |
| }, |
| { |
| "epoch": 69.26923076923077, |
| "grad_norm": 0.6132825016975403, |
| "learning_rate": 8.05509195648727e-07, |
| "loss": 0.0296, |
| "step": 18010 |
| }, |
| { |
| "epoch": 69.3076923076923, |
| "grad_norm": 0.4234136641025543, |
| "learning_rate": 7.975103089111052e-07, |
| "loss": 0.0329, |
| "step": 18020 |
| }, |
| { |
| "epoch": 69.34615384615384, |
| "grad_norm": 0.5253662467002869, |
| "learning_rate": 7.895502512379805e-07, |
| "loss": 0.0327, |
| "step": 18030 |
| }, |
| { |
| "epoch": 69.38461538461539, |
| "grad_norm": 0.40497887134552, |
| "learning_rate": 7.816290443918411e-07, |
| "loss": 0.0297, |
| "step": 18040 |
| }, |
| { |
| "epoch": 69.42307692307692, |
| "grad_norm": 0.5554369688034058, |
| "learning_rate": 7.737467100289725e-07, |
| "loss": 0.0329, |
| "step": 18050 |
| }, |
| { |
| "epoch": 69.46153846153847, |
| "grad_norm": 0.5565347671508789, |
| "learning_rate": 7.659032696993661e-07, |
| "loss": 0.0307, |
| "step": 18060 |
| }, |
| { |
| "epoch": 69.5, |
| "grad_norm": 0.3294142484664917, |
| "learning_rate": 7.580987448466925e-07, |
| "loss": 0.0279, |
| "step": 18070 |
| }, |
| { |
| "epoch": 69.53846153846153, |
| "grad_norm": 0.5592007040977478, |
| "learning_rate": 7.503331568082267e-07, |
| "loss": 0.029, |
| "step": 18080 |
| }, |
| { |
| "epoch": 69.57692307692308, |
| "grad_norm": 0.6052271127700806, |
| "learning_rate": 7.426065268147875e-07, |
| "loss": 0.027, |
| "step": 18090 |
| }, |
| { |
| "epoch": 69.61538461538461, |
| "grad_norm": 0.3888969421386719, |
| "learning_rate": 7.349188759906889e-07, |
| "loss": 0.0294, |
| "step": 18100 |
| }, |
| { |
| "epoch": 69.65384615384616, |
| "grad_norm": 0.5859863758087158, |
| "learning_rate": 7.272702253536683e-07, |
| "loss": 0.0289, |
| "step": 18110 |
| }, |
| { |
| "epoch": 69.6923076923077, |
| "grad_norm": 0.5358383655548096, |
| "learning_rate": 7.196605958148505e-07, |
| "loss": 0.0288, |
| "step": 18120 |
| }, |
| { |
| "epoch": 69.73076923076923, |
| "grad_norm": 0.39235493540763855, |
| "learning_rate": 7.120900081786719e-07, |
| "loss": 0.0242, |
| "step": 18130 |
| }, |
| { |
| "epoch": 69.76923076923077, |
| "grad_norm": 0.564791202545166, |
| "learning_rate": 7.045584831428276e-07, |
| "loss": 0.0338, |
| "step": 18140 |
| }, |
| { |
| "epoch": 69.8076923076923, |
| "grad_norm": 0.6125440001487732, |
| "learning_rate": 6.970660412982199e-07, |
| "loss": 0.0254, |
| "step": 18150 |
| }, |
| { |
| "epoch": 69.84615384615384, |
| "grad_norm": 0.6010186076164246, |
| "learning_rate": 6.896127031288985e-07, |
| "loss": 0.0328, |
| "step": 18160 |
| }, |
| { |
| "epoch": 69.88461538461539, |
| "grad_norm": 0.45994505286216736, |
| "learning_rate": 6.821984890120064e-07, |
| "loss": 0.0257, |
| "step": 18170 |
| }, |
| { |
| "epoch": 69.92307692307692, |
| "grad_norm": 0.5627428889274597, |
| "learning_rate": 6.748234192177227e-07, |
| "loss": 0.0323, |
| "step": 18180 |
| }, |
| { |
| "epoch": 69.96153846153847, |
| "grad_norm": 0.6051352024078369, |
| "learning_rate": 6.674875139092051e-07, |
| "loss": 0.0307, |
| "step": 18190 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.3997330963611603, |
| "learning_rate": 6.601907931425388e-07, |
| "loss": 0.0247, |
| "step": 18200 |
| }, |
| { |
| "epoch": 70.03846153846153, |
| "grad_norm": 0.4721720516681671, |
| "learning_rate": 6.529332768666779e-07, |
| "loss": 0.026, |
| "step": 18210 |
| }, |
| { |
| "epoch": 70.07692307692308, |
| "grad_norm": 0.536614716053009, |
| "learning_rate": 6.457149849233973e-07, |
| "loss": 0.0282, |
| "step": 18220 |
| }, |
| { |
| "epoch": 70.11538461538461, |
| "grad_norm": 0.4795149564743042, |
| "learning_rate": 6.385359370472343e-07, |
| "loss": 0.0309, |
| "step": 18230 |
| }, |
| { |
| "epoch": 70.15384615384616, |
| "grad_norm": 0.6099283695220947, |
| "learning_rate": 6.313961528654239e-07, |
| "loss": 0.0295, |
| "step": 18240 |
| }, |
| { |
| "epoch": 70.1923076923077, |
| "grad_norm": 0.6247525215148926, |
| "learning_rate": 6.242956518978682e-07, |
| "loss": 0.0325, |
| "step": 18250 |
| }, |
| { |
| "epoch": 70.23076923076923, |
| "grad_norm": 0.3841114640235901, |
| "learning_rate": 6.172344535570673e-07, |
| "loss": 0.0253, |
| "step": 18260 |
| }, |
| { |
| "epoch": 70.26923076923077, |
| "grad_norm": 0.4295583963394165, |
| "learning_rate": 6.102125771480655e-07, |
| "loss": 0.0283, |
| "step": 18270 |
| }, |
| { |
| "epoch": 70.3076923076923, |
| "grad_norm": 0.41051971912384033, |
| "learning_rate": 6.032300418684062e-07, |
| "loss": 0.0255, |
| "step": 18280 |
| }, |
| { |
| "epoch": 70.34615384615384, |
| "grad_norm": 0.5418064594268799, |
| "learning_rate": 5.962868668080706e-07, |
| "loss": 0.0311, |
| "step": 18290 |
| }, |
| { |
| "epoch": 70.38461538461539, |
| "grad_norm": 0.6504908800125122, |
| "learning_rate": 5.89383070949438e-07, |
| "loss": 0.0319, |
| "step": 18300 |
| }, |
| { |
| "epoch": 70.42307692307692, |
| "grad_norm": 0.4007869064807892, |
| "learning_rate": 5.825186731672217e-07, |
| "loss": 0.0287, |
| "step": 18310 |
| }, |
| { |
| "epoch": 70.46153846153847, |
| "grad_norm": 0.5333831310272217, |
| "learning_rate": 5.756936922284228e-07, |
| "loss": 0.0239, |
| "step": 18320 |
| }, |
| { |
| "epoch": 70.5, |
| "grad_norm": 0.5186364650726318, |
| "learning_rate": 5.689081467922791e-07, |
| "loss": 0.03, |
| "step": 18330 |
| }, |
| { |
| "epoch": 70.53846153846153, |
| "grad_norm": 0.7441152334213257, |
| "learning_rate": 5.621620554102108e-07, |
| "loss": 0.0267, |
| "step": 18340 |
| }, |
| { |
| "epoch": 70.57692307692308, |
| "grad_norm": 0.4066428244113922, |
| "learning_rate": 5.554554365257747e-07, |
| "loss": 0.0256, |
| "step": 18350 |
| }, |
| { |
| "epoch": 70.61538461538461, |
| "grad_norm": 0.3941934108734131, |
| "learning_rate": 5.487883084746137e-07, |
| "loss": 0.031, |
| "step": 18360 |
| }, |
| { |
| "epoch": 70.65384615384616, |
| "grad_norm": 0.5388003587722778, |
| "learning_rate": 5.421606894843989e-07, |
| "loss": 0.0335, |
| "step": 18370 |
| }, |
| { |
| "epoch": 70.6923076923077, |
| "grad_norm": 0.8006777167320251, |
| "learning_rate": 5.355725976747878e-07, |
| "loss": 0.0293, |
| "step": 18380 |
| }, |
| { |
| "epoch": 70.73076923076923, |
| "grad_norm": 0.48611146211624146, |
| "learning_rate": 5.290240510573707e-07, |
| "loss": 0.0271, |
| "step": 18390 |
| }, |
| { |
| "epoch": 70.76923076923077, |
| "grad_norm": 0.40488916635513306, |
| "learning_rate": 5.22515067535625e-07, |
| "loss": 0.0328, |
| "step": 18400 |
| }, |
| { |
| "epoch": 70.8076923076923, |
| "grad_norm": 0.5412066578865051, |
| "learning_rate": 5.160456649048656e-07, |
| "loss": 0.0338, |
| "step": 18410 |
| }, |
| { |
| "epoch": 70.84615384615384, |
| "grad_norm": 0.33006104826927185, |
| "learning_rate": 5.096158608521878e-07, |
| "loss": 0.0286, |
| "step": 18420 |
| }, |
| { |
| "epoch": 70.88461538461539, |
| "grad_norm": 0.5399073362350464, |
| "learning_rate": 5.032256729564349e-07, |
| "loss": 0.0305, |
| "step": 18430 |
| }, |
| { |
| "epoch": 70.92307692307692, |
| "grad_norm": 0.5007637739181519, |
| "learning_rate": 4.968751186881321e-07, |
| "loss": 0.0296, |
| "step": 18440 |
| }, |
| { |
| "epoch": 70.96153846153847, |
| "grad_norm": 0.4356684982776642, |
| "learning_rate": 4.905642154094526e-07, |
| "loss": 0.0281, |
| "step": 18450 |
| }, |
| { |
| "epoch": 71.0, |
| "grad_norm": 0.5559306740760803, |
| "learning_rate": 4.842929803741713e-07, |
| "loss": 0.0332, |
| "step": 18460 |
| }, |
| { |
| "epoch": 71.03846153846153, |
| "grad_norm": 0.5105197429656982, |
| "learning_rate": 4.780614307275987e-07, |
| "loss": 0.0281, |
| "step": 18470 |
| }, |
| { |
| "epoch": 71.07692307692308, |
| "grad_norm": 0.6882253885269165, |
| "learning_rate": 4.7186958350655506e-07, |
| "loss": 0.0257, |
| "step": 18480 |
| }, |
| { |
| "epoch": 71.11538461538461, |
| "grad_norm": 0.483347088098526, |
| "learning_rate": 4.65717455639314e-07, |
| "loss": 0.0297, |
| "step": 18490 |
| }, |
| { |
| "epoch": 71.15384615384616, |
| "grad_norm": 0.4287182092666626, |
| "learning_rate": 4.5960506394555956e-07, |
| "loss": 0.0301, |
| "step": 18500 |
| }, |
| { |
| "epoch": 71.1923076923077, |
| "grad_norm": 0.6860320568084717, |
| "learning_rate": 4.53532425136341e-07, |
| "loss": 0.0326, |
| "step": 18510 |
| }, |
| { |
| "epoch": 71.23076923076923, |
| "grad_norm": 0.379873126745224, |
| "learning_rate": 4.4749955581401103e-07, |
| "loss": 0.0276, |
| "step": 18520 |
| }, |
| { |
| "epoch": 71.26923076923077, |
| "grad_norm": 0.47224199771881104, |
| "learning_rate": 4.415064724722129e-07, |
| "loss": 0.0298, |
| "step": 18530 |
| }, |
| { |
| "epoch": 71.3076923076923, |
| "grad_norm": 0.5363563299179077, |
| "learning_rate": 4.355531914958016e-07, |
| "loss": 0.037, |
| "step": 18540 |
| }, |
| { |
| "epoch": 71.34615384615384, |
| "grad_norm": 0.4464838206768036, |
| "learning_rate": 4.2963972916082286e-07, |
| "loss": 0.0271, |
| "step": 18550 |
| }, |
| { |
| "epoch": 71.38461538461539, |
| "grad_norm": 0.39452752470970154, |
| "learning_rate": 4.2376610163446074e-07, |
| "loss": 0.0323, |
| "step": 18560 |
| }, |
| { |
| "epoch": 71.42307692307692, |
| "grad_norm": 0.46284347772598267, |
| "learning_rate": 4.1793232497498167e-07, |
| "loss": 0.03, |
| "step": 18570 |
| }, |
| { |
| "epoch": 71.46153846153847, |
| "grad_norm": 0.501216471195221, |
| "learning_rate": 4.1213841513171257e-07, |
| "loss": 0.0277, |
| "step": 18580 |
| }, |
| { |
| "epoch": 71.5, |
| "grad_norm": 0.6250320672988892, |
| "learning_rate": 4.0638438794497743e-07, |
| "loss": 0.0314, |
| "step": 18590 |
| }, |
| { |
| "epoch": 71.53846153846153, |
| "grad_norm": 0.5748885273933411, |
| "learning_rate": 4.0067025914607257e-07, |
| "loss": 0.0306, |
| "step": 18600 |
| }, |
| { |
| "epoch": 71.57692307692308, |
| "grad_norm": 0.7396159172058105, |
| "learning_rate": 3.9499604435720483e-07, |
| "loss": 0.0305, |
| "step": 18610 |
| }, |
| { |
| "epoch": 71.61538461538461, |
| "grad_norm": 0.47810420393943787, |
| "learning_rate": 3.8936175909146e-07, |
| "loss": 0.0255, |
| "step": 18620 |
| }, |
| { |
| "epoch": 71.65384615384616, |
| "grad_norm": 0.41282424330711365, |
| "learning_rate": 3.837674187527629e-07, |
| "loss": 0.0291, |
| "step": 18630 |
| }, |
| { |
| "epoch": 71.6923076923077, |
| "grad_norm": 0.3942478895187378, |
| "learning_rate": 3.7821303863581904e-07, |
| "loss": 0.0253, |
| "step": 18640 |
| }, |
| { |
| "epoch": 71.73076923076923, |
| "grad_norm": 0.5066636800765991, |
| "learning_rate": 3.726986339260996e-07, |
| "loss": 0.028, |
| "step": 18650 |
| }, |
| { |
| "epoch": 71.76923076923077, |
| "grad_norm": 0.6203503012657166, |
| "learning_rate": 3.672242196997733e-07, |
| "loss": 0.0283, |
| "step": 18660 |
| }, |
| { |
| "epoch": 71.8076923076923, |
| "grad_norm": 0.5737688541412354, |
| "learning_rate": 3.6178981092367615e-07, |
| "loss": 0.0384, |
| "step": 18670 |
| }, |
| { |
| "epoch": 71.84615384615384, |
| "grad_norm": 0.43425294756889343, |
| "learning_rate": 3.5639542245527847e-07, |
| "loss": 0.0291, |
| "step": 18680 |
| }, |
| { |
| "epoch": 71.88461538461539, |
| "grad_norm": 0.5544036030769348, |
| "learning_rate": 3.5104106904263134e-07, |
| "loss": 0.0303, |
| "step": 18690 |
| }, |
| { |
| "epoch": 71.92307692307692, |
| "grad_norm": 0.3179159164428711, |
| "learning_rate": 3.4572676532433345e-07, |
| "loss": 0.0311, |
| "step": 18700 |
| }, |
| { |
| "epoch": 71.96153846153847, |
| "grad_norm": 0.36036983132362366, |
| "learning_rate": 3.4045252582948603e-07, |
| "loss": 0.0306, |
| "step": 18710 |
| }, |
| { |
| "epoch": 72.0, |
| "grad_norm": 0.4262266159057617, |
| "learning_rate": 3.3521836497765803e-07, |
| "loss": 0.027, |
| "step": 18720 |
| }, |
| { |
| "epoch": 72.03846153846153, |
| "grad_norm": 0.4187345802783966, |
| "learning_rate": 3.300242970788492e-07, |
| "loss": 0.0269, |
| "step": 18730 |
| }, |
| { |
| "epoch": 72.07692307692308, |
| "grad_norm": 0.5532457232475281, |
| "learning_rate": 3.248703363334404e-07, |
| "loss": 0.0282, |
| "step": 18740 |
| }, |
| { |
| "epoch": 72.11538461538461, |
| "grad_norm": 0.4422209560871124, |
| "learning_rate": 3.197564968321637e-07, |
| "loss": 0.0257, |
| "step": 18750 |
| }, |
| { |
| "epoch": 72.15384615384616, |
| "grad_norm": 0.38217607140541077, |
| "learning_rate": 3.1468279255606027e-07, |
| "loss": 0.0267, |
| "step": 18760 |
| }, |
| { |
| "epoch": 72.1923076923077, |
| "grad_norm": 0.48791781067848206, |
| "learning_rate": 3.096492373764442e-07, |
| "loss": 0.0284, |
| "step": 18770 |
| }, |
| { |
| "epoch": 72.23076923076923, |
| "grad_norm": 0.48523175716400146, |
| "learning_rate": 3.046558450548642e-07, |
| "loss": 0.0287, |
| "step": 18780 |
| }, |
| { |
| "epoch": 72.26923076923077, |
| "grad_norm": 0.5189900994300842, |
| "learning_rate": 2.997026292430632e-07, |
| "loss": 0.0294, |
| "step": 18790 |
| }, |
| { |
| "epoch": 72.3076923076923, |
| "grad_norm": 0.5620434880256653, |
| "learning_rate": 2.9478960348294393e-07, |
| "loss": 0.0297, |
| "step": 18800 |
| }, |
| { |
| "epoch": 72.34615384615384, |
| "grad_norm": 0.3768644332885742, |
| "learning_rate": 2.8991678120653343e-07, |
| "loss": 0.0305, |
| "step": 18810 |
| }, |
| { |
| "epoch": 72.38461538461539, |
| "grad_norm": 0.4504743218421936, |
| "learning_rate": 2.850841757359385e-07, |
| "loss": 0.0287, |
| "step": 18820 |
| }, |
| { |
| "epoch": 72.42307692307692, |
| "grad_norm": 0.5062809586524963, |
| "learning_rate": 2.802918002833188e-07, |
| "loss": 0.0273, |
| "step": 18830 |
| }, |
| { |
| "epoch": 72.46153846153847, |
| "grad_norm": 0.4553619623184204, |
| "learning_rate": 2.7553966795084875e-07, |
| "loss": 0.0263, |
| "step": 18840 |
| }, |
| { |
| "epoch": 72.5, |
| "grad_norm": 0.5206001996994019, |
| "learning_rate": 2.708277917306723e-07, |
| "loss": 0.0289, |
| "step": 18850 |
| }, |
| { |
| "epoch": 72.53846153846153, |
| "grad_norm": 0.4859105050563812, |
| "learning_rate": 2.661561845048832e-07, |
| "loss": 0.0255, |
| "step": 18860 |
| }, |
| { |
| "epoch": 72.57692307692308, |
| "grad_norm": 0.36909204721450806, |
| "learning_rate": 2.615248590454733e-07, |
| "loss": 0.0282, |
| "step": 18870 |
| }, |
| { |
| "epoch": 72.61538461538461, |
| "grad_norm": 0.44843748211860657, |
| "learning_rate": 2.569338280143124e-07, |
| "loss": 0.0298, |
| "step": 18880 |
| }, |
| { |
| "epoch": 72.65384615384616, |
| "grad_norm": 0.4249100089073181, |
| "learning_rate": 2.523831039631036e-07, |
| "loss": 0.0258, |
| "step": 18890 |
| }, |
| { |
| "epoch": 72.6923076923077, |
| "grad_norm": 0.38302505016326904, |
| "learning_rate": 2.478726993333513e-07, |
| "loss": 0.0297, |
| "step": 18900 |
| }, |
| { |
| "epoch": 72.73076923076923, |
| "grad_norm": 0.4525497555732727, |
| "learning_rate": 2.434026264563299e-07, |
| "loss": 0.026, |
| "step": 18910 |
| }, |
| { |
| "epoch": 72.76923076923077, |
| "grad_norm": 0.47983717918395996, |
| "learning_rate": 2.389728975530486e-07, |
| "loss": 0.0334, |
| "step": 18920 |
| }, |
| { |
| "epoch": 72.8076923076923, |
| "grad_norm": 0.35710352659225464, |
| "learning_rate": 2.3458352473421986e-07, |
| "loss": 0.0292, |
| "step": 18930 |
| }, |
| { |
| "epoch": 72.84615384615384, |
| "grad_norm": 0.3811783790588379, |
| "learning_rate": 2.3023452000021594e-07, |
| "loss": 0.0289, |
| "step": 18940 |
| }, |
| { |
| "epoch": 72.88461538461539, |
| "grad_norm": 0.38314539194107056, |
| "learning_rate": 2.2592589524105255e-07, |
| "loss": 0.0258, |
| "step": 18950 |
| }, |
| { |
| "epoch": 72.92307692307692, |
| "grad_norm": 0.5492963194847107, |
| "learning_rate": 2.216576622363453e-07, |
| "loss": 0.0312, |
| "step": 18960 |
| }, |
| { |
| "epoch": 72.96153846153847, |
| "grad_norm": 0.4219627380371094, |
| "learning_rate": 2.1742983265527984e-07, |
| "loss": 0.0271, |
| "step": 18970 |
| }, |
| { |
| "epoch": 73.0, |
| "grad_norm": 0.2676997780799866, |
| "learning_rate": 2.1324241805658006e-07, |
| "loss": 0.0273, |
| "step": 18980 |
| }, |
| { |
| "epoch": 73.03846153846153, |
| "grad_norm": 0.5187320113182068, |
| "learning_rate": 2.0909542988848007e-07, |
| "loss": 0.0235, |
| "step": 18990 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 0.39366570115089417, |
| "learning_rate": 2.0498887948868395e-07, |
| "loss": 0.0256, |
| "step": 19000 |
| }, |
| { |
| "epoch": 73.11538461538461, |
| "grad_norm": 0.5478989481925964, |
| "learning_rate": 2.009227780843459e-07, |
| "loss": 0.0271, |
| "step": 19010 |
| }, |
| { |
| "epoch": 73.15384615384616, |
| "grad_norm": 0.4115489423274994, |
| "learning_rate": 1.968971367920319e-07, |
| "loss": 0.0314, |
| "step": 19020 |
| }, |
| { |
| "epoch": 73.1923076923077, |
| "grad_norm": 0.4727810025215149, |
| "learning_rate": 1.9291196661768984e-07, |
| "loss": 0.0332, |
| "step": 19030 |
| }, |
| { |
| "epoch": 73.23076923076923, |
| "grad_norm": 0.5472187995910645, |
| "learning_rate": 1.8896727845662432e-07, |
| "loss": 0.0272, |
| "step": 19040 |
| }, |
| { |
| "epoch": 73.26923076923077, |
| "grad_norm": 0.2943132817745209, |
| "learning_rate": 1.8506308309346022e-07, |
| "loss": 0.0247, |
| "step": 19050 |
| }, |
| { |
| "epoch": 73.3076923076923, |
| "grad_norm": 0.47855880856513977, |
| "learning_rate": 1.811993912021226e-07, |
| "loss": 0.0258, |
| "step": 19060 |
| }, |
| { |
| "epoch": 73.34615384615384, |
| "grad_norm": 0.44629132747650146, |
| "learning_rate": 1.7737621334579346e-07, |
| "loss": 0.0279, |
| "step": 19070 |
| }, |
| { |
| "epoch": 73.38461538461539, |
| "grad_norm": 0.4611893892288208, |
| "learning_rate": 1.735935599768951e-07, |
| "loss": 0.0281, |
| "step": 19080 |
| }, |
| { |
| "epoch": 73.42307692307692, |
| "grad_norm": 0.4612509310245514, |
| "learning_rate": 1.6985144143706166e-07, |
| "loss": 0.0273, |
| "step": 19090 |
| }, |
| { |
| "epoch": 73.46153846153847, |
| "grad_norm": 0.651528537273407, |
| "learning_rate": 1.6614986795709774e-07, |
| "loss": 0.0288, |
| "step": 19100 |
| }, |
| { |
| "epoch": 73.5, |
| "grad_norm": 0.3900488317012787, |
| "learning_rate": 1.6248884965696654e-07, |
| "loss": 0.0268, |
| "step": 19110 |
| }, |
| { |
| "epoch": 73.53846153846153, |
| "grad_norm": 0.393657386302948, |
| "learning_rate": 1.5886839654575158e-07, |
| "loss": 0.0266, |
| "step": 19120 |
| }, |
| { |
| "epoch": 73.57692307692308, |
| "grad_norm": 0.5446521639823914, |
| "learning_rate": 1.5528851852163183e-07, |
| "loss": 0.0312, |
| "step": 19130 |
| }, |
| { |
| "epoch": 73.61538461538461, |
| "grad_norm": 0.37836071848869324, |
| "learning_rate": 1.5174922537185997e-07, |
| "loss": 0.0278, |
| "step": 19140 |
| }, |
| { |
| "epoch": 73.65384615384616, |
| "grad_norm": 0.40086400508880615, |
| "learning_rate": 1.4825052677272576e-07, |
| "loss": 0.0302, |
| "step": 19150 |
| }, |
| { |
| "epoch": 73.6923076923077, |
| "grad_norm": 0.3947674334049225, |
| "learning_rate": 1.4479243228953942e-07, |
| "loss": 0.0308, |
| "step": 19160 |
| }, |
| { |
| "epoch": 73.73076923076923, |
| "grad_norm": 0.5151625275611877, |
| "learning_rate": 1.4137495137659827e-07, |
| "loss": 0.0316, |
| "step": 19170 |
| }, |
| { |
| "epoch": 73.76923076923077, |
| "grad_norm": 0.539281964302063, |
| "learning_rate": 1.3799809337716517e-07, |
| "loss": 0.0268, |
| "step": 19180 |
| }, |
| { |
| "epoch": 73.8076923076923, |
| "grad_norm": 0.48684024810791016, |
| "learning_rate": 1.3466186752344178e-07, |
| "loss": 0.0298, |
| "step": 19190 |
| }, |
| { |
| "epoch": 73.84615384615384, |
| "grad_norm": 0.4839021861553192, |
| "learning_rate": 1.3136628293653863e-07, |
| "loss": 0.0277, |
| "step": 19200 |
| }, |
| { |
| "epoch": 73.88461538461539, |
| "grad_norm": 0.39492732286453247, |
| "learning_rate": 1.2811134862646178e-07, |
| "loss": 0.0257, |
| "step": 19210 |
| }, |
| { |
| "epoch": 73.92307692307692, |
| "grad_norm": 0.41039136052131653, |
| "learning_rate": 1.2489707349207623e-07, |
| "loss": 0.0277, |
| "step": 19220 |
| }, |
| { |
| "epoch": 73.96153846153847, |
| "grad_norm": 0.6431099772453308, |
| "learning_rate": 1.2172346632108754e-07, |
| "loss": 0.0309, |
| "step": 19230 |
| }, |
| { |
| "epoch": 74.0, |
| "grad_norm": 0.41975292563438416, |
| "learning_rate": 1.1859053579001688e-07, |
| "loss": 0.0274, |
| "step": 19240 |
| }, |
| { |
| "epoch": 74.03846153846153, |
| "grad_norm": 0.5424035787582397, |
| "learning_rate": 1.1549829046417437e-07, |
| "loss": 0.0304, |
| "step": 19250 |
| }, |
| { |
| "epoch": 74.07692307692308, |
| "grad_norm": 0.25909972190856934, |
| "learning_rate": 1.1244673879764411e-07, |
| "loss": 0.0275, |
| "step": 19260 |
| }, |
| { |
| "epoch": 74.11538461538461, |
| "grad_norm": 0.3769975006580353, |
| "learning_rate": 1.094358891332492e-07, |
| "loss": 0.0274, |
| "step": 19270 |
| }, |
| { |
| "epoch": 74.15384615384616, |
| "grad_norm": 0.46477359533309937, |
| "learning_rate": 1.0646574970253842e-07, |
| "loss": 0.0329, |
| "step": 19280 |
| }, |
| { |
| "epoch": 74.1923076923077, |
| "grad_norm": 0.5041369199752808, |
| "learning_rate": 1.0353632862576124e-07, |
| "loss": 0.031, |
| "step": 19290 |
| }, |
| { |
| "epoch": 74.23076923076923, |
| "grad_norm": 0.34948527812957764, |
| "learning_rate": 1.0064763391184118e-07, |
| "loss": 0.0266, |
| "step": 19300 |
| }, |
| { |
| "epoch": 74.26923076923077, |
| "grad_norm": 0.49228963255882263, |
| "learning_rate": 9.779967345835917e-08, |
| "loss": 0.0269, |
| "step": 19310 |
| }, |
| { |
| "epoch": 74.3076923076923, |
| "grad_norm": 0.30661070346832275, |
| "learning_rate": 9.49924550515302e-08, |
| "loss": 0.0266, |
| "step": 19320 |
| }, |
| { |
| "epoch": 74.34615384615384, |
| "grad_norm": 0.546813428401947, |
| "learning_rate": 9.22259863661834e-08, |
| "loss": 0.0282, |
| "step": 19330 |
| }, |
| { |
| "epoch": 74.38461538461539, |
| "grad_norm": 0.34433984756469727, |
| "learning_rate": 8.950027496573865e-08, |
| "loss": 0.0285, |
| "step": 19340 |
| }, |
| { |
| "epoch": 74.42307692307692, |
| "grad_norm": 0.6200993061065674, |
| "learning_rate": 8.681532830218497e-08, |
| "loss": 0.0302, |
| "step": 19350 |
| }, |
| { |
| "epoch": 74.46153846153847, |
| "grad_norm": 0.283530592918396, |
| "learning_rate": 8.417115371606554e-08, |
| "loss": 0.0258, |
| "step": 19360 |
| }, |
| { |
| "epoch": 74.5, |
| "grad_norm": 0.4857964515686035, |
| "learning_rate": 8.15677584364527e-08, |
| "loss": 0.0275, |
| "step": 19370 |
| }, |
| { |
| "epoch": 74.53846153846153, |
| "grad_norm": 0.4263962209224701, |
| "learning_rate": 7.900514958092964e-08, |
| "loss": 0.0295, |
| "step": 19380 |
| }, |
| { |
| "epoch": 74.57692307692308, |
| "grad_norm": 0.4442691206932068, |
| "learning_rate": 7.64833341555704e-08, |
| "loss": 0.0314, |
| "step": 19390 |
| }, |
| { |
| "epoch": 74.61538461538461, |
| "grad_norm": 0.4575265944004059, |
| "learning_rate": 7.400231905492328e-08, |
| "loss": 0.0275, |
| "step": 19400 |
| }, |
| { |
| "epoch": 74.65384615384616, |
| "grad_norm": 0.5982808470726013, |
| "learning_rate": 7.15621110619874e-08, |
| "loss": 0.0318, |
| "step": 19410 |
| }, |
| { |
| "epoch": 74.6923076923077, |
| "grad_norm": 0.47400107979774475, |
| "learning_rate": 6.916271684819787e-08, |
| "loss": 0.0268, |
| "step": 19420 |
| }, |
| { |
| "epoch": 74.73076923076923, |
| "grad_norm": 0.3319554030895233, |
| "learning_rate": 6.680414297340897e-08, |
| "loss": 0.0267, |
| "step": 19430 |
| }, |
| { |
| "epoch": 74.76923076923077, |
| "grad_norm": 0.4506201148033142, |
| "learning_rate": 6.448639588587103e-08, |
| "loss": 0.0296, |
| "step": 19440 |
| }, |
| { |
| "epoch": 74.8076923076923, |
| "grad_norm": 0.4350897967815399, |
| "learning_rate": 6.22094819222152e-08, |
| "loss": 0.0279, |
| "step": 19450 |
| }, |
| { |
| "epoch": 74.84615384615384, |
| "grad_norm": 0.46058401465415955, |
| "learning_rate": 5.997340730743705e-08, |
| "loss": 0.0247, |
| "step": 19460 |
| }, |
| { |
| "epoch": 74.88461538461539, |
| "grad_norm": 0.29316025972366333, |
| "learning_rate": 5.7778178154879736e-08, |
| "loss": 0.0309, |
| "step": 19470 |
| }, |
| { |
| "epoch": 74.92307692307692, |
| "grad_norm": 0.4575360119342804, |
| "learning_rate": 5.56238004662174e-08, |
| "loss": 0.0346, |
| "step": 19480 |
| }, |
| { |
| "epoch": 74.96153846153847, |
| "grad_norm": 0.4061368703842163, |
| "learning_rate": 5.351028013143355e-08, |
| "loss": 0.0345, |
| "step": 19490 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.38843148946762085, |
| "learning_rate": 5.1437622928814374e-08, |
| "loss": 0.027, |
| "step": 19500 |
| }, |
| { |
| "epoch": 75.03846153846153, |
| "grad_norm": 0.46850109100341797, |
| "learning_rate": 4.940583452492542e-08, |
| "loss": 0.0289, |
| "step": 19510 |
| }, |
| { |
| "epoch": 75.07692307692308, |
| "grad_norm": 0.2569507658481598, |
| "learning_rate": 4.741492047459661e-08, |
| "loss": 0.0273, |
| "step": 19520 |
| }, |
| { |
| "epoch": 75.11538461538461, |
| "grad_norm": 0.546170175075531, |
| "learning_rate": 4.5464886220912275e-08, |
| "loss": 0.0295, |
| "step": 19530 |
| }, |
| { |
| "epoch": 75.15384615384616, |
| "grad_norm": 0.35281962156295776, |
| "learning_rate": 4.355573709519112e-08, |
| "loss": 0.0277, |
| "step": 19540 |
| }, |
| { |
| "epoch": 75.1923076923077, |
| "grad_norm": 0.5647571086883545, |
| "learning_rate": 4.168747831697628e-08, |
| "loss": 0.0327, |
| "step": 19550 |
| }, |
| { |
| "epoch": 75.23076923076923, |
| "grad_norm": 0.48984843492507935, |
| "learning_rate": 3.986011499401199e-08, |
| "loss": 0.0296, |
| "step": 19560 |
| }, |
| { |
| "epoch": 75.26923076923077, |
| "grad_norm": 0.4074559807777405, |
| "learning_rate": 3.80736521222419e-08, |
| "loss": 0.0323, |
| "step": 19570 |
| }, |
| { |
| "epoch": 75.3076923076923, |
| "grad_norm": 0.35387930274009705, |
| "learning_rate": 3.6328094585789116e-08, |
| "loss": 0.0303, |
| "step": 19580 |
| }, |
| { |
| "epoch": 75.34615384615384, |
| "grad_norm": 0.37408000230789185, |
| "learning_rate": 3.462344715693788e-08, |
| "loss": 0.0254, |
| "step": 19590 |
| }, |
| { |
| "epoch": 75.38461538461539, |
| "grad_norm": 0.3567971885204315, |
| "learning_rate": 3.295971449613022e-08, |
| "loss": 0.0259, |
| "step": 19600 |
| }, |
| { |
| "epoch": 75.42307692307692, |
| "grad_norm": 0.2978547215461731, |
| "learning_rate": 3.1336901151949316e-08, |
| "loss": 0.0242, |
| "step": 19610 |
| }, |
| { |
| "epoch": 75.46153846153847, |
| "grad_norm": 0.4735611379146576, |
| "learning_rate": 2.975501156110283e-08, |
| "loss": 0.0284, |
| "step": 19620 |
| }, |
| { |
| "epoch": 75.5, |
| "grad_norm": 0.3551243245601654, |
| "learning_rate": 2.821405004841793e-08, |
| "loss": 0.0271, |
| "step": 19630 |
| }, |
| { |
| "epoch": 75.53846153846153, |
| "grad_norm": 0.478957861661911, |
| "learning_rate": 2.671402082682295e-08, |
| "loss": 0.0263, |
| "step": 19640 |
| }, |
| { |
| "epoch": 75.57692307692308, |
| "grad_norm": 0.4333643913269043, |
| "learning_rate": 2.5254927997342415e-08, |
| "loss": 0.0255, |
| "step": 19650 |
| }, |
| { |
| "epoch": 75.61538461538461, |
| "grad_norm": 0.456312358379364, |
| "learning_rate": 2.3836775549078703e-08, |
| "loss": 0.0317, |
| "step": 19660 |
| }, |
| { |
| "epoch": 75.65384615384616, |
| "grad_norm": 0.5187574625015259, |
| "learning_rate": 2.245956735920873e-08, |
| "loss": 0.0313, |
| "step": 19670 |
| }, |
| { |
| "epoch": 75.6923076923077, |
| "grad_norm": 0.3721175491809845, |
| "learning_rate": 2.1123307192965625e-08, |
| "loss": 0.0258, |
| "step": 19680 |
| }, |
| { |
| "epoch": 75.73076923076923, |
| "grad_norm": 0.42539578676223755, |
| "learning_rate": 1.9827998703632056e-08, |
| "loss": 0.0263, |
| "step": 19690 |
| }, |
| { |
| "epoch": 75.76923076923077, |
| "grad_norm": 0.594091534614563, |
| "learning_rate": 1.8573645432535258e-08, |
| "loss": 0.0347, |
| "step": 19700 |
| }, |
| { |
| "epoch": 75.8076923076923, |
| "grad_norm": 0.30675655603408813, |
| "learning_rate": 1.7360250809027034e-08, |
| "loss": 0.0275, |
| "step": 19710 |
| }, |
| { |
| "epoch": 75.84615384615384, |
| "grad_norm": 0.37368330359458923, |
| "learning_rate": 1.618781815048209e-08, |
| "loss": 0.0287, |
| "step": 19720 |
| }, |
| { |
| "epoch": 75.88461538461539, |
| "grad_norm": 0.3517482280731201, |
| "learning_rate": 1.5056350662286388e-08, |
| "loss": 0.03, |
| "step": 19730 |
| }, |
| { |
| "epoch": 75.92307692307692, |
| "grad_norm": 0.5828191041946411, |
| "learning_rate": 1.3965851437830468e-08, |
| "loss": 0.0295, |
| "step": 19740 |
| }, |
| { |
| "epoch": 75.96153846153847, |
| "grad_norm": 0.359904408454895, |
| "learning_rate": 1.2916323458494473e-08, |
| "loss": 0.0305, |
| "step": 19750 |
| }, |
| { |
| "epoch": 76.0, |
| "grad_norm": 0.5236489772796631, |
| "learning_rate": 1.1907769593651474e-08, |
| "loss": 0.0294, |
| "step": 19760 |
| }, |
| { |
| "epoch": 76.03846153846153, |
| "grad_norm": 0.4129619300365448, |
| "learning_rate": 1.0940192600647491e-08, |
| "loss": 0.03, |
| "step": 19770 |
| }, |
| { |
| "epoch": 76.07692307692308, |
| "grad_norm": 0.4157169461250305, |
| "learning_rate": 1.0013595124801488e-08, |
| "loss": 0.0294, |
| "step": 19780 |
| }, |
| { |
| "epoch": 76.11538461538461, |
| "grad_norm": 0.5382236242294312, |
| "learning_rate": 9.127979699393719e-09, |
| "loss": 0.0307, |
| "step": 19790 |
| }, |
| { |
| "epoch": 76.15384615384616, |
| "grad_norm": 0.36726507544517517, |
| "learning_rate": 8.283348745665719e-09, |
| "loss": 0.0273, |
| "step": 19800 |
| }, |
| { |
| "epoch": 76.1923076923077, |
| "grad_norm": 0.35563984513282776, |
| "learning_rate": 7.479704572805336e-09, |
| "loss": 0.0299, |
| "step": 19810 |
| }, |
| { |
| "epoch": 76.23076923076923, |
| "grad_norm": 0.5569225549697876, |
| "learning_rate": 6.717049377943374e-09, |
| "loss": 0.0284, |
| "step": 19820 |
| }, |
| { |
| "epoch": 76.26923076923077, |
| "grad_norm": 0.37364256381988525, |
| "learning_rate": 5.995385246151952e-09, |
| "loss": 0.0308, |
| "step": 19830 |
| }, |
| { |
| "epoch": 76.3076923076923, |
| "grad_norm": 0.3298107385635376, |
| "learning_rate": 5.314714150432831e-09, |
| "loss": 0.0343, |
| "step": 19840 |
| }, |
| { |
| "epoch": 76.34615384615384, |
| "grad_norm": 0.6490257978439331, |
| "learning_rate": 4.67503795171409e-09, |
| "loss": 0.0333, |
| "step": 19850 |
| }, |
| { |
| "epoch": 76.38461538461539, |
| "grad_norm": 0.6045204401016235, |
| "learning_rate": 4.076358398846791e-09, |
| "loss": 0.0331, |
| "step": 19860 |
| }, |
| { |
| "epoch": 76.42307692307692, |
| "grad_norm": 0.40658727288246155, |
| "learning_rate": 3.518677128598324e-09, |
| "loss": 0.0314, |
| "step": 19870 |
| }, |
| { |
| "epoch": 76.46153846153847, |
| "grad_norm": 0.40208566188812256, |
| "learning_rate": 3.0019956656457404e-09, |
| "loss": 0.027, |
| "step": 19880 |
| }, |
| { |
| "epoch": 76.5, |
| "grad_norm": 0.3167416453361511, |
| "learning_rate": 2.526315422579084e-09, |
| "loss": 0.0299, |
| "step": 19890 |
| }, |
| { |
| "epoch": 76.53846153846153, |
| "grad_norm": 0.5065469145774841, |
| "learning_rate": 2.091637699889737e-09, |
| "loss": 0.0288, |
| "step": 19900 |
| }, |
| { |
| "epoch": 76.57692307692308, |
| "grad_norm": 0.3822614550590515, |
| "learning_rate": 1.6979636859687509e-09, |
| "loss": 0.027, |
| "step": 19910 |
| }, |
| { |
| "epoch": 76.61538461538461, |
| "grad_norm": 0.48988813161849976, |
| "learning_rate": 1.3452944571051839e-09, |
| "loss": 0.0262, |
| "step": 19920 |
| }, |
| { |
| "epoch": 76.65384615384616, |
| "grad_norm": 0.34604522585868835, |
| "learning_rate": 1.0336309774860998e-09, |
| "loss": 0.0247, |
| "step": 19930 |
| }, |
| { |
| "epoch": 76.6923076923077, |
| "grad_norm": 0.5055863261222839, |
| "learning_rate": 7.629740991849099e-10, |
| "loss": 0.0274, |
| "step": 19940 |
| }, |
| { |
| "epoch": 76.73076923076923, |
| "grad_norm": 0.45108962059020996, |
| "learning_rate": 5.333245621680361e-10, |
| "loss": 0.0313, |
| "step": 19950 |
| }, |
| { |
| "epoch": 76.76923076923077, |
| "grad_norm": 0.4111419916152954, |
| "learning_rate": 3.446829942882479e-10, |
| "loss": 0.0278, |
| "step": 19960 |
| }, |
| { |
| "epoch": 76.8076923076923, |
| "grad_norm": 0.5759734511375427, |
| "learning_rate": 1.9704991128632887e-10, |
| "loss": 0.0312, |
| "step": 19970 |
| }, |
| { |
| "epoch": 76.84615384615384, |
| "grad_norm": 0.4901687204837799, |
| "learning_rate": 9.042571678274936e-11, |
| "loss": 0.026, |
| "step": 19980 |
| }, |
| { |
| "epoch": 76.88461538461539, |
| "grad_norm": 0.47623711824417114, |
| "learning_rate": 2.4810702284328024e-11, |
| "loss": 0.0309, |
| "step": 19990 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "grad_norm": 0.5158125162124634, |
| "learning_rate": 2.050471825665312e-13, |
| "loss": 0.0254, |
| "step": 20000 |
| }, |
| { |
| "epoch": 76.92307692307692, |
| "step": 20000, |
| "total_flos": 0.0, |
| "train_loss": 0.09504265115857125, |
| "train_runtime": 20232.6485, |
| "train_samples_per_second": 63.264, |
| "train_steps_per_second": 0.989 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 77, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|