| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "global_step": 1974, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9996834033646177e-05, | |
| "loss": 0.9175, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.998733693645213e-05, | |
| "loss": 0.7595, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.997151111381707e-05, | |
| "loss": 0.7375, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9949360574062774e-05, | |
| "loss": 0.7227, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9920890927418316e-05, | |
| "loss": 0.7147, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.988610938459917e-05, | |
| "loss": 0.7102, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9845024754980876e-05, | |
| "loss": 0.7056, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.979764744436784e-05, | |
| "loss": 0.7128, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9743989452357756e-05, | |
| "loss": 0.6982, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.968406436930243e-05, | |
| "loss": 0.7258, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.961788737286559e-05, | |
| "loss": 0.7095, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.954547522417877e-05, | |
| "loss": 0.7048, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.946684626359607e-05, | |
| "loss": 0.6805, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.938202040604898e-05, | |
| "loss": 0.6798, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.929101913600238e-05, | |
| "loss": 0.7134, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.919386550201299e-05, | |
| "loss": 0.6895, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.909058411089174e-05, | |
| "loss": 0.705, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.8981201121471356e-05, | |
| "loss": 0.6712, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.886574423798097e-05, | |
| "loss": 0.6744, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.874424270302927e-05, | |
| "loss": 0.6675, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.861672729019797e-05, | |
| "loss": 0.6726, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.848323029624761e-05, | |
| "loss": 0.6821, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.834378553293748e-05, | |
| "loss": 0.7133, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.81984283184619e-05, | |
| "loss": 0.6681, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.804719546850487e-05, | |
| "loss": 0.682, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.789012528691558e-05, | |
| "loss": 0.6755, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.772725755600682e-05, | |
| "loss": 0.68, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.755863352647909e-05, | |
| "loss": 0.6663, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.738429590697271e-05, | |
| "loss": 0.6672, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.720428885325069e-05, | |
| "loss": 0.6466, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.701865795701505e-05, | |
| "loss": 0.6668, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.684682059461469e-05, | |
| "loss": 0.6591, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.665063509461097e-05, | |
| "loss": 0.661, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.644896598002736e-05, | |
| "loss": 0.6749, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.624186432907437e-05, | |
| "loss": 0.6654, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.602938259590072e-05, | |
| "loss": 0.6716, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.581157459730783e-05, | |
| "loss": 0.6796, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.558849549911931e-05, | |
| "loss": 0.6794, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.536020180220871e-05, | |
| "loss": 0.6651, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.515032675559024e-05, | |
| "loss": 0.6823, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.4912285699446786e-05, | |
| "loss": 0.6677, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.4669201313179155e-05, | |
| "loss": 0.6704, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.442113516454638e-05, | |
| "loss": 0.6481, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.416815008307488e-05, | |
| "loss": 0.665, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.391031014414514e-05, | |
| "loss": 0.6658, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.364768065276284e-05, | |
| "loss": 0.6699, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.338032812701867e-05, | |
| "loss": 0.6664, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.310832028124069e-05, | |
| "loss": 0.6817, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.283172600884393e-05, | |
| "loss": 0.6791, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.2550615364881194e-05, | |
| "loss": 0.6679, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.226505954829973e-05, | |
| "loss": 0.6476, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.197513088390813e-05, | |
| "loss": 0.6721, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.1680902804058095e-05, | |
| "loss": 0.6563, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.138244983004574e-05, | |
| "loss": 0.6391, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.107984755323697e-05, | |
| "loss": 0.6672, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.077317261592194e-05, | |
| "loss": 0.6497, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.04625026919033e-05, | |
| "loss": 0.668, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0147916466823174e-05, | |
| "loss": 0.6682, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.982949361823388e-05, | |
| "loss": 0.6352, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.950731479541743e-05, | |
| "loss": 0.6698, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.918146159895882e-05, | |
| "loss": 0.6549, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.8852016560078605e-05, | |
| "loss": 0.6516, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.851906311972943e-05, | |
| "loss": 0.6629, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.821647502051616e-05, | |
| "loss": 0.6764, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.787708866250794e-05, | |
| "loss": 0.6415, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.7534440830144466e-05, | |
| "loss": 0.6463, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.71886183083464e-05, | |
| "loss": 0.6508, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.683970868611123e-05, | |
| "loss": 0.6411, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.648780033432891e-05, | |
| "loss": 0.6266, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.613298238339955e-05, | |
| "loss": 0.6409, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.5775344700658705e-05, | |
| "loss": 0.6594, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5414977867616006e-05, | |
| "loss": 0.6427, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.505197315701292e-05, | |
| "loss": 0.6462, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.468642250970547e-05, | |
| "loss": 0.6277, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.431841851137764e-05, | |
| "loss": 0.6551, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.394805436909157e-05, | |
| "loss": 0.6402, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.357542388768033e-05, | |
| "loss": 0.6515, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.3200621445989226e-05, | |
| "loss": 0.6489, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.282374197297185e-05, | |
| "loss": 0.6568, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.2444880923646674e-05, | |
| "loss": 0.615, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.20641342549205e-05, | |
| "loss": 0.6447, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.168159840128472e-05, | |
| "loss": 0.6159, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.129737025039068e-05, | |
| "loss": 0.6347, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.091154711851022e-05, | |
| "loss": 0.6361, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.052422672588765e-05, | |
| "loss": 0.6504, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.013550717198948e-05, | |
| "loss": 0.6467, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.9745486910657993e-05, | |
| "loss": 0.6364, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.9354264725175185e-05, | |
| "loss": 0.6361, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.8961939703243122e-05, | |
| "loss": 0.6441, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.856861121188735e-05, | |
| "loss": 0.6404, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.8174378872289446e-05, | |
| "loss": 0.6307, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.777934253455522e-05, | |
| "loss": 0.6484, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.7383602252424985e-05, | |
| "loss": 0.6237, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.6987258257932175e-05, | |
| "loss": 0.6161, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.6590410936016895e-05, | |
| "loss": 0.6381, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.619316079910063e-05, | |
| "loss": 0.6366, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.5795608461628802e-05, | |
| "loss": 0.6202, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.5397854614587334e-05, | |
| "loss": 0.6334, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.5954, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.460214538541267e-05, | |
| "loss": 0.4963, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.4204391538371207e-05, | |
| "loss": 0.487, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.3806839200899377e-05, | |
| "loss": 0.489, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.3409589063983117e-05, | |
| "loss": 0.4805, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.3012741742067838e-05, | |
| "loss": 0.4907, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.261639774757503e-05, | |
| "loss": 0.4719, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.2220657465444782e-05, | |
| "loss": 0.4914, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.182562112771056e-05, | |
| "loss": 0.4775, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.143138878811265e-05, | |
| "loss": 0.4935, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.1038060296756883e-05, | |
| "loss": 0.5082, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.064573527482482e-05, | |
| "loss": 0.4868, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.025451308934201e-05, | |
| "loss": 0.4988, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.9864492828010526e-05, | |
| "loss": 0.4653, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.9475773274112354e-05, | |
| "loss": 0.4915, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 1.9088452881489787e-05, | |
| "loss": 0.4763, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.8702629749609324e-05, | |
| "loss": 0.4807, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.8318401598715284e-05, | |
| "loss": 0.4653, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.793586574507951e-05, | |
| "loss": 0.4778, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.7555119076353338e-05, | |
| "loss": 0.4839, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.7176258027028152e-05, | |
| "loss": 0.4718, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.6799378554010773e-05, | |
| "loss": 0.4793, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.6424576112319672e-05, | |
| "loss": 0.4825, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.6051945630908426e-05, | |
| "loss": 0.4857, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.5681581488622367e-05, | |
| "loss": 0.4802, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.5313577490294538e-05, | |
| "loss": 0.4812, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.4948026842987084e-05, | |
| "loss": 0.4682, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.4585022132384008e-05, | |
| "loss": 0.4974, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.4224655299341304e-05, | |
| "loss": 0.4737, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.3867017616600456e-05, | |
| "loss": 0.4877, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.3512199665671094e-05, | |
| "loss": 0.4753, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.316029131388878e-05, | |
| "loss": 0.4638, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.2811381691653607e-05, | |
| "loss": 0.4626, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.2465559169855535e-05, | |
| "loss": 0.4786, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.212291133749206e-05, | |
| "loss": 0.4717, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.178352497948384e-05, | |
| "loss": 0.4803, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.1447486054694112e-05, | |
| "loss": 0.4803, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.1114879674157233e-05, | |
| "loss": 0.4739, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.0785790079522001e-05, | |
| "loss": 0.471, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.046030062171512e-05, | |
| "loss": 0.4799, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.0138493739830352e-05, | |
| "loss": 0.4689, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 9.820450940248544e-06, | |
| "loss": 0.4599, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 9.506252775993882e-06, | |
| "loss": 0.5019, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 9.195978826331697e-06, | |
| "loss": 0.4764, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 8.889707676612791e-06, | |
| "loss": 0.4579, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 8.587516898369589e-06, | |
| "loss": 0.4592, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 8.289483029668972e-06, | |
| "loss": 0.4861, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 7.99568155572701e-06, | |
| "loss": 0.4753, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 7.706186889790209e-06, | |
| "loss": 0.4929, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 7.421072354288302e-06, | |
| "loss": 0.4594, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 7.140410162263414e-06, | |
| "loss": 0.4912, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 6.86427139908008e-06, | |
| "loss": 0.4681, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 6.5927260044209655e-06, | |
| "loss": 0.4816, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 6.3258427545727e-06, | |
| "loss": 0.4723, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 6.063689245006443e-06, | |
| "loss": 0.4856, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 5.806331873257462e-06, | |
| "loss": 0.4829, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 5.553835822108152e-06, | |
| "loss": 0.4741, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 5.306265043078693e-06, | |
| "loss": 0.4654, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 5.0636822402296165e-06, | |
| "loss": 0.4668, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.826148854280277e-06, | |
| "loss": 0.4723, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.593725047047293e-06, | |
| "loss": 0.4639, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.3664696862069505e-06, | |
| "loss": 0.4777, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.144440330385347e-06, | |
| "loss": 0.4546, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.927693214580075e-06, | |
| "loss": 0.4543, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.71628323591722e-06, | |
| "loss": 0.4543, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.5102639397471214e-06, | |
| "loss": 0.4659, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.3096875060825845e-06, | |
| "loss": 0.485, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.11460473638282e-06, | |
| "loss": 0.4768, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.925065040686642e-06, | |
| "loss": 0.4635, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.741116425097995e-06, | |
| "loss": 0.4681, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.5628054796271063e-06, | |
| "loss": 0.4492, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.390177366390273e-06, | |
| "loss": 0.4664, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.22327580817136e-06, | |
| "loss": 0.48, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.0621430773477947e-06, | |
| "loss": 0.4616, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 1.906819985183908e-06, | |
| "loss": 0.4854, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.7573458714944063e-06, | |
| "loss": 0.4846, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.6137585946804674e-06, | |
| "loss": 0.4552, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.4760945221410638e-06, | |
| "loss": 0.4615, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.3443885210619428e-06, | |
| "loss": 0.4735, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.2186739495845477e-06, | |
| "loss": 0.4705, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.0989826483571552e-06, | |
| "loss": 0.4653, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 9.85344932470364e-07, | |
| "loss": 0.4632, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 8.77789583778979e-07, | |
| "loss": 0.481, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 7.763438436122122e-07, | |
| "loss": 0.4773, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.810334058740736e-07, | |
| "loss": 0.4791, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 5.918824105356797e-07, | |
| "loss": 0.4793, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 5.08913437521169e-07, | |
| "loss": 0.4647, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 4.3214750098869995e-07, | |
| "loss": 0.4765, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.616040440080432e-07, | |
| "loss": 0.4787, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.973009336361021e-07, | |
| "loss": 0.4723, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.392544563915883e-07, | |
| "loss": 0.453, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.8747931413001795e-07, | |
| "loss": 0.4561, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.4198862032005488e-07, | |
| "loss": 0.4612, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.0279389672218365e-07, | |
| "loss": 0.4774, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 6.990507047049676e-08, | |
| "loss": 0.4635, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.3330471558378213e-08, | |
| "loss": 0.4761, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.3076830728713252e-08, | |
| "loss": 0.4593, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 9.149277769132658e-09, | |
| "loss": 0.4736, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.551340212760377e-09, | |
| "loss": 0.4486, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1974, | |
| "total_flos": 3.437359013145084e+19, | |
| "train_loss": 0.5708327819994277, | |
| "train_runtime": 105327.89, | |
| "train_samples_per_second": 4.797, | |
| "train_steps_per_second": 0.019 | |
| } | |
| ], | |
| "max_steps": 1974, | |
| "num_train_epochs": 2, | |
| "total_flos": 3.437359013145084e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |