| { | |
| "best_metric": 2.318600654602051, | |
| "best_model_checkpoint": "/data4/share_nlp/data/luannd/78.52.project/weight_saving/PoetGPT_vietnamese_with_deepspeed_v0/checkpoint-7744", | |
| "epoch": 11.0, | |
| "global_step": 7744, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.3499999999999998e-05, | |
| "loss": 5.9125, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "loss": 5.6567, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.3499999999999993e-05, | |
| "loss": 5.3131, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 4.8718, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 7.199999999999999e-05, | |
| "loss": 4.3081, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 8.699999999999999e-05, | |
| "loss": 3.6688, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000102, | |
| "loss": 3.3306, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000117, | |
| "loss": 3.2425, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00013199999999999998, | |
| "loss": 3.1231, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000147, | |
| "loss": 3.1498, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000162, | |
| "loss": 3.0767, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00017699999999999997, | |
| "loss": 3.0715, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019199999999999998, | |
| "loss": 3.0144, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00020699999999999996, | |
| "loss": 3.0438, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00022199999999999998, | |
| "loss": 2.9902, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000237, | |
| "loss": 2.9502, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00025199999999999995, | |
| "loss": 2.9778, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.000267, | |
| "loss": 2.9055, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00028199999999999997, | |
| "loss": 2.8714, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00029699999999999996, | |
| "loss": 2.8467, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002998135381828383, | |
| "loss": 2.846, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002990568314839864, | |
| "loss": 2.8177, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002977211629518312, | |
| "loss": 2.8653, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00029581172054786616, | |
| "loss": 2.8117, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00029333592086792107, | |
| "loss": 2.8291, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002903033803348551, | |
| "loss": 2.797, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00028672587784675096, | |
| "loss": 2.7837, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00028261730902569146, | |
| "loss": 2.7499, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002779936322448233, | |
| "loss": 2.7736, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00027287280664334875, | |
| "loss": 2.7529, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00026727472237020447, | |
| "loss": 2.7289, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.000261221123327374, | |
| "loss": 2.7221, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002547355227129109, | |
| "loss": 2.7534, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00024784311169171814, | |
| "loss": 2.7273, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002405706615488216, | |
| "loss": 2.705, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002329464197051909, | |
| "loss": 2.6387, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.000225, | |
| "loss": 2.736, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00021676226766548882, | |
| "loss": 2.688, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0002082652194412042, | |
| "loss": 2.7384, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00019954185929327506, | |
| "loss": 2.6948, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00019062607022145078, | |
| "loss": 2.7189, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00018155248265182435, | |
| "loss": 2.6816, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00017235633992642615, | |
| "loss": 2.7302, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016307336141214873, | |
| "loss": 2.6641, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00015373960376071093, | |
| "loss": 2.7036, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00014439132085855116, | |
| "loss": 2.6852, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0001350648230106275, | |
| "loss": 2.6867, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000125796335905079, | |
| "loss": 2.7016, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00011662185990655284, | |
| "loss": 2.7191, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00010757703022472587, | |
| "loss": 2.7024, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 9.869697850114969e-05, | |
| "loss": 2.6439, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.001619635203888e-05, | |
| "loss": 2.7095, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.156840139702554e-05, | |
| "loss": 2.684, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.33864062942472e-05, | |
| "loss": 2.6504, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.550199129045668e-05, | |
| "loss": 2.6889, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.794578078119291e-05, | |
| "loss": 2.6694, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.074712436047112e-05, | |
| "loss": 2.669, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.3933982822017876e-05, | |
| "loss": 2.661, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.753281955483985e-05, | |
| "loss": 2.6576, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.15684977549647e-05, | |
| "loss": 2.665, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.6064183852600797e-05, | |
| "loss": 2.6302, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.1041257529821453e-05, | |
| "loss": 2.7063, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.6519228678279718e-05, | |
| "loss": 2.6988, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.251566161950357e-05, | |
| "loss": 2.6588, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.046106882113751e-06, | |
| "loss": 2.6728, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.1240408009518346e-06, | |
| "loss": 2.6478, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.760813172726457e-06, | |
| "loss": 2.6409, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.9656031714918365e-06, | |
| "loss": 2.73, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.453836951897885e-07, | |
| "loss": 2.6736, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0489428174020875e-07, | |
| "loss": 2.6633, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.6718015670776367, | |
| "eval_runtime": 11.0736, | |
| "eval_samples_per_second": 904.038, | |
| "eval_steps_per_second": 7.134, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.662269987756317e-08, | |
| "loss": 2.6824, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 5.707952862381681e-07, | |
| "loss": 2.6342, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.6753760662307215e-06, | |
| "loss": 2.6547, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.356074662104319e-06, | |
| "loss": 2.6604, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 5.606362957498179e-06, | |
| "loss": 2.6865, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 8.417500453744864e-06, | |
| "loss": 2.6392, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.1778568219438839e-05, | |
| "loss": 2.6564, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.567651130140481e-05, | |
| "loss": 2.6074, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.0096189432334208e-05, | |
| "loss": 2.6392, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.5020435838132658e-05, | |
| "loss": 2.6144, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.0430123916561604e-05, | |
| "loss": 2.6486, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.630424152818204e-05, | |
| "loss": 2.6122, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.2619972611042214e-05, | |
| "loss": 2.6399, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.935278580210442e-05, | |
| "loss": 2.6323, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5.647652972118994e-05, | |
| "loss": 2.6635, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 6.396353454734303e-05, | |
| "loss": 2.6061, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 7.178471949307521e-05, | |
| "loss": 2.5807, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.990970575904072e-05, | |
| "loss": 2.6347, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 8.830693453040826e-05, | |
| "loss": 2.6889, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 9.694378955661275e-05, | |
| "loss": 2.6108, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00010578672383836428, | |
| "loss": 2.6338, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00011480138992984273, | |
| "loss": 2.596, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00012395277334996044, | |
| "loss": 2.6379, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00013320532858450377, | |
| "loss": 2.6312, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00014252311715089535, | |
| "loss": 2.6165, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00015186994718931215, | |
| "loss": 2.6148, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0001612095140379635, | |
| "loss": 2.6115, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00017050554124651096, | |
| "loss": 2.6476, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00017972192147990958, | |
| "loss": 2.6233, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00018882285676537802, | |
| "loss": 2.6507, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00019777299753775265, | |
| "loss": 2.6474, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00020653757994315076, | |
| "loss": 2.6162, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021508256086763368, | |
| "loss": 2.6377, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00022337475016639342, | |
| "loss": 2.6132, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00023138193957986393, | |
| "loss": 2.6239, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00023907302783602514, | |
| "loss": 2.6212, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0002464181414529809, | |
| "loss": 2.599, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.000253388750772592, | |
| "loss": 2.6263, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0002599577807744739, | |
| "loss": 2.5971, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.0002660997162399341, | |
| "loss": 2.5784, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.0002717907008573785, | |
| "loss": 2.6068, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.0002770086298842426, | |
| "loss": 2.6139, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00028173323600553423, | |
| "loss": 2.6228, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0002859461680554975, | |
| "loss": 2.5909, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00028963106229663063, | |
| "loss": 2.5944, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0002927736059791983, | |
| "loss": 2.6474, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0002953615929343616, | |
| "loss": 2.6134, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0002973849709849932, | |
| "loss": 2.5762, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.0002988358809900258, | |
| "loss": 2.5956, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0002997086873706798, | |
| "loss": 2.5878, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0003, | |
| "loss": 2.6209, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.0002997086873706798, | |
| "loss": 2.6205, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0002988358809900258, | |
| "loss": 2.5864, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00029738497098499324, | |
| "loss": 2.5826, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00029536159293436166, | |
| "loss": 2.5817, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0002927736059791983, | |
| "loss": 2.5889, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00028963106229663063, | |
| "loss": 2.5749, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0002859461680554975, | |
| "loss": 2.6131, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00028173323600553434, | |
| "loss": 2.5982, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.0002770086298842427, | |
| "loss": 2.5702, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0002717907008573784, | |
| "loss": 2.5854, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00026609971623993406, | |
| "loss": 2.554, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0002599577807744739, | |
| "loss": 2.5928, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00025338875077259205, | |
| "loss": 2.5768, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00024641814145298093, | |
| "loss": 2.5395, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00023907302783602525, | |
| "loss": 2.5265, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00023138193957986385, | |
| "loss": 2.5535, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0002233747501663934, | |
| "loss": 2.5308, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00021508256086763376, | |
| "loss": 2.5611, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00020653757994315084, | |
| "loss": 2.5567, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.5756430625915527, | |
| "eval_runtime": 11.0382, | |
| "eval_samples_per_second": 906.942, | |
| "eval_steps_per_second": 7.157, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00019777299753775273, | |
| "loss": 2.5977, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.00018882285676537824, | |
| "loss": 2.5147, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0001797219214799098, | |
| "loss": 2.5313, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.0001705055412465109, | |
| "loss": 2.5303, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00016120951403796358, | |
| "loss": 2.5039, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 0.00015186994718931223, | |
| "loss": 2.5199, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.00014252311715089543, | |
| "loss": 2.4787, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00013320532858450385, | |
| "loss": 2.5371, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00012395277334996052, | |
| "loss": 2.5247, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.00011480138992984267, | |
| "loss": 2.488, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00010578672383836424, | |
| "loss": 2.5005, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 9.694378955661282e-05, | |
| "loss": 2.4841, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.830693453040844e-05, | |
| "loss": 2.5214, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 7.990970575904079e-05, | |
| "loss": 2.4842, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 7.178471949307551e-05, | |
| "loss": 2.5118, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 6.396353454734299e-05, | |
| "loss": 2.468, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 5.647652972119001e-05, | |
| "loss": 2.5586, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 4.935278580210444e-05, | |
| "loss": 2.4862, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 4.2619972611042316e-05, | |
| "loss": 2.4773, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 3.630424152818206e-05, | |
| "loss": 2.4787, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.0430123916561723e-05, | |
| "loss": 2.4914, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.5020435838132692e-05, | |
| "loss": 2.5184, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.0096189432334177e-05, | |
| "loss": 2.5503, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.5676511301404892e-05, | |
| "loss": 2.4975, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1778568219438856e-05, | |
| "loss": 2.4797, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 8.41750045374493e-06, | |
| "loss": 2.5437, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 5.606362957498212e-06, | |
| "loss": 2.5227, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 3.356074662104369e-06, | |
| "loss": 2.4699, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.6753760662307048e-06, | |
| "loss": 2.5275, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 5.707952862381681e-07, | |
| "loss": 2.5028, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.662269987756317e-08, | |
| "loss": 2.4967, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0489428174020875e-07, | |
| "loss": 2.5104, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.453836951897885e-07, | |
| "loss": 2.4775, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.965603171491803e-06, | |
| "loss": 2.4989, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 3.76081317272644e-06, | |
| "loss": 2.5113, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.1240408009518185e-06, | |
| "loss": 2.5008, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.046106882113702e-06, | |
| "loss": 2.5256, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 1.251566161950357e-05, | |
| "loss": 2.5026, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.6519228678279633e-05, | |
| "loss": 2.4909, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.104125752982142e-05, | |
| "loss": 2.5, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 2.606418385260078e-05, | |
| "loss": 2.5199, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.156849775496477e-05, | |
| "loss": 2.4495, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.753281955483985e-05, | |
| "loss": 2.497, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.3933982822017924e-05, | |
| "loss": 2.4602, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 5.074712436047102e-05, | |
| "loss": 2.4893, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.794578078119269e-05, | |
| "loss": 2.4704, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.550199129045656e-05, | |
| "loss": 2.487, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 7.338640629424713e-05, | |
| "loss": 2.5259, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 8.15684013970256e-05, | |
| "loss": 2.5138, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 9.001619635203874e-05, | |
| "loss": 2.4926, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 9.869697850114967e-05, | |
| "loss": 2.5033, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.00010757703022472571, | |
| "loss": 2.4881, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00011662185990655278, | |
| "loss": 2.5102, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00012579633590507902, | |
| "loss": 2.5005, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.00013506482301062737, | |
| "loss": 2.5107, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00014439132085855113, | |
| "loss": 2.5019, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00015373960376071074, | |
| "loss": 2.5012, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.00016307336141214865, | |
| "loss": 2.5013, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 0.0001723563399264259, | |
| "loss": 2.4734, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00018155248265182446, | |
| "loss": 2.4939, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00019062607022145073, | |
| "loss": 2.4969, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 0.0001995418592932751, | |
| "loss": 2.4625, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.0002082652194412041, | |
| "loss": 2.5128, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00021676226766548885, | |
| "loss": 2.4877, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00022499999999999986, | |
| "loss": 2.4856, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.0002329464197051909, | |
| "loss": 2.4225, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.00024057066154882162, | |
| "loss": 2.5323, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0002478431116917181, | |
| "loss": 2.4751, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.0002547355227129109, | |
| "loss": 2.4638, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0002612211233273739, | |
| "loss": 2.4553, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0002672747223702044, | |
| "loss": 2.4885, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.5283169746398926, | |
| "eval_runtime": 11.0137, | |
| "eval_samples_per_second": 908.962, | |
| "eval_steps_per_second": 7.173, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 0.00027287280664334865, | |
| "loss": 2.4728, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.00027799363224482337, | |
| "loss": 2.4908, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.00028261730902569146, | |
| "loss": 2.4329, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.00028672587784675096, | |
| "loss": 2.4558, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.0002903033803348551, | |
| "loss": 2.4522, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.000293335920867921, | |
| "loss": 2.4269, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.00029581172054786616, | |
| "loss": 2.4584, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 0.0002977211629518312, | |
| "loss": 2.4243, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.0002990568314839864, | |
| "loss": 2.4672, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.0002998135381828383, | |
| "loss": 2.4333, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.0002999883438721462, | |
| "loss": 2.3994, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 0.000299580569577177, | |
| "loss": 2.4639, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.00029859179916195787, | |
| "loss": 2.4175, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.0002970258731772816, | |
| "loss": 2.4103, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00029488887394336027, | |
| "loss": 2.4623, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.00029218910192506983, | |
| "loss": 2.4217, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.0002889370434915463, | |
| "loss": 2.4537, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 0.00028514533018536277, | |
| "loss": 2.4471, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.00028082868965949076, | |
| "loss": 2.3821, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.0002760038884726156, | |
| "loss": 2.406, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0002706896669650002, | |
| "loss": 2.4075, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00026490666646784665, | |
| "loss": 2.4283, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 0.00025867734912889096, | |
| "loss": 2.3801, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 0.00025202591066563786, | |
| "loss": 2.4049, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.00024497818638512107, | |
| "loss": 2.3783, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 0.00023756155083521851, | |
| "loss": 2.4167, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.00022980481147730062, | |
| "loss": 2.3844, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 0.00022173809679319783, | |
| "loss": 2.4114, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 0.00021339273926110515, | |
| "loss": 2.4202, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 0.00020480115365495915, | |
| "loss": 2.3722, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.00019599671113999995, | |
| "loss": 2.4075, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 0.00018701360965354402, | |
| "loss": 2.3886, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.00017788674107443704, | |
| "loss": 2.3833, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 0.00016865155569712278, | |
| "loss": 2.3775, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 0.00015934392453672772, | |
| "loss": 2.413, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.00015000000000000004, | |
| "loss": 2.3845, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.00014065607546327242, | |
| "loss": 2.3666, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.00013134844430287736, | |
| "loss": 2.4041, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 0.0001221132589255631, | |
| "loss": 2.4064, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.00011298639034645613, | |
| "loss": 2.4397, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.00010400328886000018, | |
| "loss": 2.4145, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 9.519884634504099e-05, | |
| "loss": 2.3853, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 8.660726073889497e-05, | |
| "loss": 2.4271, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 7.82619032068023e-05, | |
| "loss": 2.3364, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 7.019518852269947e-05, | |
| "loss": 2.3847, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.24384491647816e-05, | |
| "loss": 2.4106, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5.502181361487904e-05, | |
| "loss": 2.3956, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 4.7974089334362206e-05, | |
| "loss": 2.3839, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 4.132265087110915e-05, | |
| "loss": 2.3884, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 3.5093333532153445e-05, | |
| "loss": 2.396, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 2.9310333034999828e-05, | |
| "loss": 2.3785, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 2.3996111527384437e-05, | |
| "loss": 2.4004, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.91713103405093e-05, | |
| "loss": 2.3918, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.4854669814637276e-05, | |
| "loss": 2.3066, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.1062956508453685e-05, | |
| "loss": 2.3492, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 7.810898074930194e-06, | |
| "loss": 2.3618, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 5.11112605663977e-06, | |
| "loss": 2.3854, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 2.974126822718409e-06, | |
| "loss": 2.3978, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.408200838042095e-06, | |
| "loss": 2.3669, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 4.194304228229806e-07, | |
| "loss": 2.3758, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.1656127853787445e-08, | |
| "loss": 2.4071, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.8646181716164831e-07, | |
| "loss": 2.3569, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 9.431685160135927e-07, | |
| "loss": 2.3831, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 2.2788370481687635e-06, | |
| "loss": 2.3951, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.188279452133741e-06, | |
| "loss": 2.4046, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 6.664079132078764e-06, | |
| "loss": 2.3911, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 9.696619665144767e-06, | |
| "loss": 2.3894, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.3274122153249145e-05, | |
| "loss": 2.4069, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.7382690974308465e-05, | |
| "loss": 2.4119, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 2.2006367755176625e-05, | |
| "loss": 2.3552, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.4395244121551514, | |
| "eval_runtime": 11.0583, | |
| "eval_samples_per_second": 905.292, | |
| "eval_steps_per_second": 7.144, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 2.712719335665126e-05, | |
| "loss": 2.3469, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.272527762979563e-05, | |
| "loss": 2.3646, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 3.877887667262582e-05, | |
| "loss": 2.3076, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 4.526447728708897e-05, | |
| "loss": 2.3542, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 5.2156888308281784e-05, | |
| "loss": 2.3176, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 5.9429338451178436e-05, | |
| "loss": 2.3324, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 6.705358029480876e-05, | |
| "loss": 2.3942, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 7.499999999999976e-05, | |
| "loss": 2.3256, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 8.323773233451096e-05, | |
| "loss": 2.3241, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 9.173478055879573e-05, | |
| "loss": 2.3697, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 0.00010045814070672496, | |
| "loss": 2.3174, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.00010937392977854936, | |
| "loss": 2.3787, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00011844751734817587, | |
| "loss": 2.3298, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00012764366007357364, | |
| "loss": 2.2992, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.00013692663858785116, | |
| "loss": 2.3446, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.00014626039623928907, | |
| "loss": 2.3374, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 0.00015560867914144898, | |
| "loss": 2.345, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.00016493517698937217, | |
| "loss": 2.3703, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 0.0001742036640949208, | |
| "loss": 2.3734, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.00018337814009344703, | |
| "loss": 2.3125, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 0.00019242296977527412, | |
| "loss": 2.3347, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.00020130302149884988, | |
| "loss": 2.406, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.00020998380364796131, | |
| "loss": 2.3703, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 0.0002184315986029747, | |
| "loss": 2.3179, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 0.0002266135937057527, | |
| "loss": 2.3096, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 0.00023449800870954326, | |
| "loss": 2.372, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 0.00024205421921880715, | |
| "loss": 2.3321, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 0.00024925287563952903, | |
| "loss": 2.3247, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.0002560660171779819, | |
| "loss": 2.3729, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 0.00026246718044516, | |
| "loss": 2.3266, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 0.0002684315022450353, | |
| "loss": 2.3244, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 0.00027393581614739896, | |
| "loss": 2.3222, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.0002789587424701784, | |
| "loss": 2.3556, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 0.00028348077132172016, | |
| "loss": 2.3062, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 0.0002874843383804963, | |
| "loss": 2.3373, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.0002909538931178862, | |
| "loss": 2.3501, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 0.0002938759591990482, | |
| "loss": 2.3569, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 0.00029623918682727355, | |
| "loss": 2.3147, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 0.0002980343968285081, | |
| "loss": 2.2977, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 0.0002992546163048102, | |
| "loss": 2.3241, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 0.0002998951057182598, | |
| "loss": 2.3418, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 0.0002999533773001224, | |
| "loss": 2.3403, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.0002994292047137618, | |
| "loss": 2.306, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 0.00029832462393376933, | |
| "loss": 2.2761, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 0.0002966439253378957, | |
| "loss": 2.3357, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.0002943936370425018, | |
| "loss": 2.3251, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.0002915824995462553, | |
| "loss": 2.3676, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.00028822143178056103, | |
| "loss": 2.3547, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 0.00028432348869859505, | |
| "loss": 2.3155, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.00027990381056766585, | |
| "loss": 2.3275, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.00027497956416186735, | |
| "loss": 2.3019, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 0.0002695698760834384, | |
| "loss": 2.3236, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 0.00026369575847181784, | |
| "loss": 2.2911, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 0.000257380027388958, | |
| "loss": 2.3297, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 0.0002506472141978957, | |
| "loss": 2.3119, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 0.0002435234702788101, | |
| "loss": 2.3075, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.00023603646545265687, | |
| "loss": 2.3113, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.00022821528050692507, | |
| "loss": 2.3155, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 0.00022009029424095958, | |
| "loss": 2.315, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 0.00021169306546959193, | |
| "loss": 2.281, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.00020305621044338731, | |
| "loss": 2.2756, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 0.00019421327616163563, | |
| "loss": 2.2932, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 0.0001851986100701572, | |
| "loss": 2.2784, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 0.00017604722665003937, | |
| "loss": 2.3281, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 0.00016679467141549642, | |
| "loss": 2.3306, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 0.00015747688284910473, | |
| "loss": 2.311, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.0001481300528106878, | |
| "loss": 2.3097, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00013879048596203628, | |
| "loss": 2.3104, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 0.00012949445875348934, | |
| "loss": 2.2894, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 0.00012027807852009062, | |
| "loss": 2.3139, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.00011117714323462205, | |
| "loss": 2.3084, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.3810958862304688, | |
| "eval_runtime": 11.048, | |
| "eval_samples_per_second": 906.136, | |
| "eval_steps_per_second": 7.151, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 0.00010222700246224737, | |
| "loss": 2.2791, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 9.346242005684964e-05, | |
| "loss": 2.2723, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 8.491743913236614e-05, | |
| "loss": 2.2202, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 7.662524983360638e-05, | |
| "loss": 2.2527, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 6.861806042013623e-05, | |
| "loss": 2.2603, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 6.092697216397482e-05, | |
| "loss": 2.2643, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 5.3581858547019076e-05, | |
| "loss": 2.3002, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.661124922740784e-05, | |
| "loss": 2.2654, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 4.004221922552624e-05, | |
| "loss": 2.2413, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 3.3900283760066006e-05, | |
| "loss": 2.2112, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.8209299142621573e-05, | |
| "loss": 2.2311, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.2991370115757362e-05, | |
| "loss": 2.258, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 1.8266763994465914e-05, | |
| "loss": 2.2304, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 1.4053831944502642e-05, | |
| "loss": 2.2356, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.0368937703369245e-05, | |
| "loss": 2.2375, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 7.226394020801679e-06, | |
| "loss": 2.2039, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 4.6384070656383054e-06, | |
| "loss": 2.232, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.6150290150067422e-06, | |
| "loss": 2.2633, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 1.1641190099741572e-06, | |
| "loss": 2.2459, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.913126293202395e-07, | |
| "loss": 2.2498, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 0.0, | |
| "loss": 2.228, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.913126293202228e-07, | |
| "loss": 2.2358, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.1641190099741237e-06, | |
| "loss": 2.2618, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 2.6150290150066923e-06, | |
| "loss": 2.2322, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 4.6384070656382385e-06, | |
| "loss": 2.2604, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 7.2263940208015954e-06, | |
| "loss": 2.2499, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 1.0368937703369145e-05, | |
| "loss": 2.2434, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 1.4053831944502525e-05, | |
| "loss": 2.2428, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 1.82667639944658e-05, | |
| "loss": 2.2561, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.299137011575723e-05, | |
| "loss": 2.2548, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.820929914262142e-05, | |
| "loss": 2.2404, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 3.3900283760065837e-05, | |
| "loss": 2.2243, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 4.004221922552608e-05, | |
| "loss": 2.2692, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.6611249227407644e-05, | |
| "loss": 2.2215, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 5.358185854701887e-05, | |
| "loss": 2.2461, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 6.0926972163974606e-05, | |
| "loss": 2.2281, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 6.861806042013602e-05, | |
| "loss": 2.2252, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 7.662524983360616e-05, | |
| "loss": 2.2318, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 8.491743913236591e-05, | |
| "loss": 2.2259, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 9.34624200568494e-05, | |
| "loss": 2.195, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 0.00010222700246224714, | |
| "loss": 2.297, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.00011117714323462178, | |
| "loss": 2.245, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 0.00012027807852009036, | |
| "loss": 2.2401, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.0001294944587534891, | |
| "loss": 2.2179, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 0.00013879048596203604, | |
| "loss": 2.2755, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 0.00014813005281068752, | |
| "loss": 2.2392, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 0.00015747688284910446, | |
| "loss": 2.2611, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.00016679467141549617, | |
| "loss": 2.2239, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 0.0001760472266500391, | |
| "loss": 2.2209, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 0.00018519861007015696, | |
| "loss": 2.2581, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 0.00019421327616163538, | |
| "loss": 2.2747, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.0002030562104433871, | |
| "loss": 2.2458, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 0.00021169306546959168, | |
| "loss": 2.2509, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 0.00022009029424095936, | |
| "loss": 2.2503, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 0.00022821528050692485, | |
| "loss": 2.2346, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.00023603646545265668, | |
| "loss": 2.2528, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 0.00024352347027880992, | |
| "loss": 2.2423, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 0.0002506472141978955, | |
| "loss": 2.2623, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.0002573800273889578, | |
| "loss": 2.2391, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 0.0002636957584718177, | |
| "loss": 2.252, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 0.0002695698760834382, | |
| "loss": 2.2544, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.0002749795641618672, | |
| "loss": 2.2513, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 0.00027990381056766574, | |
| "loss": 2.2127, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 0.0002843234886985949, | |
| "loss": 2.2516, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.000288221431780561, | |
| "loss": 2.2518, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.0002915824995462552, | |
| "loss": 2.2243, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 0.00029439363704250176, | |
| "loss": 2.2356, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 0.00029664392533789563, | |
| "loss": 2.2513, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.0002983246239337692, | |
| "loss": 2.2249, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0002994292047137618, | |
| "loss": 2.2587, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.3698606491088867, | |
| "eval_runtime": 11.0095, | |
| "eval_samples_per_second": 909.309, | |
| "eval_steps_per_second": 7.176, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 0.0002999533773001224, | |
| "loss": 2.2167, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 0.0002998951057182598, | |
| "loss": 2.1821, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 0.0002992546163048102, | |
| "loss": 2.2571, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 0.0002980343968285081, | |
| "loss": 2.213, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.0002962391868272736, | |
| "loss": 2.1956, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 0.0002938759591990481, | |
| "loss": 2.2039, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 0.0002909538931178863, | |
| "loss": 2.2088, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 0.00028748433838049643, | |
| "loss": 2.1783, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.0002834807713217205, | |
| "loss": 2.2402, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.0002789587424701785, | |
| "loss": 2.1845, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.0002739358161473994, | |
| "loss": 2.2124, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.0002684315022450354, | |
| "loss": 2.1913, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.00026246718044516056, | |
| "loss": 2.2663, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 0.00025606601717798207, | |
| "loss": 2.2365, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 0.0002492528756395288, | |
| "loss": 2.2329, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 0.00024205421921880737, | |
| "loss": 2.1998, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.00023449800870954305, | |
| "loss": 2.2371, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.00022661359370575293, | |
| "loss": 2.2358, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 0.00021843159860297448, | |
| "loss": 2.1672, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.00020998380364796156, | |
| "loss": 2.1938, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 0.00020130302149885012, | |
| "loss": 2.1829, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 0.00019242296977527433, | |
| "loss": 2.2059, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 0.0001833781400934473, | |
| "loss": 2.2004, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00017420366409492155, | |
| "loss": 2.2182, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 0.00016493517698937244, | |
| "loss": 2.2017, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.0001556086791414492, | |
| "loss": 2.173, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 0.00014626039623928932, | |
| "loss": 2.1953, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 0.0001369266385878509, | |
| "loss": 2.2158, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 0.00012764366007357388, | |
| "loss": 2.1664, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.0001184475173481756, | |
| "loss": 2.1875, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 0.0001093739297785496, | |
| "loss": 2.2206, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 0.0001004581407067247, | |
| "loss": 2.2095, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 9.173478055879596e-05, | |
| "loss": 2.218, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 8.323773233451119e-05, | |
| "loss": 2.1932, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 7.500000000000044e-05, | |
| "loss": 2.2209, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 6.705358029480897e-05, | |
| "loss": 2.1806, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 5.942933845117864e-05, | |
| "loss": 2.2136, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 5.215688830828199e-05, | |
| "loss": 2.2195, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 4.5264477287089516e-05, | |
| "loss": 2.1989, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 3.877887667262599e-05, | |
| "loss": 2.1715, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 3.272527762979546e-05, | |
| "loss": 2.1788, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.7127193356651412e-05, | |
| "loss": 2.1751, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.200636775517649e-05, | |
| "loss": 2.2413, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.7382690974308584e-05, | |
| "loss": 2.1537, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 1.3274122153249028e-05, | |
| "loss": 2.2157, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 9.696619665145034e-06, | |
| "loss": 2.2003, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 6.664079132078831e-06, | |
| "loss": 2.1929, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 4.188279452133875e-06, | |
| "loss": 2.2119, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 2.2788370481688135e-06, | |
| "loss": 2.1549, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 9.431685160136759e-07, | |
| "loss": 2.1555, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.8646181716164831e-07, | |
| "loss": 2.1977, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.1656127853787445e-08, | |
| "loss": 2.1932, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 4.194304228229639e-07, | |
| "loss": 2.1683, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.408200838042145e-06, | |
| "loss": 2.1984, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 2.974126822718409e-06, | |
| "loss": 2.1759, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 5.11112605663977e-06, | |
| "loss": 2.1944, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 7.810898074930111e-06, | |
| "loss": 2.1653, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 1.1062956508453785e-05, | |
| "loss": 2.1588, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.485466981463706e-05, | |
| "loss": 2.2177, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 1.9171310340509167e-05, | |
| "loss": 2.1766, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 2.399611152738402e-05, | |
| "loss": 2.2032, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 2.9310333034999828e-05, | |
| "loss": 2.1885, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 3.509333353215311e-05, | |
| "loss": 2.1923, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 4.1322650871108964e-05, | |
| "loss": 2.2324, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 4.797408933436242e-05, | |
| "loss": 2.2365, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 5.5021813614879056e-05, | |
| "loss": 2.2018, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 6.243844916478162e-05, | |
| "loss": 2.2164, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 7.019518852269926e-05, | |
| "loss": 2.1815, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 7.826190320680255e-05, | |
| "loss": 2.1761, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 8.660726073889499e-05, | |
| "loss": 2.1938, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.346975326538086, | |
| "eval_runtime": 11.0292, | |
| "eval_samples_per_second": 907.68, | |
| "eval_steps_per_second": 7.163, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 9.519884634504074e-05, | |
| "loss": 2.157, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 0.00010400328885999944, | |
| "loss": 2.1342, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 0.00011298639034645615, | |
| "loss": 2.169, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 0.00012211325892556206, | |
| "loss": 2.1329, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 0.0001313484443028771, | |
| "loss": 2.1549, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 0.00014065607546327163, | |
| "loss": 2.1629, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 0.00014999999999999955, | |
| "loss": 2.1504, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 0.000159343924536728, | |
| "loss": 2.1788, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 0.0001686515556971225, | |
| "loss": 2.1536, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 0.00017788674107443756, | |
| "loss": 2.1729, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 0.00018701360965354348, | |
| "loss": 2.1063, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.0001959967111400002, | |
| "loss": 2.1522, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 0.0002048011536549589, | |
| "loss": 2.1394, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 0.00021339273926110466, | |
| "loss": 2.1487, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.00022173809679319713, | |
| "loss": 2.1357, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 0.00022980481147730043, | |
| "loss": 2.1608, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 0.00023756155083521808, | |
| "loss": 2.1628, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 0.00024497818638512063, | |
| "loss": 2.1579, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 0.0002520259106656373, | |
| "loss": 2.1437, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 0.0002586773491288908, | |
| "loss": 2.173, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 0.00026490666646784665, | |
| "loss": 2.1681, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 0.00027068966696499995, | |
| "loss": 2.168, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.00027600388847261577, | |
| "loss": 2.1713, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 0.00028082868965949065, | |
| "loss": 2.1775, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 0.00028514533018536277, | |
| "loss": 2.1691, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 0.00028893704349154605, | |
| "loss": 2.201, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 0.0002921891019250697, | |
| "loss": 2.1381, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 0.0002948888739433601, | |
| "loss": 2.1945, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 0.0002970258731772815, | |
| "loss": 2.212, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 0.00029859179916195776, | |
| "loss": 2.1935, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 0.00029958056957717696, | |
| "loss": 2.1762, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 0.0002999883438721462, | |
| "loss": 2.1784, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.00029981353818283837, | |
| "loss": 2.156, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 0.00029905683148398634, | |
| "loss": 2.1444, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 0.0002977211629518312, | |
| "loss": 2.1508, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.00029581172054786616, | |
| "loss": 2.1578, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 0.00029333592086792123, | |
| "loss": 2.1284, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 0.0002903033803348551, | |
| "loss": 2.1465, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 0.0002867258778467511, | |
| "loss": 2.1643, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 0.0002826173090256916, | |
| "loss": 2.2208, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 0.00027799363224482364, | |
| "loss": 2.1873, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 0.00027287280664334875, | |
| "loss": 2.1881, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 0.00026727472237020506, | |
| "loss": 2.1576, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 0.0002612211233273738, | |
| "loss": 2.1402, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 0.0002547355227129107, | |
| "loss": 2.1339, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 0.0002478431116917187, | |
| "loss": 2.1533, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00024057066154882119, | |
| "loss": 2.185, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 0.00023294641970519127, | |
| "loss": 2.1806, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 0.00022500000000000032, | |
| "loss": 2.134, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.00021676226766548907, | |
| "loss": 2.1749, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 0.00020826521944120434, | |
| "loss": 2.1698, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 0.0001995418592932751, | |
| "loss": 2.1687, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 0.0001906260702214507, | |
| "loss": 2.1888, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 0.00018155248265182522, | |
| "loss": 2.145, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 0.0001723563399264259, | |
| "loss": 2.1814, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.0001630733614121494, | |
| "loss": 2.1604, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 0.00015373960376071152, | |
| "loss": 2.1584, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.00014439132085855056, | |
| "loss": 2.146, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 0.0001350648230106279, | |
| "loss": 2.1995, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 0.0001257963359050793, | |
| "loss": 2.1522, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 0.00011662185990655302, | |
| "loss": 2.1457, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 0.00010757703022472596, | |
| "loss": 2.1699, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 9.869697850114966e-05, | |
| "loss": 2.1149, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 9.001619635203874e-05, | |
| "loss": 2.1681, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 8.156840139702631e-05, | |
| "loss": 2.1431, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 7.33864062942469e-05, | |
| "loss": 2.1279, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 6.550199129045721e-05, | |
| "loss": 2.1389, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 5.794578078119331e-05, | |
| "loss": 2.1316, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 5.0747124360471404e-05, | |
| "loss": 2.0931, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 4.393398282201809e-05, | |
| "loss": 2.1771, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.7532819554840014e-05, | |
| "loss": 2.1491, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.3224949836730957, | |
| "eval_runtime": 11.0019, | |
| "eval_samples_per_second": 909.934, | |
| "eval_steps_per_second": 7.181, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "learning_rate": 3.156849775496477e-05, | |
| "loss": 2.0926, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 2.606418385260078e-05, | |
| "loss": 2.077, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 2.104125752982142e-05, | |
| "loss": 2.0516, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.6519228678279616e-05, | |
| "loss": 2.0906, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 1.2515661619503886e-05, | |
| "loss": 2.095, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.046106882113602e-06, | |
| "loss": 2.0801, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 6.124040800951968e-06, | |
| "loss": 2.0714, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 3.760813172726557e-06, | |
| "loss": 2.1109, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 1.9656031714918865e-06, | |
| "loss": 2.12, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 7.453836951898218e-07, | |
| "loss": 2.0788, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 1.048942817402254e-07, | |
| "loss": 2.0993, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 4.662269987756317e-08, | |
| "loss": 2.0666, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 5.707952862381681e-07, | |
| "loss": 2.1035, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 1.675376066230738e-06, | |
| "loss": 2.0755, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "learning_rate": 3.356074662104369e-06, | |
| "loss": 2.0395, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 5.606362957497995e-06, | |
| "loss": 2.049, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 8.417500453745013e-06, | |
| "loss": 2.1038, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 1.1778568219438656e-05, | |
| "loss": 2.0996, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.5676511301404658e-05, | |
| "loss": 2.055, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 2.009618943233406e-05, | |
| "loss": 2.0607, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 2.5020435838132556e-05, | |
| "loss": 2.0802, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 3.043012391656157e-05, | |
| "loss": 2.0957, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 3.630424152818206e-05, | |
| "loss": 2.0716, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 4.2619972611041564e-05, | |
| "loss": 2.1055, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 4.935278580210464e-05, | |
| "loss": 2.0589, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 5.6476529721190224e-05, | |
| "loss": 2.0697, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 6.396353454734256e-05, | |
| "loss": 2.0682, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 7.178471949307574e-05, | |
| "loss": 2.0672, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 7.990970575904032e-05, | |
| "loss": 2.0863, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 8.830693453040795e-05, | |
| "loss": 2.089, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 9.694378955661256e-05, | |
| "loss": 2.0583, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 0.00010578672383836425, | |
| "loss": 2.0616, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 0.00011480138992984269, | |
| "loss": 2.0936, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 0.00012395277334996052, | |
| "loss": 2.0863, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 0.00013320532858450296, | |
| "loss": 2.1113, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "learning_rate": 0.0001425231171508957, | |
| "loss": 2.04, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 0.0001518699471893126, | |
| "loss": 2.0697, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 0.00016120951403796307, | |
| "loss": 2.0468, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 0.0001705055412465116, | |
| "loss": 2.0831, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 0.00017972192147990926, | |
| "loss": 2.1102, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 0.00018882285676537786, | |
| "loss": 2.0944, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 0.0001977729975377525, | |
| "loss": 2.108, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 0.00020653757994315073, | |
| "loss": 2.0661, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 0.00021508256086763376, | |
| "loss": 2.078, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 0.00022337475016639353, | |
| "loss": 2.1152, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 0.0002313819395798632, | |
| "loss": 2.1097, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 0.00023907302783602552, | |
| "loss": 2.0812, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 0.0002464181414529804, | |
| "loss": 2.1115, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.00025338875077259167, | |
| "loss": 2.0616, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 0.0002599577807744744, | |
| "loss": 2.1264, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 0.0002655054352858896, | |
| "loss": 2.0978, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 0.0002712425392983004, | |
| "loss": 2.1099, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 0.00027650871687193255, | |
| "loss": 2.0902, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 0.0002812835132863128, | |
| "loss": 2.102, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 0.0002855483824281353, | |
| "loss": 2.082, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 0.0002892867588274879, | |
| "loss": 2.1016, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 0.00029248412200092686, | |
| "loss": 2.0823, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 0.0002951280528514794, | |
| "loss": 2.1198, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 0.00029720828190650815, | |
| "loss": 2.0912, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 0.00029871672920607153, | |
| "loss": 2.0547, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 0.00029964753568684926, | |
| "loss": 2.1026, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 0.0002999970859397307, | |
| "loss": 2.106, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 0.00029976402225267247, | |
| "loss": 2.1424, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 0.0002989492498842809, | |
| "loss": 2.1021, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 0.00029755593354763527, | |
| "loss": 2.1026, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 0.00029558948511800866, | |
| "loss": 2.0828, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 0.00029305754261223406, | |
| "loss": 2.1417, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 0.00028996994052135996, | |
| "loss": 2.0579, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 0.00028633867161183155, | |
| "loss": 2.1051, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 0.00028217784034356626, | |
| "loss": 2.0623, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.327604293823242, | |
| "eval_runtime": 11.0081, | |
| "eval_samples_per_second": 909.418, | |
| "eval_steps_per_second": 7.177, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 0.0002775036080858562, | |
| "loss": 2.091, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 0.0002723341303438894, | |
| "loss": 2.0147, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 0.00026668948623970694, | |
| "loss": 2.0584, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 0.0002605916005215189, | |
| "loss": 2.0441, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "learning_rate": 0.00025406415840428147, | |
| "loss": 2.0037, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 0.00024713251357234075, | |
| "loss": 2.0342, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 0.00023982358970145017, | |
| "loss": 2.0517, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 0.00023216577588268072, | |
| "loss": 2.0496, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 0.00022418881635441105, | |
| "loss": 2.0683, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 0.00021592369497069755, | |
| "loss": 2.0416, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 0.00020740251485476324, | |
| "loss": 2.0329, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 0.00019865837370507073, | |
| "loss": 2.0409, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 0.00018972523523827966, | |
| "loss": 2.0436, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 0.00018063779726845152, | |
| "loss": 2.0631, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 0.0001714313569349074, | |
| "loss": 2.0563, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 0.0001621416736021808, | |
| "loss": 2.0395, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 0.00015280482996463552, | |
| "loss": 2.0422, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 0.0001434570918951997, | |
| "loss": 2.038, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 0.00013413476758260932, | |
| "loss": 2.0441, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 0.00012487406650428943, | |
| "loss": 2.036, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 0.00011571095878264738, | |
| "loss": 2.074, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 0.00010668103547105523, | |
| "loss": 2.063, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 9.781937031221653e-05, | |
| "loss": 2.0583, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 8.916038350582922e-05, | |
| "loss": 2.0469, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 8.073770801474436e-05, | |
| "loss": 2.0674, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 7.25840589288743e-05, | |
| "loss": 2.0586, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 6.473110639426635e-05, | |
| "loss": 2.0402, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 5.72093526009319e-05, | |
| "loss": 2.0753, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 5.0048013307199424e-05, | |
| "loss": 1.9955, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 4.3274904360790423e-05, | |
| "loss": 2.0728, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 3.691633365738289e-05, | |
| "loss": 2.0395, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 3.09969989563152e-05, | |
| "loss": 2.0321, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 2.5539891950326658e-05, | |
| "loss": 2.0234, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 2.056620896195836e-05, | |
| "loss": 2.0023, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 1.6095268613458483e-05, | |
| "loss": 2.0256, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 1.2144436790007034e-05, | |
| "loss": 2.005, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 8.729059187690579e-06, | |
| "loss": 2.0098, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 5.862401708235109e-06, | |
| "loss": 2.0488, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 3.555598932010012e-06, | |
| "loss": 2.0012, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 1.8176108694427927e-06, | |
| "loss": 2.0404, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 6.551881588300112e-07, | |
| "loss": 2.0141, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 7.284584572083696e-08, | |
| "loss": 2.0593, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 7.284584572082031e-08, | |
| "loss": 2.0341, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 6.551881588299612e-07, | |
| "loss": 2.037, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.817610869442726e-06, | |
| "loss": 2.0146, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "learning_rate": 3.5555989320099122e-06, | |
| "loss": 2.0295, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 5.862401708234976e-06, | |
| "loss": 2.0359, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 8.729059187690413e-06, | |
| "loss": 2.0138, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 1.214443679000685e-05, | |
| "loss": 2.028, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 1.6095268613458266e-05, | |
| "loss": 2.0097, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 2.0566208961958125e-05, | |
| "loss": 2.0522, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 9.73, | |
| "learning_rate": 2.553989195032639e-05, | |
| "loss": 2.0516, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 3.0996998956314905e-05, | |
| "loss": 2.022, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 3.691633365738259e-05, | |
| "loss": 2.0524, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 4.327490436079009e-05, | |
| "loss": 2.0464, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 5.004801330719908e-05, | |
| "loss": 2.0572, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 5.720935260093152e-05, | |
| "loss": 2.0273, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 6.473110639426594e-05, | |
| "loss": 2.03, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 7.258405892887389e-05, | |
| "loss": 2.027, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "learning_rate": 8.073770801474394e-05, | |
| "loss": 2.0345, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 8.916038350582879e-05, | |
| "loss": 2.0404, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 9.781937031221607e-05, | |
| "loss": 2.0137, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 0.00010668103547105477, | |
| "loss": 2.0181, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 0.0001157109587826469, | |
| "loss": 2.0438, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 0.00012487406650428897, | |
| "loss": 2.0424, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 0.00013413476758260883, | |
| "loss": 2.0435, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 0.0001434570918951992, | |
| "loss": 2.0117, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 0.00015280482996463506, | |
| "loss": 2.0098, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 0.00016214167360218033, | |
| "loss": 2.0263, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.00017143135693490692, | |
| "loss": 2.0697, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00018063779726845106, | |
| "loss": 2.0672, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.330075263977051, | |
| "eval_runtime": 11.0756, | |
| "eval_samples_per_second": 903.883, | |
| "eval_steps_per_second": 7.133, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "learning_rate": 0.0001897252352382792, | |
| "loss": 2.0053, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "learning_rate": 0.0001986583737050703, | |
| "loss": 2.0154, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 0.0002074025148547628, | |
| "loss": 1.998, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "learning_rate": 0.00021592369497069712, | |
| "loss": 2.0056, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 10.07, | |
| "learning_rate": 0.00022418881635441067, | |
| "loss": 1.9888, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 0.00023216577588268034, | |
| "loss": 2.0266, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 0.00023982358970144976, | |
| "loss": 1.9969, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 0.0002471325135723404, | |
| "loss": 2.0036, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "learning_rate": 0.00025406415840428115, | |
| "loss": 2.0173, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 0.0002605916005215186, | |
| "loss": 2.0406, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "learning_rate": 0.0002666894862397066, | |
| "loss": 2.0007, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 0.0002723341303438891, | |
| "loss": 2.0359, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "learning_rate": 0.000277503608085856, | |
| "loss": 2.0217, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 0.00028217784034356605, | |
| "loss": 2.0029, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 0.0002863386716118314, | |
| "loss": 2.0112, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 0.0002899699405213598, | |
| "loss": 2.0221, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 0.00029305754261223395, | |
| "loss": 2.0064, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "learning_rate": 0.00029558948511800855, | |
| "loss": 2.019, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 10.27, | |
| "learning_rate": 0.00029755593354763516, | |
| "loss": 2.0286, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 0.00029894924988428087, | |
| "loss": 2.0046, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "learning_rate": 0.00029976402225267247, | |
| "loss": 2.0264, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 0.0002999970859397307, | |
| "loss": 2.0141, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 0.00029964753568684926, | |
| "loss": 2.0015, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "learning_rate": 0.0002987167292060716, | |
| "loss": 2.0057, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "learning_rate": 0.00029720828190650826, | |
| "loss": 2.0256, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 0.00029512805285147956, | |
| "loss": 2.0477, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "learning_rate": 0.00029248412200092697, | |
| "loss": 2.0311, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "learning_rate": 0.00028928675882748813, | |
| "loss": 1.9869, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 0.00028554838242813554, | |
| "loss": 2.0306, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 0.00028128351328631304, | |
| "loss": 2.0188, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 0.0002765087168719328, | |
| "loss": 2.0359, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 0.00027124253929830067, | |
| "loss": 2.008, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 0.00026550543528588993, | |
| "loss": 1.9957, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "learning_rate": 0.000259319688712759, | |
| "loss": 2.0123, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 0.0002527093260602245, | |
| "loss": 2.0441, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 0.00024570002308995167, | |
| "loss": 2.0213, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "learning_rate": 0.00023831900511498102, | |
| "loss": 2.0308, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 0.00023059494125202381, | |
| "loss": 2.0259, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "learning_rate": 0.00022255783306578613, | |
| "loss": 2.0105, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "learning_rate": 0.0002142388980378395, | |
| "loss": 2.0307, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 0.00020567044831266566, | |
| "loss": 2.0057, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "learning_rate": 0.00019688576519184654, | |
| "loss": 1.9967, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 0.0001879189698638844, | |
| "loss": 2.0092, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 0.00017880489087176112, | |
| "loss": 1.9987, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "learning_rate": 0.00016957892883300732, | |
| "loss": 2.0128, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 0.000160276918937754, | |
| "loss": 2.0124, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 0.00015093499175880545, | |
| "loss": 2.0014, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "learning_rate": 0.00014158943291442156, | |
| "loss": 2.0083, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 0.0001322765421288613, | |
| "loss": 2.0226, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 0.0001230324922381422, | |
| "loss": 2.0203, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "learning_rate": 0.00011389318868865406, | |
| "loss": 2.0343, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "learning_rate": 0.00010489413007435998, | |
| "loss": 2.0033, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 9.60702702542747e-05, | |
| "loss": 2.0476, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "learning_rate": 8.745588258580053e-05, | |
| "loss": 1.9919, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "learning_rate": 7.908442680122653e-05, | |
| "loss": 1.999, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 7.098841904449448e-05, | |
| "loss": 2.0013, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 6.319930557302952e-05, | |
| "loss": 1.9996, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "learning_rate": 5.57473406151682e-05, | |
| "loss": 1.9714, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 4.8661468857651156e-05, | |
| "loss": 2.0055, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 4.196921301958112e-05, | |
| "loss": 2.0256, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 3.569656694954841e-05, | |
| "loss": 2.0128, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 2.9867894661145786e-05, | |
| "loss": 1.9936, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "learning_rate": 2.4505835699037535e-05, | |
| "loss": 1.9781, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 1.9631217203152903e-05, | |
| "loss": 2.0075, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 1.5262973012573674e-05, | |
| "loss": 1.9988, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "learning_rate": 1.141807012330722e-05, | |
| "loss": 2.0045, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 10.95, | |
| "learning_rate": 8.111442785622413e-06, | |
| "loss": 1.9608, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 5.3559344969059505e-06, | |
| "loss": 1.9888, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 3.1622481153527944e-06, | |
| "loss": 1.9767, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 1.5389042882661184e-06, | |
| "loss": 2.0293, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 2.318600654602051, | |
| "eval_runtime": 11.0328, | |
| "eval_samples_per_second": 907.383, | |
| "eval_steps_per_second": 7.16, | |
| "step": 7744 | |
| } | |
| ], | |
| "max_steps": 21120, | |
| "num_train_epochs": 30, | |
| "total_flos": 7.58623645990912e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |