| { | |
| "best_metric": 0.6366316676139832, | |
| "best_model_checkpoint": "./exper3_mesum5/checkpoint-2800", | |
| "epoch": 8.0, | |
| "global_step": 3440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0001994186046511628, | |
| "loss": 4.954, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001988372093023256, | |
| "loss": 4.8641, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019825581395348837, | |
| "loss": 4.6647, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019767441860465116, | |
| "loss": 4.5255, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019709302325581396, | |
| "loss": 4.4509, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019651162790697676, | |
| "loss": 4.253, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019593023255813952, | |
| "loss": 4.2104, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019534883720930232, | |
| "loss": 4.0778, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00019476744186046511, | |
| "loss": 3.9537, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001941860465116279, | |
| "loss": 3.895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.19349112426035503, | |
| "eval_loss": 3.8276302814483643, | |
| "eval_runtime": 19.0606, | |
| "eval_samples_per_second": 88.665, | |
| "eval_steps_per_second": 11.122, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001936046511627907, | |
| "loss": 3.825, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001930232558139535, | |
| "loss": 3.6952, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001924418604651163, | |
| "loss": 3.4767, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0001918604651162791, | |
| "loss": 3.5417, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001912790697674419, | |
| "loss": 3.4797, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019069767441860466, | |
| "loss": 3.3749, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00019011627906976745, | |
| "loss": 3.4024, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00018953488372093025, | |
| "loss": 3.2742, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00018895348837209304, | |
| "loss": 3.3841, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00018837209302325584, | |
| "loss": 3.1174, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.3106508875739645, | |
| "eval_loss": 3.1216797828674316, | |
| "eval_runtime": 21.0487, | |
| "eval_samples_per_second": 80.29, | |
| "eval_steps_per_second": 10.072, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001877906976744186, | |
| "loss": 3.012, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001872093023255814, | |
| "loss": 3.0221, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001866279069767442, | |
| "loss": 2.9971, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000186046511627907, | |
| "loss": 3.0478, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00018546511627906976, | |
| "loss": 2.9971, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00018488372093023256, | |
| "loss": 2.7655, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00018430232558139535, | |
| "loss": 2.703, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00018372093023255815, | |
| "loss": 2.8271, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00018313953488372094, | |
| "loss": 2.6716, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001825581395348837, | |
| "loss": 2.6, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.42071005917159765, | |
| "eval_loss": 2.5399255752563477, | |
| "eval_runtime": 20.8035, | |
| "eval_samples_per_second": 81.236, | |
| "eval_steps_per_second": 10.191, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0001819767441860465, | |
| "loss": 2.5875, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001813953488372093, | |
| "loss": 2.6057, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00018081395348837212, | |
| "loss": 2.5459, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001802325581395349, | |
| "loss": 2.4955, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001796511627906977, | |
| "loss": 2.3718, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00017906976744186048, | |
| "loss": 2.3314, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00017848837209302328, | |
| "loss": 2.3855, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00017790697674418605, | |
| "loss": 2.313, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00017732558139534884, | |
| "loss": 2.1767, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00017674418604651164, | |
| "loss": 2.256, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.5159763313609468, | |
| "eval_loss": 2.176730155944824, | |
| "eval_runtime": 21.1447, | |
| "eval_samples_per_second": 79.925, | |
| "eval_steps_per_second": 10.026, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00017616279069767443, | |
| "loss": 2.2881, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00017558139534883723, | |
| "loss": 2.0996, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000175, | |
| "loss": 2.0865, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0001744186046511628, | |
| "loss": 1.84, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0001738372093023256, | |
| "loss": 1.7042, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00017325581395348838, | |
| "loss": 1.6687, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00017267441860465118, | |
| "loss": 1.8394, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00017209302325581395, | |
| "loss": 1.6962, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00017151162790697674, | |
| "loss": 1.7329, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00017093023255813954, | |
| "loss": 1.5441, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.585207100591716, | |
| "eval_loss": 1.8085863590240479, | |
| "eval_runtime": 21.5882, | |
| "eval_samples_per_second": 78.284, | |
| "eval_steps_per_second": 9.82, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00017034883720930233, | |
| "loss": 1.6099, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0001697674418604651, | |
| "loss": 1.5833, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.0001691860465116279, | |
| "loss": 1.5696, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00016860465116279072, | |
| "loss": 1.3757, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00016802325581395352, | |
| "loss": 1.4537, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00016744186046511629, | |
| "loss": 1.445, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00016686046511627908, | |
| "loss": 1.3975, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00016627906976744188, | |
| "loss": 1.4086, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00016569767441860467, | |
| "loss": 1.3367, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00016511627906976747, | |
| "loss": 1.3834, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 0.6325443786982249, | |
| "eval_loss": 1.556496500968933, | |
| "eval_runtime": 21.486, | |
| "eval_samples_per_second": 78.656, | |
| "eval_steps_per_second": 9.867, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00016453488372093024, | |
| "loss": 1.2953, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00016395348837209303, | |
| "loss": 1.2843, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00016337209302325583, | |
| "loss": 1.1906, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00016279069767441862, | |
| "loss": 1.3458, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0001622093023255814, | |
| "loss": 1.1714, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00016162790697674419, | |
| "loss": 1.191, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00016104651162790698, | |
| "loss": 1.159, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00016046511627906978, | |
| "loss": 1.2594, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00015988372093023257, | |
| "loss": 1.1533, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00015930232558139534, | |
| "loss": 1.1995, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 0.6763313609467455, | |
| "eval_loss": 1.3339420557022095, | |
| "eval_runtime": 19.734, | |
| "eval_samples_per_second": 85.639, | |
| "eval_steps_per_second": 10.743, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00015872093023255814, | |
| "loss": 1.0989, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00015813953488372093, | |
| "loss": 1.2864, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00015755813953488373, | |
| "loss": 1.2124, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00015697674418604652, | |
| "loss": 1.1752, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.0001563953488372093, | |
| "loss": 1.2127, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0001558139534883721, | |
| "loss": 1.1634, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0001552325581395349, | |
| "loss": 1.0915, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00015465116279069768, | |
| "loss": 1.103, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00015406976744186047, | |
| "loss": 1.0951, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00015348837209302327, | |
| "loss": 1.0845, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_accuracy": 0.6532544378698225, | |
| "eval_loss": 1.3299002647399902, | |
| "eval_runtime": 19.6334, | |
| "eval_samples_per_second": 86.078, | |
| "eval_steps_per_second": 10.798, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00015290697674418606, | |
| "loss": 1.03, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00015232558139534886, | |
| "loss": 0.9288, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00015174418604651163, | |
| "loss": 0.9963, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00015116279069767442, | |
| "loss": 1.0363, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00015058139534883722, | |
| "loss": 0.9671, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 0.8207, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.0001494186046511628, | |
| "loss": 0.6051, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 0.00014883720930232558, | |
| "loss": 0.6698, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 0.00014825581395348837, | |
| "loss": 0.6969, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.00014767441860465117, | |
| "loss": 0.6472, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_accuracy": 0.7218934911242604, | |
| "eval_loss": 1.0679467916488647, | |
| "eval_runtime": 19.5979, | |
| "eval_samples_per_second": 86.234, | |
| "eval_steps_per_second": 10.818, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00014709302325581396, | |
| "loss": 0.6366, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.00014651162790697673, | |
| "loss": 0.6353, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00014593023255813953, | |
| "loss": 0.4765, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00014534883720930232, | |
| "loss": 0.453, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 0.00014476744186046512, | |
| "loss": 0.5234, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00014418604651162791, | |
| "loss": 0.5019, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.0001436046511627907, | |
| "loss": 0.6719, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.0001430232558139535, | |
| "loss": 0.5294, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0001424418604651163, | |
| "loss": 0.6135, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.0001418604651162791, | |
| "loss": 0.5948, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 0.7124260355029586, | |
| "eval_loss": 1.0286362171173096, | |
| "eval_runtime": 19.4904, | |
| "eval_samples_per_second": 86.709, | |
| "eval_steps_per_second": 10.877, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00014127906976744186, | |
| "loss": 0.6138, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.00014069767441860466, | |
| "loss": 0.6543, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00014011627906976746, | |
| "loss": 0.5534, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.00013953488372093025, | |
| "loss": 0.6408, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.00013895348837209302, | |
| "loss": 0.4687, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 0.00013837209302325582, | |
| "loss": 0.4635, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.0001377906976744186, | |
| "loss": 0.4466, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 0.0001372093023255814, | |
| "loss": 0.4255, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.0001366279069767442, | |
| "loss": 0.5848, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.00013604651162790697, | |
| "loss": 0.5565, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 0.7284023668639054, | |
| "eval_loss": 0.9595437049865723, | |
| "eval_runtime": 19.5222, | |
| "eval_samples_per_second": 86.568, | |
| "eval_steps_per_second": 10.859, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00013546511627906977, | |
| "loss": 0.4625, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00013488372093023256, | |
| "loss": 0.7103, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 0.00013430232558139536, | |
| "loss": 0.5923, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.00013372093023255815, | |
| "loss": 0.4913, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00013313953488372092, | |
| "loss": 0.4915, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00013255813953488372, | |
| "loss": 0.401, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.0001319767441860465, | |
| "loss": 0.4169, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.0001313953488372093, | |
| "loss": 0.52, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 0.0001308139534883721, | |
| "loss": 0.4018, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.0001302325581395349, | |
| "loss": 0.4879, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_accuracy": 0.7420118343195267, | |
| "eval_loss": 0.8915188312530518, | |
| "eval_runtime": 19.4697, | |
| "eval_samples_per_second": 86.801, | |
| "eval_steps_per_second": 10.889, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 0.0001296511627906977, | |
| "loss": 0.527, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.0001290697674418605, | |
| "loss": 0.4114, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00012848837209302326, | |
| "loss": 0.5728, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00012790697674418605, | |
| "loss": 0.347, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00012732558139534885, | |
| "loss": 0.3652, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 0.00012674418604651164, | |
| "loss": 0.5574, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.00012616279069767444, | |
| "loss": 0.4363, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0001255813953488372, | |
| "loss": 0.4769, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.000125, | |
| "loss": 0.5541, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.0001244186046511628, | |
| "loss": 0.2816, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_accuracy": 0.7763313609467456, | |
| "eval_loss": 0.8158556818962097, | |
| "eval_runtime": 19.4244, | |
| "eval_samples_per_second": 87.004, | |
| "eval_steps_per_second": 10.914, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 0.0001238372093023256, | |
| "loss": 0.2724, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 0.00012325581395348836, | |
| "loss": 0.234, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00012267441860465116, | |
| "loss": 0.2116, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.00012209302325581395, | |
| "loss": 0.2968, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00012151162790697675, | |
| "loss": 0.2539, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 0.00012093023255813953, | |
| "loss": 0.2837, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 0.00012034883720930233, | |
| "loss": 0.2571, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.00011976744186046511, | |
| "loss": 0.2502, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 0.0001191860465116279, | |
| "loss": 0.2629, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.00011860465116279071, | |
| "loss": 0.2412, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "eval_accuracy": 0.7911242603550296, | |
| "eval_loss": 0.776643693447113, | |
| "eval_runtime": 19.7719, | |
| "eval_samples_per_second": 85.475, | |
| "eval_steps_per_second": 10.722, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00011802325581395351, | |
| "loss": 0.2721, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.00011744186046511629, | |
| "loss": 0.1876, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00011686046511627909, | |
| "loss": 0.2417, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.00011627906976744187, | |
| "loss": 0.2029, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 0.00011569767441860466, | |
| "loss": 0.1991, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 0.00011511627906976746, | |
| "loss": 0.2763, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 0.00011453488372093024, | |
| "loss": 0.1539, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.00011395348837209304, | |
| "loss": 0.2287, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.00011337209302325582, | |
| "loss": 0.2572, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 0.00011279069767441861, | |
| "loss": 0.2015, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "eval_accuracy": 0.7828402366863906, | |
| "eval_loss": 0.784956157207489, | |
| "eval_runtime": 19.4284, | |
| "eval_samples_per_second": 86.986, | |
| "eval_steps_per_second": 10.912, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.0001122093023255814, | |
| "loss": 0.2263, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 0.00011162790697674419, | |
| "loss": 0.2843, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 0.00011104651162790699, | |
| "loss": 0.2371, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.00011046511627906977, | |
| "loss": 0.1717, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.00010988372093023256, | |
| "loss": 0.311, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.00010930232558139534, | |
| "loss": 0.2567, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 0.00010872093023255814, | |
| "loss": 0.1739, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.00010813953488372092, | |
| "loss": 0.2185, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.00010755813953488372, | |
| "loss": 0.2158, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00010697674418604651, | |
| "loss": 0.274, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "eval_accuracy": 0.7934911242603551, | |
| "eval_loss": 0.7361425757408142, | |
| "eval_runtime": 19.2623, | |
| "eval_samples_per_second": 87.736, | |
| "eval_steps_per_second": 11.006, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.0001063953488372093, | |
| "loss": 0.1164, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 0.0001058139534883721, | |
| "loss": 0.1515, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 0.0001052325581395349, | |
| "loss": 0.2399, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.00010465116279069768, | |
| "loss": 0.2516, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.00010406976744186048, | |
| "loss": 0.2054, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 0.00010348837209302327, | |
| "loss": 0.1758, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00010290697674418605, | |
| "loss": 0.1903, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 0.00010232558139534885, | |
| "loss": 0.1922, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 0.00010174418604651163, | |
| "loss": 0.2019, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.00010116279069767443, | |
| "loss": 0.1244, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "eval_accuracy": 0.7911242603550296, | |
| "eval_loss": 0.7299075126647949, | |
| "eval_runtime": 19.238, | |
| "eval_samples_per_second": 87.847, | |
| "eval_steps_per_second": 11.02, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.00010058139534883721, | |
| "loss": 0.2361, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1389, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 9.94186046511628e-05, | |
| "loss": 0.0844, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 9.883720930232558e-05, | |
| "loss": 0.1463, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 9.825581395348838e-05, | |
| "loss": 0.09, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 9.767441860465116e-05, | |
| "loss": 0.1106, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 9.709302325581396e-05, | |
| "loss": 0.1387, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 9.651162790697675e-05, | |
| "loss": 0.1231, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 9.593023255813955e-05, | |
| "loss": 0.0734, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 9.534883720930233e-05, | |
| "loss": 0.0794, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "eval_accuracy": 0.7846153846153846, | |
| "eval_loss": 0.7440704107284546, | |
| "eval_runtime": 19.1417, | |
| "eval_samples_per_second": 88.289, | |
| "eval_steps_per_second": 11.075, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 9.476744186046512e-05, | |
| "loss": 0.0885, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 9.418604651162792e-05, | |
| "loss": 0.0781, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 9.36046511627907e-05, | |
| "loss": 0.0842, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 9.30232558139535e-05, | |
| "loss": 0.0957, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 9.244186046511628e-05, | |
| "loss": 0.0561, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 9.186046511627907e-05, | |
| "loss": 0.0905, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 9.127906976744186e-05, | |
| "loss": 0.075, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 9.069767441860465e-05, | |
| "loss": 0.1165, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 9.011627906976745e-05, | |
| "loss": 0.0809, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 8.953488372093024e-05, | |
| "loss": 0.0915, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "eval_accuracy": 0.7940828402366864, | |
| "eval_loss": 0.7614301443099976, | |
| "eval_runtime": 19.2799, | |
| "eval_samples_per_second": 87.656, | |
| "eval_steps_per_second": 10.996, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 8.895348837209302e-05, | |
| "loss": 0.1187, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 8.837209302325582e-05, | |
| "loss": 0.1082, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 8.779069767441861e-05, | |
| "loss": 0.0698, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 8.72093023255814e-05, | |
| "loss": 0.1073, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 8.662790697674419e-05, | |
| "loss": 0.0638, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 8.604651162790697e-05, | |
| "loss": 0.157, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 8.546511627906977e-05, | |
| "loss": 0.0641, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 8.488372093023255e-05, | |
| "loss": 0.0536, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 8.430232558139536e-05, | |
| "loss": 0.0921, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 8.372093023255814e-05, | |
| "loss": 0.0817, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "eval_accuracy": 0.8011834319526627, | |
| "eval_loss": 0.7310301065444946, | |
| "eval_runtime": 19.2052, | |
| "eval_samples_per_second": 87.997, | |
| "eval_steps_per_second": 11.039, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 8.313953488372094e-05, | |
| "loss": 0.0876, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 8.255813953488373e-05, | |
| "loss": 0.0959, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 8.197674418604652e-05, | |
| "loss": 0.0945, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 8.139534883720931e-05, | |
| "loss": 0.0375, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 8.081395348837209e-05, | |
| "loss": 0.0877, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 8.023255813953489e-05, | |
| "loss": 0.053, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 7.965116279069767e-05, | |
| "loss": 0.1233, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 7.906976744186047e-05, | |
| "loss": 0.1089, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 7.848837209302326e-05, | |
| "loss": 0.1326, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 7.790697674418606e-05, | |
| "loss": 0.0561, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "eval_accuracy": 0.806508875739645, | |
| "eval_loss": 0.722186267375946, | |
| "eval_runtime": 19.2931, | |
| "eval_samples_per_second": 87.596, | |
| "eval_steps_per_second": 10.988, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 7.732558139534884e-05, | |
| "loss": 0.1245, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 7.674418604651163e-05, | |
| "loss": 0.0414, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 7.616279069767443e-05, | |
| "loss": 0.0901, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 7.558139534883721e-05, | |
| "loss": 0.0751, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.1456, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 7.441860465116279e-05, | |
| "loss": 0.0245, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 7.383720930232558e-05, | |
| "loss": 0.0377, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 7.325581395348837e-05, | |
| "loss": 0.0435, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 7.267441860465116e-05, | |
| "loss": 0.0639, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 7.209302325581396e-05, | |
| "loss": 0.0165, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "eval_accuracy": 0.8059171597633136, | |
| "eval_loss": 0.7515397667884827, | |
| "eval_runtime": 19.4193, | |
| "eval_samples_per_second": 87.027, | |
| "eval_steps_per_second": 10.917, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 7.151162790697675e-05, | |
| "loss": 0.0338, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 7.093023255813955e-05, | |
| "loss": 0.0586, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 7.034883720930233e-05, | |
| "loss": 0.0787, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 6.976744186046513e-05, | |
| "loss": 0.023, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 6.918604651162791e-05, | |
| "loss": 0.0681, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 6.86046511627907e-05, | |
| "loss": 0.0569, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 6.802325581395348e-05, | |
| "loss": 0.0206, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 6.744186046511628e-05, | |
| "loss": 0.0369, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 6.686046511627908e-05, | |
| "loss": 0.0526, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 6.627906976744186e-05, | |
| "loss": 0.0168, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "eval_accuracy": 0.821301775147929, | |
| "eval_loss": 0.6687235832214355, | |
| "eval_runtime": 19.3189, | |
| "eval_samples_per_second": 87.479, | |
| "eval_steps_per_second": 10.974, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 6.569767441860465e-05, | |
| "loss": 0.0185, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 6.511627906976745e-05, | |
| "loss": 0.018, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 6.453488372093024e-05, | |
| "loss": 0.0503, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 6.395348837209303e-05, | |
| "loss": 0.0145, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 6.337209302325582e-05, | |
| "loss": 0.0139, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 6.27906976744186e-05, | |
| "loss": 0.0134, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 6.22093023255814e-05, | |
| "loss": 0.0168, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 6.162790697674418e-05, | |
| "loss": 0.0234, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 6.104651162790698e-05, | |
| "loss": 0.046, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 6.0465116279069765e-05, | |
| "loss": 0.0212, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "eval_accuracy": 0.8248520710059172, | |
| "eval_loss": 0.6671048402786255, | |
| "eval_runtime": 19.6081, | |
| "eval_samples_per_second": 86.189, | |
| "eval_steps_per_second": 10.812, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 5.9883720930232554e-05, | |
| "loss": 0.0182, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 5.9302325581395356e-05, | |
| "loss": 0.0101, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 5.8720930232558145e-05, | |
| "loss": 0.0131, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 5.8139534883720933e-05, | |
| "loss": 0.0592, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 5.755813953488373e-05, | |
| "loss": 0.0317, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 5.697674418604652e-05, | |
| "loss": 0.0136, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 5.6395348837209306e-05, | |
| "loss": 0.024, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 5.5813953488372095e-05, | |
| "loss": 0.0233, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 5.5232558139534884e-05, | |
| "loss": 0.034, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 5.465116279069767e-05, | |
| "loss": 0.0389, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "eval_accuracy": 0.827810650887574, | |
| "eval_loss": 0.6893125176429749, | |
| "eval_runtime": 19.5009, | |
| "eval_samples_per_second": 86.663, | |
| "eval_steps_per_second": 10.871, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 5.406976744186046e-05, | |
| "loss": 0.0227, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 5.348837209302326e-05, | |
| "loss": 0.0186, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 5.290697674418605e-05, | |
| "loss": 0.022, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 5.232558139534884e-05, | |
| "loss": 0.0373, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 5.1744186046511636e-05, | |
| "loss": 0.0144, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 5.1162790697674425e-05, | |
| "loss": 0.0112, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 5.0581395348837214e-05, | |
| "loss": 0.0331, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0139, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 4.941860465116279e-05, | |
| "loss": 0.0142, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 4.883720930232558e-05, | |
| "loss": 0.0087, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_accuracy": 0.8260355029585799, | |
| "eval_loss": 0.6839348077774048, | |
| "eval_runtime": 19.5465, | |
| "eval_samples_per_second": 86.461, | |
| "eval_steps_per_second": 10.846, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 4.8255813953488375e-05, | |
| "loss": 0.0089, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 4.7674418604651164e-05, | |
| "loss": 0.0168, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 4.709302325581396e-05, | |
| "loss": 0.009, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 4.651162790697675e-05, | |
| "loss": 0.0079, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 4.593023255813954e-05, | |
| "loss": 0.0092, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 4.5348837209302326e-05, | |
| "loss": 0.01, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.476744186046512e-05, | |
| "loss": 0.0134, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 4.418604651162791e-05, | |
| "loss": 0.0265, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 4.36046511627907e-05, | |
| "loss": 0.0079, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 4.302325581395349e-05, | |
| "loss": 0.0087, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "eval_accuracy": 0.8319526627218935, | |
| "eval_loss": 0.6412006616592407, | |
| "eval_runtime": 19.4572, | |
| "eval_samples_per_second": 86.857, | |
| "eval_steps_per_second": 10.896, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 4.2441860465116276e-05, | |
| "loss": 0.0242, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 4.186046511627907e-05, | |
| "loss": 0.0087, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 4.127906976744187e-05, | |
| "loss": 0.0097, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.0697674418604655e-05, | |
| "loss": 0.0073, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 4.0116279069767444e-05, | |
| "loss": 0.0077, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 3.953488372093023e-05, | |
| "loss": 0.0115, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 3.895348837209303e-05, | |
| "loss": 0.0076, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 3.837209302325582e-05, | |
| "loss": 0.0092, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 3.7790697674418606e-05, | |
| "loss": 0.0071, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 3.7209302325581394e-05, | |
| "loss": 0.0077, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "eval_accuracy": 0.8366863905325443, | |
| "eval_loss": 0.6366316676139832, | |
| "eval_runtime": 19.6422, | |
| "eval_samples_per_second": 86.039, | |
| "eval_steps_per_second": 10.793, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 3.662790697674418e-05, | |
| "loss": 0.0081, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 3.604651162790698e-05, | |
| "loss": 0.0077, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 3.5465116279069774e-05, | |
| "loss": 0.0068, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 3.488372093023256e-05, | |
| "loss": 0.0069, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 3.430232558139535e-05, | |
| "loss": 0.0078, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 3.372093023255814e-05, | |
| "loss": 0.0069, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.313953488372093e-05, | |
| "loss": 0.0075, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 3.2558139534883724e-05, | |
| "loss": 0.0088, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 3.197674418604651e-05, | |
| "loss": 0.007, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 3.13953488372093e-05, | |
| "loss": 0.0065, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "eval_accuracy": 0.8272189349112427, | |
| "eval_loss": 0.6696515679359436, | |
| "eval_runtime": 19.4791, | |
| "eval_samples_per_second": 86.76, | |
| "eval_steps_per_second": 10.883, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 3.081395348837209e-05, | |
| "loss": 0.0276, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 3.0232558139534883e-05, | |
| "loss": 0.0064, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 2.9651162790697678e-05, | |
| "loss": 0.0064, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 2.9069767441860467e-05, | |
| "loss": 0.0271, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 2.848837209302326e-05, | |
| "loss": 0.0062, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 2.7906976744186048e-05, | |
| "loss": 0.0073, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.7325581395348836e-05, | |
| "loss": 0.0074, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 2.674418604651163e-05, | |
| "loss": 0.0068, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 2.616279069767442e-05, | |
| "loss": 0.007, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 2.5581395348837212e-05, | |
| "loss": 0.0061, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.8349112426035503, | |
| "eval_loss": 0.6509989500045776, | |
| "eval_runtime": 19.534, | |
| "eval_samples_per_second": 86.516, | |
| "eval_steps_per_second": 10.853, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0411, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 2.441860465116279e-05, | |
| "loss": 0.006, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.3837209302325582e-05, | |
| "loss": 0.0066, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 2.3255813953488374e-05, | |
| "loss": 0.0058, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 2.2674418604651163e-05, | |
| "loss": 0.0064, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 2.2093023255813955e-05, | |
| "loss": 0.006, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.1511627906976744e-05, | |
| "loss": 0.0064, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 2.0930232558139536e-05, | |
| "loss": 0.0063, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.0348837209302328e-05, | |
| "loss": 0.0243, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.9767441860465116e-05, | |
| "loss": 0.0185, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "eval_accuracy": 0.8366863905325443, | |
| "eval_loss": 0.6451619267463684, | |
| "eval_runtime": 19.7485, | |
| "eval_samples_per_second": 85.576, | |
| "eval_steps_per_second": 10.735, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.918604651162791e-05, | |
| "loss": 0.0111, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.8604651162790697e-05, | |
| "loss": 0.0063, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.802325581395349e-05, | |
| "loss": 0.007, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.744186046511628e-05, | |
| "loss": 0.006, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.686046511627907e-05, | |
| "loss": 0.0064, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.6279069767441862e-05, | |
| "loss": 0.006, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.569767441860465e-05, | |
| "loss": 0.0059, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.5116279069767441e-05, | |
| "loss": 0.0053, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.4534883720930233e-05, | |
| "loss": 0.006, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.3953488372093024e-05, | |
| "loss": 0.0059, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "eval_accuracy": 0.8378698224852071, | |
| "eval_loss": 0.6426283717155457, | |
| "eval_runtime": 19.368, | |
| "eval_samples_per_second": 87.257, | |
| "eval_steps_per_second": 10.946, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.3372093023255814e-05, | |
| "loss": 0.0059, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 1.2790697674418606e-05, | |
| "loss": 0.0057, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.2209302325581395e-05, | |
| "loss": 0.006, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 1.1627906976744187e-05, | |
| "loss": 0.0053, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 1.1046511627906977e-05, | |
| "loss": 0.0053, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 1.0465116279069768e-05, | |
| "loss": 0.0062, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 9.883720930232558e-06, | |
| "loss": 0.0061, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 9.302325581395349e-06, | |
| "loss": 0.0061, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 8.72093023255814e-06, | |
| "loss": 0.0053, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 8.139534883720931e-06, | |
| "loss": 0.0062, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "eval_accuracy": 0.8378698224852071, | |
| "eval_loss": 0.6398439407348633, | |
| "eval_runtime": 20.4188, | |
| "eval_samples_per_second": 82.767, | |
| "eval_steps_per_second": 10.383, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 7.558139534883721e-06, | |
| "loss": 0.0056, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 6.976744186046512e-06, | |
| "loss": 0.0058, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 6.395348837209303e-06, | |
| "loss": 0.0058, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "learning_rate": 5.8139534883720935e-06, | |
| "loss": 0.0058, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 5.232558139534884e-06, | |
| "loss": 0.0062, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 4.651162790697674e-06, | |
| "loss": 0.0055, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 4.0697674418604655e-06, | |
| "loss": 0.0053, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 3.488372093023256e-06, | |
| "loss": 0.0057, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.9069767441860468e-06, | |
| "loss": 0.006, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 2.325581395348837e-06, | |
| "loss": 0.0315, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "eval_accuracy": 0.8384615384615385, | |
| "eval_loss": 0.6396650075912476, | |
| "eval_runtime": 19.5469, | |
| "eval_samples_per_second": 86.459, | |
| "eval_steps_per_second": 10.846, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.744186046511628e-06, | |
| "loss": 0.0074, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.1627906976744186e-06, | |
| "loss": 0.0057, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 5.813953488372093e-07, | |
| "loss": 0.0058, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0057, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 3440, | |
| "total_flos": 4.2707785173722726e+18, | |
| "train_loss": 0.6754590564342432, | |
| "train_runtime": 2008.5618, | |
| "train_samples_per_second": 27.403, | |
| "train_steps_per_second": 1.713 | |
| } | |
| ], | |
| "max_steps": 3440, | |
| "num_train_epochs": 8, | |
| "total_flos": 4.2707785173722726e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |