| { | |
| "best_metric": 0.6827040314674377, | |
| "best_model_checkpoint": "checkpoints/1a_52k/checkpoint-6200", | |
| "epoch": 1.9838412926965843, | |
| "eval_steps": 200, | |
| "global_step": 6200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.6999999999999996e-05, | |
| "loss": 2.0256, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.399999999999999e-05, | |
| "loss": 1.9236, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.8e-05, | |
| "loss": 1.712, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00010799999999999998, | |
| "loss": 1.2747, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000138, | |
| "loss": 1.0657, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000168, | |
| "loss": 0.973, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000198, | |
| "loss": 0.8178, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00022799999999999999, | |
| "loss": 0.7901, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000258, | |
| "loss": 0.7289, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00028799999999999995, | |
| "loss": 0.7429, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002998840579710145, | |
| "loss": 0.7631, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002996908212560386, | |
| "loss": 0.7339, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029949758454106277, | |
| "loss": 0.7435, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029930434782608696, | |
| "loss": 0.7333, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002991111111111111, | |
| "loss": 0.769, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029891787439613525, | |
| "loss": 0.7644, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002987246376811594, | |
| "loss": 0.7517, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002985314009661836, | |
| "loss": 0.7212, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029833816425120773, | |
| "loss": 0.745, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029814492753623187, | |
| "loss": 0.7023, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.7244767546653748, | |
| "eval_runtime": 150.1014, | |
| "eval_samples_per_second": 13.324, | |
| "eval_steps_per_second": 1.666, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.000297951690821256, | |
| "loss": 0.6783, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029775845410628016, | |
| "loss": 0.7327, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029756521739130435, | |
| "loss": 0.69, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002973719806763285, | |
| "loss": 0.7069, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029717874396135264, | |
| "loss": 0.7276, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002969855072463768, | |
| "loss": 0.7356, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000296792270531401, | |
| "loss": 0.7103, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002965990338164251, | |
| "loss": 0.7224, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029640579710144926, | |
| "loss": 0.6898, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002962125603864734, | |
| "loss": 0.7222, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029601932367149755, | |
| "loss": 0.685, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029582608695652175, | |
| "loss": 0.7389, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002956328502415459, | |
| "loss": 0.6956, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029543961352657003, | |
| "loss": 0.7191, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002952463768115942, | |
| "loss": 0.6938, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029505314009661837, | |
| "loss": 0.695, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002948599033816425, | |
| "loss": 0.7169, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029466666666666666, | |
| "loss": 0.7313, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002944734299516908, | |
| "loss": 0.7016, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029428019323671494, | |
| "loss": 0.7149, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 0.7104699611663818, | |
| "eval_runtime": 150.0015, | |
| "eval_samples_per_second": 13.333, | |
| "eval_steps_per_second": 1.667, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029408695652173914, | |
| "loss": 0.7133, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002938937198067633, | |
| "loss": 0.7568, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002937004830917874, | |
| "loss": 0.7159, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029350724637681156, | |
| "loss": 0.7356, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029331400966183576, | |
| "loss": 0.665, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002931207729468599, | |
| "loss": 0.7017, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029292753623188405, | |
| "loss": 0.6979, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002927342995169082, | |
| "loss": 0.7104, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029254106280193233, | |
| "loss": 0.6907, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029234782608695653, | |
| "loss": 0.7407, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029215458937198067, | |
| "loss": 0.7028, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002919613526570048, | |
| "loss": 0.7102, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029176811594202896, | |
| "loss": 0.6956, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029157487922705315, | |
| "loss": 0.6926, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002913816425120773, | |
| "loss": 0.7114, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00029118840579710144, | |
| "loss": 0.7066, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002909951690821256, | |
| "loss": 0.6853, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002908019323671497, | |
| "loss": 0.7063, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002906086956521739, | |
| "loss": 0.7064, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00029041545893719806, | |
| "loss": 0.729, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.7055845856666565, | |
| "eval_runtime": 149.8446, | |
| "eval_samples_per_second": 13.347, | |
| "eval_steps_per_second": 1.668, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002902222222222222, | |
| "loss": 0.7098, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00029002898550724635, | |
| "loss": 0.6363, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002898357487922705, | |
| "loss": 0.7128, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002896425120772947, | |
| "loss": 0.692, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00028944927536231883, | |
| "loss": 0.7068, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00028925603864734297, | |
| "loss": 0.7191, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002890628019323671, | |
| "loss": 0.6865, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002888695652173913, | |
| "loss": 0.7331, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00028867632850241545, | |
| "loss": 0.652, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002884830917874396, | |
| "loss": 0.7188, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00028828985507246374, | |
| "loss": 0.6834, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002880966183574879, | |
| "loss": 0.693, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002879033816425121, | |
| "loss": 0.7445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002877101449275362, | |
| "loss": 0.6904, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00028751690821256036, | |
| "loss": 0.7547, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002873236714975845, | |
| "loss": 0.7068, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002871304347826087, | |
| "loss": 0.6677, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00028693719806763285, | |
| "loss": 0.6808, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.000286743961352657, | |
| "loss": 0.7142, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00028655072463768113, | |
| "loss": 0.7126, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 0.7015364766120911, | |
| "eval_runtime": 149.6299, | |
| "eval_samples_per_second": 13.366, | |
| "eval_steps_per_second": 1.671, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002863574879227053, | |
| "loss": 0.69, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00028616425120772947, | |
| "loss": 0.6772, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002859710144927536, | |
| "loss": 0.6881, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00028577777777777776, | |
| "loss": 0.6509, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002855845410628019, | |
| "loss": 0.6924, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00028539130434782604, | |
| "loss": 0.7116, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00028519806763285024, | |
| "loss": 0.711, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002850048309178744, | |
| "loss": 0.6897, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002848115942028985, | |
| "loss": 0.6689, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00028461835748792266, | |
| "loss": 0.6891, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00028442512077294686, | |
| "loss": 0.6985, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000284231884057971, | |
| "loss": 0.6643, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00028403864734299515, | |
| "loss": 0.7277, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002838454106280193, | |
| "loss": 0.703, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00028365217391304343, | |
| "loss": 0.6678, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00028345893719806763, | |
| "loss": 0.6836, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00028326570048309177, | |
| "loss": 0.7164, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002830724637681159, | |
| "loss": 0.6382, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00028287922705314006, | |
| "loss": 0.7, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002826859903381642, | |
| "loss": 0.6974, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 0.6978325843811035, | |
| "eval_runtime": 149.6863, | |
| "eval_samples_per_second": 13.361, | |
| "eval_steps_per_second": 1.67, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002824927536231884, | |
| "loss": 0.6644, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00028229951690821254, | |
| "loss": 0.672, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002821062801932367, | |
| "loss": 0.7036, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002819130434782608, | |
| "loss": 0.7049, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000281719806763285, | |
| "loss": 0.7033, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00028152657004830916, | |
| "loss": 0.7059, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0002813333333333333, | |
| "loss": 0.6915, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00028114009661835745, | |
| "loss": 0.7609, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002809468599033816, | |
| "loss": 0.6756, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0002807536231884058, | |
| "loss": 0.7127, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00028056038647342993, | |
| "loss": 0.7212, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00028036714975845407, | |
| "loss": 0.6974, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002801739130434782, | |
| "loss": 0.6948, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002799806763285024, | |
| "loss": 0.73, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00027978743961352655, | |
| "loss": 0.7154, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002795942028985507, | |
| "loss": 0.7007, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00027940096618357484, | |
| "loss": 0.6854, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.000279207729468599, | |
| "loss": 0.7075, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0002790144927536232, | |
| "loss": 0.6942, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0002788212560386473, | |
| "loss": 0.7389, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.6969788670539856, | |
| "eval_runtime": 149.7859, | |
| "eval_samples_per_second": 13.352, | |
| "eval_steps_per_second": 1.669, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00027862801932367146, | |
| "loss": 0.6633, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0002784347826086956, | |
| "loss": 0.6741, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00027824154589371975, | |
| "loss": 0.702, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00027804830917874395, | |
| "loss": 0.691, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0002778550724637681, | |
| "loss": 0.7049, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00027766183574879223, | |
| "loss": 0.7254, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002774685990338164, | |
| "loss": 0.6732, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00027727536231884057, | |
| "loss": 0.6995, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002770821256038647, | |
| "loss": 0.687, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00027688888888888885, | |
| "loss": 0.6831, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000276695652173913, | |
| "loss": 0.7189, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00027650241545893714, | |
| "loss": 0.6996, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00027630917874396134, | |
| "loss": 0.696, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002761159420289855, | |
| "loss": 0.7237, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002759227053140096, | |
| "loss": 0.7266, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00027572946859903376, | |
| "loss": 0.6745, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002755362318840579, | |
| "loss": 0.7102, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002753429951690821, | |
| "loss": 0.6922, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00027514975845410625, | |
| "loss": 0.7059, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002749565217391304, | |
| "loss": 0.7198, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 0.6949622631072998, | |
| "eval_runtime": 150.1806, | |
| "eval_samples_per_second": 13.317, | |
| "eval_steps_per_second": 1.665, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00027476328502415453, | |
| "loss": 0.6608, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00027457004830917873, | |
| "loss": 0.6699, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00027437681159420287, | |
| "loss": 0.6817, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.000274183574879227, | |
| "loss": 0.7019, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00027399033816425116, | |
| "loss": 0.6839, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002737971014492753, | |
| "loss": 0.6725, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002736038647342995, | |
| "loss": 0.7065, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00027341062801932364, | |
| "loss": 0.6728, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002732173913043478, | |
| "loss": 0.6449, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002730241545893719, | |
| "loss": 0.7094, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002728309178743961, | |
| "loss": 0.6881, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00027263768115942026, | |
| "loss": 0.6804, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002724444444444444, | |
| "loss": 0.6822, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00027225120772946855, | |
| "loss": 0.6816, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002720579710144927, | |
| "loss": 0.6615, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002718647342995169, | |
| "loss": 0.6945, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00027167149758454103, | |
| "loss": 0.7249, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00027147826086956517, | |
| "loss": 0.7061, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002712850241545893, | |
| "loss": 0.6909, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002710917874396135, | |
| "loss": 0.7214, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 0.6923746466636658, | |
| "eval_runtime": 150.1003, | |
| "eval_samples_per_second": 13.324, | |
| "eval_steps_per_second": 1.666, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00027089855072463765, | |
| "loss": 0.7448, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0002707053140096618, | |
| "loss": 0.6746, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00027051207729468594, | |
| "loss": 0.6952, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0002703188405797101, | |
| "loss": 0.6985, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0002701256038647343, | |
| "loss": 0.706, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0002699323671497584, | |
| "loss": 0.6838, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00026973913043478256, | |
| "loss": 0.6809, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0002695458937198067, | |
| "loss": 0.7066, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0002693526570048309, | |
| "loss": 0.6828, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00026915942028985505, | |
| "loss": 0.6653, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0002689661835748792, | |
| "loss": 0.6772, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00026877294685990333, | |
| "loss": 0.6798, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00026857971014492753, | |
| "loss": 0.6838, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00026838647342995167, | |
| "loss": 0.7115, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0002681932367149758, | |
| "loss": 0.6907, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00026799999999999995, | |
| "loss": 0.6587, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0002678067632850241, | |
| "loss": 0.7089, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0002676135265700483, | |
| "loss": 0.6947, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00026742028985507244, | |
| "loss": 0.698, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002672270531400966, | |
| "loss": 0.7183, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 0.6911550164222717, | |
| "eval_runtime": 368.3262, | |
| "eval_samples_per_second": 5.43, | |
| "eval_steps_per_second": 0.679, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002670338164251207, | |
| "loss": 0.7501, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002668405797101449, | |
| "loss": 0.6472, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00026664734299516906, | |
| "loss": 0.6773, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0002664541062801932, | |
| "loss": 0.7195, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00026626086956521735, | |
| "loss": 0.7111, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0002660676328502415, | |
| "loss": 0.6921, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0002658743961352657, | |
| "loss": 0.6776, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00026568115942028983, | |
| "loss": 0.6911, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00026548792270531397, | |
| "loss": 0.7253, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0002652946859903381, | |
| "loss": 0.672, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0002651014492753623, | |
| "loss": 0.7156, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00026490821256038645, | |
| "loss": 0.6929, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0002647149758454106, | |
| "loss": 0.714, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00026452173913043474, | |
| "loss": 0.6834, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0002643285024154589, | |
| "loss": 0.7413, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0002641352657004831, | |
| "loss": 0.6369, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0002639420289855072, | |
| "loss": 0.6581, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00026374879227053136, | |
| "loss": 0.644, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0002635555555555555, | |
| "loss": 0.675, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0002633623188405797, | |
| "loss": 0.6935, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 0.6894997954368591, | |
| "eval_runtime": 384.4995, | |
| "eval_samples_per_second": 5.202, | |
| "eval_steps_per_second": 0.65, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00026316908212560384, | |
| "loss": 0.6739, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.000262975845410628, | |
| "loss": 0.719, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00026278260869565213, | |
| "loss": 0.6724, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0002625893719806763, | |
| "loss": 0.6736, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00026239613526570047, | |
| "loss": 0.675, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0002622028985507246, | |
| "loss": 0.7289, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00026200966183574875, | |
| "loss": 0.7081, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002618164251207729, | |
| "loss": 0.6529, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002616231884057971, | |
| "loss": 0.6659, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00026142995169082124, | |
| "loss": 0.663, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002612367149758454, | |
| "loss": 0.7021, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002610434782608695, | |
| "loss": 0.6856, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002608502415458937, | |
| "loss": 0.6762, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00026065700483091786, | |
| "loss": 0.6754, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.000260463768115942, | |
| "loss": 0.7019, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00026027053140096615, | |
| "loss": 0.6847, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0002600772946859903, | |
| "loss": 0.6971, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0002598840579710145, | |
| "loss": 0.6819, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0002596908212560386, | |
| "loss": 0.6898, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00025949758454106277, | |
| "loss": 0.698, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 0.6893799304962158, | |
| "eval_runtime": 386.4657, | |
| "eval_samples_per_second": 5.175, | |
| "eval_steps_per_second": 0.647, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0002593043478260869, | |
| "loss": 0.6876, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0002591111111111111, | |
| "loss": 0.6798, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00025891787439613525, | |
| "loss": 0.706, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0002587246376811594, | |
| "loss": 0.6495, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00025853140096618354, | |
| "loss": 0.6646, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0002583381642512077, | |
| "loss": 0.6417, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0002581449275362319, | |
| "loss": 0.6681, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.000257951690821256, | |
| "loss": 0.6689, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00025775845410628016, | |
| "loss": 0.6837, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002575652173913043, | |
| "loss": 0.7031, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002573719806763285, | |
| "loss": 0.6746, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00025717874396135264, | |
| "loss": 0.6951, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0002569855072463768, | |
| "loss": 0.6988, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00025679227053140093, | |
| "loss": 0.6541, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0002565990338164251, | |
| "loss": 0.6366, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00025640579710144927, | |
| "loss": 0.7011, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0002562125603864734, | |
| "loss": 0.6935, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00025601932367149755, | |
| "loss": 0.6931, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0002558260869565217, | |
| "loss": 0.7004, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0002556328502415459, | |
| "loss": 0.6556, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.6892030239105225, | |
| "eval_runtime": 391.4843, | |
| "eval_samples_per_second": 5.109, | |
| "eval_steps_per_second": 0.639, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00025543961352657003, | |
| "loss": 0.664, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0002552463768115942, | |
| "loss": 0.7162, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002550531400966183, | |
| "loss": 0.646, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002548599033816425, | |
| "loss": 0.6515, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00025466666666666666, | |
| "loss": 0.6953, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0002544734299516908, | |
| "loss": 0.6887, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00025428019323671494, | |
| "loss": 0.6739, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0002540869565217391, | |
| "loss": 0.6923, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0002538937198067633, | |
| "loss": 0.6877, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0002537004830917874, | |
| "loss": 0.6865, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00025350724637681157, | |
| "loss": 0.6337, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002533140096618357, | |
| "loss": 0.7073, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002531207729468599, | |
| "loss": 0.6973, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00025292753623188405, | |
| "loss": 0.6719, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0002527342995169082, | |
| "loss": 0.6674, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00025254106280193234, | |
| "loss": 0.6745, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0002523478260869565, | |
| "loss": 0.6914, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0002521545893719807, | |
| "loss": 0.6382, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0002519613526570048, | |
| "loss": 0.6644, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00025176811594202896, | |
| "loss": 0.6892, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 0.6873727440834045, | |
| "eval_runtime": 390.6367, | |
| "eval_samples_per_second": 5.12, | |
| "eval_steps_per_second": 0.64, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002515748792270531, | |
| "loss": 0.6592, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002513816425120773, | |
| "loss": 0.6827, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00025118840579710144, | |
| "loss": 0.6436, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002509951690821256, | |
| "loss": 0.6969, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0002508019323671497, | |
| "loss": 0.6747, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0002506086956521739, | |
| "loss": 0.697, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00025041545893719807, | |
| "loss": 0.7146, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002502222222222222, | |
| "loss": 0.689, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00025002898550724635, | |
| "loss": 0.6957, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002498357487922705, | |
| "loss": 0.708, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0002496425120772947, | |
| "loss": 0.7306, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00024944927536231883, | |
| "loss": 0.6418, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.000249256038647343, | |
| "loss": 0.6888, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0002490628019323671, | |
| "loss": 0.6573, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0002488695652173913, | |
| "loss": 0.7008, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00024867632850241546, | |
| "loss": 0.689, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0002484830917874396, | |
| "loss": 0.6739, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00024828985507246374, | |
| "loss": 0.6867, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0002480966183574879, | |
| "loss": 0.6874, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0002479033816425121, | |
| "loss": 0.6858, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.686144232749939, | |
| "eval_runtime": 396.2716, | |
| "eval_samples_per_second": 5.047, | |
| "eval_steps_per_second": 0.631, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0002477101449275362, | |
| "loss": 0.659, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00024751690821256037, | |
| "loss": 0.6537, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002473236714975845, | |
| "loss": 0.7331, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002471304347826087, | |
| "loss": 0.6855, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00024693719806763285, | |
| "loss": 0.7252, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.000246743961352657, | |
| "loss": 0.7, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00024655072463768113, | |
| "loss": 0.6917, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002463574879227053, | |
| "loss": 0.6828, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002461642512077295, | |
| "loss": 0.6747, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0002459710144927536, | |
| "loss": 0.696, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00024577777777777776, | |
| "loss": 0.6573, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0002455845410628019, | |
| "loss": 0.6811, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0002453913043478261, | |
| "loss": 0.6992, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00024519806763285024, | |
| "loss": 0.6972, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0002450048309178744, | |
| "loss": 0.6685, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0002448115942028985, | |
| "loss": 0.6531, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0002446183574879227, | |
| "loss": 0.6749, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00024442512077294686, | |
| "loss": 0.6811, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.000244231884057971, | |
| "loss": 0.6904, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00024403864734299515, | |
| "loss": 0.6819, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.6858677864074707, | |
| "eval_runtime": 418.7569, | |
| "eval_samples_per_second": 4.776, | |
| "eval_steps_per_second": 0.597, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0002438454106280193, | |
| "loss": 0.6773, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00024365217391304346, | |
| "loss": 0.7034, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0002434589371980676, | |
| "loss": 0.6804, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00024326570048309177, | |
| "loss": 0.6912, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00024307246376811592, | |
| "loss": 0.683, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0002428792270531401, | |
| "loss": 0.6767, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00024268599033816423, | |
| "loss": 0.6683, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024249275362318837, | |
| "loss": 0.6777, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024229951690821254, | |
| "loss": 0.6813, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024210628019323668, | |
| "loss": 0.6878, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00024191304347826085, | |
| "loss": 0.6885, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000241719806763285, | |
| "loss": 0.6373, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00024152657004830917, | |
| "loss": 0.6982, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0002413333333333333, | |
| "loss": 0.6485, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024114009661835748, | |
| "loss": 0.6522, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024094685990338162, | |
| "loss": 0.6092, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024075362318840576, | |
| "loss": 0.6362, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024056038647342993, | |
| "loss": 0.6725, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024036714975845408, | |
| "loss": 0.6718, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024017391304347825, | |
| "loss": 0.6738, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.6860418915748596, | |
| "eval_runtime": 407.1637, | |
| "eval_samples_per_second": 4.912, | |
| "eval_steps_per_second": 0.614, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0002399806763285024, | |
| "loss": 0.658, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00023978743961352656, | |
| "loss": 0.6325, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0002395942028985507, | |
| "loss": 0.6372, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00023940096618357487, | |
| "loss": 0.6075, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.000239207729468599, | |
| "loss": 0.6212, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00023901449275362315, | |
| "loss": 0.6373, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00023882125603864732, | |
| "loss": 0.6799, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0002386280193236715, | |
| "loss": 0.6769, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00023843478260869564, | |
| "loss": 0.6265, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023824154589371978, | |
| "loss": 0.6627, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023804830917874392, | |
| "loss": 0.6602, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0002378550724637681, | |
| "loss": 0.671, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00023766183574879226, | |
| "loss": 0.6297, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0002374685990338164, | |
| "loss": 0.662, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00023727536231884055, | |
| "loss": 0.6224, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023708212560386472, | |
| "loss": 0.6527, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023688888888888889, | |
| "loss": 0.6388, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023669565217391303, | |
| "loss": 0.6626, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023650241545893717, | |
| "loss": 0.6522, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0002363091787439613, | |
| "loss": 0.6514, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.6873291730880737, | |
| "eval_runtime": 392.2057, | |
| "eval_samples_per_second": 5.099, | |
| "eval_steps_per_second": 0.637, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00023611594202898548, | |
| "loss": 0.6147, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00023592270531400965, | |
| "loss": 0.6282, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0002357294685990338, | |
| "loss": 0.6168, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00023553623188405794, | |
| "loss": 0.6858, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00023534299516908208, | |
| "loss": 0.6219, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023514975845410628, | |
| "loss": 0.6566, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023495652173913042, | |
| "loss": 0.6633, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023476328502415456, | |
| "loss": 0.6526, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0002345700483091787, | |
| "loss": 0.6544, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00023437681159420287, | |
| "loss": 0.6588, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00023418357487922704, | |
| "loss": 0.6477, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0002339903381642512, | |
| "loss": 0.6494, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00023379710144927533, | |
| "loss": 0.6219, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00023360386473429947, | |
| "loss": 0.6369, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00023341062801932367, | |
| "loss": 0.6647, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0002332173913043478, | |
| "loss": 0.6477, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00023302415458937195, | |
| "loss": 0.6721, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0002328309178743961, | |
| "loss": 0.6327, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0002326376811594203, | |
| "loss": 0.6668, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00023244444444444444, | |
| "loss": 0.6422, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 0.6869779229164124, | |
| "eval_runtime": 396.5319, | |
| "eval_samples_per_second": 5.044, | |
| "eval_steps_per_second": 0.63, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023225120772946858, | |
| "loss": 0.6774, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023205797101449272, | |
| "loss": 0.6663, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023186473429951686, | |
| "loss": 0.6903, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023167149758454106, | |
| "loss": 0.6362, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0002314782608695652, | |
| "loss": 0.6187, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00023128502415458935, | |
| "loss": 0.6256, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0002310917874396135, | |
| "loss": 0.6493, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023089855072463768, | |
| "loss": 0.635, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023070531400966183, | |
| "loss": 0.6973, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023051207729468597, | |
| "loss": 0.6562, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0002303188405797101, | |
| "loss": 0.6327, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00023012560386473425, | |
| "loss": 0.6736, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00022993236714975845, | |
| "loss": 0.6178, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0002297391304347826, | |
| "loss": 0.6574, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00022954589371980674, | |
| "loss": 0.6848, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00022935265700483088, | |
| "loss": 0.6478, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022915942028985508, | |
| "loss": 0.6144, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022896618357487922, | |
| "loss": 0.6609, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022877294685990336, | |
| "loss": 0.6744, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.0002285797101449275, | |
| "loss": 0.636, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.6865532398223877, | |
| "eval_runtime": 393.5432, | |
| "eval_samples_per_second": 5.082, | |
| "eval_steps_per_second": 0.635, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00022838647342995165, | |
| "loss": 0.654, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00022819323671497584, | |
| "loss": 0.6442, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00022799999999999999, | |
| "loss": 0.6835, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00022780676328502413, | |
| "loss": 0.6339, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00022761352657004827, | |
| "loss": 0.6512, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00022742028985507247, | |
| "loss": 0.622, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0002272270531400966, | |
| "loss": 0.6802, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00022703381642512075, | |
| "loss": 0.6911, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0002268405797101449, | |
| "loss": 0.6815, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0002266473429951691, | |
| "loss": 0.6569, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00022645410628019323, | |
| "loss": 0.6363, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00022626086956521738, | |
| "loss": 0.6371, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00022606763285024152, | |
| "loss": 0.6458, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00022587439613526566, | |
| "loss": 0.668, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00022568115942028986, | |
| "loss": 0.6676, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.000225487922705314, | |
| "loss": 0.6692, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00022529468599033814, | |
| "loss": 0.6502, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0002251014492753623, | |
| "loss": 0.6387, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022490821256038646, | |
| "loss": 0.6475, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022471497584541063, | |
| "loss": 0.6627, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.6856961846351624, | |
| "eval_runtime": 387.1458, | |
| "eval_samples_per_second": 5.166, | |
| "eval_steps_per_second": 0.646, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022452173913043477, | |
| "loss": 0.6346, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.0002243285024154589, | |
| "loss": 0.6593, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00022413526570048305, | |
| "loss": 0.6397, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00022394202898550725, | |
| "loss": 0.641, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0002237487922705314, | |
| "loss": 0.6766, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00022355555555555554, | |
| "loss": 0.6931, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00022336231884057968, | |
| "loss": 0.6618, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022316908212560385, | |
| "loss": 0.6655, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022297584541062802, | |
| "loss": 0.647, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022278260869565216, | |
| "loss": 0.6337, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0002225893719806763, | |
| "loss": 0.6957, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022239613526570044, | |
| "loss": 0.6435, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022220289855072464, | |
| "loss": 0.6272, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022200966183574878, | |
| "loss": 0.6502, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00022181642512077293, | |
| "loss": 0.6514, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00022162318840579707, | |
| "loss": 0.6625, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00022142995169082124, | |
| "loss": 0.6284, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0002212367149758454, | |
| "loss": 0.6503, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00022104347826086955, | |
| "loss": 0.6425, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0002208502415458937, | |
| "loss": 0.6818, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.6859603524208069, | |
| "eval_runtime": 382.589, | |
| "eval_samples_per_second": 5.228, | |
| "eval_steps_per_second": 0.653, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00022065700483091784, | |
| "loss": 0.6299, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.000220463768115942, | |
| "loss": 0.6508, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00022027053140096618, | |
| "loss": 0.6572, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00022007729468599032, | |
| "loss": 0.6781, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00021988405797101446, | |
| "loss": 0.6806, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00021969082125603863, | |
| "loss": 0.632, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0002194975845410628, | |
| "loss": 0.6631, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00021930434782608694, | |
| "loss": 0.6729, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00021911111111111109, | |
| "loss": 0.6618, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00021891787439613525, | |
| "loss": 0.6132, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0002187246376811594, | |
| "loss": 0.6457, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00021853140096618357, | |
| "loss": 0.6443, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0002183381642512077, | |
| "loss": 0.653, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00021814492753623185, | |
| "loss": 0.6706, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00021795169082125602, | |
| "loss": 0.6684, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021775845410628016, | |
| "loss": 0.6597, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021756521739130433, | |
| "loss": 0.6478, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021737198067632848, | |
| "loss": 0.6411, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021717874396135265, | |
| "loss": 0.657, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.0002169855072463768, | |
| "loss": 0.663, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.6874400973320007, | |
| "eval_runtime": 387.4088, | |
| "eval_samples_per_second": 5.163, | |
| "eval_steps_per_second": 0.645, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00021679227053140096, | |
| "loss": 0.6833, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.0002165990338164251, | |
| "loss": 0.6827, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00021640579710144924, | |
| "loss": 0.6789, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.0002162125603864734, | |
| "loss": 0.6582, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00021601932367149756, | |
| "loss": 0.6222, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00021582608695652173, | |
| "loss": 0.6314, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00021563285024154587, | |
| "loss": 0.6466, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00021543961352657004, | |
| "loss": 0.6734, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00021524637681159418, | |
| "loss": 0.6552, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00021505314009661835, | |
| "loss": 0.7156, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0002148599033816425, | |
| "loss": 0.6548, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00021466666666666664, | |
| "loss": 0.7265, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0002144734299516908, | |
| "loss": 0.6757, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00021428019323671495, | |
| "loss": 0.6914, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021408695652173912, | |
| "loss": 0.6746, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021389371980676326, | |
| "loss": 0.7085, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021370048309178743, | |
| "loss": 0.7058, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00021350724637681157, | |
| "loss": 0.6599, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00021331400966183571, | |
| "loss": 0.6653, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00021312077294685988, | |
| "loss": 0.6757, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 0.6850671172142029, | |
| "eval_runtime": 410.1023, | |
| "eval_samples_per_second": 4.877, | |
| "eval_steps_per_second": 0.61, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00021292753623188405, | |
| "loss": 0.6644, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002127342995169082, | |
| "loss": 0.6444, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00021254106280193234, | |
| "loss": 0.6548, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002123478260869565, | |
| "loss": 0.6361, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021215458937198065, | |
| "loss": 0.6726, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021196135265700482, | |
| "loss": 0.6451, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021176811594202896, | |
| "loss": 0.6939, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0002115748792270531, | |
| "loss": 0.6569, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00021138164251207728, | |
| "loss": 0.6405, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00021118840579710145, | |
| "loss": 0.6261, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0002109951690821256, | |
| "loss": 0.6544, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00021080193236714973, | |
| "loss": 0.6367, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0002106086956521739, | |
| "loss": 0.6691, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00021041545893719804, | |
| "loss": 0.6228, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0002102222222222222, | |
| "loss": 0.6628, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00021002898550724635, | |
| "loss": 0.6678, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.0002098357487922705, | |
| "loss": 0.6699, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00020964251207729467, | |
| "loss": 0.683, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00020944927536231884, | |
| "loss": 0.688, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020925603864734298, | |
| "loss": 0.6661, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 0.6855675578117371, | |
| "eval_runtime": 385.7747, | |
| "eval_samples_per_second": 5.184, | |
| "eval_steps_per_second": 0.648, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020906280193236712, | |
| "loss": 0.6605, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020886956521739126, | |
| "loss": 0.7119, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00020867632850241543, | |
| "loss": 0.6181, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0002084830917874396, | |
| "loss": 0.6484, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00020828985507246375, | |
| "loss": 0.6747, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.0002080966183574879, | |
| "loss": 0.6455, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020790338164251206, | |
| "loss": 0.6591, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020771014492753623, | |
| "loss": 0.6898, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020751690821256037, | |
| "loss": 0.6491, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0002073236714975845, | |
| "loss": 0.66, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00020713043478260866, | |
| "loss": 0.6423, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00020693719806763285, | |
| "loss": 0.6102, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.000206743961352657, | |
| "loss": 0.6709, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00020655072463768114, | |
| "loss": 0.6432, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00020635748792270528, | |
| "loss": 0.6388, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020616425120772942, | |
| "loss": 0.6574, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020597101449275362, | |
| "loss": 0.6687, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020577777777777776, | |
| "loss": 0.674, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0002055845410628019, | |
| "loss": 0.618, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00020539130434782605, | |
| "loss": 0.689, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.6850703358650208, | |
| "eval_runtime": 372.4881, | |
| "eval_samples_per_second": 5.369, | |
| "eval_steps_per_second": 0.671, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00020519806763285024, | |
| "loss": 0.6865, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.0002050048309178744, | |
| "loss": 0.6529, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00020481159420289853, | |
| "loss": 0.6495, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00020461835748792267, | |
| "loss": 0.6517, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00020442512077294681, | |
| "loss": 0.6684, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.000204231884057971, | |
| "loss": 0.6412, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00020403864734299515, | |
| "loss": 0.6463, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0002038454106280193, | |
| "loss": 0.633, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00020365217391304344, | |
| "loss": 0.6473, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00020345893719806764, | |
| "loss": 0.6102, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020326570048309178, | |
| "loss": 0.6816, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020307246376811592, | |
| "loss": 0.6743, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020287922705314006, | |
| "loss": 0.6745, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.0002026859903381642, | |
| "loss": 0.6435, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0002024927536231884, | |
| "loss": 0.6675, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00020229951690821255, | |
| "loss": 0.6547, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0002021062801932367, | |
| "loss": 0.6464, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020191304347826083, | |
| "loss": 0.6894, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020171980676328503, | |
| "loss": 0.6613, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020152657004830917, | |
| "loss": 0.6362, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 0.6850407719612122, | |
| "eval_runtime": 377.0156, | |
| "eval_samples_per_second": 5.305, | |
| "eval_steps_per_second": 0.663, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0002013333333333333, | |
| "loss": 0.636, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00020114009661835745, | |
| "loss": 0.6731, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00020094685990338165, | |
| "loss": 0.6429, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0002007536231884058, | |
| "loss": 0.6808, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00020056038647342994, | |
| "loss": 0.6728, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00020036714975845408, | |
| "loss": 0.6377, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00020017391304347822, | |
| "loss": 0.6616, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00019998067632850242, | |
| "loss": 0.6571, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00019978743961352656, | |
| "loss": 0.6765, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001995942028985507, | |
| "loss": 0.6636, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00019940096618357485, | |
| "loss": 0.6621, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00019920772946859904, | |
| "loss": 0.6788, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019901449275362319, | |
| "loss": 0.6491, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019882125603864733, | |
| "loss": 0.6413, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019862801932367147, | |
| "loss": 0.6287, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0001984347826086956, | |
| "loss": 0.6624, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0001982415458937198, | |
| "loss": 0.6657, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00019804830917874395, | |
| "loss": 0.6617, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0001978550724637681, | |
| "loss": 0.6791, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00019766183574879224, | |
| "loss": 0.6537, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.683772623538971, | |
| "eval_runtime": 380.6447, | |
| "eval_samples_per_second": 5.254, | |
| "eval_steps_per_second": 0.657, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00019746859903381643, | |
| "loss": 0.5705, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00019727536231884058, | |
| "loss": 0.634, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00019708212560386472, | |
| "loss": 0.6558, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00019688888888888886, | |
| "loss": 0.657, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.000196695652173913, | |
| "loss": 0.6615, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0001965024154589372, | |
| "loss": 0.6656, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00019630917874396134, | |
| "loss": 0.6286, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0001961159420289855, | |
| "loss": 0.657, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00019592270531400963, | |
| "loss": 0.6891, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0001957294685990338, | |
| "loss": 0.6539, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00019553623188405797, | |
| "loss": 0.6335, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0001953429951690821, | |
| "loss": 0.6625, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00019514975845410625, | |
| "loss": 0.6597, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00019495652173913042, | |
| "loss": 0.6753, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0001947632850241546, | |
| "loss": 0.6644, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00019457004830917874, | |
| "loss": 0.6434, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00019437681159420288, | |
| "loss": 0.6604, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00019418357487922702, | |
| "loss": 0.6538, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0001939903381642512, | |
| "loss": 0.639, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00019379710144927536, | |
| "loss": 0.6668, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.6843588352203369, | |
| "eval_runtime": 378.4765, | |
| "eval_samples_per_second": 5.284, | |
| "eval_steps_per_second": 0.661, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0001936038647342995, | |
| "loss": 0.6375, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019341062801932364, | |
| "loss": 0.6455, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019321739130434781, | |
| "loss": 0.6856, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019302415458937198, | |
| "loss": 0.648, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00019283091787439613, | |
| "loss": 0.6005, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00019263768115942027, | |
| "loss": 0.6719, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0001924444444444444, | |
| "loss": 0.6588, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00019225120772946858, | |
| "loss": 0.6517, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00019205797101449275, | |
| "loss": 0.6489, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0001918647342995169, | |
| "loss": 0.6283, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00019167149758454104, | |
| "loss": 0.6523, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0001914782608695652, | |
| "loss": 0.5914, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00019128502415458935, | |
| "loss": 0.6403, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00019109178743961352, | |
| "loss": 0.6426, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00019089855072463766, | |
| "loss": 0.6609, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0001907053140096618, | |
| "loss": 0.6394, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00019051207729468597, | |
| "loss": 0.6546, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00019031884057971014, | |
| "loss": 0.6879, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00019012560386473429, | |
| "loss": 0.6116, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00018993236714975843, | |
| "loss": 0.6477, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.6840969920158386, | |
| "eval_runtime": 385.7288, | |
| "eval_samples_per_second": 5.185, | |
| "eval_steps_per_second": 0.648, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0001897391304347826, | |
| "loss": 0.6657, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00018954589371980674, | |
| "loss": 0.6399, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.0001893526570048309, | |
| "loss": 0.6587, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018915942028985505, | |
| "loss": 0.6333, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018896618357487922, | |
| "loss": 0.6327, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00018877294685990336, | |
| "loss": 0.6788, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0001885797101449275, | |
| "loss": 0.6765, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00018838647342995168, | |
| "loss": 0.6742, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00018819323671497582, | |
| "loss": 0.6462, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.000188, | |
| "loss": 0.6907, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00018780676328502413, | |
| "loss": 0.6645, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0001876135265700483, | |
| "loss": 0.6719, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00018742028985507244, | |
| "loss": 0.6417, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0001872270531400966, | |
| "loss": 0.6383, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00018703381642512076, | |
| "loss": 0.6342, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.0001868405797101449, | |
| "loss": 0.657, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00018664734299516907, | |
| "loss": 0.6832, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.0001864541062801932, | |
| "loss": 0.6658, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00018626086956521738, | |
| "loss": 0.6614, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00018606763285024152, | |
| "loss": 0.6878, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.6828343272209167, | |
| "eval_runtime": 392.2472, | |
| "eval_samples_per_second": 5.099, | |
| "eval_steps_per_second": 0.637, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0001858743961352657, | |
| "loss": 0.6408, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00018568115942028984, | |
| "loss": 0.6498, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.000185487922705314, | |
| "loss": 0.6598, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.00018529468599033815, | |
| "loss": 0.6231, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0001851014492753623, | |
| "loss": 0.6906, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00018490821256038646, | |
| "loss": 0.6467, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0001847149758454106, | |
| "loss": 0.6355, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00018452173913043477, | |
| "loss": 0.6794, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00018432850241545891, | |
| "loss": 0.6475, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.00018413526570048306, | |
| "loss": 0.6165, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00018394202898550723, | |
| "loss": 0.6281, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0001837487922705314, | |
| "loss": 0.6473, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00018355555555555554, | |
| "loss": 0.6415, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00018336231884057968, | |
| "loss": 0.659, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00018316908212560385, | |
| "loss": 0.6821, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00018297584541062802, | |
| "loss": 0.6631, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.00018278260869565216, | |
| "loss": 0.6332, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0001825893719806763, | |
| "loss": 0.6561, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00018239613526570045, | |
| "loss": 0.6654, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00018220289855072462, | |
| "loss": 0.6656, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.6827040314674377, | |
| "eval_runtime": 411.107, | |
| "eval_samples_per_second": 4.865, | |
| "eval_steps_per_second": 0.608, | |
| "step": 6200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 15625, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "total_flos": 9.076709518992998e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |