| { | |
| "best_metric": 1.0564384460449219, | |
| "best_model_checkpoint": "checkpoints/alpaca_text_52K/checkpoint-6000", | |
| "epoch": 1.9198464122870171, | |
| "eval_steps": 200, | |
| "global_step": 6000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 2.2432, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.4999999999999996e-05, | |
| "loss": 2.184, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.5e-05, | |
| "loss": 2.1422, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000102, | |
| "loss": 1.7323, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00013199999999999998, | |
| "loss": 1.5829, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000162, | |
| "loss": 1.5178, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019199999999999998, | |
| "loss": 1.2123, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00022199999999999998, | |
| "loss": 1.1177, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00025199999999999995, | |
| "loss": 1.1426, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00028199999999999997, | |
| "loss": 1.1881, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002999227053140096, | |
| "loss": 1.1073, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002997294685990338, | |
| "loss": 1.1338, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029953623188405795, | |
| "loss": 1.0979, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002993429951690821, | |
| "loss": 1.1325, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029914975845410623, | |
| "loss": 1.1361, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002989565217391304, | |
| "loss": 1.1412, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00029876328502415457, | |
| "loss": 1.0977, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002985700483091787, | |
| "loss": 1.1252, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00029837681159420286, | |
| "loss": 1.1185, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000298183574879227, | |
| "loss": 1.0904, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.1103301048278809, | |
| "eval_runtime": 299.3056, | |
| "eval_samples_per_second": 6.682, | |
| "eval_steps_per_second": 0.835, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002979903381642512, | |
| "loss": 1.1097, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00029779710144927534, | |
| "loss": 1.1154, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002976038647342995, | |
| "loss": 1.1053, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002974106280193236, | |
| "loss": 1.1247, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029721739130434777, | |
| "loss": 1.1455, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00029702415458937196, | |
| "loss": 1.1066, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002968309178743961, | |
| "loss": 1.0765, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00029663768115942025, | |
| "loss": 1.0976, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002964444444444444, | |
| "loss": 1.0866, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002962512077294686, | |
| "loss": 1.0844, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00029605797101449273, | |
| "loss": 1.1352, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002958647342995169, | |
| "loss": 1.0878, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000295671497584541, | |
| "loss": 1.0814, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029547826086956516, | |
| "loss": 1.0676, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00029528502415458936, | |
| "loss": 1.043, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002950917874396135, | |
| "loss": 1.0888, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00029489855072463764, | |
| "loss": 1.0948, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002947053140096618, | |
| "loss": 1.0595, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000294512077294686, | |
| "loss": 1.1063, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002943188405797101, | |
| "loss": 1.0437, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.0938334465026855, | |
| "eval_runtime": 298.9118, | |
| "eval_samples_per_second": 6.691, | |
| "eval_steps_per_second": 0.836, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00029412560386473427, | |
| "loss": 1.107, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002939323671497584, | |
| "loss": 1.1255, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002937391304347826, | |
| "loss": 1.1168, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00029354589371980675, | |
| "loss": 1.0599, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002933526570048309, | |
| "loss": 1.0614, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029315942028985503, | |
| "loss": 1.1127, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002929661835748792, | |
| "loss": 1.0629, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00029277294685990337, | |
| "loss": 1.0952, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002925797101449275, | |
| "loss": 1.1008, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00029238647342995166, | |
| "loss": 1.0932, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002921932367149758, | |
| "loss": 1.0604, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.000292, | |
| "loss": 1.0925, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00029180676328502414, | |
| "loss": 1.0489, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002916135265700483, | |
| "loss": 1.1185, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002914202898550724, | |
| "loss": 1.0939, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00029122705314009657, | |
| "loss": 1.1288, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00029103381642512076, | |
| "loss": 1.0368, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002908405797101449, | |
| "loss": 1.0762, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00029064734299516905, | |
| "loss": 1.0966, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002904541062801932, | |
| "loss": 1.0941, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.0855311155319214, | |
| "eval_runtime": 299.2543, | |
| "eval_samples_per_second": 6.683, | |
| "eval_steps_per_second": 0.835, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002902608695652174, | |
| "loss": 1.0732, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00029006763285024153, | |
| "loss": 1.0497, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00028987439613526567, | |
| "loss": 1.0601, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002896811594202898, | |
| "loss": 1.0886, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00028948792270531396, | |
| "loss": 1.0681, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00028929468599033815, | |
| "loss": 1.0559, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002891014492753623, | |
| "loss": 1.0879, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00028890821256038644, | |
| "loss": 1.089, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002887149758454106, | |
| "loss": 1.1005, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002885217391304348, | |
| "loss": 1.1494, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002883285024154589, | |
| "loss": 1.0847, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00028813526570048306, | |
| "loss": 1.0514, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002879420289855072, | |
| "loss": 1.1038, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002877487922705314, | |
| "loss": 1.0171, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00028755555555555555, | |
| "loss": 1.1281, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002873623188405797, | |
| "loss": 1.0677, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00028716908212560383, | |
| "loss": 1.0963, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.000286975845410628, | |
| "loss": 1.1041, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00028678260869565217, | |
| "loss": 1.0847, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002865893719806763, | |
| "loss": 1.0871, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.0817850828170776, | |
| "eval_runtime": 298.1366, | |
| "eval_samples_per_second": 6.708, | |
| "eval_steps_per_second": 0.839, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00028639613526570046, | |
| "loss": 1.0593, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002862028985507246, | |
| "loss": 1.0254, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002860096618357488, | |
| "loss": 1.0503, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00028581642512077294, | |
| "loss": 1.1017, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002856231884057971, | |
| "loss": 1.0658, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002854299516908212, | |
| "loss": 1.1069, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00028523671497584537, | |
| "loss": 1.0603, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00028504347826086956, | |
| "loss": 1.082, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002848502415458937, | |
| "loss": 1.1248, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00028465700483091785, | |
| "loss": 1.1223, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000284463768115942, | |
| "loss": 1.0672, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002842705314009662, | |
| "loss": 1.1095, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00028407729468599033, | |
| "loss": 1.0227, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00028388405797101447, | |
| "loss": 1.0969, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002836908212560386, | |
| "loss": 1.0638, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00028349758454106276, | |
| "loss": 1.0892, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00028330434782608695, | |
| "loss": 1.0565, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002831111111111111, | |
| "loss": 1.1078, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00028291787439613524, | |
| "loss": 1.1106, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002827246376811594, | |
| "loss": 1.066, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.0781841278076172, | |
| "eval_runtime": 298.6342, | |
| "eval_samples_per_second": 6.697, | |
| "eval_steps_per_second": 0.837, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002825314009661836, | |
| "loss": 1.107, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002823381642512077, | |
| "loss": 1.0654, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00028214492753623186, | |
| "loss": 1.0342, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000281951690821256, | |
| "loss": 1.088, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0002817584541062802, | |
| "loss": 1.0871, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00028156521739130434, | |
| "loss": 1.1038, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0002813719806763285, | |
| "loss": 1.0912, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00028117874396135263, | |
| "loss": 1.0836, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00028098550724637677, | |
| "loss": 1.0852, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00028079227053140097, | |
| "loss": 1.0757, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002805990338164251, | |
| "loss": 1.0274, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00028040579710144925, | |
| "loss": 1.0815, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002802125603864734, | |
| "loss": 1.0917, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0002800193236714976, | |
| "loss": 1.0986, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00027982608695652174, | |
| "loss": 1.0712, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0002796328502415459, | |
| "loss": 1.0725, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00027943961352657, | |
| "loss": 1.0358, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00027924637681159416, | |
| "loss": 1.0775, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00027905314009661836, | |
| "loss": 1.083, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0002788599033816425, | |
| "loss": 1.0618, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.0745840072631836, | |
| "eval_runtime": 300.795, | |
| "eval_samples_per_second": 6.649, | |
| "eval_steps_per_second": 0.831, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00027866666666666665, | |
| "loss": 1.0851, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0002784734299516908, | |
| "loss": 1.1221, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.000278280193236715, | |
| "loss": 1.1222, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00027808695652173913, | |
| "loss": 1.0941, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00027789371980676327, | |
| "loss": 1.0994, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0002777004830917874, | |
| "loss": 1.0973, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00027750724637681156, | |
| "loss": 0.9984, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00027731400966183575, | |
| "loss": 1.0531, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002771207729468599, | |
| "loss": 1.0872, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00027692753623188404, | |
| "loss": 1.0575, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0002767342995169082, | |
| "loss": 1.0515, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0002765410628019324, | |
| "loss": 1.0888, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002763478260869565, | |
| "loss": 1.058, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00027615458937198066, | |
| "loss": 1.0355, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0002759613526570048, | |
| "loss": 1.0612, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.000275768115942029, | |
| "loss": 1.1059, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00027557487922705314, | |
| "loss": 1.0836, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0002753816425120773, | |
| "loss": 1.0433, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00027518840579710143, | |
| "loss": 1.1255, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00027499516908212557, | |
| "loss": 1.0494, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 1.0709148645401, | |
| "eval_runtime": 299.6045, | |
| "eval_samples_per_second": 6.675, | |
| "eval_steps_per_second": 0.834, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00027480193236714977, | |
| "loss": 1.0646, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0002746086956521739, | |
| "loss": 1.0492, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00027441545893719805, | |
| "loss": 1.1201, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002742222222222222, | |
| "loss": 1.1151, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0002740289855072464, | |
| "loss": 1.0785, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00027383574879227054, | |
| "loss": 1.1167, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002736425120772947, | |
| "loss": 1.0623, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0002734492753623188, | |
| "loss": 1.084, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00027325603864734296, | |
| "loss": 1.0931, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00027306280193236716, | |
| "loss": 1.0783, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0002728695652173913, | |
| "loss": 1.0801, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00027267632850241544, | |
| "loss": 1.0362, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002724830917874396, | |
| "loss": 1.1147, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002722898550724638, | |
| "loss": 1.0659, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002720966183574879, | |
| "loss": 1.0629, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00027190338164251207, | |
| "loss": 1.0563, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0002717101449275362, | |
| "loss": 1.0661, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00027151690821256035, | |
| "loss": 1.0722, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00027132367149758455, | |
| "loss": 1.0175, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0002711304347826087, | |
| "loss": 1.0829, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.0700575113296509, | |
| "eval_runtime": 301.3359, | |
| "eval_samples_per_second": 6.637, | |
| "eval_steps_per_second": 0.83, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00027093719806763284, | |
| "loss": 1.0935, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.000270743961352657, | |
| "loss": 1.094, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0002705507246376812, | |
| "loss": 1.0603, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0002703574879227053, | |
| "loss": 1.0813, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00027016425120772946, | |
| "loss": 1.0836, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0002699710144927536, | |
| "loss": 1.0538, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00026977777777777775, | |
| "loss": 1.0892, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00026958454106280194, | |
| "loss": 1.0814, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0002693913043478261, | |
| "loss": 1.0407, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00026919806763285023, | |
| "loss": 1.0628, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00026900483091787437, | |
| "loss": 1.0678, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0002688115942028985, | |
| "loss": 1.0832, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0002686183574879227, | |
| "loss": 1.1199, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00026842512077294685, | |
| "loss": 1.0674, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.000268231884057971, | |
| "loss": 1.0719, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00026803864734299514, | |
| "loss": 1.0754, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00026784541062801933, | |
| "loss": 1.0248, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0002676521739130435, | |
| "loss": 1.0659, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0002674589371980676, | |
| "loss": 1.0341, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00026726570048309176, | |
| "loss": 1.1131, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 1.0687577724456787, | |
| "eval_runtime": 298.631, | |
| "eval_samples_per_second": 6.697, | |
| "eval_steps_per_second": 0.837, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002670724637681159, | |
| "loss": 1.0585, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0002668792270531401, | |
| "loss": 1.0657, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00026668599033816424, | |
| "loss": 1.0435, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0002664927536231884, | |
| "loss": 1.0697, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00026629951690821253, | |
| "loss": 1.086, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0002661062801932367, | |
| "loss": 1.0853, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00026591304347826087, | |
| "loss": 1.0214, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.000265719806763285, | |
| "loss": 1.069, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00026552657004830915, | |
| "loss": 1.0793, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0002653333333333333, | |
| "loss": 1.0631, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0002651400966183575, | |
| "loss": 1.0605, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00026494685990338163, | |
| "loss": 1.0322, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0002647536231884058, | |
| "loss": 1.0644, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0002645603864734299, | |
| "loss": 1.0665, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00026436714975845406, | |
| "loss": 1.1168, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00026417391304347826, | |
| "loss": 1.0518, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0002639806763285024, | |
| "loss": 1.0634, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00026378743961352654, | |
| "loss": 1.0483, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0002635942028985507, | |
| "loss": 1.0808, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0002634009661835749, | |
| "loss": 1.0519, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.0664184093475342, | |
| "eval_runtime": 297.6732, | |
| "eval_samples_per_second": 6.719, | |
| "eval_steps_per_second": 0.84, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.000263207729468599, | |
| "loss": 1.0351, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00026301449275362317, | |
| "loss": 1.0342, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0002628212560386473, | |
| "loss": 1.029, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00026262801932367145, | |
| "loss": 1.0726, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00026243478260869565, | |
| "loss": 1.0187, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0002622415458937198, | |
| "loss": 1.0743, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00026204830917874394, | |
| "loss": 1.0544, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002618550724637681, | |
| "loss": 1.0619, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002616618357487922, | |
| "loss": 1.0642, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0002614685990338164, | |
| "loss": 1.0353, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00026127536231884056, | |
| "loss": 1.0435, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0002610821256038647, | |
| "loss": 1.0759, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00026088888888888885, | |
| "loss": 1.1161, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00026069565217391304, | |
| "loss": 1.0283, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0002605024154589372, | |
| "loss": 1.0889, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00026030917874396133, | |
| "loss": 1.0649, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00026011594202898547, | |
| "loss": 1.0541, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0002599227053140096, | |
| "loss": 1.086, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0002597294685990338, | |
| "loss": 1.0836, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00025953623188405795, | |
| "loss": 1.0128, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 1.0654979944229126, | |
| "eval_runtime": 298.984, | |
| "eval_samples_per_second": 6.689, | |
| "eval_steps_per_second": 0.836, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0002593429951690821, | |
| "loss": 1.0502, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00025914975845410624, | |
| "loss": 1.0275, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00025895652173913043, | |
| "loss": 1.0567, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0002587632850241546, | |
| "loss": 1.1105, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0002585700483091787, | |
| "loss": 1.0271, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00025837681159420286, | |
| "loss": 1.0471, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.000258183574879227, | |
| "loss": 1.0862, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0002579903381642512, | |
| "loss": 1.0829, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00025779710144927534, | |
| "loss": 1.0542, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0002576038647342995, | |
| "loss": 1.0632, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00025741062801932363, | |
| "loss": 1.0714, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00025721739130434777, | |
| "loss": 0.9918, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00025702415458937197, | |
| "loss": 1.0651, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0002568309178743961, | |
| "loss": 1.0521, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00025663768115942025, | |
| "loss": 1.057, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0002564444444444444, | |
| "loss": 1.0675, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0002562512077294686, | |
| "loss": 1.1029, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00025605797101449273, | |
| "loss": 1.0636, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0002558647342995169, | |
| "loss": 1.0586, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.000255671497584541, | |
| "loss": 1.0517, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.0638346672058105, | |
| "eval_runtime": 305.5987, | |
| "eval_samples_per_second": 6.545, | |
| "eval_steps_per_second": 0.818, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00025547826086956516, | |
| "loss": 1.0955, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00025528502415458936, | |
| "loss": 1.0724, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002550917874396135, | |
| "loss": 1.0682, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00025489855072463764, | |
| "loss": 1.0906, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0002547053140096618, | |
| "loss": 1.0637, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00025451207729468593, | |
| "loss": 1.0794, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0002543188405797101, | |
| "loss": 1.0554, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00025412560386473427, | |
| "loss": 1.1048, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0002539323671497584, | |
| "loss": 1.0526, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00025373913043478255, | |
| "loss": 1.1208, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00025354589371980675, | |
| "loss": 1.058, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002533526570048309, | |
| "loss": 1.0374, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00025315942028985504, | |
| "loss": 1.0768, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0002529661835748792, | |
| "loss": 1.0313, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0002527729468599033, | |
| "loss": 1.0322, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0002525797101449275, | |
| "loss": 1.0753, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00025238647342995166, | |
| "loss": 1.0702, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0002521932367149758, | |
| "loss": 1.0889, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00025199999999999995, | |
| "loss": 1.0907, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00025180676328502414, | |
| "loss": 1.0813, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 1.06317138671875, | |
| "eval_runtime": 301.103, | |
| "eval_samples_per_second": 6.642, | |
| "eval_steps_per_second": 0.83, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002516135265700483, | |
| "loss": 1.0696, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00025142028985507243, | |
| "loss": 1.05, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00025122705314009657, | |
| "loss": 1.049, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0002510338164251207, | |
| "loss": 1.0756, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0002508405797101449, | |
| "loss": 1.0656, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00025064734299516905, | |
| "loss": 1.0376, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0002504541062801932, | |
| "loss": 1.0677, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00025026086956521734, | |
| "loss": 1.0488, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00025006763285024153, | |
| "loss": 1.0751, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002498743961352657, | |
| "loss": 1.054, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0002496811594202898, | |
| "loss": 1.0478, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00024948792270531396, | |
| "loss": 1.0872, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0002492946859903381, | |
| "loss": 1.0308, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0002491014492753623, | |
| "loss": 1.0349, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00024890821256038644, | |
| "loss": 1.0775, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0002487149758454106, | |
| "loss": 1.1327, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00024852173913043473, | |
| "loss": 1.0442, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0002483285024154589, | |
| "loss": 1.0428, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00024813526570048307, | |
| "loss": 1.0661, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0002479420289855072, | |
| "loss": 1.1036, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.0611621141433716, | |
| "eval_runtime": 303.5211, | |
| "eval_samples_per_second": 6.589, | |
| "eval_steps_per_second": 0.824, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00024774879227053135, | |
| "loss": 1.0631, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0002475555555555555, | |
| "loss": 1.0571, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0002473623188405797, | |
| "loss": 1.0389, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00024716908212560383, | |
| "loss": 1.0354, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.000246975845410628, | |
| "loss": 1.0385, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002467826086956521, | |
| "loss": 1.0689, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002465893719806763, | |
| "loss": 1.0414, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00024639613526570046, | |
| "loss": 1.069, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0002462028985507246, | |
| "loss": 1.0096, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00024600966183574874, | |
| "loss": 1.0859, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00024581642512077294, | |
| "loss": 1.0518, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0002456231884057971, | |
| "loss": 1.0696, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0002454299516908212, | |
| "loss": 1.0559, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00024523671497584537, | |
| "loss": 1.0453, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0002450434782608695, | |
| "loss": 1.0325, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0002448502415458937, | |
| "loss": 1.1118, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00024465700483091785, | |
| "loss": 1.0666, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.000244463768115942, | |
| "loss": 1.0467, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00024427053140096614, | |
| "loss": 1.0605, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00024407729468599033, | |
| "loss": 1.0787, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 1.0607348680496216, | |
| "eval_runtime": 301.6471, | |
| "eval_samples_per_second": 6.63, | |
| "eval_steps_per_second": 0.829, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00024388405797101448, | |
| "loss": 1.0407, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00024369082125603862, | |
| "loss": 1.0713, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00024349758454106276, | |
| "loss": 1.0489, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00024330434782608693, | |
| "loss": 1.0443, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0002431111111111111, | |
| "loss": 1.0606, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00024291787439613524, | |
| "loss": 1.0929, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00024272463768115938, | |
| "loss": 1.0656, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024253140096618355, | |
| "loss": 1.0528, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024233816425120772, | |
| "loss": 1.1073, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00024214492753623187, | |
| "loss": 1.0326, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000241951690821256, | |
| "loss": 1.0446, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00024175845410628015, | |
| "loss": 1.0253, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00024156521739130432, | |
| "loss": 1.0186, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0002413719806763285, | |
| "loss": 1.0226, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024117874396135263, | |
| "loss": 0.9921, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024098550724637678, | |
| "loss": 0.9769, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00024079227053140095, | |
| "loss": 1.0191, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024059903381642512, | |
| "loss": 1.0193, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00024040579710144926, | |
| "loss": 1.0023, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0002402125603864734, | |
| "loss": 1.0317, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 1.0621122121810913, | |
| "eval_runtime": 301.2241, | |
| "eval_samples_per_second": 6.64, | |
| "eval_steps_per_second": 0.83, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00024001932367149754, | |
| "loss": 1.0001, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00023982608695652174, | |
| "loss": 0.9865, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00023963285024154588, | |
| "loss": 0.9851, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00023943961352657002, | |
| "loss": 1.017, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00023924637681159417, | |
| "loss": 1.0149, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0002390531400966183, | |
| "loss": 0.9958, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0002388599033816425, | |
| "loss": 1.0346, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00023866666666666665, | |
| "loss": 1.0612, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0002384734299516908, | |
| "loss": 0.9862, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023828019323671493, | |
| "loss": 0.9982, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023808695652173913, | |
| "loss": 1.046, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00023789371980676327, | |
| "loss": 1.0176, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00023770048309178742, | |
| "loss": 0.9974, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00023750724637681156, | |
| "loss": 1.0194, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0002373140096618357, | |
| "loss": 1.0272, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0002371207729468599, | |
| "loss": 1.0707, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023692753623188404, | |
| "loss": 0.9941, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023673429951690818, | |
| "loss": 1.0298, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00023654106280193233, | |
| "loss": 1.0192, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00023634782608695652, | |
| "loss": 0.99, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 1.0633840560913086, | |
| "eval_runtime": 301.1449, | |
| "eval_samples_per_second": 6.641, | |
| "eval_steps_per_second": 0.83, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00023615458937198067, | |
| "loss": 1.0316, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0002359613526570048, | |
| "loss": 0.9605, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00023576811594202895, | |
| "loss": 0.9902, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0002355748792270531, | |
| "loss": 0.99, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0002353816425120773, | |
| "loss": 0.9923, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023518840579710143, | |
| "loss": 0.993, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023499516908212557, | |
| "loss": 1.0103, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00023480193236714972, | |
| "loss": 1.0536, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00023460869565217391, | |
| "loss": 0.9961, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00023441545893719806, | |
| "loss": 1.0461, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0002342222222222222, | |
| "loss": 0.9959, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00023402898550724634, | |
| "loss": 0.9932, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00023383574879227054, | |
| "loss": 1.0258, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00023364251207729468, | |
| "loss": 1.0591, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00023344927536231882, | |
| "loss": 1.0672, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00023325603864734297, | |
| "loss": 0.9972, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0002330628019323671, | |
| "loss": 1.0063, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0002328695652173913, | |
| "loss": 1.0153, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00023267632850241545, | |
| "loss": 1.0611, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0002324830917874396, | |
| "loss": 1.0215, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 1.0626596212387085, | |
| "eval_runtime": 302.4484, | |
| "eval_samples_per_second": 6.613, | |
| "eval_steps_per_second": 0.827, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023228985507246373, | |
| "loss": 1.0118, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023209661835748793, | |
| "loss": 1.0626, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023190338164251207, | |
| "loss": 1.0051, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00023171014492753622, | |
| "loss": 0.9824, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00023151690821256036, | |
| "loss": 1.0567, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0002313236714975845, | |
| "loss": 1.015, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0002311304347826087, | |
| "loss": 1.0129, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023093719806763284, | |
| "loss": 0.9826, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023074396135265698, | |
| "loss": 1.0257, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00023055072463768112, | |
| "loss": 1.0426, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00023035748792270532, | |
| "loss": 1.0054, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00023016425120772946, | |
| "loss": 1.0168, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0002299710144927536, | |
| "loss": 1.0, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00022977777777777775, | |
| "loss": 1.0138, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0002295845410628019, | |
| "loss": 1.0339, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0002293913043478261, | |
| "loss": 0.9986, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022919806763285023, | |
| "loss": 1.0171, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022900483091787437, | |
| "loss": 0.9846, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00022881159420289852, | |
| "loss": 1.0338, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00022861835748792269, | |
| "loss": 1.0161, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 1.0625114440917969, | |
| "eval_runtime": 301.9731, | |
| "eval_samples_per_second": 6.623, | |
| "eval_steps_per_second": 0.828, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00022842512077294686, | |
| "loss": 1.0253, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.000228231884057971, | |
| "loss": 1.0187, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00022803864734299514, | |
| "loss": 0.9766, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.0002278454106280193, | |
| "loss": 1.0336, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00022765217391304348, | |
| "loss": 1.0084, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00022745893719806762, | |
| "loss": 1.0152, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00022726570048309177, | |
| "loss": 0.9628, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0002270724637681159, | |
| "loss": 0.9494, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00022687922705314008, | |
| "loss": 1.0688, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00022668599033816425, | |
| "loss": 1.0242, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.0002264927536231884, | |
| "loss": 1.0518, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00022629951690821253, | |
| "loss": 0.9777, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.0002261062801932367, | |
| "loss": 0.9929, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00022591304347826084, | |
| "loss": 1.0192, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00022571980676328501, | |
| "loss": 0.9951, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00022552657004830916, | |
| "loss": 1.0014, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0002253333333333333, | |
| "loss": 1.035, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00022514009661835747, | |
| "loss": 0.9909, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022494685990338164, | |
| "loss": 1.0372, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022475362318840578, | |
| "loss": 0.992, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 1.0622905492782593, | |
| "eval_runtime": 303.6073, | |
| "eval_samples_per_second": 6.587, | |
| "eval_steps_per_second": 0.823, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00022456038647342992, | |
| "loss": 1.0224, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.0002243671497584541, | |
| "loss": 1.0159, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00022417391304347824, | |
| "loss": 1.0257, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.0002239806763285024, | |
| "loss": 1.0391, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00022378743961352655, | |
| "loss": 0.9996, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0002235942028985507, | |
| "loss": 1.0807, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00022340096618357486, | |
| "loss": 0.975, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022320772946859903, | |
| "loss": 1.0069, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022301449275362317, | |
| "loss": 0.9844, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00022282125603864732, | |
| "loss": 1.0336, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022262801932367148, | |
| "loss": 1.0124, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022243478260869563, | |
| "loss": 0.9849, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0002222415458937198, | |
| "loss": 1.0065, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00022204830917874394, | |
| "loss": 1.0319, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00022185507246376808, | |
| "loss": 1.0118, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00022166183574879225, | |
| "loss": 1.0255, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0002214685990338164, | |
| "loss": 0.9649, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00022127536231884056, | |
| "loss": 0.9756, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0002210821256038647, | |
| "loss": 0.9827, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00022088888888888888, | |
| "loss": 1.0245, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 1.0623623132705688, | |
| "eval_runtime": 303.2092, | |
| "eval_samples_per_second": 6.596, | |
| "eval_steps_per_second": 0.825, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00022069565217391302, | |
| "loss": 1.0284, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.0002205024154589372, | |
| "loss": 1.0258, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00022030917874396133, | |
| "loss": 1.0665, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0002201159420289855, | |
| "loss": 0.9961, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00021992270531400964, | |
| "loss": 1.0137, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00021972946859903379, | |
| "loss": 0.9965, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00021953623188405796, | |
| "loss": 1.0181, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0002193429951690821, | |
| "loss": 1.0047, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00021914975845410627, | |
| "loss": 1.0193, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.0002189565217391304, | |
| "loss": 1.011, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00021876328502415458, | |
| "loss": 1.0327, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00021857004830917872, | |
| "loss": 1.0434, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0002183768115942029, | |
| "loss": 1.0138, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00021818357487922703, | |
| "loss": 0.9737, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00021799033816425118, | |
| "loss": 0.9977, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021779710144927535, | |
| "loss": 1.0181, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0002176038647342995, | |
| "loss": 1.0434, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00021741062801932366, | |
| "loss": 1.0143, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0002172173913043478, | |
| "loss": 0.9786, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00021702415458937194, | |
| "loss": 0.9861, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 1.0632884502410889, | |
| "eval_runtime": 302.3081, | |
| "eval_samples_per_second": 6.616, | |
| "eval_steps_per_second": 0.827, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00021683091787439611, | |
| "loss": 1.0298, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00021663768115942028, | |
| "loss": 1.0203, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00021644444444444443, | |
| "loss": 1.0476, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00021625120772946857, | |
| "loss": 1.0071, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00021605797101449274, | |
| "loss": 1.0125, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00021586473429951688, | |
| "loss": 1.0706, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00021567149758454105, | |
| "loss": 0.9954, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.0002154782608695652, | |
| "loss": 1.0229, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00021528502415458934, | |
| "loss": 0.9828, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0002150917874396135, | |
| "loss": 1.0033, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00021489855072463767, | |
| "loss": 1.0132, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00021470531400966182, | |
| "loss": 1.0256, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00021451207729468596, | |
| "loss": 1.0227, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0002143188405797101, | |
| "loss": 1.0434, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0002141256038647343, | |
| "loss": 1.0596, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021393236714975844, | |
| "loss": 0.9893, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00021373913043478258, | |
| "loss": 1.0225, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00021354589371980673, | |
| "loss": 0.981, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0002133526570048309, | |
| "loss": 1.0348, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00021315942028985507, | |
| "loss": 1.0208, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 1.0610480308532715, | |
| "eval_runtime": 302.534, | |
| "eval_samples_per_second": 6.611, | |
| "eval_steps_per_second": 0.826, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002129661835748792, | |
| "loss": 1.0074, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00021277294685990335, | |
| "loss": 1.0173, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002125797101449275, | |
| "loss": 0.9835, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0002123864734299517, | |
| "loss": 0.9819, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021219323671497583, | |
| "loss": 1.0399, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021199999999999998, | |
| "loss": 1.0076, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00021180676328502412, | |
| "loss": 1.0235, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0002116135265700483, | |
| "loss": 1.0269, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00021142028985507246, | |
| "loss": 1.0428, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0002112270531400966, | |
| "loss": 1.0019, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00021103381642512074, | |
| "loss": 0.9701, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00021084057971014489, | |
| "loss": 0.977, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00021064734299516908, | |
| "loss": 0.9753, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00021045410628019322, | |
| "loss": 1.0432, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00021026086956521737, | |
| "loss": 1.051, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0002100676328502415, | |
| "loss": 1.0301, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00020987439613526565, | |
| "loss": 0.9793, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00020968115942028985, | |
| "loss": 1.0151, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.000209487922705314, | |
| "loss": 0.9979, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020929468599033813, | |
| "loss": 1.0126, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 1.0597844123840332, | |
| "eval_runtime": 303.6517, | |
| "eval_samples_per_second": 6.586, | |
| "eval_steps_per_second": 0.823, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020910144927536228, | |
| "loss": 1.0428, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00020890821256038647, | |
| "loss": 1.0049, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00020871497584541062, | |
| "loss": 1.0006, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00020852173913043476, | |
| "loss": 1.022, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0002083285024154589, | |
| "loss": 1.0342, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.0002081352657004831, | |
| "loss": 1.0146, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020794202898550724, | |
| "loss": 1.029, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020774879227053138, | |
| "loss": 1.0381, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00020755555555555553, | |
| "loss": 1.0372, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00020736231884057967, | |
| "loss": 1.0119, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00020716908212560387, | |
| "loss": 1.0797, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.000206975845410628, | |
| "loss": 0.95, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00020678260869565215, | |
| "loss": 1.0012, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.0002065893719806763, | |
| "loss": 1.0194, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.0002063961352657005, | |
| "loss": 1.0135, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020620289855072463, | |
| "loss": 1.0294, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020600966183574877, | |
| "loss": 0.9954, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00020581642512077292, | |
| "loss": 1.0082, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00020562318840579706, | |
| "loss": 1.0343, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00020542995169082126, | |
| "loss": 0.9967, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 1.060504674911499, | |
| "eval_runtime": 303.6571, | |
| "eval_samples_per_second": 6.586, | |
| "eval_steps_per_second": 0.823, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0002052367149758454, | |
| "loss": 1.0436, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00020504347826086954, | |
| "loss": 1.037, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00020485024154589368, | |
| "loss": 1.0613, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00020465700483091788, | |
| "loss": 1.0305, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00020446376811594202, | |
| "loss": 1.0585, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00020427053140096617, | |
| "loss": 1.0071, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0002040772946859903, | |
| "loss": 0.9929, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00020388405797101445, | |
| "loss": 1.051, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00020369082125603865, | |
| "loss": 0.9953, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0002034975845410628, | |
| "loss": 1.0176, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020330434782608693, | |
| "loss": 1.0107, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020311111111111108, | |
| "loss": 1.0618, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020291787439613527, | |
| "loss": 1.0147, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00020272463768115942, | |
| "loss": 0.9754, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00020253140096618356, | |
| "loss": 1.0381, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.0002023381642512077, | |
| "loss": 0.9645, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00020214492753623187, | |
| "loss": 0.9645, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020195169082125604, | |
| "loss": 1.0371, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020175845410628018, | |
| "loss": 1.0058, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00020156521739130432, | |
| "loss": 1.02, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 1.0592375993728638, | |
| "eval_runtime": 303.4125, | |
| "eval_samples_per_second": 6.592, | |
| "eval_steps_per_second": 0.824, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00020137198067632847, | |
| "loss": 1.0409, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00020117874396135266, | |
| "loss": 1.0193, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0002009855072463768, | |
| "loss": 1.0169, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00020079227053140095, | |
| "loss": 1.0484, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0002005990338164251, | |
| "loss": 0.9895, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00020040579710144926, | |
| "loss": 0.9858, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00020021256038647343, | |
| "loss": 0.9993, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00020001932367149757, | |
| "loss": 0.9755, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00019982608695652172, | |
| "loss": 1.0392, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00019963285024154586, | |
| "loss": 1.0123, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00019943961352657003, | |
| "loss": 1.0386, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001992463768115942, | |
| "loss": 1.0359, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019905314009661834, | |
| "loss": 1.0003, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019885990338164248, | |
| "loss": 0.9989, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00019866666666666665, | |
| "loss": 0.9515, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00019847342995169082, | |
| "loss": 1.0035, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00019828019323671497, | |
| "loss": 1.0219, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0001980869565217391, | |
| "loss": 1.0096, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00019789371980676325, | |
| "loss": 1.0077, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00019770048309178742, | |
| "loss": 1.0298, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 1.0586228370666504, | |
| "eval_runtime": 302.8462, | |
| "eval_samples_per_second": 6.604, | |
| "eval_steps_per_second": 0.826, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.0001975072463768116, | |
| "loss": 1.0001, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00019731400966183573, | |
| "loss": 1.0465, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00019712077294685987, | |
| "loss": 1.0265, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00019692753623188404, | |
| "loss": 1.0148, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.0001967342995169082, | |
| "loss": 0.9763, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00019654106280193236, | |
| "loss": 0.9861, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0001963478260869565, | |
| "loss": 0.9877, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00019615458937198067, | |
| "loss": 0.9489, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0001959613526570048, | |
| "loss": 1.0694, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00019576811594202898, | |
| "loss": 1.0311, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00019557487922705312, | |
| "loss": 1.0494, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00019538164251207727, | |
| "loss": 0.9967, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00019518840579710144, | |
| "loss": 1.0278, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00019499516908212558, | |
| "loss": 1.0384, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00019480193236714975, | |
| "loss": 1.0051, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0001946086956521739, | |
| "loss": 1.0387, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00019441545893719806, | |
| "loss": 1.0094, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0001942222222222222, | |
| "loss": 1.0155, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00019402898550724637, | |
| "loss": 1.0104, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00019383574879227052, | |
| "loss": 1.0264, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 1.058441162109375, | |
| "eval_runtime": 301.1664, | |
| "eval_samples_per_second": 6.641, | |
| "eval_steps_per_second": 0.83, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019364251207729466, | |
| "loss": 0.9823, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019344927536231883, | |
| "loss": 0.9871, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019325603864734297, | |
| "loss": 1.022, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00019306280193236714, | |
| "loss": 1.0128, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00019286956521739128, | |
| "loss": 1.0502, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00019267632850241545, | |
| "loss": 1.0427, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0001924830917874396, | |
| "loss": 1.0088, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00019228985507246374, | |
| "loss": 1.0319, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0001920966183574879, | |
| "loss": 1.02, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00019190338164251205, | |
| "loss": 1.0376, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00019171014492753622, | |
| "loss": 1.0233, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00019151690821256036, | |
| "loss": 1.0285, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00019132367149758453, | |
| "loss": 1.0579, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00019113043478260867, | |
| "loss": 1.0542, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00019093719806763284, | |
| "loss": 1.0427, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00019074396135265699, | |
| "loss": 0.9983, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00019055072463768113, | |
| "loss": 1.0145, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.0001903574879227053, | |
| "loss": 1.0471, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 0.00019016425120772947, | |
| "loss": 1.0436, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0001899710144927536, | |
| "loss": 1.0312, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 1.056977391242981, | |
| "eval_runtime": 105.8988, | |
| "eval_samples_per_second": 18.886, | |
| "eval_steps_per_second": 2.361, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00018977777777777775, | |
| "loss": 0.9989, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.00018958454106280192, | |
| "loss": 1.0022, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018939130434782606, | |
| "loss": 1.0295, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018919806763285023, | |
| "loss": 1.0644, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00018900483091787438, | |
| "loss": 1.0238, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00018881159420289852, | |
| "loss": 1.06, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0001886183574879227, | |
| "loss": 0.9779, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00018842512077294686, | |
| "loss": 1.0072, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.000188231884057971, | |
| "loss": 1.0261, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00018803864734299514, | |
| "loss": 1.0632, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.0001878454106280193, | |
| "loss": 0.9929, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00018765217391304346, | |
| "loss": 0.9593, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00018745893719806763, | |
| "loss": 1.051, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.00018726570048309177, | |
| "loss": 1.0117, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0001870724637681159, | |
| "loss": 1.041, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00018687922705314008, | |
| "loss": 0.9918, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00018668599033816425, | |
| "loss": 1.0187, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.0001864927536231884, | |
| "loss": 1.0205, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00018629951690821254, | |
| "loss": 1.0267, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.00018610628019323668, | |
| "loss": 1.0381, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 1.0564384460449219, | |
| "eval_runtime": 105.9283, | |
| "eval_samples_per_second": 18.881, | |
| "eval_steps_per_second": 2.36, | |
| "step": 6000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 15625, | |
| "num_train_epochs": 5, | |
| "save_steps": 200, | |
| "total_flos": 6.618622281779773e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |