| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.976312925460403, | |
| "global_step": 1870000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.973325366509464e-05, | |
| "loss": 5.9346, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.946650733018929e-05, | |
| "loss": 4.8171, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.919976099528393e-05, | |
| "loss": 4.378, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.893301466037857e-05, | |
| "loss": 4.0841, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.866626832547321e-05, | |
| "loss": 3.8807, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.839952199056785e-05, | |
| "loss": 3.7164, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.8132775655662495e-05, | |
| "loss": 3.584, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.786602932075713e-05, | |
| "loss": 3.4784, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.759928298585178e-05, | |
| "loss": 3.3842, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.733253665094642e-05, | |
| "loss": 3.3004, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.7065790316041056e-05, | |
| "loss": 3.2337, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.6799043981135706e-05, | |
| "loss": 3.1757, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.653229764623034e-05, | |
| "loss": 3.1198, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.626555131132499e-05, | |
| "loss": 3.0722, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.5998804976419624e-05, | |
| "loss": 3.0281, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.573205864151427e-05, | |
| "loss": 2.9954, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.546531230660891e-05, | |
| "loss": 2.9605, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.519856597170355e-05, | |
| "loss": 2.9286, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 4.493181963679819e-05, | |
| "loss": 2.8946, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 4.4665073301892835e-05, | |
| "loss": 2.8688, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 4.439832696698747e-05, | |
| "loss": 2.8424, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.4131580632082116e-05, | |
| "loss": 2.8178, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 4.386483429717676e-05, | |
| "loss": 2.7994, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.35980879622714e-05, | |
| "loss": 2.784, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.333134162736604e-05, | |
| "loss": 2.7612, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.3064595292460684e-05, | |
| "loss": 2.7435, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.279784895755533e-05, | |
| "loss": 2.7275, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.2531102622649964e-05, | |
| "loss": 2.7117, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.226435628774461e-05, | |
| "loss": 2.6908, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.199760995283925e-05, | |
| "loss": 2.6787, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.173086361793389e-05, | |
| "loss": 2.6626, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 4.146411728302853e-05, | |
| "loss": 2.6431, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.1197370948123176e-05, | |
| "loss": 2.6329, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.093062461321781e-05, | |
| "loss": 2.6195, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.066387827831246e-05, | |
| "loss": 2.6042, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.03971319434071e-05, | |
| "loss": 2.5967, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.0130385608501744e-05, | |
| "loss": 2.5843, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 3.986363927359638e-05, | |
| "loss": 2.5683, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.9596892938691025e-05, | |
| "loss": 2.5605, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.933014660378567e-05, | |
| "loss": 2.5515, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 3.9063400268880305e-05, | |
| "loss": 2.5406, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.879665393397495e-05, | |
| "loss": 2.5315, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 3.852990759906959e-05, | |
| "loss": 2.5272, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.826316126416423e-05, | |
| "loss": 2.5199, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.799641492925887e-05, | |
| "loss": 2.5061, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.772966859435352e-05, | |
| "loss": 2.5007, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.746292225944816e-05, | |
| "loss": 2.4908, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.71961759245428e-05, | |
| "loss": 2.4817, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.6929429589637434e-05, | |
| "loss": 2.4731, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.6662683254732085e-05, | |
| "loss": 2.4659, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 3.639593691982672e-05, | |
| "loss": 2.4596, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.6129190584921365e-05, | |
| "loss": 2.4506, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.586244425001601e-05, | |
| "loss": 2.4425, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.5595697915110646e-05, | |
| "loss": 2.4378, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 3.532895158020529e-05, | |
| "loss": 2.4301, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.506220524529993e-05, | |
| "loss": 2.4195, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 3.479545891039458e-05, | |
| "loss": 2.4108, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 3.4528712575489214e-05, | |
| "loss": 2.4003, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.426196624058385e-05, | |
| "loss": 2.3948, | |
| "step": 590000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.39952199056785e-05, | |
| "loss": 2.3911, | |
| "step": 600000 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 3.372847357077314e-05, | |
| "loss": 2.3805, | |
| "step": 610000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.346172723586778e-05, | |
| "loss": 2.3719, | |
| "step": 620000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.3194980900962426e-05, | |
| "loss": 2.3685, | |
| "step": 630000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 3.292823456605706e-05, | |
| "loss": 2.3668, | |
| "step": 640000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 3.2661488231151706e-05, | |
| "loss": 2.3584, | |
| "step": 650000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 3.239474189624634e-05, | |
| "loss": 2.3538, | |
| "step": 660000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 3.212799556134099e-05, | |
| "loss": 2.3481, | |
| "step": 670000 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.186124922643563e-05, | |
| "loss": 2.3495, | |
| "step": 680000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 3.159450289153027e-05, | |
| "loss": 2.3413, | |
| "step": 690000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 3.132775655662492e-05, | |
| "loss": 2.3325, | |
| "step": 700000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 3.1061010221719555e-05, | |
| "loss": 2.3269, | |
| "step": 710000 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 3.07942638868142e-05, | |
| "loss": 2.3255, | |
| "step": 720000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 3.052751755190884e-05, | |
| "loss": 2.3168, | |
| "step": 730000 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 3.0260771217003482e-05, | |
| "loss": 2.3179, | |
| "step": 740000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 2.999402488209812e-05, | |
| "loss": 2.3098, | |
| "step": 750000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 2.972727854719276e-05, | |
| "loss": 2.2928, | |
| "step": 760000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 2.9460532212287407e-05, | |
| "loss": 2.2911, | |
| "step": 770000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 2.9193785877382047e-05, | |
| "loss": 2.2906, | |
| "step": 780000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.8927039542476687e-05, | |
| "loss": 2.2824, | |
| "step": 790000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.866029320757133e-05, | |
| "loss": 2.2766, | |
| "step": 800000 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.839354687266597e-05, | |
| "loss": 2.2722, | |
| "step": 810000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 2.812680053776061e-05, | |
| "loss": 2.2665, | |
| "step": 820000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 2.786005420285525e-05, | |
| "loss": 2.259, | |
| "step": 830000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 2.7593307867949895e-05, | |
| "loss": 2.2584, | |
| "step": 840000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 2.7326561533044536e-05, | |
| "loss": 2.2524, | |
| "step": 850000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 2.7059815198139176e-05, | |
| "loss": 2.2536, | |
| "step": 860000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.6793068863233823e-05, | |
| "loss": 2.2446, | |
| "step": 870000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 2.6526322528328463e-05, | |
| "loss": 2.2439, | |
| "step": 880000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.6259576193423104e-05, | |
| "loss": 2.2389, | |
| "step": 890000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.5992829858517747e-05, | |
| "loss": 2.2362, | |
| "step": 900000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.5726083523612388e-05, | |
| "loss": 2.2313, | |
| "step": 910000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.5459337188707028e-05, | |
| "loss": 2.2283, | |
| "step": 920000 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.5192590853801668e-05, | |
| "loss": 2.2224, | |
| "step": 930000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.4925844518896312e-05, | |
| "loss": 2.2175, | |
| "step": 940000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.4659098183990952e-05, | |
| "loss": 2.2067, | |
| "step": 950000 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 2.4392351849085596e-05, | |
| "loss": 2.2029, | |
| "step": 960000 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.4125605514180236e-05, | |
| "loss": 2.2014, | |
| "step": 970000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.385885917927488e-05, | |
| "loss": 2.2002, | |
| "step": 980000 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.3592112844369517e-05, | |
| "loss": 2.195, | |
| "step": 990000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.332536650946416e-05, | |
| "loss": 2.1898, | |
| "step": 1000000 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.3058620174558804e-05, | |
| "loss": 2.1864, | |
| "step": 1010000 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.2791873839653444e-05, | |
| "loss": 2.1833, | |
| "step": 1020000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.2525127504748088e-05, | |
| "loss": 2.1799, | |
| "step": 1030000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.2258381169842725e-05, | |
| "loss": 2.1785, | |
| "step": 1040000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.199163483493737e-05, | |
| "loss": 2.1756, | |
| "step": 1050000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.1724888500032012e-05, | |
| "loss": 2.1726, | |
| "step": 1060000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.1458142165126653e-05, | |
| "loss": 2.1664, | |
| "step": 1070000 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.1191395830221293e-05, | |
| "loss": 2.1651, | |
| "step": 1080000 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.0924649495315933e-05, | |
| "loss": 2.1577, | |
| "step": 1090000 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.0657903160410577e-05, | |
| "loss": 2.1472, | |
| "step": 1100000 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.039115682550522e-05, | |
| "loss": 2.1458, | |
| "step": 1110000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.012441049059986e-05, | |
| "loss": 2.1432, | |
| "step": 1120000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 1.98576641556945e-05, | |
| "loss": 2.134, | |
| "step": 1130000 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 1.959091782078914e-05, | |
| "loss": 2.1337, | |
| "step": 1140000 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 1.9324171485883785e-05, | |
| "loss": 2.1337, | |
| "step": 1150000 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.9057425150978425e-05, | |
| "loss": 2.128, | |
| "step": 1160000 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 1.879067881607307e-05, | |
| "loss": 2.1262, | |
| "step": 1170000 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 1.852393248116771e-05, | |
| "loss": 2.1209, | |
| "step": 1180000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.825718614626235e-05, | |
| "loss": 2.1178, | |
| "step": 1190000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.7990439811356993e-05, | |
| "loss": 2.1179, | |
| "step": 1200000 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.7723693476451634e-05, | |
| "loss": 2.1123, | |
| "step": 1210000 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 1.7456947141546277e-05, | |
| "loss": 2.1098, | |
| "step": 1220000 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 1.7190200806640918e-05, | |
| "loss": 2.1115, | |
| "step": 1230000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 1.6923454471735558e-05, | |
| "loss": 2.1068, | |
| "step": 1240000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.66567081368302e-05, | |
| "loss": 2.1002, | |
| "step": 1250000 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.6389961801924842e-05, | |
| "loss": 2.0962, | |
| "step": 1260000 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.6123215467019486e-05, | |
| "loss": 2.0961, | |
| "step": 1270000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.5856469132114126e-05, | |
| "loss": 2.0911, | |
| "step": 1280000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.5589722797208766e-05, | |
| "loss": 2.0916, | |
| "step": 1290000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 1.532297646230341e-05, | |
| "loss": 2.0854, | |
| "step": 1300000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 1.505623012739805e-05, | |
| "loss": 2.0846, | |
| "step": 1310000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.4789483792492692e-05, | |
| "loss": 2.072, | |
| "step": 1320000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.4522737457587332e-05, | |
| "loss": 2.0691, | |
| "step": 1330000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 1.4255991122681974e-05, | |
| "loss": 2.072, | |
| "step": 1340000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.3989244787776618e-05, | |
| "loss": 2.0669, | |
| "step": 1350000 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.3722498452871257e-05, | |
| "loss": 2.0646, | |
| "step": 1360000 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.34557521179659e-05, | |
| "loss": 2.0573, | |
| "step": 1370000 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.318900578306054e-05, | |
| "loss": 2.0487, | |
| "step": 1380000 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.2922259448155183e-05, | |
| "loss": 2.0529, | |
| "step": 1390000 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.2655513113249826e-05, | |
| "loss": 2.0468, | |
| "step": 1400000 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 1.2388766778344465e-05, | |
| "loss": 2.0455, | |
| "step": 1410000 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 1.2122020443439109e-05, | |
| "loss": 2.046, | |
| "step": 1420000 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 1.1855274108533749e-05, | |
| "loss": 2.0398, | |
| "step": 1430000 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.158852777362839e-05, | |
| "loss": 2.0391, | |
| "step": 1440000 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 1.1321781438723033e-05, | |
| "loss": 2.0345, | |
| "step": 1450000 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.1055035103817673e-05, | |
| "loss": 2.038, | |
| "step": 1460000 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 1.0788288768912317e-05, | |
| "loss": 2.032, | |
| "step": 1470000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.0521542434006957e-05, | |
| "loss": 2.0271, | |
| "step": 1480000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 1.0254796099101599e-05, | |
| "loss": 2.0261, | |
| "step": 1490000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 9.988049764196241e-06, | |
| "loss": 2.0235, | |
| "step": 1500000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 9.721303429290881e-06, | |
| "loss": 2.0173, | |
| "step": 1510000 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "learning_rate": 9.454557094385523e-06, | |
| "loss": 2.0178, | |
| "step": 1520000 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 9.187810759480165e-06, | |
| "loss": 2.0116, | |
| "step": 1530000 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 8.921064424574807e-06, | |
| "loss": 2.0099, | |
| "step": 1540000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.654318089669448e-06, | |
| "loss": 2.0105, | |
| "step": 1550000 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 8.38757175476409e-06, | |
| "loss": 2.0033, | |
| "step": 1560000 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 8.120825419858732e-06, | |
| "loss": 2.0033, | |
| "step": 1570000 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 7.854079084953374e-06, | |
| "loss": 2.0, | |
| "step": 1580000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.5873327500480155e-06, | |
| "loss": 1.9954, | |
| "step": 1590000 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 7.320586415142657e-06, | |
| "loss": 1.9964, | |
| "step": 1600000 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 7.053840080237298e-06, | |
| "loss": 1.9995, | |
| "step": 1610000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 6.787093745331939e-06, | |
| "loss": 1.9878, | |
| "step": 1620000 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 6.52034741042658e-06, | |
| "loss": 1.988, | |
| "step": 1630000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 6.253601075521223e-06, | |
| "loss": 1.9861, | |
| "step": 1640000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 5.986854740615865e-06, | |
| "loss": 1.9828, | |
| "step": 1650000 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 5.720108405710506e-06, | |
| "loss": 1.9787, | |
| "step": 1660000 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 5.453362070805147e-06, | |
| "loss": 1.9794, | |
| "step": 1670000 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5.186615735899789e-06, | |
| "loss": 1.9756, | |
| "step": 1680000 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 4.91986940099443e-06, | |
| "loss": 1.9756, | |
| "step": 1690000 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 4.653123066089072e-06, | |
| "loss": 1.9676, | |
| "step": 1700000 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.386376731183714e-06, | |
| "loss": 1.967, | |
| "step": 1710000 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 4.119630396278355e-06, | |
| "loss": 1.9656, | |
| "step": 1720000 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.8528840613729966e-06, | |
| "loss": 1.9627, | |
| "step": 1730000 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.5861377264676385e-06, | |
| "loss": 1.964, | |
| "step": 1740000 | |
| }, | |
| { | |
| "epoch": 9.34, | |
| "learning_rate": 3.31939139156228e-06, | |
| "loss": 1.9646, | |
| "step": 1750000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.0526450566569217e-06, | |
| "loss": 1.9591, | |
| "step": 1760000 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 2.7858987217515632e-06, | |
| "loss": 1.9623, | |
| "step": 1770000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 2.519152386846205e-06, | |
| "loss": 1.9544, | |
| "step": 1780000 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 2.2524060519408464e-06, | |
| "loss": 1.9572, | |
| "step": 1790000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 1.985659717035488e-06, | |
| "loss": 1.9545, | |
| "step": 1800000 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "learning_rate": 1.7189133821301297e-06, | |
| "loss": 1.953, | |
| "step": 1810000 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 1.4521670472247713e-06, | |
| "loss": 1.9523, | |
| "step": 1820000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 1.1854207123194128e-06, | |
| "loss": 1.9528, | |
| "step": 1830000 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "learning_rate": 9.186743774140545e-07, | |
| "loss": 1.9482, | |
| "step": 1840000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 6.519280425086959e-07, | |
| "loss": 1.9511, | |
| "step": 1850000 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 3.8518170760333756e-07, | |
| "loss": 1.947, | |
| "step": 1860000 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 1.1843537269797913e-07, | |
| "loss": 1.9437, | |
| "step": 1870000 | |
| } | |
| ], | |
| "max_steps": 1874440, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.135177187546945e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |