| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5460003882669427, | |
| "global_step": 22500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.06550748079256e-07, | |
| "loss": 7.5852, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.213101496158512e-06, | |
| "loss": 7.1855, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.819652244237768e-06, | |
| "loss": 6.5439, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.426202992317024e-06, | |
| "loss": 6.1467, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.03275374039628e-06, | |
| "loss": 5.905, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 3.639304488475536e-06, | |
| "loss": 5.3489, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.2458552365547915e-06, | |
| "loss": 4.6596, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.852405984634048e-06, | |
| "loss": 4.455, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.458956732713303e-06, | |
| "loss": 4.2972, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.06550748079256e-06, | |
| "loss": 4.199, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.672058228871817e-06, | |
| "loss": 4.121, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.278608976951072e-06, | |
| "loss": 4.0656, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.885159725030328e-06, | |
| "loss": 4.0166, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.491710473109583e-06, | |
| "loss": 3.9316, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.09826122118884e-06, | |
| "loss": 3.9286, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.704811969268096e-06, | |
| "loss": 3.8572, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.0311362717347352e-05, | |
| "loss": 3.8367, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.0917913465426607e-05, | |
| "loss": 3.8374, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.1524464213505865e-05, | |
| "loss": 3.7934, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.213101496158512e-05, | |
| "loss": 3.7839, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2737565709664375e-05, | |
| "loss": 3.7421, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3344116457743634e-05, | |
| "loss": 3.728, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3950667205822887e-05, | |
| "loss": 3.7362, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.4557217953902144e-05, | |
| "loss": 3.6775, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.51637687019814e-05, | |
| "loss": 3.6752, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.5770319450060656e-05, | |
| "loss": 3.667, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6376870198139912e-05, | |
| "loss": 3.6275, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6983420946219166e-05, | |
| "loss": 3.628, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.7589971694298423e-05, | |
| "loss": 3.61, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.819652244237768e-05, | |
| "loss": 3.5868, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.8803073190456936e-05, | |
| "loss": 3.5831, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.9409623938536193e-05, | |
| "loss": 3.5523, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.0016174686615446e-05, | |
| "loss": 3.5191, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.0622725434694703e-05, | |
| "loss": 3.556, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.122927618277396e-05, | |
| "loss": 3.5289, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.1835826930853213e-05, | |
| "loss": 3.4989, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.2442377678932473e-05, | |
| "loss": 3.5054, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.304892842701173e-05, | |
| "loss": 3.4637, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.3655479175090983e-05, | |
| "loss": 3.4803, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.426202992317024e-05, | |
| "loss": 3.4702, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.4868580671249494e-05, | |
| "loss": 3.4505, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.547513141932875e-05, | |
| "loss": 3.4357, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.6081682167408007e-05, | |
| "loss": 3.4247, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.6688232915487267e-05, | |
| "loss": 3.3992, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.729478366356652e-05, | |
| "loss": 3.393, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.7901334411645774e-05, | |
| "loss": 3.3824, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.8507885159725034e-05, | |
| "loss": 3.3798, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9114435907804288e-05, | |
| "loss": 3.3817, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.972098665588354e-05, | |
| "loss": 3.3661, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.03275374039628e-05, | |
| "loss": 3.3417, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.0934088152042055e-05, | |
| "loss": 3.3432, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.154063890012131e-05, | |
| "loss": 3.3277, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.214718964820057e-05, | |
| "loss": 3.3205, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.2753740396279825e-05, | |
| "loss": 3.3078, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.336029114435908e-05, | |
| "loss": 3.2985, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.396684189243833e-05, | |
| "loss": 3.2822, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.4573392640517595e-05, | |
| "loss": 3.2815, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.5179943388596845e-05, | |
| "loss": 3.2864, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.57864941366761e-05, | |
| "loss": 3.2787, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.639304488475536e-05, | |
| "loss": 3.266, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 3.6999595632834615e-05, | |
| "loss": 3.2375, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.760614638091387e-05, | |
| "loss": 3.2608, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.821269712899313e-05, | |
| "loss": 3.2287, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.8819247877072386e-05, | |
| "loss": 3.224, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 3.9425798625151636e-05, | |
| "loss": 3.2322, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.003234937323089e-05, | |
| "loss": 3.217, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.0638900121310156e-05, | |
| "loss": 3.2323, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.1245450869389406e-05, | |
| "loss": 3.2, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.185200161746866e-05, | |
| "loss": 3.1984, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.245855236554792e-05, | |
| "loss": 3.1852, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.3065103113627176e-05, | |
| "loss": 3.1697, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.3671653861706426e-05, | |
| "loss": 3.1615, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.427820460978569e-05, | |
| "loss": 3.1719, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.488475535786495e-05, | |
| "loss": 3.1612, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.54913061059442e-05, | |
| "loss": 3.1602, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.609785685402346e-05, | |
| "loss": 3.1535, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.670440760210271e-05, | |
| "loss": 3.1247, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.731095835018197e-05, | |
| "loss": 3.1389, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7917509098261224e-05, | |
| "loss": 3.1345, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.852405984634048e-05, | |
| "loss": 3.1319, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.913061059441974e-05, | |
| "loss": 3.1201, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.973716134249899e-05, | |
| "loss": 3.1161, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9978056021685813e-05, | |
| "loss": 3.117, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.993933135407255e-05, | |
| "loss": 3.101, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9900606686459276e-05, | |
| "loss": 3.1061, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9861882018846004e-05, | |
| "loss": 3.1054, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.982315735123274e-05, | |
| "loss": 3.0931, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9784432683619466e-05, | |
| "loss": 3.089, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.97457080160062e-05, | |
| "loss": 3.0828, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.970698334839293e-05, | |
| "loss": 3.0736, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9668258680779664e-05, | |
| "loss": 3.0454, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.962953401316639e-05, | |
| "loss": 3.0586, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.959080934555312e-05, | |
| "loss": 3.0508, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.955208467793985e-05, | |
| "loss": 3.0555, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.951336001032658e-05, | |
| "loss": 3.0389, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.947463534271331e-05, | |
| "loss": 3.036, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.943591067510004e-05, | |
| "loss": 3.0312, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.939718600748677e-05, | |
| "loss": 3.021, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.93584613398735e-05, | |
| "loss": 3.0294, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.931973667226023e-05, | |
| "loss": 3.0288, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.928101200464696e-05, | |
| "loss": 3.0096, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.924228733703369e-05, | |
| "loss": 3.0038, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9203562669420425e-05, | |
| "loss": 3.0196, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.916483800180715e-05, | |
| "loss": 2.9956, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.912611333419389e-05, | |
| "loss": 2.9993, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9087388666580615e-05, | |
| "loss": 3.0124, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.904866399896734e-05, | |
| "loss": 2.9948, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.900993933135408e-05, | |
| "loss": 2.986, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8971214663740806e-05, | |
| "loss": 2.9872, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8932489996127534e-05, | |
| "loss": 2.9754, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.889376532851427e-05, | |
| "loss": 2.9838, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8855040660900996e-05, | |
| "loss": 2.973, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8816315993287724e-05, | |
| "loss": 2.9608, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.877759132567445e-05, | |
| "loss": 2.9573, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8738866658061186e-05, | |
| "loss": 2.9465, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8700141990447914e-05, | |
| "loss": 2.9434, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.866141732283465e-05, | |
| "loss": 2.9662, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.862269265522138e-05, | |
| "loss": 2.9296, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.858396798760811e-05, | |
| "loss": 2.9414, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.854524331999484e-05, | |
| "loss": 2.942, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8506518652381574e-05, | |
| "loss": 2.9477, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.84677939847683e-05, | |
| "loss": 2.9388, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.842906931715503e-05, | |
| "loss": 2.9418, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.839034464954176e-05, | |
| "loss": 2.9214, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.835161998192849e-05, | |
| "loss": 2.9278, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.831289531431522e-05, | |
| "loss": 2.9257, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.827417064670195e-05, | |
| "loss": 2.9263, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.823544597908868e-05, | |
| "loss": 2.9098, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.819672131147541e-05, | |
| "loss": 2.9309, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.815799664386214e-05, | |
| "loss": 2.8921, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.811927197624887e-05, | |
| "loss": 2.9042, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.80805473086356e-05, | |
| "loss": 2.9063, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8041822641022335e-05, | |
| "loss": 2.9021, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.800309797340906e-05, | |
| "loss": 2.8911, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.79643733057958e-05, | |
| "loss": 2.8967, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7925648638182526e-05, | |
| "loss": 2.8968, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7886923970569254e-05, | |
| "loss": 2.8985, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.784819930295599e-05, | |
| "loss": 2.887, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7809474635342716e-05, | |
| "loss": 2.8874, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7770749967729444e-05, | |
| "loss": 2.8896, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.773202530011618e-05, | |
| "loss": 2.8767, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7693300632502906e-05, | |
| "loss": 2.8806, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7654575964889634e-05, | |
| "loss": 2.8712, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.761585129727636e-05, | |
| "loss": 2.8749, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.75771266296631e-05, | |
| "loss": 2.865, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.7538401962049825e-05, | |
| "loss": 2.8752, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.749967729443656e-05, | |
| "loss": 2.8796, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.7460952626823294e-05, | |
| "loss": 2.8637, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.742222795921002e-05, | |
| "loss": 2.8427, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.738350329159675e-05, | |
| "loss": 2.8585, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.7344778623983484e-05, | |
| "loss": 2.8607, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.730605395637021e-05, | |
| "loss": 2.8537, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.726732928875694e-05, | |
| "loss": 2.8622, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.722860462114367e-05, | |
| "loss": 2.8422, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.71898799535304e-05, | |
| "loss": 2.8539, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.715115528591713e-05, | |
| "loss": 2.8447, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.711243061830386e-05, | |
| "loss": 2.8497, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.707370595069059e-05, | |
| "loss": 2.8325, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.703498128307732e-05, | |
| "loss": 2.8413, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.699625661546405e-05, | |
| "loss": 2.8426, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.695753194785078e-05, | |
| "loss": 2.8336, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.691880728023751e-05, | |
| "loss": 2.8403, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6880082612624246e-05, | |
| "loss": 2.8278, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6841357945010974e-05, | |
| "loss": 2.8372, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.680263327739771e-05, | |
| "loss": 2.8275, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6763908609784436e-05, | |
| "loss": 2.8341, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6725183942171164e-05, | |
| "loss": 2.8186, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.66864592745579e-05, | |
| "loss": 2.8227, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6647734606944627e-05, | |
| "loss": 2.8248, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.6609009939331354e-05, | |
| "loss": 2.8243, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.657028527171809e-05, | |
| "loss": 2.8275, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.653156060410482e-05, | |
| "loss": 2.8164, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.6492835936491545e-05, | |
| "loss": 2.8165, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.645411126887827e-05, | |
| "loss": 2.8175, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.641538660126501e-05, | |
| "loss": 2.8092, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.6376661933651735e-05, | |
| "loss": 2.8247, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.633793726603847e-05, | |
| "loss": 2.8027, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.6299212598425204e-05, | |
| "loss": 2.8081, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.626048793081193e-05, | |
| "loss": 2.8027, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.622176326319866e-05, | |
| "loss": 2.7845, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.6183038595585395e-05, | |
| "loss": 2.7918, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.614431392797212e-05, | |
| "loss": 2.7942, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.610558926035885e-05, | |
| "loss": 2.7948, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.606686459274558e-05, | |
| "loss": 2.7853, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.602813992513231e-05, | |
| "loss": 2.7906, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.598941525751904e-05, | |
| "loss": 2.7894, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.595069058990577e-05, | |
| "loss": 2.8041, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.59119659222925e-05, | |
| "loss": 2.7877, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.587324125467923e-05, | |
| "loss": 2.7811, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.583451658706596e-05, | |
| "loss": 2.7871, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.5795791919452694e-05, | |
| "loss": 2.7673, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.575706725183942e-05, | |
| "loss": 2.7578, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.5718342584226156e-05, | |
| "loss": 2.7671, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.5679617916612884e-05, | |
| "loss": 2.7746, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.564089324899962e-05, | |
| "loss": 2.7802, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.560216858138635e-05, | |
| "loss": 2.771, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.5563443913773074e-05, | |
| "loss": 2.769, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.552471924615981e-05, | |
| "loss": 2.7674, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.548599457854654e-05, | |
| "loss": 2.7661, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.5447269910933265e-05, | |
| "loss": 2.7681, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.540854524331999e-05, | |
| "loss": 2.764, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.536982057570673e-05, | |
| "loss": 2.7641, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.5331095908093455e-05, | |
| "loss": 2.7679, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.529237124048018e-05, | |
| "loss": 2.7547, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.525364657286692e-05, | |
| "loss": 2.7507, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.5214921905253646e-05, | |
| "loss": 2.7622, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.517619723764038e-05, | |
| "loss": 2.7546, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.5137472570027115e-05, | |
| "loss": 2.7469, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.509874790241384e-05, | |
| "loss": 2.7413, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.506002323480057e-05, | |
| "loss": 2.7509, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.50212985671873e-05, | |
| "loss": 2.752, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.498257389957403e-05, | |
| "loss": 2.733, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.494384923196076e-05, | |
| "loss": 2.7405, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.490512456434749e-05, | |
| "loss": 2.744, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.4866399896734223e-05, | |
| "loss": 2.733, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.482767522912095e-05, | |
| "loss": 2.7521, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.478895056150768e-05, | |
| "loss": 2.7394, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.4750225893894414e-05, | |
| "loss": 2.7476, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.471150122628114e-05, | |
| "loss": 2.7389, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.467277655866787e-05, | |
| "loss": 2.7369, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.4634051891054604e-05, | |
| "loss": 2.7177, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.459532722344134e-05, | |
| "loss": 2.7319, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.455660255582807e-05, | |
| "loss": 2.7276, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.4517877888214795e-05, | |
| "loss": 2.7168, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.447915322060153e-05, | |
| "loss": 2.7201, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.444042855298826e-05, | |
| "loss": 2.7164, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.4401703885374985e-05, | |
| "loss": 2.7289, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.436297921776172e-05, | |
| "loss": 2.7345, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.432425455014845e-05, | |
| "loss": 2.729, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.4285529882535175e-05, | |
| "loss": 2.7202, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.42468052149219e-05, | |
| "loss": 2.7348, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.420808054730864e-05, | |
| "loss": 2.7134, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.4169355879695366e-05, | |
| "loss": 2.7259, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.4130631212082094e-05, | |
| "loss": 2.7068, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.409190654446883e-05, | |
| "loss": 2.7211, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.4053181876855556e-05, | |
| "loss": 2.707, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.401445720924229e-05, | |
| "loss": 2.7249, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.3975732541629025e-05, | |
| "loss": 2.7232, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.393700787401575e-05, | |
| "loss": 2.7047, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.389828320640248e-05, | |
| "loss": 2.6984, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.385955853878921e-05, | |
| "loss": 2.7221, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.3820833871175944e-05, | |
| "loss": 2.6858, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.378210920356267e-05, | |
| "loss": 2.7026, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.37433845359494e-05, | |
| "loss": 2.7008, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.3704659868336134e-05, | |
| "loss": 2.7112, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.366593520072286e-05, | |
| "loss": 2.6968, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.362721053310959e-05, | |
| "loss": 2.7087, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.3588485865496324e-05, | |
| "loss": 2.7107, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.354976119788305e-05, | |
| "loss": 2.7, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.351103653026978e-05, | |
| "loss": 2.6956, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.3472311862656515e-05, | |
| "loss": 2.6889, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.343358719504325e-05, | |
| "loss": 2.6964, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.339486252742998e-05, | |
| "loss": 2.6893, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.3356137859816705e-05, | |
| "loss": 2.6989, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.331741319220344e-05, | |
| "loss": 2.6747, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.327868852459017e-05, | |
| "loss": 2.7004, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.3239963856976895e-05, | |
| "loss": 2.6988, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.320123918936363e-05, | |
| "loss": 2.6916, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.316251452175036e-05, | |
| "loss": 2.6973, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.3123789854137086e-05, | |
| "loss": 2.6985, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.3085065186523814e-05, | |
| "loss": 2.6873, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.304634051891055e-05, | |
| "loss": 2.6937, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.3007615851297276e-05, | |
| "loss": 2.6973, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.2968891183684004e-05, | |
| "loss": 2.6816, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.293016651607074e-05, | |
| "loss": 2.6817, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.289144184845747e-05, | |
| "loss": 2.6886, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.28527171808442e-05, | |
| "loss": 2.6819, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2813992513230936e-05, | |
| "loss": 2.662, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2775267845617664e-05, | |
| "loss": 2.668, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.273654317800439e-05, | |
| "loss": 2.6783, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.269781851039112e-05, | |
| "loss": 2.6977, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2659093842777854e-05, | |
| "loss": 2.6692, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.262036917516458e-05, | |
| "loss": 2.6784, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.258164450755131e-05, | |
| "loss": 2.6846, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2542919839938044e-05, | |
| "loss": 2.6878, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.250419517232477e-05, | |
| "loss": 2.6694, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.24654705047115e-05, | |
| "loss": 2.6651, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2426745837098235e-05, | |
| "loss": 2.6706, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.238802116948496e-05, | |
| "loss": 2.6724, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.234929650187169e-05, | |
| "loss": 2.6831, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.2310571834258425e-05, | |
| "loss": 2.676, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.227184716664516e-05, | |
| "loss": 2.6632, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.223312249903189e-05, | |
| "loss": 2.6607, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.2194397831418615e-05, | |
| "loss": 2.6798, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.215567316380535e-05, | |
| "loss": 2.6583, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.211694849619208e-05, | |
| "loss": 2.6464, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.2078223828578806e-05, | |
| "loss": 2.661, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.203949916096554e-05, | |
| "loss": 2.6619, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.200077449335227e-05, | |
| "loss": 2.6633, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.1962049825738996e-05, | |
| "loss": 2.6683, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.1923325158125724e-05, | |
| "loss": 2.6621, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.188460049051246e-05, | |
| "loss": 2.65, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.1845875822899186e-05, | |
| "loss": 2.6415, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.1807151155285914e-05, | |
| "loss": 2.6554, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.176842648767265e-05, | |
| "loss": 2.6508, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.1729701820059384e-05, | |
| "loss": 2.65, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.169097715244611e-05, | |
| "loss": 2.6507, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.1652252484832846e-05, | |
| "loss": 2.654, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.1613527817219574e-05, | |
| "loss": 2.6414, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.15748031496063e-05, | |
| "loss": 2.6386, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.153607848199303e-05, | |
| "loss": 2.6563, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.1497353814379764e-05, | |
| "loss": 2.6429, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.145862914676649e-05, | |
| "loss": 2.6567, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.141990447915322e-05, | |
| "loss": 2.6396, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.1381179811539955e-05, | |
| "loss": 2.6459, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.134245514392668e-05, | |
| "loss": 2.6453, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.130373047631341e-05, | |
| "loss": 2.6317, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.126500580870014e-05, | |
| "loss": 2.6345, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.122628114108687e-05, | |
| "loss": 2.6435, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.11875564734736e-05, | |
| "loss": 2.6366, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.1148831805860335e-05, | |
| "loss": 2.6281, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.111010713824707e-05, | |
| "loss": 2.6359, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.10713824706338e-05, | |
| "loss": 2.6382, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.1032657803020526e-05, | |
| "loss": 2.6411, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.099393313540726e-05, | |
| "loss": 2.6449, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.095520846779399e-05, | |
| "loss": 2.6463, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.0916483800180716e-05, | |
| "loss": 2.6345, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.0877759132567444e-05, | |
| "loss": 2.6435, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.083903446495418e-05, | |
| "loss": 2.625, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.0800309797340907e-05, | |
| "loss": 2.6263, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.0761585129727634e-05, | |
| "loss": 2.6299, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.072286046211437e-05, | |
| "loss": 2.6401, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.06841357945011e-05, | |
| "loss": 2.6187, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0645411126887825e-05, | |
| "loss": 2.6353, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.060668645927456e-05, | |
| "loss": 2.6237, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0567961791661294e-05, | |
| "loss": 2.63, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.052923712404802e-05, | |
| "loss": 2.628, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.049051245643475e-05, | |
| "loss": 2.6154, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0451787788821484e-05, | |
| "loss": 2.6295, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.041306312120821e-05, | |
| "loss": 2.6272, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.037433845359494e-05, | |
| "loss": 2.6073, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0335613785981675e-05, | |
| "loss": 2.6157, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.02968891183684e-05, | |
| "loss": 2.618, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.025816445075513e-05, | |
| "loss": 2.6201, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.0219439783141865e-05, | |
| "loss": 2.6273, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.018071511552859e-05, | |
| "loss": 2.6223, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.014199044791532e-05, | |
| "loss": 2.6214, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.010326578030205e-05, | |
| "loss": 2.625, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.0064541112688783e-05, | |
| "loss": 2.6146, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.002581644507552e-05, | |
| "loss": 2.6207, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.9987091777462246e-05, | |
| "loss": 2.6252, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.994836710984898e-05, | |
| "loss": 2.6106, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.990964244223571e-05, | |
| "loss": 2.6055, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.9870917774622436e-05, | |
| "loss": 2.5988, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.983219310700917e-05, | |
| "loss": 2.6214, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.97934684393959e-05, | |
| "loss": 2.6146, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.975474377178263e-05, | |
| "loss": 2.5985, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.9716019104169354e-05, | |
| "loss": 2.5994, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.967729443655609e-05, | |
| "loss": 2.5999, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.963856976894282e-05, | |
| "loss": 2.6035, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.9599845101329545e-05, | |
| "loss": 2.5996, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.956112043371628e-05, | |
| "loss": 2.6093, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.952239576610301e-05, | |
| "loss": 2.615, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.9483671098489735e-05, | |
| "loss": 2.6139, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.944494643087647e-05, | |
| "loss": 2.6011, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.9406221763263204e-05, | |
| "loss": 2.6035, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.936749709564993e-05, | |
| "loss": 2.6115, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.932877242803666e-05, | |
| "loss": 2.6012, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.9290047760423395e-05, | |
| "loss": 2.6059, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.925132309281012e-05, | |
| "loss": 2.6058, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.921259842519685e-05, | |
| "loss": 2.6077, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.9173873757583585e-05, | |
| "loss": 2.5925, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.913514908997031e-05, | |
| "loss": 2.605, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.909642442235704e-05, | |
| "loss": 2.5989, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.9057699754743776e-05, | |
| "loss": 2.5972, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.9018975087130503e-05, | |
| "loss": 2.6013, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.898025041951723e-05, | |
| "loss": 2.5948, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.894152575190396e-05, | |
| "loss": 2.5913, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.8902801084290694e-05, | |
| "loss": 2.5851, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.886407641667743e-05, | |
| "loss": 2.6033, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.8825351749064156e-05, | |
| "loss": 2.5987, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.878662708145089e-05, | |
| "loss": 2.6092, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.874790241383762e-05, | |
| "loss": 2.5909, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.870917774622435e-05, | |
| "loss": 2.606, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.867045307861108e-05, | |
| "loss": 2.5958, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.863172841099781e-05, | |
| "loss": 2.5806, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.859300374338454e-05, | |
| "loss": 2.5957, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8554279075771265e-05, | |
| "loss": 2.595, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8515554408158e-05, | |
| "loss": 2.5892, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.847682974054473e-05, | |
| "loss": 2.6007, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8438105072931455e-05, | |
| "loss": 2.5844, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.839938040531819e-05, | |
| "loss": 2.5834, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.836065573770492e-05, | |
| "loss": 2.5869, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8321931070091646e-05, | |
| "loss": 2.583, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.828320640247838e-05, | |
| "loss": 2.5799, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8244481734865115e-05, | |
| "loss": 2.5773, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.820575706725184e-05, | |
| "loss": 2.5865, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.816703239963857e-05, | |
| "loss": 2.5678, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.8128307732025305e-05, | |
| "loss": 2.5954, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.808958306441203e-05, | |
| "loss": 2.5796, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.805085839679876e-05, | |
| "loss": 2.5767, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.8012133729185496e-05, | |
| "loss": 2.5763, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.7973409061572224e-05, | |
| "loss": 2.587, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.793468439395895e-05, | |
| "loss": 2.5754, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.7895959726345686e-05, | |
| "loss": 2.5904, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.7857235058732414e-05, | |
| "loss": 2.5823, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.781851039111914e-05, | |
| "loss": 2.592, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.777978572350587e-05, | |
| "loss": 2.5739, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.7741061055892604e-05, | |
| "loss": 2.5726, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.770233638827934e-05, | |
| "loss": 2.5922, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.766361172066607e-05, | |
| "loss": 2.5587, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.76248870530528e-05, | |
| "loss": 2.5723, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.758616238543953e-05, | |
| "loss": 2.5676, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.754743771782626e-05, | |
| "loss": 2.5887, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.750871305021299e-05, | |
| "loss": 2.5664, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.746998838259972e-05, | |
| "loss": 2.5782, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.743126371498645e-05, | |
| "loss": 2.5689, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.7392539047373175e-05, | |
| "loss": 2.564, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.735381437975991e-05, | |
| "loss": 2.5686, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.731508971214664e-05, | |
| "loss": 2.566, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.7276365044533366e-05, | |
| "loss": 2.5771, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.72376403769201e-05, | |
| "loss": 2.569, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.719891570930683e-05, | |
| "loss": 2.5695, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.716019104169356e-05, | |
| "loss": 2.5705, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.712146637408029e-05, | |
| "loss": 2.5529, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.7082741706467025e-05, | |
| "loss": 2.5758, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.704401703885375e-05, | |
| "loss": 2.5682, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.700529237124048e-05, | |
| "loss": 2.5726, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6966567703627216e-05, | |
| "loss": 2.5614, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6927843036013944e-05, | |
| "loss": 2.5694, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.688911836840067e-05, | |
| "loss": 2.5645, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6850393700787406e-05, | |
| "loss": 2.5693, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6811669033174134e-05, | |
| "loss": 2.5516, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.677294436556086e-05, | |
| "loss": 2.5659, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.673421969794759e-05, | |
| "loss": 2.5459, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6695495030334324e-05, | |
| "loss": 2.5625, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.665677036272105e-05, | |
| "loss": 2.5741, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.661804569510778e-05, | |
| "loss": 2.5722, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.6579321027494515e-05, | |
| "loss": 2.5583, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.654059635988125e-05, | |
| "loss": 2.5604, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.650187169226798e-05, | |
| "loss": 2.5576, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.646314702465471e-05, | |
| "loss": 2.5615, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.642442235704144e-05, | |
| "loss": 2.5517, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.638569768942817e-05, | |
| "loss": 2.5781, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.6346973021814895e-05, | |
| "loss": 2.5499, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.630824835420163e-05, | |
| "loss": 2.5494, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.626952368658836e-05, | |
| "loss": 2.5514, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.6230799018975086e-05, | |
| "loss": 2.5568, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.619207435136182e-05, | |
| "loss": 2.5518, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.615334968374855e-05, | |
| "loss": 2.551, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.6114625016135276e-05, | |
| "loss": 2.5534, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.607590034852201e-05, | |
| "loss": 2.5473, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.603717568090874e-05, | |
| "loss": 2.55, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.599845101329547e-05, | |
| "loss": 2.5653, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.59597263456822e-05, | |
| "loss": 2.5436, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.5921001678068936e-05, | |
| "loss": 2.5629, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5882277010455664e-05, | |
| "loss": 2.5548, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.584355234284239e-05, | |
| "loss": 2.5542, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5804827675229126e-05, | |
| "loss": 2.5478, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5766103007615854e-05, | |
| "loss": 2.5392, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.572737834000258e-05, | |
| "loss": 2.5561, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5688653672389317e-05, | |
| "loss": 2.5443, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5649929004776044e-05, | |
| "loss": 2.5452, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.561120433716277e-05, | |
| "loss": 2.5483, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.55724796695495e-05, | |
| "loss": 2.5589, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5533755001936235e-05, | |
| "loss": 2.5542, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.549503033432296e-05, | |
| "loss": 2.5461, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.545630566670969e-05, | |
| "loss": 2.5703, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.5417580999096425e-05, | |
| "loss": 2.5444, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.537885633148316e-05, | |
| "loss": 2.5537, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.534013166386989e-05, | |
| "loss": 2.5437, | |
| "step": 13830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.530140699625662e-05, | |
| "loss": 2.5531, | |
| "step": 13860 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.526268232864335e-05, | |
| "loss": 2.5576, | |
| "step": 13890 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.522395766103008e-05, | |
| "loss": 2.5519, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.5185232993416806e-05, | |
| "loss": 2.536, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.514650832580354e-05, | |
| "loss": 2.5326, | |
| "step": 13980 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.510778365819027e-05, | |
| "loss": 2.5423, | |
| "step": 14010 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.5069058990576996e-05, | |
| "loss": 2.5351, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.503033432296373e-05, | |
| "loss": 2.5225, | |
| "step": 14070 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.499160965535046e-05, | |
| "loss": 2.5281, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.4952884987737187e-05, | |
| "loss": 2.5311, | |
| "step": 14130 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.491416032012392e-05, | |
| "loss": 2.5253, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.487543565251065e-05, | |
| "loss": 2.5437, | |
| "step": 14190 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4836710984897384e-05, | |
| "loss": 2.5281, | |
| "step": 14220 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.479798631728411e-05, | |
| "loss": 2.5257, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4759261649670846e-05, | |
| "loss": 2.5512, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4720536982057574e-05, | |
| "loss": 2.5513, | |
| "step": 14310 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.46818123144443e-05, | |
| "loss": 2.5242, | |
| "step": 14340 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4643087646831037e-05, | |
| "loss": 2.5308, | |
| "step": 14370 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4604362979217764e-05, | |
| "loss": 2.5271, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.456563831160449e-05, | |
| "loss": 2.525, | |
| "step": 14430 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.452691364399123e-05, | |
| "loss": 2.5377, | |
| "step": 14460 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4488188976377955e-05, | |
| "loss": 2.5507, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.444946430876468e-05, | |
| "loss": 2.5425, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.441073964115141e-05, | |
| "loss": 2.5231, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.4372014973538145e-05, | |
| "loss": 2.5268, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.433329030592487e-05, | |
| "loss": 2.5277, | |
| "step": 14610 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.429456563831161e-05, | |
| "loss": 2.5204, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.4255840970698336e-05, | |
| "loss": 2.5237, | |
| "step": 14670 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.421711630308507e-05, | |
| "loss": 2.5271, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.41783916354718e-05, | |
| "loss": 2.5228, | |
| "step": 14730 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.413966696785853e-05, | |
| "loss": 2.5301, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.410094230024526e-05, | |
| "loss": 2.5386, | |
| "step": 14790 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.406221763263199e-05, | |
| "loss": 2.5238, | |
| "step": 14820 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.4023492965018716e-05, | |
| "loss": 2.528, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.398476829740545e-05, | |
| "loss": 2.5345, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.394604362979218e-05, | |
| "loss": 2.5175, | |
| "step": 14910 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.390731896217891e-05, | |
| "loss": 2.527, | |
| "step": 14940 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.386859429456564e-05, | |
| "loss": 2.524, | |
| "step": 14970 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.382986962695237e-05, | |
| "loss": 2.5156, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.37911449593391e-05, | |
| "loss": 2.5283, | |
| "step": 15030 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.375242029172583e-05, | |
| "loss": 2.5451, | |
| "step": 15060 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.371369562411256e-05, | |
| "loss": 2.5244, | |
| "step": 15090 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3674970956499294e-05, | |
| "loss": 2.502, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.363624628888602e-05, | |
| "loss": 2.5264, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.359752162127276e-05, | |
| "loss": 2.5317, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3558796953659485e-05, | |
| "loss": 2.5168, | |
| "step": 15210 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.352007228604621e-05, | |
| "loss": 2.5147, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.348134761843295e-05, | |
| "loss": 2.508, | |
| "step": 15270 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3442622950819675e-05, | |
| "loss": 2.5237, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.34038982832064e-05, | |
| "loss": 2.5216, | |
| "step": 15330 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.336517361559314e-05, | |
| "loss": 2.5181, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.3326448947979865e-05, | |
| "loss": 2.5175, | |
| "step": 15390 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.328772428036659e-05, | |
| "loss": 2.5169, | |
| "step": 15420 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.324899961275332e-05, | |
| "loss": 2.5267, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.3210274945140056e-05, | |
| "loss": 2.511, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.3171550277526783e-05, | |
| "loss": 2.5161, | |
| "step": 15510 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.313282560991352e-05, | |
| "loss": 2.5144, | |
| "step": 15540 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.3094100942300246e-05, | |
| "loss": 2.5314, | |
| "step": 15570 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.305537627468698e-05, | |
| "loss": 2.5182, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.301665160707371e-05, | |
| "loss": 2.5198, | |
| "step": 15630 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.297792693946044e-05, | |
| "loss": 2.5043, | |
| "step": 15660 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.293920227184717e-05, | |
| "loss": 2.501, | |
| "step": 15690 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.29004776042339e-05, | |
| "loss": 2.5098, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.286175293662063e-05, | |
| "loss": 2.5178, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.282302826900736e-05, | |
| "loss": 2.5196, | |
| "step": 15780 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.278430360139409e-05, | |
| "loss": 2.5128, | |
| "step": 15810 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.274557893378082e-05, | |
| "loss": 2.5156, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.270685426616755e-05, | |
| "loss": 2.5126, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.266812959855428e-05, | |
| "loss": 2.5094, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.262940493094101e-05, | |
| "loss": 2.5179, | |
| "step": 15930 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2590680263327735e-05, | |
| "loss": 2.494, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.255195559571447e-05, | |
| "loss": 2.5254, | |
| "step": 15990 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2513230928101205e-05, | |
| "loss": 2.512, | |
| "step": 16020 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.247450626048793e-05, | |
| "loss": 2.5086, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.243578159287467e-05, | |
| "loss": 2.5188, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2397056925261395e-05, | |
| "loss": 2.5194, | |
| "step": 16110 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.235833225764812e-05, | |
| "loss": 2.506, | |
| "step": 16140 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.231960759003486e-05, | |
| "loss": 2.4998, | |
| "step": 16170 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.2280882922421585e-05, | |
| "loss": 2.5227, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.224215825480831e-05, | |
| "loss": 2.5252, | |
| "step": 16230 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.220343358719504e-05, | |
| "loss": 2.5154, | |
| "step": 16260 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.2164708919581776e-05, | |
| "loss": 2.5199, | |
| "step": 16290 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.2125984251968504e-05, | |
| "loss": 2.5159, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.208725958435523e-05, | |
| "loss": 2.5132, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.2048534916741966e-05, | |
| "loss": 2.5071, | |
| "step": 16380 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.2009810249128694e-05, | |
| "loss": 2.503, | |
| "step": 16410 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.197108558151543e-05, | |
| "loss": 2.5039, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.1932360913902156e-05, | |
| "loss": 2.5008, | |
| "step": 16470 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.189363624628889e-05, | |
| "loss": 2.5124, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.185491157867562e-05, | |
| "loss": 2.4988, | |
| "step": 16530 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.181618691106235e-05, | |
| "loss": 2.4936, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.177746224344908e-05, | |
| "loss": 2.5016, | |
| "step": 16590 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.173873757583581e-05, | |
| "loss": 2.4925, | |
| "step": 16620 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.170001290822254e-05, | |
| "loss": 2.5011, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.166128824060927e-05, | |
| "loss": 2.498, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1622563572996e-05, | |
| "loss": 2.4951, | |
| "step": 16710 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.158383890538273e-05, | |
| "loss": 2.4971, | |
| "step": 16740 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.154511423776946e-05, | |
| "loss": 2.4985, | |
| "step": 16770 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.150638957015619e-05, | |
| "loss": 2.4987, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.146766490254292e-05, | |
| "loss": 2.4951, | |
| "step": 16830 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.142894023492965e-05, | |
| "loss": 2.49, | |
| "step": 16860 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.139021556731638e-05, | |
| "loss": 2.503, | |
| "step": 16890 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1351490899703115e-05, | |
| "loss": 2.5191, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.131276623208984e-05, | |
| "loss": 2.499, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.127404156447658e-05, | |
| "loss": 2.5019, | |
| "step": 16980 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1235316896863305e-05, | |
| "loss": 2.4959, | |
| "step": 17010 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.119659222925003e-05, | |
| "loss": 2.5014, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.115786756163677e-05, | |
| "loss": 2.4765, | |
| "step": 17070 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.1119142894023496e-05, | |
| "loss": 2.504, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.1080418226410224e-05, | |
| "loss": 2.4888, | |
| "step": 17130 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.104169355879695e-05, | |
| "loss": 2.4964, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.1002968891183686e-05, | |
| "loss": 2.5023, | |
| "step": 17190 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.0964244223570414e-05, | |
| "loss": 2.4929, | |
| "step": 17220 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.092551955595714e-05, | |
| "loss": 2.4945, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.0886794888343876e-05, | |
| "loss": 2.473, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.0848070220730604e-05, | |
| "loss": 2.5037, | |
| "step": 17310 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.080934555311734e-05, | |
| "loss": 2.4862, | |
| "step": 17340 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.077062088550407e-05, | |
| "loss": 2.4972, | |
| "step": 17370 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.07318962178908e-05, | |
| "loss": 2.4686, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.069317155027753e-05, | |
| "loss": 2.4916, | |
| "step": 17430 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.065444688266426e-05, | |
| "loss": 2.4837, | |
| "step": 17460 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.061572221505099e-05, | |
| "loss": 2.5114, | |
| "step": 17490 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.057699754743772e-05, | |
| "loss": 2.4902, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.053827287982445e-05, | |
| "loss": 2.4912, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0499548212211182e-05, | |
| "loss": 2.4925, | |
| "step": 17580 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.046082354459791e-05, | |
| "loss": 2.4813, | |
| "step": 17610 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0422098876984638e-05, | |
| "loss": 2.5024, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0383374209371373e-05, | |
| "loss": 2.4885, | |
| "step": 17670 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0344649541758104e-05, | |
| "loss": 2.4792, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.030592487414483e-05, | |
| "loss": 2.4909, | |
| "step": 17730 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.026720020653156e-05, | |
| "loss": 2.4834, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0228475538918294e-05, | |
| "loss": 2.4686, | |
| "step": 17790 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0189750871305022e-05, | |
| "loss": 2.4849, | |
| "step": 17820 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.015102620369175e-05, | |
| "loss": 2.4959, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0112301536078485e-05, | |
| "loss": 2.5005, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.0073576868465216e-05, | |
| "loss": 2.4912, | |
| "step": 17910 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.0034852200851944e-05, | |
| "loss": 2.498, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9996127533238678e-05, | |
| "loss": 2.4895, | |
| "step": 17970 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9957402865625406e-05, | |
| "loss": 2.4801, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9918678198012134e-05, | |
| "loss": 2.4798, | |
| "step": 18030 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9879953530398862e-05, | |
| "loss": 2.49, | |
| "step": 18060 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9841228862785597e-05, | |
| "loss": 2.4618, | |
| "step": 18090 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9802504195172324e-05, | |
| "loss": 2.4889, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9763779527559056e-05, | |
| "loss": 2.4918, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.972505485994579e-05, | |
| "loss": 2.4864, | |
| "step": 18180 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9686330192332518e-05, | |
| "loss": 2.4822, | |
| "step": 18210 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9647605524719246e-05, | |
| "loss": 2.4844, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.960888085710598e-05, | |
| "loss": 2.485, | |
| "step": 18270 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.957015618949271e-05, | |
| "loss": 2.4729, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.9531431521879436e-05, | |
| "loss": 2.4768, | |
| "step": 18330 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9492706854266168e-05, | |
| "loss": 2.4913, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9453982186652902e-05, | |
| "loss": 2.4764, | |
| "step": 18390 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.941525751903963e-05, | |
| "loss": 2.4882, | |
| "step": 18420 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9376532851426358e-05, | |
| "loss": 2.4748, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9337808183813093e-05, | |
| "loss": 2.4778, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.929908351619982e-05, | |
| "loss": 2.4816, | |
| "step": 18510 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.926035884858655e-05, | |
| "loss": 2.4636, | |
| "step": 18540 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9221634180973283e-05, | |
| "loss": 2.484, | |
| "step": 18570 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9182909513360014e-05, | |
| "loss": 2.4816, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9144184845746742e-05, | |
| "loss": 2.4718, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.910546017813347e-05, | |
| "loss": 2.4792, | |
| "step": 18660 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9066735510520205e-05, | |
| "loss": 2.4792, | |
| "step": 18690 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.9028010842906932e-05, | |
| "loss": 2.4719, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.898928617529366e-05, | |
| "loss": 2.4699, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8950561507680395e-05, | |
| "loss": 2.4768, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8911836840067126e-05, | |
| "loss": 2.4836, | |
| "step": 18810 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8873112172453854e-05, | |
| "loss": 2.4699, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.883438750484059e-05, | |
| "loss": 2.4592, | |
| "step": 18870 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8795662837227317e-05, | |
| "loss": 2.4676, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8756938169614044e-05, | |
| "loss": 2.4808, | |
| "step": 18930 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8718213502000772e-05, | |
| "loss": 2.4709, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8679488834387507e-05, | |
| "loss": 2.4792, | |
| "step": 18990 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8640764166774238e-05, | |
| "loss": 2.4764, | |
| "step": 19020 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8602039499160966e-05, | |
| "loss": 2.4613, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.85633148315477e-05, | |
| "loss": 2.4641, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.852459016393443e-05, | |
| "loss": 2.4856, | |
| "step": 19110 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.8485865496321156e-05, | |
| "loss": 2.4732, | |
| "step": 19140 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8447140828707884e-05, | |
| "loss": 2.488, | |
| "step": 19170 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.840841616109462e-05, | |
| "loss": 2.4762, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8369691493481347e-05, | |
| "loss": 2.4831, | |
| "step": 19230 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8330966825868078e-05, | |
| "loss": 2.471, | |
| "step": 19260 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8292242158254813e-05, | |
| "loss": 2.4727, | |
| "step": 19290 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.825351749064154e-05, | |
| "loss": 2.4848, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.821479282302827e-05, | |
| "loss": 2.4742, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8176068155415003e-05, | |
| "loss": 2.4701, | |
| "step": 19380 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.813734348780173e-05, | |
| "loss": 2.4682, | |
| "step": 19410 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.809861882018846e-05, | |
| "loss": 2.4643, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.805989415257519e-05, | |
| "loss": 2.4695, | |
| "step": 19470 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.8021169484961925e-05, | |
| "loss": 2.4901, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.7982444817348653e-05, | |
| "loss": 2.4891, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.794372014973538e-05, | |
| "loss": 2.4629, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7904995482122115e-05, | |
| "loss": 2.4786, | |
| "step": 19590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7866270814508843e-05, | |
| "loss": 2.4626, | |
| "step": 19620 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.782754614689557e-05, | |
| "loss": 2.4802, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7788821479282305e-05, | |
| "loss": 2.4609, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7750096811669037e-05, | |
| "loss": 2.465, | |
| "step": 19710 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7711372144055765e-05, | |
| "loss": 2.4721, | |
| "step": 19740 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7672647476442492e-05, | |
| "loss": 2.4796, | |
| "step": 19770 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7633922808829227e-05, | |
| "loss": 2.4547, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7595198141215955e-05, | |
| "loss": 2.4621, | |
| "step": 19830 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7556473473602683e-05, | |
| "loss": 2.467, | |
| "step": 19860 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7517748805989417e-05, | |
| "loss": 2.4748, | |
| "step": 19890 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.747902413837615e-05, | |
| "loss": 2.4638, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.7440299470762876e-05, | |
| "loss": 2.463, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.740157480314961e-05, | |
| "loss": 2.4597, | |
| "step": 19980 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 2.3166391849517822, | |
| "eval_runtime": 11245.8663, | |
| "eval_samples_per_second": 177.843, | |
| "eval_steps_per_second": 1.71, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.736285013553634e-05, | |
| "loss": 2.4547, | |
| "step": 20010 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7324125467923067e-05, | |
| "loss": 2.4594, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7285400800309795e-05, | |
| "loss": 2.4535, | |
| "step": 20070 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.724667613269653e-05, | |
| "loss": 2.4665, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.720795146508326e-05, | |
| "loss": 2.4703, | |
| "step": 20130 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.716922679746999e-05, | |
| "loss": 2.4784, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7130502129856723e-05, | |
| "loss": 2.4762, | |
| "step": 20190 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.709177746224345e-05, | |
| "loss": 2.4685, | |
| "step": 20220 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.705305279463018e-05, | |
| "loss": 2.4536, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.7014328127016913e-05, | |
| "loss": 2.4801, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.697560345940364e-05, | |
| "loss": 2.4487, | |
| "step": 20310 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.693687879179037e-05, | |
| "loss": 2.4652, | |
| "step": 20340 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.68981541241771e-05, | |
| "loss": 2.467, | |
| "step": 20370 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6859429456563835e-05, | |
| "loss": 2.4546, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6820704788950563e-05, | |
| "loss": 2.4607, | |
| "step": 20430 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.678198012133729e-05, | |
| "loss": 2.447, | |
| "step": 20460 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6743255453724025e-05, | |
| "loss": 2.4564, | |
| "step": 20490 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6704530786110753e-05, | |
| "loss": 2.4761, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.666580611849748e-05, | |
| "loss": 2.4661, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6627081450884216e-05, | |
| "loss": 2.463, | |
| "step": 20580 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6588356783270947e-05, | |
| "loss": 2.4645, | |
| "step": 20610 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6549632115657675e-05, | |
| "loss": 2.4625, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6510907448044403e-05, | |
| "loss": 2.4632, | |
| "step": 20670 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6472182780431137e-05, | |
| "loss": 2.4489, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6433458112817865e-05, | |
| "loss": 2.4472, | |
| "step": 20730 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6394733445204593e-05, | |
| "loss": 2.4406, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.6356008777591328e-05, | |
| "loss": 2.4519, | |
| "step": 20790 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.631728410997806e-05, | |
| "loss": 2.4558, | |
| "step": 20820 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.6278559442364787e-05, | |
| "loss": 2.4594, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.623983477475152e-05, | |
| "loss": 2.4452, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.620111010713825e-05, | |
| "loss": 2.4495, | |
| "step": 20910 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.6162385439524977e-05, | |
| "loss": 2.4643, | |
| "step": 20940 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.6123660771911705e-05, | |
| "loss": 2.4523, | |
| "step": 20970 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.608493610429844e-05, | |
| "loss": 2.4489, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.604621143668517e-05, | |
| "loss": 2.4369, | |
| "step": 21030 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.60074867690719e-05, | |
| "loss": 2.4612, | |
| "step": 21060 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.5968762101458634e-05, | |
| "loss": 2.4532, | |
| "step": 21090 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.593003743384536e-05, | |
| "loss": 2.4474, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.589131276623209e-05, | |
| "loss": 2.4528, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.5852588098618824e-05, | |
| "loss": 2.4537, | |
| "step": 21180 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.5813863431005552e-05, | |
| "loss": 2.4598, | |
| "step": 21210 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5775138763392283e-05, | |
| "loss": 2.4648, | |
| "step": 21240 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.573641409577901e-05, | |
| "loss": 2.4513, | |
| "step": 21270 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5697689428165746e-05, | |
| "loss": 2.4592, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5658964760552473e-05, | |
| "loss": 2.4362, | |
| "step": 21330 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.56202400929392e-05, | |
| "loss": 2.4434, | |
| "step": 21360 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5581515425325936e-05, | |
| "loss": 2.4606, | |
| "step": 21390 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5542790757712664e-05, | |
| "loss": 2.4475, | |
| "step": 21420 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.550406609009939e-05, | |
| "loss": 2.445, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5465341422486126e-05, | |
| "loss": 2.4463, | |
| "step": 21480 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5426616754872858e-05, | |
| "loss": 2.4447, | |
| "step": 21510 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5387892087259585e-05, | |
| "loss": 2.4369, | |
| "step": 21540 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5349167419646313e-05, | |
| "loss": 2.4462, | |
| "step": 21570 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5310442752033048e-05, | |
| "loss": 2.4498, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.5271718084419776e-05, | |
| "loss": 2.4576, | |
| "step": 21630 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5232993416806504e-05, | |
| "loss": 2.4525, | |
| "step": 21660 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5194268749193238e-05, | |
| "loss": 2.4472, | |
| "step": 21690 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.515554408157997e-05, | |
| "loss": 2.4342, | |
| "step": 21720 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5116819413966697e-05, | |
| "loss": 2.4542, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5078094746353432e-05, | |
| "loss": 2.4521, | |
| "step": 21780 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.503937007874016e-05, | |
| "loss": 2.4607, | |
| "step": 21810 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.5000645411126888e-05, | |
| "loss": 2.443, | |
| "step": 21840 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.496192074351362e-05, | |
| "loss": 2.4537, | |
| "step": 21870 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.492319607590035e-05, | |
| "loss": 2.4412, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.488447140828708e-05, | |
| "loss": 2.4293, | |
| "step": 21930 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.4845746740673813e-05, | |
| "loss": 2.4659, | |
| "step": 21960 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.480702207306054e-05, | |
| "loss": 2.4499, | |
| "step": 21990 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.4768297405447272e-05, | |
| "loss": 2.4421, | |
| "step": 22020 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4729572737834e-05, | |
| "loss": 2.44, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.469084807022073e-05, | |
| "loss": 2.4487, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4652123402607462e-05, | |
| "loss": 2.4454, | |
| "step": 22110 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4613398734994193e-05, | |
| "loss": 2.434, | |
| "step": 22140 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4574674067380925e-05, | |
| "loss": 2.4248, | |
| "step": 22170 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4535949399767653e-05, | |
| "loss": 2.4449, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4497224732154384e-05, | |
| "loss": 2.4379, | |
| "step": 22230 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4458500064541115e-05, | |
| "loss": 2.4457, | |
| "step": 22260 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4419775396927843e-05, | |
| "loss": 2.4384, | |
| "step": 22290 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4381050729314574e-05, | |
| "loss": 2.442, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4342326061701305e-05, | |
| "loss": 2.4434, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4303601394088037e-05, | |
| "loss": 2.4303, | |
| "step": 22380 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4264876726474768e-05, | |
| "loss": 2.4432, | |
| "step": 22410 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.4226152058861496e-05, | |
| "loss": 2.4266, | |
| "step": 22440 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4187427391248227e-05, | |
| "loss": 2.4169, | |
| "step": 22470 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.4148702723634955e-05, | |
| "loss": 2.4334, | |
| "step": 22500 | |
| } | |
| ], | |
| "max_steps": 41208, | |
| "num_train_epochs": 1, | |
| "total_flos": 3.2342062910976e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |