| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3572704537334763, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.9999999999999997e-06, | |
| "loss": 1.0412, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.999999999999999e-06, | |
| "loss": 0.835, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.999999999999999e-06, | |
| "loss": 0.7822, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.1999999999999999e-05, | |
| "loss": 0.7718, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.4999999999999999e-05, | |
| "loss": 0.7707, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.7999999999999997e-05, | |
| "loss": 0.7697, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.1e-05, | |
| "loss": 0.769, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.3999999999999997e-05, | |
| "loss": 0.7682, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.6999999999999996e-05, | |
| "loss": 0.7674, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "loss": 0.767, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_runtime": 45.7675, | |
| "eval_samples_per_second": 235.975, | |
| "eval_steps_per_second": 7.385, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.2999999999999996e-05, | |
| "loss": 0.7665, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.5999999999999994e-05, | |
| "loss": 0.7662, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 3.9e-05, | |
| "loss": 0.7661, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.766, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.4999999999999996e-05, | |
| "loss": 0.7659, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.7999999999999994e-05, | |
| "loss": 0.7656, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.1e-05, | |
| "loss": 0.7655, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.399999999999999e-05, | |
| "loss": 0.7655, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 0.7653, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5.9999999999999995e-05, | |
| "loss": 0.7655, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_runtime": 45.5917, | |
| "eval_samples_per_second": 236.885, | |
| "eval_steps_per_second": 7.414, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.299999999999999e-05, | |
| "loss": 0.7651, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.599999999999999e-05, | |
| "loss": 0.7653, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 6.9e-05, | |
| "loss": 0.7654, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.199999999999999e-05, | |
| "loss": 0.765, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.5e-05, | |
| "loss": 0.7649, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 7.8e-05, | |
| "loss": 0.7648, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 8.1e-05, | |
| "loss": 0.7647, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.4e-05, | |
| "loss": 0.7645, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.699999999999999e-05, | |
| "loss": 0.7645, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.7644, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_runtime": 45.7281, | |
| "eval_samples_per_second": 236.179, | |
| "eval_steps_per_second": 7.392, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.3e-05, | |
| "loss": 0.7641, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.599999999999999e-05, | |
| "loss": 0.764, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.9e-05, | |
| "loss": 0.7638, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000102, | |
| "loss": 0.763, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00010499999999999999, | |
| "loss": 0.7665, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00010799999999999998, | |
| "loss": 0.7669, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00011099999999999999, | |
| "loss": 0.7653, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00011399999999999999, | |
| "loss": 0.7535, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000117, | |
| "loss": 0.7218, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00011999999999999999, | |
| "loss": 0.6956, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_runtime": 45.9121, | |
| "eval_samples_per_second": 235.232, | |
| "eval_steps_per_second": 7.362, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00012299999999999998, | |
| "loss": 0.6758, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00012599999999999997, | |
| "loss": 0.6557, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000129, | |
| "loss": 0.6402, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00013199999999999998, | |
| "loss": 0.6302, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000135, | |
| "loss": 0.623, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000138, | |
| "loss": 0.6169, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014099999999999998, | |
| "loss": 0.6121, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00014399999999999998, | |
| "loss": 0.607, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000147, | |
| "loss": 0.6039, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00015, | |
| "loss": 0.6012, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_runtime": 46.0979, | |
| "eval_samples_per_second": 234.284, | |
| "eval_steps_per_second": 7.332, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0001499996172456075, | |
| "loss": 0.5981, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00014999846898661572, | |
| "loss": 0.5954, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999655523558183, | |
| "loss": 0.5935, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999387601343436, | |
| "loss": 0.5911, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014999043134947282, | |
| "loss": 0.5895, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00014998622128136748, | |
| "loss": 0.5877, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000149981245855159, | |
| "loss": 0.5866, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014997550512525784, | |
| "loss": 0.5845, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001499689991544437, | |
| "loss": 0.5784, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00014996172801386482, | |
| "loss": 0.5684, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_runtime": 46.0154, | |
| "eval_samples_per_second": 234.704, | |
| "eval_steps_per_second": 7.345, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014995369178303722, | |
| "loss": 0.5642, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499448905498439, | |
| "loss": 0.5625, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014993532441053364, | |
| "loss": 0.5601, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499249934697203, | |
| "loss": 0.5581, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001499138978403813, | |
| "loss": 0.554, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014990203764385677, | |
| "loss": 0.5462, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014988941300984784, | |
| "loss": 0.5284, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0001498760240764155, | |
| "loss": 0.5032, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000149861870989979, | |
| "loss": 0.4751, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001498469539053142, | |
| "loss": 0.4574, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_runtime": 45.9402, | |
| "eval_samples_per_second": 235.088, | |
| "eval_steps_per_second": 7.357, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014983127298555198, | |
| "loss": 0.4453, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014981482840217632, | |
| "loss": 0.437, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00014979762033502262, | |
| "loss": 0.4306, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014977964897227547, | |
| "loss": 0.4254, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014976091451046687, | |
| "loss": 0.4204, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014974141715447386, | |
| "loss": 0.4178, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00014972115711751644, | |
| "loss": 0.4135, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014970013462115505, | |
| "loss": 0.4099, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014967834989528843, | |
| "loss": 0.4077, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014965580317815078, | |
| "loss": 0.405, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_runtime": 45.7648, | |
| "eval_samples_per_second": 235.989, | |
| "eval_steps_per_second": 7.386, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00014963249471630944, | |
| "loss": 0.4017, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.000149608424764662, | |
| "loss": 0.4006, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001495835935864336, | |
| "loss": 0.3977, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014955800145317397, | |
| "loss": 0.3964, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00014953164864475466, | |
| "loss": 0.3949, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001495045354493657, | |
| "loss": 0.3961, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014947666216351272, | |
| "loss": 0.398, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014944802909201344, | |
| "loss": 0.3924, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014941863654799456, | |
| "loss": 0.3938, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00014938848485288825, | |
| "loss": 0.3885, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_runtime": 45.9868, | |
| "eval_samples_per_second": 234.85, | |
| "eval_steps_per_second": 7.35, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001493575743364286, | |
| "loss": 0.391, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00014932590533664808, | |
| "loss": 0.3884, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001492934781998738, | |
| "loss": 0.3856, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001492602932807237, | |
| "loss": 0.3843, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00014922635094210277, | |
| "loss": 0.3848, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000149191651555199, | |
| "loss": 0.3795, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001491561954994793, | |
| "loss": 0.3735, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00014911998316268537, | |
| "loss": 0.3658, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014908301494082963, | |
| "loss": 0.362, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014904529123819054, | |
| "loss": 0.3595, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_runtime": 46.3224, | |
| "eval_samples_per_second": 233.148, | |
| "eval_steps_per_second": 7.297, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014900681246730852, | |
| "loss": 0.3585, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00014896757904898125, | |
| "loss": 0.3578, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014892759141225904, | |
| "loss": 0.3568, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014888684999444035, | |
| "loss": 0.355, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014884535524106675, | |
| "loss": 0.3537, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00014880310760591824, | |
| "loss": 0.3523, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001487601075510082, | |
| "loss": 0.3524, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001487163555465783, | |
| "loss": 0.3515, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001486718520710935, | |
| "loss": 0.3508, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014862659761123663, | |
| "loss": 0.3493, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_runtime": 46.1625, | |
| "eval_samples_per_second": 233.956, | |
| "eval_steps_per_second": 7.322, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00014858059266190327, | |
| "loss": 0.3472, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014853383772619612, | |
| "loss": 0.3463, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014848633331541967, | |
| "loss": 0.3363, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001484380799490746, | |
| "loss": 0.3265, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00014838907815485194, | |
| "loss": 0.3235, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014833932846862748, | |
| "loss": 0.3218, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00014828883143445582, | |
| "loss": 0.3203, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001482375876045644, | |
| "loss": 0.3204, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001481855975393476, | |
| "loss": 0.3184, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001481328618073604, | |
| "loss": 0.318, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_runtime": 46.1354, | |
| "eval_samples_per_second": 234.094, | |
| "eval_steps_per_second": 7.326, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001480793809853123, | |
| "loss": 0.3163, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00014802515565806107, | |
| "loss": 0.3155, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00014797018641860612, | |
| "loss": 0.314, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001479144738680823, | |
| "loss": 0.3136, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014785801861575312, | |
| "loss": 0.3117, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014780082127900416, | |
| "loss": 0.3086, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014774288248333635, | |
| "loss": 0.3074, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00014768420286235908, | |
| "loss": 0.3074, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014762478305778328, | |
| "loss": 0.3064, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001475646237194144, | |
| "loss": 0.3057, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_runtime": 46.1242, | |
| "eval_samples_per_second": 234.15, | |
| "eval_steps_per_second": 7.328, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00014750372550514533, | |
| "loss": 0.3048, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001474420890809492, | |
| "loss": 0.3037, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014737971512087202, | |
| "loss": 0.3029, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014731660430702552, | |
| "loss": 0.3024, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014725275732957937, | |
| "loss": 0.3011, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00014718817488675387, | |
| "loss": 0.3006, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014712285768481235, | |
| "loss": 0.3009, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014705680643805323, | |
| "loss": 0.2991, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014699002186880232, | |
| "loss": 0.2991, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00014692250470740503, | |
| "loss": 0.2979, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_runtime": 46.2531, | |
| "eval_samples_per_second": 233.498, | |
| "eval_steps_per_second": 7.308, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014685425569221819, | |
| "loss": 0.2975, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014678527556960207, | |
| "loss": 0.2955, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001467155650939123, | |
| "loss": 0.295, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00014664512502749141, | |
| "loss": 0.2941, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014657395614066075, | |
| "loss": 0.2931, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001465020592117118, | |
| "loss": 0.2921, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001464294350268979, | |
| "loss": 0.2918, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014635608438042546, | |
| "loss": 0.2907, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00014628200807444543, | |
| "loss": 0.2899, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001462072069190444, | |
| "loss": 0.2898, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_runtime": 46.2774, | |
| "eval_samples_per_second": 233.375, | |
| "eval_steps_per_second": 7.304, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014613168173223585, | |
| "loss": 0.2885, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014605543333995113, | |
| "loss": 0.288, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014597846257603038, | |
| "loss": 0.2875, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001459007702822136, | |
| "loss": 0.2876, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014582235730813128, | |
| "loss": 0.2862, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014574322451129507, | |
| "loss": 0.2849, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00014566337275708863, | |
| "loss": 0.2852, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001455828029187579, | |
| "loss": 0.2833, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014550151587740178, | |
| "loss": 0.2836, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014541951252196225, | |
| "loss": 0.2817, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_runtime": 46.1169, | |
| "eval_samples_per_second": 234.187, | |
| "eval_steps_per_second": 7.329, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00014533679374921493, | |
| "loss": 0.2824, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014525336046375905, | |
| "loss": 0.2817, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014516921357800766, | |
| "loss": 0.2812, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014508435401217759, | |
| "loss": 0.2812, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00014499878269427948, | |
| "loss": 0.2795, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014491250056010758, | |
| "loss": 0.2788, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014482550855322943, | |
| "loss": 0.2775, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001447378076249757, | |
| "loss": 0.2773, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014464939873442973, | |
| "loss": 0.2769, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00014456028284841693, | |
| "loss": 0.2765, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_runtime": 46.3516, | |
| "eval_samples_per_second": 233.002, | |
| "eval_steps_per_second": 7.292, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014447046094149437, | |
| "loss": 0.2752, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014437993399594003, | |
| "loss": 0.2765, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001442887030017421, | |
| "loss": 0.2752, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00014419676895658807, | |
| "loss": 0.2748, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.000144104132865854, | |
| "loss": 0.2739, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001440107957425933, | |
| "loss": 0.2729, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001439167586075258, | |
| "loss": 0.2722, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001438220224890265, | |
| "loss": 0.2725, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014372658842311449, | |
| "loss": 0.2726, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014363045745344137, | |
| "loss": 0.2715, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_runtime": 46.2247, | |
| "eval_samples_per_second": 233.641, | |
| "eval_steps_per_second": 7.312, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00014353363063128005, | |
| "loss": 0.2705, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001434361090155131, | |
| "loss": 0.2706, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014333789367262136, | |
| "loss": 0.2701, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014323898567667202, | |
| "loss": 0.2693, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014313938610930712, | |
| "loss": 0.2693, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00014303909605973154, | |
| "loss": 0.2691, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001429381166247012, | |
| "loss": 0.2681, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014283644890851103, | |
| "loss": 0.2672, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014273409402298291, | |
| "loss": 0.2671, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014263105308745343, | |
| "loss": 0.2676, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_runtime": 46.3331, | |
| "eval_samples_per_second": 233.095, | |
| "eval_steps_per_second": 7.295, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00014252732722876176, | |
| "loss": 0.2654, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001424229175812373, | |
| "loss": 0.2649, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00014231782528668717, | |
| "loss": 0.2647, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00014221205149438394, | |
| "loss": 0.2649, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001421055973610528, | |
| "loss": 0.264, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014199846405085913, | |
| "loss": 0.2647, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014189065273539564, | |
| "loss": 0.2635, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014178216459366958, | |
| "loss": 0.2623, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00014167300081208988, | |
| "loss": 0.2627, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014156316258445421, | |
| "loss": 0.2932, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_runtime": 46.169, | |
| "eval_samples_per_second": 233.923, | |
| "eval_steps_per_second": 7.321, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014145265111193583, | |
| "loss": 0.2645, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014134146760307043, | |
| "loss": 0.2625, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00014122961327374313, | |
| "loss": 0.2615, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001411170893471749, | |
| "loss": 0.2605, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00014100389705390938, | |
| "loss": 0.26, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001408900376317994, | |
| "loss": 0.2583, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001407755123259933, | |
| "loss": 0.258, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014066032238892152, | |
| "loss": 0.2569, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014054446908028272, | |
| "loss": 0.2568, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014042795366703018, | |
| "loss": 0.2563, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_runtime": 46.2726, | |
| "eval_samples_per_second": 233.4, | |
| "eval_steps_per_second": 7.305, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001403107774233577, | |
| "loss": 0.256, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00014019294163068597, | |
| "loss": 0.2548, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014007444757764835, | |
| "loss": 0.2543, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001399552965600768, | |
| "loss": 0.2537, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001398354898809877, | |
| "loss": 0.2531, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001397150288505678, | |
| "loss": 0.2531, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013959391478615959, | |
| "loss": 0.2526, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013947214901224706, | |
| "loss": 0.2522, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001393497328604412, | |
| "loss": 0.2515, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00013922666766946545, | |
| "loss": 0.2513, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_runtime": 46.224, | |
| "eval_samples_per_second": 233.645, | |
| "eval_steps_per_second": 7.312, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00013910295478514106, | |
| "loss": 0.2504, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001389785955603722, | |
| "loss": 0.2503, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00013885359135513154, | |
| "loss": 0.2501, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.000138727943536445, | |
| "loss": 0.2488, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013860165347837698, | |
| "loss": 0.2492, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013847472256201535, | |
| "loss": 0.2483, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013834715217545625, | |
| "loss": 0.248, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.000138218943713789, | |
| "loss": 0.2479, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001380900985790808, | |
| "loss": 0.2485, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013796061818036138, | |
| "loss": 0.2467, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_runtime": 46.1546, | |
| "eval_samples_per_second": 233.996, | |
| "eval_steps_per_second": 7.323, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013783050393360768, | |
| "loss": 0.2468, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001376997572617282, | |
| "loss": 0.2463, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013756837959454766, | |
| "loss": 0.2456, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001374363723687911, | |
| "loss": 0.2459, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013730373702806846, | |
| "loss": 0.2447, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013717047502285855, | |
| "loss": 0.245, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001370365878104933, | |
| "loss": 0.2446, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013690207685514185, | |
| "loss": 0.2442, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001367669436277944, | |
| "loss": 0.2439, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001366311896062463, | |
| "loss": 0.2438, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_runtime": 46.5558, | |
| "eval_samples_per_second": 231.98, | |
| "eval_steps_per_second": 7.26, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013649481627508181, | |
| "loss": 0.2436, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001363578251256578, | |
| "loss": 0.2429, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00013622021765608754, | |
| "loss": 0.2424, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00013608199537122425, | |
| "loss": 0.242, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001359431597826447, | |
| "loss": 0.2422, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001358037124086327, | |
| "loss": 0.2418, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013566365477416233, | |
| "loss": 0.2407, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013552298841088144, | |
| "loss": 0.2416, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00013538171485709486, | |
| "loss": 0.2411, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013523983565774753, | |
| "loss": 0.2401, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_runtime": 46.0773, | |
| "eval_samples_per_second": 234.389, | |
| "eval_steps_per_second": 7.336, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013509735236440766, | |
| "loss": 0.2401, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013495426653524972, | |
| "loss": 0.2402, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013481057973503742, | |
| "loss": 0.24, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00013466629353510651, | |
| "loss": 0.239, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013452140951334787, | |
| "loss": 0.239, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013437592925418985, | |
| "loss": 0.2388, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013422985434858133, | |
| "loss": 0.238, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013408318639397405, | |
| "loss": 0.2387, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013393592699430525, | |
| "loss": 0.2372, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013378807775998012, | |
| "loss": 0.2377, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_runtime": 46.2501, | |
| "eval_samples_per_second": 233.513, | |
| "eval_steps_per_second": 7.308, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013363964030785422, | |
| "loss": 0.2373, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00013349061626121578, | |
| "loss": 0.238, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013334100724976783, | |
| "loss": 0.2367, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001331908149096106, | |
| "loss": 0.2367, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013304004088322342, | |
| "loss": 0.2356, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013288868681944692, | |
| "loss": 0.2365, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013273675437346487, | |
| "loss": 0.236, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013258424520678618, | |
| "loss": 0.2356, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013243116098722663, | |
| "loss": 0.2363, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013227750338889077, | |
| "loss": 0.2345, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_runtime": 46.2738, | |
| "eval_samples_per_second": 233.394, | |
| "eval_steps_per_second": 7.304, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00013212327409215343, | |
| "loss": 0.2351, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001319684747836415, | |
| "loss": 0.2351, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001318131071562154, | |
| "loss": 0.2342, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00013165717290895067, | |
| "loss": 0.2338, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001315006737471192, | |
| "loss": 0.234, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001313436113821708, | |
| "loss": 0.233, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00013118598753171425, | |
| "loss": 0.2331, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001310278039194988, | |
| "loss": 0.2329, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00013086906227539506, | |
| "loss": 0.2332, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013070976433537623, | |
| "loss": 0.2338, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_runtime": 46.2625, | |
| "eval_samples_per_second": 233.45, | |
| "eval_steps_per_second": 7.306, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013054991184149905, | |
| "loss": 0.2325, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013038950654188476, | |
| "loss": 0.2312, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013022855019070005, | |
| "loss": 0.2323, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001300670445481378, | |
| "loss": 0.2319, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001299049913803978, | |
| "loss": 0.2324, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00012974239245966754, | |
| "loss": 0.2313, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001295792495641028, | |
| "loss": 0.2318, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00012941556447780813, | |
| "loss": 0.2309, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001292513389908174, | |
| "loss": 0.231, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0001290865748990742, | |
| "loss": 0.2298, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_runtime": 46.1555, | |
| "eval_samples_per_second": 233.992, | |
| "eval_steps_per_second": 7.323, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00012892127400441228, | |
| "loss": 0.2302, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00012875543811453576, | |
| "loss": 0.2305, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001285890690429993, | |
| "loss": 0.2293, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00012842216860918846, | |
| "loss": 0.2298, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001282547386382996, | |
| "loss": 0.2296, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001280867809613201, | |
| "loss": 0.2291, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001279182974150082, | |
| "loss": 0.2279, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00012774928984187297, | |
| "loss": 0.2278, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00012757976009015413, | |
| "loss": 0.228, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0001274097100138019, | |
| "loss": 0.2282, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_runtime": 46.6895, | |
| "eval_samples_per_second": 231.315, | |
| "eval_steps_per_second": 7.239, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012723914147245663, | |
| "loss": 0.2276, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012706805633142863, | |
| "loss": 0.2276, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012689645646167755, | |
| "loss": 0.2281, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012672434373979207, | |
| "loss": 0.2265, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012655172004796936, | |
| "loss": 0.2286, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012637858727399448, | |
| "loss": 0.227, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012620494731121966, | |
| "loss": 0.2267, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012603080205854372, | |
| "loss": 0.2266, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00012585615342039126, | |
| "loss": 0.2258, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001256810033066918, | |
| "loss": 0.226, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_runtime": 47.0689, | |
| "eval_samples_per_second": 229.451, | |
| "eval_steps_per_second": 7.181, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001255053536328589, | |
| "loss": 0.2257, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001253292063197693, | |
| "loss": 0.2256, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001251525632937418, | |
| "loss": 0.2257, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012497542648651615, | |
| "loss": 0.2248, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012479779783523216, | |
| "loss": 0.225, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00012461967928240828, | |
| "loss": 0.2246, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012444107277592047, | |
| "loss": 0.2247, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001242619802689809, | |
| "loss": 0.2246, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012408240372011647, | |
| "loss": 0.2238, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001239023450931476, | |
| "loss": 0.2243, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_runtime": 47.1954, | |
| "eval_samples_per_second": 228.836, | |
| "eval_steps_per_second": 7.162, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00012372180635716656, | |
| "loss": 0.2235, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012354078948651604, | |
| "loss": 0.2239, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012335929646076758, | |
| "loss": 0.2231, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012317732926469976, | |
| "loss": 0.2225, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012299488988827675, | |
| "loss": 0.2233, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001228119803266263, | |
| "loss": 0.223, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001226286025800181, | |
| "loss": 0.2229, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00012244475865384177, | |
| "loss": 0.222, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00012226045055858505, | |
| "loss": 0.2217, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012207568030981174, | |
| "loss": 0.2222, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_runtime": 47.0101, | |
| "eval_samples_per_second": 229.738, | |
| "eval_steps_per_second": 7.19, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012189044992813972, | |
| "loss": 0.2213, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0001217047614392187, | |
| "loss": 0.2206, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00012151861687370828, | |
| "loss": 0.2221, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012133201826725558, | |
| "loss": 0.2209, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001211449676604731, | |
| "loss": 0.2211, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012095746709891632, | |
| "loss": 0.2205, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012076951863306127, | |
| "loss": 0.2203, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001205811243182823, | |
| "loss": 0.22, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012039228621482949, | |
| "loss": 0.2192, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012020300638780604, | |
| "loss": 0.219, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_runtime": 47.0946, | |
| "eval_samples_per_second": 229.325, | |
| "eval_steps_per_second": 7.177, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00012001328690714582, | |
| "loss": 0.2194, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00011982312984759068, | |
| "loss": 0.2194, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011963253728866778, | |
| "loss": 0.2189, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011944151131466675, | |
| "loss": 0.219, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011925005401461709, | |
| "loss": 0.2184, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011905816748226513, | |
| "loss": 0.2182, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011886585381605125, | |
| "loss": 0.2188, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011867311511908693, | |
| "loss": 0.2179, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011847995349913162, | |
| "loss": 0.218, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011828637106856989, | |
| "loss": 0.2173, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_runtime": 46.7598, | |
| "eval_samples_per_second": 230.968, | |
| "eval_steps_per_second": 7.228, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011809236994438816, | |
| "loss": 0.2171, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011789795224815164, | |
| "loss": 0.2175, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011770312010598116, | |
| "loss": 0.2167, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00011750787564852973, | |
| "loss": 0.2167, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011731222101095955, | |
| "loss": 0.2171, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011711615833291833, | |
| "loss": 0.2161, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0001169196897585161, | |
| "loss": 0.2168, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011672281743630175, | |
| "loss": 0.2162, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001165255435192394, | |
| "loss": 0.2152, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011632787016468506, | |
| "loss": 0.216, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_runtime": 47.0992, | |
| "eval_samples_per_second": 229.303, | |
| "eval_steps_per_second": 7.176, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001161297995343628, | |
| "loss": 0.2157, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011593133379434138, | |
| "loss": 0.215, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011573247511501028, | |
| "loss": 0.2154, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011553322567105619, | |
| "loss": 0.2155, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011533358764143905, | |
| "loss": 0.2149, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011513356320936841, | |
| "loss": 0.2144, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011493315456227943, | |
| "loss": 0.2147, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011473236389180894, | |
| "loss": 0.2145, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011453119339377154, | |
| "loss": 0.2146, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011432964526813558, | |
| "loss": 0.2145, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_runtime": 46.8321, | |
| "eval_samples_per_second": 230.611, | |
| "eval_steps_per_second": 7.217, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00011412772171899904, | |
| "loss": 0.2132, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011392542495456556, | |
| "loss": 0.2133, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011372275718712006, | |
| "loss": 0.2125, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011351972063300484, | |
| "loss": 0.2135, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011331631751259515, | |
| "loss": 0.213, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011311255005027487, | |
| "loss": 0.2132, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011290842047441232, | |
| "loss": 0.2125, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011270393101733585, | |
| "loss": 0.2122, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011249908391530946, | |
| "loss": 0.2113, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011229388140850814, | |
| "loss": 0.2119, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_runtime": 46.8036, | |
| "eval_samples_per_second": 230.751, | |
| "eval_steps_per_second": 7.222, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011208832574099368, | |
| "loss": 0.2113, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011188241916068993, | |
| "loss": 0.2111, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011167616391935826, | |
| "loss": 0.2111, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00011146956227257293, | |
| "loss": 0.2119, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011126261647969645, | |
| "loss": 0.2115, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011105532880385487, | |
| "loss": 0.2104, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011084770151191299, | |
| "loss": 0.2107, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00011063973687444962, | |
| "loss": 0.2097, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011043143716573272, | |
| "loss": 0.2107, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011022280466369448, | |
| "loss": 0.2113, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_runtime": 47.0898, | |
| "eval_samples_per_second": 229.349, | |
| "eval_steps_per_second": 7.178, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00011001384164990662, | |
| "loss": 0.2099, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010980455040955506, | |
| "loss": 0.21, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010959493323141538, | |
| "loss": 0.2091, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010938499240782739, | |
| "loss": 0.2098, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010917473023467032, | |
| "loss": 0.2096, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010896414901133761, | |
| "loss": 0.2085, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010875325104071177, | |
| "loss": 0.2093, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010854203862913927, | |
| "loss": 0.2084, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010833051408640509, | |
| "loss": 0.2083, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010811867972570786, | |
| "loss": 0.2084, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_runtime": 46.8854, | |
| "eval_samples_per_second": 230.349, | |
| "eval_steps_per_second": 7.209, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010790653786363416, | |
| "loss": 0.2082, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010769409082013337, | |
| "loss": 0.2081, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010748134091849238, | |
| "loss": 0.2077, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010726829048531, | |
| "loss": 0.2078, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00010705494185047165, | |
| "loss": 0.2077, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001068412973471238, | |
| "loss": 0.2073, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00010662735931164853, | |
| "loss": 0.2076, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001064131300836379, | |
| "loss": 0.2069, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001061986120058684, | |
| "loss": 0.2067, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010598380742427543, | |
| "loss": 0.206, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_runtime": 46.6481, | |
| "eval_samples_per_second": 231.521, | |
| "eval_steps_per_second": 7.246, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010576871868792746, | |
| "loss": 0.206, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0001055533481490004, | |
| "loss": 0.2058, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.000105337698162752, | |
| "loss": 0.206, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010512177108749594, | |
| "loss": 0.2057, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010490556928457616, | |
| "loss": 0.2039, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010468909511834088, | |
| "loss": 0.205, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010447235095611692, | |
| "loss": 0.2045, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010425533916818376, | |
| "loss": 0.2047, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010403806212774747, | |
| "loss": 0.205, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000103820522210915, | |
| "loss": 0.2042, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 46.7967, | |
| "eval_samples_per_second": 230.786, | |
| "eval_steps_per_second": 7.223, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00010360272179666802, | |
| "loss": 0.204, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010338466326683697, | |
| "loss": 0.2037, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010316634900607497, | |
| "loss": 0.2033, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010294778140183182, | |
| "loss": 0.2035, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010272896284432785, | |
| "loss": 0.2037, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00010250989572652766, | |
| "loss": 0.2028, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010229058244411427, | |
| "loss": 0.2019, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010207102539546251, | |
| "loss": 0.2032, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010185122698161311, | |
| "loss": 0.2026, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00010163118960624632, | |
| "loss": 0.2024, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_runtime": 46.9319, | |
| "eval_samples_per_second": 230.121, | |
| "eval_steps_per_second": 7.202, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010141091567565561, | |
| "loss": 0.2028, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010119040759872142, | |
| "loss": 0.2018, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010096966778688472, | |
| "loss": 0.2016, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00010074869865412074, | |
| "loss": 0.2024, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00010052750261691254, | |
| "loss": 0.2017, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001003060820942245, | |
| "loss": 0.2015, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00010008443950747599, | |
| "loss": 0.2014, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.986257728051483e-05, | |
| "loss": 0.2014, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.964049783959082e-05, | |
| "loss": 0.2012, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.94182036133291e-05, | |
| "loss": 0.201, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_runtime": 47.2136, | |
| "eval_samples_per_second": 228.748, | |
| "eval_steps_per_second": 7.159, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.919569703270376e-05, | |
| "loss": 0.1998, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.89729805310111e-05, | |
| "loss": 0.2004, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.875005654384307e-05, | |
| "loss": 0.2009, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.852692750906071e-05, | |
| "loss": 0.1999, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.830359586676737e-05, | |
| "loss": 0.1997, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.808006405928215e-05, | |
| "loss": 0.2006, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.785633453111306e-05, | |
| "loss": 0.1999, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.763240972893037e-05, | |
| "loss": 0.1992, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.740829210153984e-05, | |
| "loss": 0.1991, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.718398409985593e-05, | |
| "loss": 0.199, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_runtime": 46.9221, | |
| "eval_samples_per_second": 230.169, | |
| "eval_steps_per_second": 7.203, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.695948817687504e-05, | |
| "loss": 0.1987, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.673480678764858e-05, | |
| "loss": 0.1982, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.650994238925626e-05, | |
| "loss": 0.1989, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.628489744077911e-05, | |
| "loss": 0.1985, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.60596744032726e-05, | |
| "loss": 0.1981, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.583427573973982e-05, | |
| "loss": 0.1976, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.560870391510441e-05, | |
| "loss": 0.1981, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.538296139618371e-05, | |
| "loss": 0.1978, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.515705065166178e-05, | |
| "loss": 0.1977, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.493097415206228e-05, | |
| "loss": 0.1974, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_runtime": 47.1161, | |
| "eval_samples_per_second": 229.221, | |
| "eval_steps_per_second": 7.174, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.47047343697216e-05, | |
| "loss": 0.1978, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.447833377876176e-05, | |
| "loss": 0.1974, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.425177485506336e-05, | |
| "loss": 0.1971, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.402506007623848e-05, | |
| "loss": 0.1968, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.379819192160362e-05, | |
| "loss": 0.1969, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.357117287215258e-05, | |
| "loss": 0.1966, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.334400541052928e-05, | |
| "loss": 0.1971, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 9.311669202100073e-05, | |
| "loss": 0.1962, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.288923518942968e-05, | |
| "loss": 0.1959, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.26616374032477e-05, | |
| "loss": 0.1964, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_runtime": 46.7963, | |
| "eval_samples_per_second": 230.788, | |
| "eval_steps_per_second": 7.223, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.243390115142761e-05, | |
| "loss": 0.196, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.220602892445661e-05, | |
| "loss": 0.1955, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.197802321430889e-05, | |
| "loss": 0.1958, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.174988651441833e-05, | |
| "loss": 0.1951, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.152162131965137e-05, | |
| "loss": 0.1954, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.129323012627956e-05, | |
| "loss": 0.1948, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.106471543195244e-05, | |
| "loss": 0.1954, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.08360797356701e-05, | |
| "loss": 0.1953, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.060732553775582e-05, | |
| "loss": 0.1949, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.037845533982892e-05, | |
| "loss": 0.1947, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_runtime": 46.9646, | |
| "eval_samples_per_second": 229.96, | |
| "eval_steps_per_second": 7.197, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 9.014947164477721e-05, | |
| "loss": 0.1946, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.992037695672967e-05, | |
| "loss": 0.1938, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.969117378102912e-05, | |
| "loss": 0.1946, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.946186462420478e-05, | |
| "loss": 0.1942, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.923245199394482e-05, | |
| "loss": 0.1934, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.900293839906903e-05, | |
| "loss": 0.194, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.87733263495013e-05, | |
| "loss": 0.1936, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.85436183562422e-05, | |
| "loss": 0.1933, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.83138169313416e-05, | |
| "loss": 0.1933, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.808392458787103e-05, | |
| "loss": 0.1931, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_runtime": 46.9712, | |
| "eval_samples_per_second": 229.928, | |
| "eval_steps_per_second": 7.196, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.78539438398963e-05, | |
| "loss": 0.1922, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.762387720245008e-05, | |
| "loss": 0.1922, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.73937271915042e-05, | |
| "loss": 0.1926, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.716349632394235e-05, | |
| "loss": 0.1924, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.69331871175324e-05, | |
| "loss": 0.1927, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.67028020908989e-05, | |
| "loss": 0.1924, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.647234376349565e-05, | |
| "loss": 0.1921, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.624181465557794e-05, | |
| "loss": 0.1914, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.601121728817519e-05, | |
| "loss": 0.1917, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.578055418306327e-05, | |
| "loss": 0.1918, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_runtime": 47.0452, | |
| "eval_samples_per_second": 229.566, | |
| "eval_steps_per_second": 7.185, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 8.55498278627369e-05, | |
| "loss": 0.1915, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.531904085038221e-05, | |
| "loss": 0.1912, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.508819566984897e-05, | |
| "loss": 0.1907, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.485729484562307e-05, | |
| "loss": 0.1912, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.462634090279895e-05, | |
| "loss": 0.1907, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.439533636705194e-05, | |
| "loss": 0.1912, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.416428376461061e-05, | |
| "loss": 0.19, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.393318562222916e-05, | |
| "loss": 0.1904, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.370204446715997e-05, | |
| "loss": 0.1902, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 8.347086282712556e-05, | |
| "loss": 0.191, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_runtime": 46.8635, | |
| "eval_samples_per_second": 230.456, | |
| "eval_steps_per_second": 7.212, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.323964323029136e-05, | |
| "loss": 0.1896, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.300838820523784e-05, | |
| "loss": 0.1903, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.277710028093289e-05, | |
| "loss": 0.1895, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.254578198670421e-05, | |
| "loss": 0.1897, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.231443585221157e-05, | |
| "loss": 0.1895, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.208306440741926e-05, | |
| "loss": 0.1898, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.185167018256834e-05, | |
| "loss": 0.1899, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 8.162025570814896e-05, | |
| "loss": 0.1894, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.138882351487275e-05, | |
| "loss": 0.1895, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.115737613364511e-05, | |
| "loss": 0.1895, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_runtime": 47.2771, | |
| "eval_samples_per_second": 228.441, | |
| "eval_steps_per_second": 7.149, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.092591609553747e-05, | |
| "loss": 0.1894, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 8.069444593175975e-05, | |
| "loss": 0.1897, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 8.046296817363259e-05, | |
| "loss": 0.1887, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 8.023148535255965e-05, | |
| "loss": 0.1886, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.999999999999999e-05, | |
| "loss": 0.1885, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.976851464744033e-05, | |
| "loss": 0.1888, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.953703182636741e-05, | |
| "loss": 0.1882, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.930555406824026e-05, | |
| "loss": 0.1879, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.907408390446254e-05, | |
| "loss": 0.1887, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.884262386635489e-05, | |
| "loss": 0.1876, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_runtime": 47.1816, | |
| "eval_samples_per_second": 228.903, | |
| "eval_steps_per_second": 7.164, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 7.861117648512725e-05, | |
| "loss": 0.1875, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.837974429185103e-05, | |
| "loss": 0.1875, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.814832981743164e-05, | |
| "loss": 0.1873, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.791693559258072e-05, | |
| "loss": 0.1871, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.768556414778842e-05, | |
| "loss": 0.1876, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.74542180132958e-05, | |
| "loss": 0.1871, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.72228997190671e-05, | |
| "loss": 0.1877, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.699161179476217e-05, | |
| "loss": 0.1864, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.676035676970863e-05, | |
| "loss": 0.1864, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.652913717287443e-05, | |
| "loss": 0.1862, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_runtime": 46.9744, | |
| "eval_samples_per_second": 229.912, | |
| "eval_steps_per_second": 7.195, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.629795553284005e-05, | |
| "loss": 0.1868, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.606681437777081e-05, | |
| "loss": 0.1867, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 7.583571623538939e-05, | |
| "loss": 0.1858, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.560466363294806e-05, | |
| "loss": 0.1865, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.537365909720104e-05, | |
| "loss": 0.1859, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.514270515437691e-05, | |
| "loss": 0.1862, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.491180433015101e-05, | |
| "loss": 0.1852, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 7.468095914961777e-05, | |
| "loss": 0.1864, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 7.445017213726307e-05, | |
| "loss": 0.1856, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 7.421944581693674e-05, | |
| "loss": 0.1852, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_runtime": 75.8452, | |
| "eval_samples_per_second": 142.395, | |
| "eval_steps_per_second": 4.456, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.39887827118248e-05, | |
| "loss": 0.1855, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.375818534442207e-05, | |
| "loss": 0.1852, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.352765623650435e-05, | |
| "loss": 0.1858, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.329719790910108e-05, | |
| "loss": 0.1842, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.30668128824676e-05, | |
| "loss": 0.185, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 7.283650367605764e-05, | |
| "loss": 0.1851, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.260627280849581e-05, | |
| "loss": 0.1842, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.23761227975499e-05, | |
| "loss": 0.1847, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.21460561601037e-05, | |
| "loss": 0.1849, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 7.191607541212897e-05, | |
| "loss": 0.1848, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_runtime": 47.6864, | |
| "eval_samples_per_second": 226.48, | |
| "eval_steps_per_second": 7.088, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.168618306865838e-05, | |
| "loss": 0.1848, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.145638164375779e-05, | |
| "loss": 0.1842, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.122667365049869e-05, | |
| "loss": 0.1846, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 7.099706160093098e-05, | |
| "loss": 0.1834, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.076754800605516e-05, | |
| "loss": 0.1837, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.053813537579523e-05, | |
| "loss": 0.1835, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.030882621897088e-05, | |
| "loss": 0.1833, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.00796230432703e-05, | |
| "loss": 0.1837, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.985052835522279e-05, | |
| "loss": 0.1833, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.962154466017105e-05, | |
| "loss": 0.1827, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_runtime": 47.1326, | |
| "eval_samples_per_second": 229.141, | |
| "eval_steps_per_second": 7.171, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.939267446224418e-05, | |
| "loss": 0.1819, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.91639202643299e-05, | |
| "loss": 0.1834, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 6.893528456804756e-05, | |
| "loss": 0.1836, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.870676987372044e-05, | |
| "loss": 0.1832, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.847837868034861e-05, | |
| "loss": 0.1833, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.825011348558167e-05, | |
| "loss": 0.1826, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 6.802197678569109e-05, | |
| "loss": 0.1826, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.779397107554339e-05, | |
| "loss": 0.1821, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.756609884857239e-05, | |
| "loss": 0.1826, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.733836259675233e-05, | |
| "loss": 0.1822, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_runtime": 47.2136, | |
| "eval_samples_per_second": 228.748, | |
| "eval_steps_per_second": 7.159, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 6.71107648105703e-05, | |
| "loss": 0.1814, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.688330797899925e-05, | |
| "loss": 0.1825, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.665599458947072e-05, | |
| "loss": 0.182, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.642882712784742e-05, | |
| "loss": 0.1821, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 6.620180807839639e-05, | |
| "loss": 0.1819, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.597493992376152e-05, | |
| "loss": 0.1824, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.574822514493664e-05, | |
| "loss": 0.1821, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.552166622123824e-05, | |
| "loss": 0.1817, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 6.52952656302784e-05, | |
| "loss": 0.181, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.506902584793773e-05, | |
| "loss": 0.1814, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_runtime": 47.2104, | |
| "eval_samples_per_second": 228.763, | |
| "eval_steps_per_second": 7.159, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.484294934833822e-05, | |
| "loss": 0.182, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.461703860381628e-05, | |
| "loss": 0.1811, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.439129608489559e-05, | |
| "loss": 0.1801, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 6.41657242602602e-05, | |
| "loss": 0.1811, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.39403255967274e-05, | |
| "loss": 0.1811, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.371510255922088e-05, | |
| "loss": 0.1811, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.349005761074372e-05, | |
| "loss": 0.1808, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 6.326519321235139e-05, | |
| "loss": 0.1803, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.304051182312496e-05, | |
| "loss": 0.1809, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.281601590014407e-05, | |
| "loss": 0.1807, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_runtime": 47.3036, | |
| "eval_samples_per_second": 228.313, | |
| "eval_steps_per_second": 7.145, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.259170789846017e-05, | |
| "loss": 0.1806, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 6.236759027106965e-05, | |
| "loss": 0.1803, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.214366546888694e-05, | |
| "loss": 0.1805, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.191993594071785e-05, | |
| "loss": 0.1798, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.169640413323262e-05, | |
| "loss": 0.1791, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.147307249093929e-05, | |
| "loss": 0.1793, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 6.124994345615693e-05, | |
| "loss": 0.18, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 6.102701946898891e-05, | |
| "loss": 0.1795, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 6.0804302967296225e-05, | |
| "loss": 0.1791, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 6.058179638667089e-05, | |
| "loss": 0.1798, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_runtime": 47.2093, | |
| "eval_samples_per_second": 228.768, | |
| "eval_steps_per_second": 7.16, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 6.035950216040917e-05, | |
| "loss": 0.1793, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 6.0137422719485145e-05, | |
| "loss": 0.1797, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 5.991556049252401e-05, | |
| "loss": 0.1789, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 5.969391790577551e-05, | |
| "loss": 0.1793, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 5.947249738308747e-05, | |
| "loss": 0.179, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 5.925130134587924e-05, | |
| "loss": 0.1785, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 5.903033221311528e-05, | |
| "loss": 0.1787, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 5.880959240127858e-05, | |
| "loss": 0.179, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 5.858908432434438e-05, | |
| "loss": 0.1784, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 5.8368810393753684e-05, | |
| "loss": 0.1789, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_runtime": 47.3247, | |
| "eval_samples_per_second": 228.211, | |
| "eval_steps_per_second": 7.142, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 5.814877301838688e-05, | |
| "loss": 0.1783, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 5.7928974604537494e-05, | |
| "loss": 0.1783, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 5.770941755588573e-05, | |
| "loss": 0.1785, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5.749010427347233e-05, | |
| "loss": 0.1784, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5.7271037155672156e-05, | |
| "loss": 0.1777, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5.7052218598168154e-05, | |
| "loss": 0.1786, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 5.6833650993925016e-05, | |
| "loss": 0.1782, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5.661533673316303e-05, | |
| "loss": 0.1776, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5.639727820333198e-05, | |
| "loss": 0.178, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5.617947778908498e-05, | |
| "loss": 0.1782, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_runtime": 47.2795, | |
| "eval_samples_per_second": 228.429, | |
| "eval_steps_per_second": 7.149, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 5.596193787225254e-05, | |
| "loss": 0.1771, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5.574466083181624e-05, | |
| "loss": 0.1777, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5.552764904388305e-05, | |
| "loss": 0.1773, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5.5310904881659116e-05, | |
| "loss": 0.177, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5.5094430715423835e-05, | |
| "loss": 0.1766, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 5.487822891250406e-05, | |
| "loss": 0.1771, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5.4662301837247985e-05, | |
| "loss": 0.177, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5.4446651850999604e-05, | |
| "loss": 0.1765, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5.4231281312072544e-05, | |
| "loss": 0.1774, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 5.401619257572453e-05, | |
| "loss": 0.1766, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_runtime": 47.4102, | |
| "eval_samples_per_second": 227.799, | |
| "eval_steps_per_second": 7.129, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.3801387994131576e-05, | |
| "loss": 0.1769, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.358686991636209e-05, | |
| "loss": 0.1768, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.3372640688351476e-05, | |
| "loss": 0.1767, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5.315870265287618e-05, | |
| "loss": 0.1762, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5.294505814952835e-05, | |
| "loss": 0.1771, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5.2731709514689995e-05, | |
| "loss": 0.1759, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5.25186590815076e-05, | |
| "loss": 0.1759, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 5.2305909179866635e-05, | |
| "loss": 0.1765, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5.209346213636584e-05, | |
| "loss": 0.1763, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5.188132027429215e-05, | |
| "loss": 0.1757, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_runtime": 47.3383, | |
| "eval_samples_per_second": 228.145, | |
| "eval_steps_per_second": 7.14, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5.166948591359489e-05, | |
| "loss": 0.1757, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 5.145796137086076e-05, | |
| "loss": 0.176, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.124674895928823e-05, | |
| "loss": 0.1759, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.103585098866237e-05, | |
| "loss": 0.1758, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.082526976532968e-05, | |
| "loss": 0.1754, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.061500759217261e-05, | |
| "loss": 0.1751, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 5.04050667685846e-05, | |
| "loss": 0.1759, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 5.01954495904449e-05, | |
| "loss": 0.1761, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.998615835009339e-05, | |
| "loss": 0.1757, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.97771953363055e-05, | |
| "loss": 0.1751, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_runtime": 47.6107, | |
| "eval_samples_per_second": 226.84, | |
| "eval_steps_per_second": 7.099, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.956856283426728e-05, | |
| "loss": 0.1747, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.936026312555037e-05, | |
| "loss": 0.1746, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.915229848808698e-05, | |
| "loss": 0.1747, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.8944671196145136e-05, | |
| "loss": 0.1744, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.8737383520303546e-05, | |
| "loss": 0.1748, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.853043772742709e-05, | |
| "loss": 0.1748, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.832383608064172e-05, | |
| "loss": 0.1746, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.811758083931005e-05, | |
| "loss": 0.1754, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.791167425900632e-05, | |
| "loss": 0.1744, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.770611859149185e-05, | |
| "loss": 0.1742, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_runtime": 47.4501, | |
| "eval_samples_per_second": 227.608, | |
| "eval_steps_per_second": 7.123, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.7500916084690564e-05, | |
| "loss": 0.174, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.729606898266411e-05, | |
| "loss": 0.1742, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.709157952558768e-05, | |
| "loss": 0.1743, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.688744994972514e-05, | |
| "loss": 0.175, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.668368248740485e-05, | |
| "loss": 0.1748, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.6480279366995116e-05, | |
| "loss": 0.1734, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.6277242812879914e-05, | |
| "loss": 0.1736, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.607457504543447e-05, | |
| "loss": 0.1739, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.5872278281000955e-05, | |
| "loss": 0.1739, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.567035473186444e-05, | |
| "loss": 0.1743, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_runtime": 47.6447, | |
| "eval_samples_per_second": 226.678, | |
| "eval_steps_per_second": 7.094, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.546880660622845e-05, | |
| "loss": 0.1737, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 4.5267636108191036e-05, | |
| "loss": 0.174, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.5066845437720555e-05, | |
| "loss": 0.1735, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.4866436790631564e-05, | |
| "loss": 0.1733, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.4666412358560955e-05, | |
| "loss": 0.1733, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.4466774328943796e-05, | |
| "loss": 0.1729, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.426752488498972e-05, | |
| "loss": 0.1735, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.406866620565862e-05, | |
| "loss": 0.173, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.3870200465637164e-05, | |
| "loss": 0.1732, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 4.3672129835314955e-05, | |
| "loss": 0.1727, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_runtime": 47.5417, | |
| "eval_samples_per_second": 227.169, | |
| "eval_steps_per_second": 7.11, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.347445648076057e-05, | |
| "loss": 0.1738, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.327718256369826e-05, | |
| "loss": 0.1725, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.3080310241483885e-05, | |
| "loss": 0.1731, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.2883841667081675e-05, | |
| "loss": 0.1731, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.268777898904044e-05, | |
| "loss": 0.1726, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.2492124351470214e-05, | |
| "loss": 0.1723, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.2296879894018835e-05, | |
| "loss": 0.1727, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.210204775184834e-05, | |
| "loss": 0.1723, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.190763005561186e-05, | |
| "loss": 0.172, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.171362893143013e-05, | |
| "loss": 0.1724, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_runtime": 47.6533, | |
| "eval_samples_per_second": 226.637, | |
| "eval_steps_per_second": 7.093, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.1520046500868384e-05, | |
| "loss": 0.1724, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.1326884880913074e-05, | |
| "loss": 0.1721, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.1134146183948724e-05, | |
| "loss": 0.1723, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.0941832517734885e-05, | |
| "loss": 0.1717, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.0749945985382915e-05, | |
| "loss": 0.1717, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.0558488685333235e-05, | |
| "loss": 0.1713, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.036746271133223e-05, | |
| "loss": 0.1724, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.0176870152409324e-05, | |
| "loss": 0.1708, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.998671309285417e-05, | |
| "loss": 0.1717, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.979699361219395e-05, | |
| "loss": 0.1706, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_runtime": 47.7292, | |
| "eval_samples_per_second": 226.277, | |
| "eval_steps_per_second": 7.082, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.960771378517049e-05, | |
| "loss": 0.171, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.941887568171766e-05, | |
| "loss": 0.1708, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.923048136693873e-05, | |
| "loss": 0.1717, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.904253290108369e-05, | |
| "loss": 0.1716, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.885503233952689e-05, | |
| "loss": 0.1706, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.86679817327444e-05, | |
| "loss": 0.171, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.848138312629171e-05, | |
| "loss": 0.1702, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.8295238560781317e-05, | |
| "loss": 0.1706, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.810955007186029e-05, | |
| "loss": 0.1708, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.792431969018824e-05, | |
| "loss": 0.1709, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_runtime": 47.6465, | |
| "eval_samples_per_second": 226.669, | |
| "eval_steps_per_second": 7.094, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.7739549441414945e-05, | |
| "loss": 0.1703, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.755524134615825e-05, | |
| "loss": 0.171, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.7371397419981925e-05, | |
| "loss": 0.1706, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.7188019673373706e-05, | |
| "loss": 0.1707, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.700511011172325e-05, | |
| "loss": 0.1706, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.682267073530023e-05, | |
| "loss": 0.1703, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.664070353923245e-05, | |
| "loss": 0.1698, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.645921051348396e-05, | |
| "loss": 0.1705, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.627819364283345e-05, | |
| "loss": 0.1708, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.6097654906852405e-05, | |
| "loss": 0.1706, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_runtime": 47.2921, | |
| "eval_samples_per_second": 228.368, | |
| "eval_steps_per_second": 7.147, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.591759627988353e-05, | |
| "loss": 0.17, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.573801973101913e-05, | |
| "loss": 0.1702, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.5558927224079534e-05, | |
| "loss": 0.1702, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.5380320717591716e-05, | |
| "loss": 0.17, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.5202202164767836e-05, | |
| "loss": 0.17, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.5024573513483864e-05, | |
| "loss": 0.1706, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.484743670625822e-05, | |
| "loss": 0.1701, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.467079368023068e-05, | |
| "loss": 0.1691, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.449464636714107e-05, | |
| "loss": 0.1698, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.431899669330819e-05, | |
| "loss": 0.1703, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_runtime": 47.7148, | |
| "eval_samples_per_second": 226.345, | |
| "eval_steps_per_second": 7.084, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.4143846579608744e-05, | |
| "loss": 0.1688, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.396919794145629e-05, | |
| "loss": 0.169, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.3795052688780345e-05, | |
| "loss": 0.1691, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.362141272600552e-05, | |
| "loss": 0.1695, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.3448279952030615e-05, | |
| "loss": 0.1692, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.327565626020793e-05, | |
| "loss": 0.1697, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.3103543538322455e-05, | |
| "loss": 0.1694, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.293194366857137e-05, | |
| "loss": 0.1686, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.276085852754336e-05, | |
| "loss": 0.1686, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.259028998619814e-05, | |
| "loss": 0.1688, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_runtime": 47.4078, | |
| "eval_samples_per_second": 227.811, | |
| "eval_steps_per_second": 7.13, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.2420239909845894e-05, | |
| "loss": 0.1688, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.2250710158127045e-05, | |
| "loss": 0.1692, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.2081702584991786e-05, | |
| "loss": 0.1692, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.191321903867988e-05, | |
| "loss": 0.1689, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.174526136170039e-05, | |
| "loss": 0.1691, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.157783139081155e-05, | |
| "loss": 0.1686, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.141093095700072e-05, | |
| "loss": 0.1687, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.1244561885464244e-05, | |
| "loss": 0.1683, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.107872599558769e-05, | |
| "loss": 0.1687, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.0913425100925795e-05, | |
| "loss": 0.1685, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_runtime": 47.5367, | |
| "eval_samples_per_second": 227.193, | |
| "eval_steps_per_second": 7.11, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.0748661009182616e-05, | |
| "loss": 0.1684, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.0584435522191896e-05, | |
| "loss": 0.1684, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.0420750435897183e-05, | |
| "loss": 0.1684, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.025760754033246e-05, | |
| "loss": 0.1679, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.0095008619602206e-05, | |
| "loss": 0.1676, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.993295545186223e-05, | |
| "loss": 0.1685, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.977144980929996e-05, | |
| "loss": 0.1681, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.961049345811523e-05, | |
| "loss": 0.1685, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.945008815850097e-05, | |
| "loss": 0.1679, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.929023566462377e-05, | |
| "loss": 0.1682, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_runtime": 47.5934, | |
| "eval_samples_per_second": 226.922, | |
| "eval_steps_per_second": 7.102, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.9130937724604947e-05, | |
| "loss": 0.1678, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.8972196080501208e-05, | |
| "loss": 0.1678, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.8814012468285748e-05, | |
| "loss": 0.1682, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.865638861782922e-05, | |
| "loss": 0.1678, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.849932625288079e-05, | |
| "loss": 0.1681, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.8342827091049336e-05, | |
| "loss": 0.1678, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.8186892843784587e-05, | |
| "loss": 0.1677, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.803152521635851e-05, | |
| "loss": 0.1679, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7876725907846578e-05, | |
| "loss": 0.1676, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7722496611109243e-05, | |
| "loss": 0.167, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_runtime": 47.8007, | |
| "eval_samples_per_second": 225.938, | |
| "eval_steps_per_second": 7.071, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7568839012773365e-05, | |
| "loss": 0.1673, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7415754793213826e-05, | |
| "loss": 0.1676, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.7263245626535116e-05, | |
| "loss": 0.1673, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.7111313180553077e-05, | |
| "loss": 0.1673, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6959959116776587e-05, | |
| "loss": 0.1663, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6809185090389406e-05, | |
| "loss": 0.1674, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6658992750232167e-05, | |
| "loss": 0.1666, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6509383738784218e-05, | |
| "loss": 0.1671, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6360359692145757e-05, | |
| "loss": 0.1669, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6211922240019883e-05, | |
| "loss": 0.1671, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_runtime": 47.5708, | |
| "eval_samples_per_second": 227.03, | |
| "eval_steps_per_second": 7.105, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6064073005694758e-05, | |
| "loss": 0.1669, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.591681360602595e-05, | |
| "loss": 0.1665, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.577014565141866e-05, | |
| "loss": 0.1666, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.562407074581014e-05, | |
| "loss": 0.1663, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.5478590486652137e-05, | |
| "loss": 0.1667, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.533370646489347e-05, | |
| "loss": 0.1665, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.5189420264962586e-05, | |
| "loss": 0.1658, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.504573346475026e-05, | |
| "loss": 0.1665, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.4902647635592324e-05, | |
| "loss": 0.1666, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.476016434225246e-05, | |
| "loss": 0.1662, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_runtime": 47.6449, | |
| "eval_samples_per_second": 226.677, | |
| "eval_steps_per_second": 7.094, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.461828514290513e-05, | |
| "loss": 0.1663, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.447701158911855e-05, | |
| "loss": 0.1664, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4336345225837658e-05, | |
| "loss": 0.1664, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.4196287591367296e-05, | |
| "loss": 0.1656, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.405684021735527e-05, | |
| "loss": 0.166, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3918004628775736e-05, | |
| "loss": 0.166, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3779782343912463e-05, | |
| "loss": 0.1656, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.364217487434221e-05, | |
| "loss": 0.1665, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3505183724918196e-05, | |
| "loss": 0.1653, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3368810393753687e-05, | |
| "loss": 0.166, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_runtime": 47.4693, | |
| "eval_samples_per_second": 227.516, | |
| "eval_steps_per_second": 7.12, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.32330563722056e-05, | |
| "loss": 0.1653, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.309792314485815e-05, | |
| "loss": 0.1659, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2963412189506695e-05, | |
| "loss": 0.1652, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.282952497714145e-05, | |
| "loss": 0.1658, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2696262971931538e-05, | |
| "loss": 0.1649, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2563627631208887e-05, | |
| "loss": 0.1657, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2431620405452336e-05, | |
| "loss": 0.1656, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.230024273827179e-05, | |
| "loss": 0.1655, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.216949606639231e-05, | |
| "loss": 0.1659, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2039381819638596e-05, | |
| "loss": 0.1649, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_runtime": 47.7941, | |
| "eval_samples_per_second": 225.969, | |
| "eval_steps_per_second": 7.072, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1909901420919184e-05, | |
| "loss": 0.1655, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1781056286210997e-05, | |
| "loss": 0.1653, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1652847824543744e-05, | |
| "loss": 0.1651, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1525277437984636e-05, | |
| "loss": 0.165, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.1398346521623e-05, | |
| "loss": 0.1652, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.1272056463554978e-05, | |
| "loss": 0.1645, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.114640864486845e-05, | |
| "loss": 0.165, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.1021404439627775e-05, | |
| "loss": 0.165, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.089704521485896e-05, | |
| "loss": 0.1652, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.0773332330534513e-05, | |
| "loss": 0.1655, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_runtime": 47.9606, | |
| "eval_samples_per_second": 225.185, | |
| "eval_steps_per_second": 7.047, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.0650267139558772e-05, | |
| "loss": 0.1651, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.052785098775293e-05, | |
| "loss": 0.1637, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.04060852138404e-05, | |
| "loss": 0.1651, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.028497114943219e-05, | |
| "loss": 0.1646, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.0164510119012263e-05, | |
| "loss": 0.1652, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.0044703439923217e-05, | |
| "loss": 0.1649, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.9925552422351654e-05, | |
| "loss": 0.1652, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.9807058369314016e-05, | |
| "loss": 0.1644, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.968922257664231e-05, | |
| "loss": 0.1647, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.9572046332969825e-05, | |
| "loss": 0.1638, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_runtime": 47.6904, | |
| "eval_samples_per_second": 226.46, | |
| "eval_steps_per_second": 7.087, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 1.945553091971727e-05, | |
| "loss": 0.1646, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.933967761107847e-05, | |
| "loss": 0.165, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.9224487674006694e-05, | |
| "loss": 0.164, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.9109962368200602e-05, | |
| "loss": 0.1646, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.8996102946090586e-05, | |
| "loss": 0.1647, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.888291065282509e-05, | |
| "loss": 0.1642, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.8770386726256865e-05, | |
| "loss": 0.1634, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.8658532396929565e-05, | |
| "loss": 0.1638, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.8547348888064178e-05, | |
| "loss": 0.1642, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.8436837415545772e-05, | |
| "loss": 0.1646, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_runtime": 47.7791, | |
| "eval_samples_per_second": 226.04, | |
| "eval_steps_per_second": 7.074, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8326999187910095e-05, | |
| "loss": 0.1625, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8217835406330415e-05, | |
| "loss": 0.1622, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.810934726460436e-05, | |
| "loss": 0.1628, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.800153594914084e-05, | |
| "loss": 0.1613, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.7894402638947176e-05, | |
| "loss": 0.1622, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.778794850561604e-05, | |
| "loss": 0.1622, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.7682174713312805e-05, | |
| "loss": 0.1615, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.75770824187627e-05, | |
| "loss": 0.1621, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.747267277123821e-05, | |
| "loss": 0.1623, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.7368946912546556e-05, | |
| "loss": 0.1622, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_runtime": 45.2924, | |
| "eval_samples_per_second": 238.45, | |
| "eval_steps_per_second": 7.463, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.726590597701708e-05, | |
| "loss": 0.1623, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7163551091488952e-05, | |
| "loss": 0.1619, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7061883375298788e-05, | |
| "loss": 0.1622, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6960903940268456e-05, | |
| "loss": 0.1613, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6860613890692876e-05, | |
| "loss": 0.1615, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6761014323327962e-05, | |
| "loss": 0.1613, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6662106327378645e-05, | |
| "loss": 0.1612, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6563890984486884e-05, | |
| "loss": 0.1617, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.6466369368719955e-05, | |
| "loss": 0.1614, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.6369542546558626e-05, | |
| "loss": 0.1608, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_runtime": 45.3963, | |
| "eval_samples_per_second": 237.905, | |
| "eval_steps_per_second": 7.446, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.6273411576885517e-05, | |
| "loss": 0.1601, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.617797751097349e-05, | |
| "loss": 0.1617, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.608324139247421e-05, | |
| "loss": 0.1618, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5989204257406693e-05, | |
| "loss": 0.1616, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5895867134145974e-05, | |
| "loss": 0.1618, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5803231043411912e-05, | |
| "loss": 0.1611, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5711296998257902e-05, | |
| "loss": 0.1611, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.562006600405996e-05, | |
| "loss": 0.1608, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5529539058505624e-05, | |
| "loss": 0.1612, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.543971715158307e-05, | |
| "loss": 0.1611, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_runtime": 45.4783, | |
| "eval_samples_per_second": 237.476, | |
| "eval_steps_per_second": 7.432, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.535060126557028e-05, | |
| "loss": 0.1601, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5262192375024284e-05, | |
| "loss": 0.1615, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5174491446770566e-05, | |
| "loss": 0.161, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.508749943989242e-05, | |
| "loss": 0.1612, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.500121730572051e-05, | |
| "loss": 0.161, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.4915645987822406e-05, | |
| "loss": 0.1613, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.4830786421992347e-05, | |
| "loss": 0.1611, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.4746639536240942e-05, | |
| "loss": 0.161, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.4663206250785055e-05, | |
| "loss": 0.1605, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4580487478037748e-05, | |
| "loss": 0.1609, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_runtime": 45.5774, | |
| "eval_samples_per_second": 236.96, | |
| "eval_steps_per_second": 7.416, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4498484122598232e-05, | |
| "loss": 0.1615, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4417197081242083e-05, | |
| "loss": 0.1605, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.433662724291136e-05, | |
| "loss": 0.1596, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4256775488704904e-05, | |
| "loss": 0.1608, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4177642691868717e-05, | |
| "loss": 0.1608, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4099229717786368e-05, | |
| "loss": 0.1609, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4021537423969588e-05, | |
| "loss": 0.1607, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.3944566660048863e-05, | |
| "loss": 0.1603, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.3868318267764128e-05, | |
| "loss": 0.1608, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.3792793080955574e-05, | |
| "loss": 0.1607, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_runtime": 45.4935, | |
| "eval_samples_per_second": 237.397, | |
| "eval_steps_per_second": 7.43, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.3717991925554562e-05, | |
| "loss": 0.1608, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.3643915619574529e-05, | |
| "loss": 0.1604, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.35705649731021e-05, | |
| "loss": 0.1607, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.3497940788288195e-05, | |
| "loss": 0.1602, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.3426043859339253e-05, | |
| "loss": 0.1595, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.3354874972508582e-05, | |
| "loss": 0.1598, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.3284434906087695e-05, | |
| "loss": 0.1602, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.3214724430397915e-05, | |
| "loss": 0.1599, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.314574430778182e-05, | |
| "loss": 0.1598, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.3077495292594966e-05, | |
| "loss": 0.1604, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_runtime": 45.6099, | |
| "eval_samples_per_second": 236.791, | |
| "eval_steps_per_second": 7.411, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.3009978131197669e-05, | |
| "loss": 0.16, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.2943193561946762e-05, | |
| "loss": 0.1604, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.2877142315187628e-05, | |
| "loss": 0.1597, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.28118251132461e-05, | |
| "loss": 0.1601, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.274724267042063e-05, | |
| "loss": 0.1599, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.2683395692974472e-05, | |
| "loss": 0.1595, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.2620284879127947e-05, | |
| "loss": 0.1598, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.2557910919050803e-05, | |
| "loss": 0.1602, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.2496274494854666e-05, | |
| "loss": 0.1596, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.24353762805856e-05, | |
| "loss": 0.1601, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_runtime": 45.5979, | |
| "eval_samples_per_second": 236.853, | |
| "eval_steps_per_second": 7.413, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.2375216942216713e-05, | |
| "loss": 0.1596, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.2315797137640906e-05, | |
| "loss": 0.1598, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.225711751666363e-05, | |
| "loss": 0.16, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.2199178720995825e-05, | |
| "loss": 0.1598, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.2141981384246874e-05, | |
| "loss": 0.1593, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.2085526131917685e-05, | |
| "loss": 0.1602, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.2029813581393866e-05, | |
| "loss": 0.1598, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.197484434193893e-05, | |
| "loss": 0.1593, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.192061901468768e-05, | |
| "loss": 0.1597, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.1867138192639601e-05, | |
| "loss": 0.16, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_runtime": 45.5858, | |
| "eval_samples_per_second": 236.916, | |
| "eval_steps_per_second": 7.415, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.1814402460652382e-05, | |
| "loss": 0.159, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.176241239543558e-05, | |
| "loss": 0.1597, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.171116856554418e-05, | |
| "loss": 0.1594, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.1660671531372517e-05, | |
| "loss": 0.1591, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.1610921845148052e-05, | |
| "loss": 0.1587, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.156192005092539e-05, | |
| "loss": 0.1593, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.1513666684580308e-05, | |
| "loss": 0.1593, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.1466162273803876e-05, | |
| "loss": 0.1587, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.1419407338096732e-05, | |
| "loss": 0.1598, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.1373402388763346e-05, | |
| "loss": 0.159, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_runtime": 45.6844, | |
| "eval_samples_per_second": 236.404, | |
| "eval_steps_per_second": 7.399, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.1328147928906494e-05, | |
| "loss": 0.1594, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.1283644453421678e-05, | |
| "loss": 0.1593, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.1239892448991798e-05, | |
| "loss": 0.1593, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.1196892394081743e-05, | |
| "loss": 0.1588, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.1154644758933235e-05, | |
| "loss": 0.1598, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.1113150005559644e-05, | |
| "loss": 0.1587, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.1072408587740942e-05, | |
| "loss": 0.1589, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.1032420951018755e-05, | |
| "loss": 0.1594, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.0993187532691458e-05, | |
| "loss": 0.1593, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.0954708761809438e-05, | |
| "loss": 0.1588, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_runtime": 45.7912, | |
| "eval_samples_per_second": 235.853, | |
| "eval_steps_per_second": 7.381, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.091698505917036e-05, | |
| "loss": 0.1587, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.0880016837314599e-05, | |
| "loss": 0.1592, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.084380450052071e-05, | |
| "loss": 0.159, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.0808348444801e-05, | |
| "loss": 0.1592, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.0773649057897206e-05, | |
| "loss": 0.1588, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.073970671927628e-05, | |
| "loss": 0.1585, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.0706521800126198e-05, | |
| "loss": 0.1593, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.0674094663351906e-05, | |
| "loss": 0.1595, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.0642425663571383e-05, | |
| "loss": 0.1593, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.0611515147111736e-05, | |
| "loss": 0.1587, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_runtime": 45.5789, | |
| "eval_samples_per_second": 236.952, | |
| "eval_steps_per_second": 7.416, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.0581363452005424e-05, | |
| "loss": 0.1583, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.0551970907986557e-05, | |
| "loss": 0.1584, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.0523337836487271e-05, | |
| "loss": 0.1585, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.0495464550634267e-05, | |
| "loss": 0.1583, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.046835135524533e-05, | |
| "loss": 0.1587, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.044199854682601e-05, | |
| "loss": 0.1588, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.0416406413566414e-05, | |
| "loss": 0.1586, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.0391575235337991e-05, | |
| "loss": 0.1596, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.0367505283690547e-05, | |
| "loss": 0.1585, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.0344196821849202e-05, | |
| "loss": 0.1584, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_runtime": 45.8042, | |
| "eval_samples_per_second": 235.786, | |
| "eval_steps_per_second": 7.379, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.032165010471157e-05, | |
| "loss": 0.1582, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.0299865378844936e-05, | |
| "loss": 0.1586, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.0278842882483569e-05, | |
| "loss": 0.1587, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.025858284552612e-05, | |
| "loss": 0.1594, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.023908548953311e-05, | |
| "loss": 0.1593, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.02203510277245e-05, | |
| "loss": 0.158, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.0202379664977364e-05, | |
| "loss": 0.1582, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.018517159782365e-05, | |
| "loss": 0.1586, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.0168727014448004e-05, | |
| "loss": 0.1586, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.0153046094685783e-05, | |
| "loss": 0.1591, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_runtime": 45.7753, | |
| "eval_samples_per_second": 235.935, | |
| "eval_steps_per_second": 7.384, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.0138129010020992e-05, | |
| "loss": 0.1586, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.0123975923584488e-05, | |
| "loss": 0.1588, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.0110586990152152e-05, | |
| "loss": 0.1585, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.0097962356143219e-05, | |
| "loss": 0.1583, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.0086102159618668e-05, | |
| "loss": 0.1584, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.0075006530279694e-05, | |
| "loss": 0.1579, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.0064675589466339e-05, | |
| "loss": 0.1586, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.0055109450156098e-05, | |
| "loss": 0.1583, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.0046308216962759e-05, | |
| "loss": 0.1585, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.0038271986135177e-05, | |
| "loss": 0.1581, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_runtime": 45.6993, | |
| "eval_samples_per_second": 236.327, | |
| "eval_steps_per_second": 7.396, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0031000845556304e-05, | |
| "loss": 0.1592, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0024494874742152e-05, | |
| "loss": 0.158, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0018754144840986e-05, | |
| "loss": 0.1586, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0013778718632507e-05, | |
| "loss": 0.1585, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.000956865052717e-05, | |
| "loss": 0.1582, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0006123986565623e-05, | |
| "loss": 0.1579, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0003444764418138e-05, | |
| "loss": 0.1585, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.000153101338428e-05, | |
| "loss": 0.1581, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.00003827543925e-05, | |
| "loss": 0.1578, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1e-05, | |
| "loss": 0.1583, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_runtime": 45.8451, | |
| "eval_samples_per_second": 235.576, | |
| "eval_steps_per_second": 7.373, | |
| "step": 100000 | |
| } | |
| ], | |
| "max_steps": 100000, | |
| "num_train_epochs": 3, | |
| "total_flos": 7.009987307584106e+21, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |