| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 10, |
| "global_step": 5300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.2099817351323793e-07, |
| "loss": 4.0153, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 3.2407805919647217, |
| "eval_runtime": 51.2609, |
| "eval_samples_per_second": 1322.879, |
| "eval_steps_per_second": 2.595, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.088952094525112e-07, |
| "loss": 2.4309, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 1.3295739889144897, |
| "eval_runtime": 51.4605, |
| "eval_samples_per_second": 1317.748, |
| "eval_steps_per_second": 2.585, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.998494650312849e-07, |
| "loss": 1.2878, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 1.2454885244369507, |
| "eval_runtime": 51.5936, |
| "eval_samples_per_second": 1314.349, |
| "eval_steps_per_second": 2.578, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 5.574009434295683e-07, |
| "loss": 1.214, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_loss": 1.176835060119629, |
| "eval_runtime": 51.5326, |
| "eval_samples_per_second": 1315.904, |
| "eval_steps_per_second": 2.581, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 5.99597299894211e-07, |
| "loss": 1.1456, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_loss": 1.135105013847351, |
| "eval_runtime": 51.5256, |
| "eval_samples_per_second": 1316.083, |
| "eval_steps_per_second": 2.581, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 6.329299792437913e-07, |
| "loss": 1.1182, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_loss": 1.1110774278640747, |
| "eval_runtime": 51.7018, |
| "eval_samples_per_second": 1311.599, |
| "eval_steps_per_second": 2.572, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 6.604839728354141e-07, |
| "loss": 1.0933, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 1.0951799154281616, |
| "eval_runtime": 51.6151, |
| "eval_samples_per_second": 1313.801, |
| "eval_steps_per_second": 2.577, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 6.839699609399337e-07, |
| "loss": 1.0838, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_loss": 1.0831515789031982, |
| "eval_runtime": 51.5722, |
| "eval_samples_per_second": 1314.893, |
| "eval_steps_per_second": 2.579, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 7.044360352012708e-07, |
| "loss": 1.0585, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_loss": 1.0732280015945435, |
| "eval_runtime": 51.672, |
| "eval_samples_per_second": 1312.355, |
| "eval_steps_per_second": 2.574, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 7.225710767839359e-07, |
| "loss": 1.0496, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 1.0647205114364624, |
| "eval_runtime": 51.7752, |
| "eval_samples_per_second": 1309.74, |
| "eval_steps_per_second": 2.569, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 7.388522018513969e-07, |
| "loss": 1.0627, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 1.0570907592773438, |
| "eval_runtime": 51.8166, |
| "eval_samples_per_second": 1308.691, |
| "eval_steps_per_second": 2.567, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 7.536235468635834e-07, |
| "loss": 1.0518, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_loss": 1.049896478652954, |
| "eval_runtime": 51.7728, |
| "eval_samples_per_second": 1309.799, |
| "eval_steps_per_second": 2.569, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 7.671415126345663e-07, |
| "loss": 1.0649, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.25, |
| "eval_loss": 1.0434167385101318, |
| "eval_runtime": 51.7035, |
| "eval_samples_per_second": 1311.557, |
| "eval_steps_per_second": 2.572, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 7.796022659328959e-07, |
| "loss": 1.045, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 1.0371665954589844, |
| "eval_runtime": 51.5545, |
| "eval_samples_per_second": 1315.346, |
| "eval_steps_per_second": 2.58, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 7.911592440089047e-07, |
| "loss": 1.032, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 1.0311847925186157, |
| "eval_runtime": 51.7951, |
| "eval_samples_per_second": 1309.235, |
| "eval_steps_per_second": 2.568, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 8.019347288561271e-07, |
| "loss": 1.0257, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 1.0256140232086182, |
| "eval_runtime": 51.7483, |
| "eval_samples_per_second": 1310.42, |
| "eval_steps_per_second": 2.57, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.120277498651883e-07, |
| "loss": 1.0171, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 1.0200444459915161, |
| "eval_runtime": 51.7312, |
| "eval_samples_per_second": 1310.854, |
| "eval_steps_per_second": 2.571, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.215196295288388e-07, |
| "loss": 1.0294, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_loss": 1.014703392982483, |
| "eval_runtime": 51.5856, |
| "eval_samples_per_second": 1314.554, |
| "eval_steps_per_second": 2.578, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.304779684672557e-07, |
| "loss": 1.0028, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_loss": 1.0094585418701172, |
| "eval_runtime": 51.7818, |
| "eval_samples_per_second": 1309.571, |
| "eval_steps_per_second": 2.568, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 8.389595688569879e-07, |
| "loss": 1.0127, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_loss": 1.0043591260910034, |
| "eval_runtime": 51.636, |
| "eval_samples_per_second": 1313.269, |
| "eval_steps_per_second": 2.576, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 8.470126185316722e-07, |
| "loss": 1.0185, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 0.9993944764137268, |
| "eval_runtime": 51.5783, |
| "eval_samples_per_second": 1314.739, |
| "eval_steps_per_second": 2.579, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.546783493625903e-07, |
| "loss": 1.0023, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 0.9948075413703918, |
| "eval_runtime": 51.744, |
| "eval_samples_per_second": 1310.529, |
| "eval_steps_per_second": 2.57, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 8.61992314820994e-07, |
| "loss": 0.9993, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_loss": 0.9913944005966187, |
| "eval_runtime": 51.6406, |
| "eval_samples_per_second": 1313.153, |
| "eval_steps_per_second": 2.575, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 8.689853870683541e-07, |
| "loss": 0.9826, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 0.9888330698013306, |
| "eval_runtime": 51.5703, |
| "eval_samples_per_second": 1314.942, |
| "eval_steps_per_second": 2.579, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 8.75684544365874e-07, |
| "loss": 0.984, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_loss": 0.9866300821304321, |
| "eval_runtime": 51.6072, |
| "eval_samples_per_second": 1314.004, |
| "eval_steps_per_second": 2.577, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 8.821134995876785e-07, |
| "loss": 0.9768, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_loss": 0.9846345782279968, |
| "eval_runtime": 51.7595, |
| "eval_samples_per_second": 1310.136, |
| "eval_steps_per_second": 2.57, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 8.882932068272419e-07, |
| "loss": 0.9652, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 0.9827404022216797, |
| "eval_runtime": 51.652, |
| "eval_samples_per_second": 1312.864, |
| "eval_steps_per_second": 2.575, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 8.942422734147504e-07, |
| "loss": 0.9891, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_loss": 0.980950117111206, |
| "eval_runtime": 51.6461, |
| "eval_samples_per_second": 1313.013, |
| "eval_steps_per_second": 2.575, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.999772977776921e-07, |
| "loss": 0.9842, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.55, |
| "eval_loss": 0.9792138934135437, |
| "eval_runtime": 51.7586, |
| "eval_samples_per_second": 1310.159, |
| "eval_steps_per_second": 2.57, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.05513148606202e-07, |
| "loss": 0.9589, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_loss": 0.9775672554969788, |
| "eval_runtime": 51.5871, |
| "eval_samples_per_second": 1314.513, |
| "eval_steps_per_second": 2.578, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.10863197149682e-07, |
| "loss": 0.9691, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.975963294506073, |
| "eval_runtime": 51.6231, |
| "eval_samples_per_second": 1313.598, |
| "eval_steps_per_second": 2.576, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.160395117812138e-07, |
| "loss": 0.9865, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 0.9743648171424866, |
| "eval_runtime": 51.6267, |
| "eval_samples_per_second": 1313.507, |
| "eval_steps_per_second": 2.576, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.21053021953609e-07, |
| "loss": 0.9961, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_loss": 0.9728517532348633, |
| "eval_runtime": 51.7315, |
| "eval_samples_per_second": 1310.844, |
| "eval_steps_per_second": 2.571, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.259136571495437e-07, |
| "loss": 0.9619, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.9713432192802429, |
| "eval_runtime": 51.6068, |
| "eval_samples_per_second": 1314.013, |
| "eval_steps_per_second": 2.577, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.306304652671935e-07, |
| "loss": 0.9845, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_loss": 0.9698730707168579, |
| "eval_runtime": 51.5942, |
| "eval_samples_per_second": 1314.334, |
| "eval_steps_per_second": 2.578, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.352117139888513e-07, |
| "loss": 0.9585, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_loss": 0.9684355854988098, |
| "eval_runtime": 51.6809, |
| "eval_samples_per_second": 1312.128, |
| "eval_steps_per_second": 2.573, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.396649779859618e-07, |
| "loss": 0.9645, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 0.9670720100402832, |
| "eval_runtime": 51.5257, |
| "eval_samples_per_second": 1316.082, |
| "eval_steps_per_second": 2.581, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.439972142709402e-07, |
| "loss": 0.9727, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_loss": 0.9656769633293152, |
| "eval_runtime": 51.5615, |
| "eval_samples_per_second": 1315.169, |
| "eval_steps_per_second": 2.579, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.48214827578075e-07, |
| "loss": 0.9581, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_loss": 0.9643924236297607, |
| "eval_runtime": 51.4727, |
| "eval_samples_per_second": 1317.436, |
| "eval_steps_per_second": 2.584, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.523237273160609e-07, |
| "loss": 0.9786, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.75, |
| "eval_loss": 0.9630870223045349, |
| "eval_runtime": 51.6713, |
| "eval_samples_per_second": 1312.374, |
| "eval_steps_per_second": 2.574, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.563293773632721e-07, |
| "loss": 0.9654, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.9617934823036194, |
| "eval_runtime": 51.6036, |
| "eval_samples_per_second": 1314.093, |
| "eval_steps_per_second": 2.577, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.602368397587203e-07, |
| "loss": 0.9526, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_loss": 0.960529625415802, |
| "eval_runtime": 51.5066, |
| "eval_samples_per_second": 1316.569, |
| "eval_steps_per_second": 2.582, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.640508131652252e-07, |
| "loss": 0.9611, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 0.9593038558959961, |
| "eval_runtime": 51.676, |
| "eval_samples_per_second": 1312.254, |
| "eval_steps_per_second": 2.574, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.677756668379226e-07, |
| "loss": 0.9744, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 0.9580965638160706, |
| "eval_runtime": 51.6776, |
| "eval_samples_per_second": 1312.214, |
| "eval_steps_per_second": 2.574, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.714154707140102e-07, |
| "loss": 0.9636, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.85, |
| "eval_loss": 0.9569092392921448, |
| "eval_runtime": 51.5523, |
| "eval_samples_per_second": 1315.402, |
| "eval_steps_per_second": 2.58, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.749740221433787e-07, |
| "loss": 0.9291, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 0.955761194229126, |
| "eval_runtime": 51.6324, |
| "eval_samples_per_second": 1313.362, |
| "eval_steps_per_second": 2.576, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.784548697003153e-07, |
| "loss": 0.9581, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_loss": 0.954668402671814, |
| "eval_runtime": 51.5081, |
| "eval_samples_per_second": 1316.531, |
| "eval_steps_per_second": 2.582, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.818613344506044e-07, |
| "loss": 0.9298, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_loss": 0.9535388946533203, |
| "eval_runtime": 51.7629, |
| "eval_samples_per_second": 1310.049, |
| "eval_steps_per_second": 2.569, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.851965289935241e-07, |
| "loss": 0.9389, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_loss": 0.9524605870246887, |
| "eval_runtime": 51.5702, |
| "eval_samples_per_second": 1314.945, |
| "eval_steps_per_second": 2.579, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.884633745523852e-07, |
| "loss": 0.9664, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_loss": 0.9513717889785767, |
| "eval_runtime": 51.687, |
| "eval_samples_per_second": 1311.975, |
| "eval_steps_per_second": 2.573, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.91664616348788e-07, |
| "loss": 0.9609, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 0.9503200054168701, |
| "eval_runtime": 51.6496, |
| "eval_samples_per_second": 1312.923, |
| "eval_steps_per_second": 2.575, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.948028374633558e-07, |
| "loss": 0.9619, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_loss": 0.9492778778076172, |
| "eval_runtime": 51.8222, |
| "eval_samples_per_second": 1308.552, |
| "eval_steps_per_second": 2.566, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.978804713582987e-07, |
| "loss": 0.9344, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.9482750296592712, |
| "eval_runtime": 51.4982, |
| "eval_samples_per_second": 1316.785, |
| "eval_steps_per_second": 2.583, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1e-06, |
| "loss": 0.9296, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_loss": 0.947296142578125, |
| "eval_runtime": 51.6009, |
| "eval_samples_per_second": 1314.163, |
| "eval_steps_per_second": 2.577, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1e-06, |
| "loss": 0.9576, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 0.9463191032409668, |
| "eval_runtime": 51.5571, |
| "eval_samples_per_second": 1315.281, |
| "eval_steps_per_second": 2.58, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1e-06, |
| "loss": 0.9506, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_loss": 0.945310115814209, |
| "eval_runtime": 51.6349, |
| "eval_samples_per_second": 1313.297, |
| "eval_steps_per_second": 2.576, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 1e-06, |
| "loss": 0.936, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_loss": 0.9444094300270081, |
| "eval_runtime": 51.5323, |
| "eval_samples_per_second": 1315.912, |
| "eval_steps_per_second": 2.581, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1e-06, |
| "loss": 0.9286, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 0.9434741139411926, |
| "eval_runtime": 51.602, |
| "eval_samples_per_second": 1314.134, |
| "eval_steps_per_second": 2.577, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1e-06, |
| "loss": 0.9528, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_loss": 0.9425981044769287, |
| "eval_runtime": 51.5723, |
| "eval_samples_per_second": 1314.893, |
| "eval_steps_per_second": 2.579, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 1e-06, |
| "loss": 0.9334, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_loss": 0.9417036771774292, |
| "eval_runtime": 51.6826, |
| "eval_samples_per_second": 1312.086, |
| "eval_steps_per_second": 2.573, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1e-06, |
| "loss": 0.9337, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_loss": 0.9408856630325317, |
| "eval_runtime": 51.611, |
| "eval_samples_per_second": 1313.905, |
| "eval_steps_per_second": 2.577, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 1e-06, |
| "loss": 0.9417, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_loss": 0.9400270581245422, |
| "eval_runtime": 51.7034, |
| "eval_samples_per_second": 1311.558, |
| "eval_steps_per_second": 2.572, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1e-06, |
| "loss": 0.9356, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_loss": 0.9391804933547974, |
| "eval_runtime": 51.5713, |
| "eval_samples_per_second": 1314.918, |
| "eval_steps_per_second": 2.579, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1e-06, |
| "loss": 0.9523, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 0.9383418560028076, |
| "eval_runtime": 51.7082, |
| "eval_samples_per_second": 1311.436, |
| "eval_steps_per_second": 2.572, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1e-06, |
| "loss": 0.9212, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_loss": 0.9375633597373962, |
| "eval_runtime": 51.6214, |
| "eval_samples_per_second": 1313.64, |
| "eval_steps_per_second": 2.576, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 1e-06, |
| "loss": 0.9317, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_loss": 0.9367907047271729, |
| "eval_runtime": 51.6435, |
| "eval_samples_per_second": 1313.079, |
| "eval_steps_per_second": 2.575, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1e-06, |
| "loss": 0.9242, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 0.9359526634216309, |
| "eval_runtime": 51.6499, |
| "eval_samples_per_second": 1312.916, |
| "eval_steps_per_second": 2.575, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1e-06, |
| "loss": 0.9316, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 0.9352383017539978, |
| "eval_runtime": 51.6428, |
| "eval_samples_per_second": 1313.098, |
| "eval_steps_per_second": 2.575, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1e-06, |
| "loss": 0.9391, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_loss": 0.9344274401664734, |
| "eval_runtime": 51.5062, |
| "eval_samples_per_second": 1316.58, |
| "eval_steps_per_second": 2.582, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1e-06, |
| "loss": 0.9437, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_loss": 0.9337021708488464, |
| "eval_runtime": 51.6761, |
| "eval_samples_per_second": 1312.252, |
| "eval_steps_per_second": 2.574, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1e-06, |
| "loss": 0.9347, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_loss": 0.9328891634941101, |
| "eval_runtime": 51.7055, |
| "eval_samples_per_second": 1311.504, |
| "eval_steps_per_second": 2.572, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 1e-06, |
| "loss": 0.9344, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_loss": 0.9321677088737488, |
| "eval_runtime": 51.5434, |
| "eval_samples_per_second": 1315.629, |
| "eval_steps_per_second": 2.58, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1e-06, |
| "loss": 0.9082, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_loss": 0.9314926266670227, |
| "eval_runtime": 51.5975, |
| "eval_samples_per_second": 1314.249, |
| "eval_steps_per_second": 2.578, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1e-06, |
| "loss": 0.9152, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_loss": 0.9307209253311157, |
| "eval_runtime": 51.6257, |
| "eval_samples_per_second": 1313.531, |
| "eval_steps_per_second": 2.576, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1e-06, |
| "loss": 0.9178, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_loss": 0.9300168752670288, |
| "eval_runtime": 51.5343, |
| "eval_samples_per_second": 1315.862, |
| "eval_steps_per_second": 2.581, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1e-06, |
| "loss": 0.9321, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_loss": 0.9293088912963867, |
| "eval_runtime": 51.5854, |
| "eval_samples_per_second": 1314.559, |
| "eval_steps_per_second": 2.578, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1e-06, |
| "loss": 0.9239, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_loss": 0.9286373853683472, |
| "eval_runtime": 51.6726, |
| "eval_samples_per_second": 1312.34, |
| "eval_steps_per_second": 2.574, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1e-06, |
| "loss": 0.9347, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 0.927922248840332, |
| "eval_runtime": 51.653, |
| "eval_samples_per_second": 1312.837, |
| "eval_steps_per_second": 2.575, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1e-06, |
| "loss": 0.9029, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_loss": 0.9272500276565552, |
| "eval_runtime": 51.8063, |
| "eval_samples_per_second": 1308.953, |
| "eval_steps_per_second": 2.567, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1e-06, |
| "loss": 0.9222, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_loss": 0.9265542030334473, |
| "eval_runtime": 51.6947, |
| "eval_samples_per_second": 1311.778, |
| "eval_steps_per_second": 2.573, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1e-06, |
| "loss": 0.9209, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_loss": 0.9258936643600464, |
| "eval_runtime": 51.8655, |
| "eval_samples_per_second": 1307.459, |
| "eval_steps_per_second": 2.564, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1e-06, |
| "loss": 0.9287, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_loss": 0.9251777529716492, |
| "eval_runtime": 51.6265, |
| "eval_samples_per_second": 1313.511, |
| "eval_steps_per_second": 2.576, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 1e-06, |
| "loss": 0.9214, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_loss": 0.9245129227638245, |
| "eval_runtime": 51.6754, |
| "eval_samples_per_second": 1312.268, |
| "eval_steps_per_second": 2.574, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1e-06, |
| "loss": 0.9029, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_loss": 0.9238165020942688, |
| "eval_runtime": 51.7762, |
| "eval_samples_per_second": 1309.713, |
| "eval_steps_per_second": 2.569, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 1e-06, |
| "loss": 0.9303, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.9232094883918762, |
| "eval_runtime": 51.813, |
| "eval_samples_per_second": 1308.783, |
| "eval_steps_per_second": 2.567, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 1e-06, |
| "loss": 0.9262, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_loss": 0.9224840998649597, |
| "eval_runtime": 51.6696, |
| "eval_samples_per_second": 1312.415, |
| "eval_steps_per_second": 2.574, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1e-06, |
| "loss": 0.9187, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_loss": 0.9218891859054565, |
| "eval_runtime": 51.7308, |
| "eval_samples_per_second": 1310.862, |
| "eval_steps_per_second": 2.571, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1e-06, |
| "loss": 0.9177, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 0.9212586283683777, |
| "eval_runtime": 51.8721, |
| "eval_samples_per_second": 1307.291, |
| "eval_steps_per_second": 2.564, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1e-06, |
| "loss": 0.9138, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_loss": 0.9205953478813171, |
| "eval_runtime": 51.7717, |
| "eval_samples_per_second": 1309.827, |
| "eval_steps_per_second": 2.569, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1e-06, |
| "loss": 0.9097, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_loss": 0.9199565052986145, |
| "eval_runtime": 51.8761, |
| "eval_samples_per_second": 1307.192, |
| "eval_steps_per_second": 2.564, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 1e-06, |
| "loss": 0.9063, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_loss": 0.9193738698959351, |
| "eval_runtime": 51.7714, |
| "eval_samples_per_second": 1309.836, |
| "eval_steps_per_second": 2.569, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 1e-06, |
| "loss": 0.92, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 0.9187218546867371, |
| "eval_runtime": 51.6954, |
| "eval_samples_per_second": 1311.761, |
| "eval_steps_per_second": 2.573, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1e-06, |
| "loss": 0.9411, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_loss": 0.9181123971939087, |
| "eval_runtime": 52.3612, |
| "eval_samples_per_second": 1295.082, |
| "eval_steps_per_second": 2.54, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 1e-06, |
| "loss": 0.9235, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_loss": 0.9175041317939758, |
| "eval_runtime": 51.7288, |
| "eval_samples_per_second": 1310.913, |
| "eval_steps_per_second": 2.571, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1e-06, |
| "loss": 0.9, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 0.9168665409088135, |
| "eval_runtime": 51.7948, |
| "eval_samples_per_second": 1309.242, |
| "eval_steps_per_second": 2.568, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1e-06, |
| "loss": 0.9148, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_loss": 0.9162449240684509, |
| "eval_runtime": 51.6549, |
| "eval_samples_per_second": 1312.789, |
| "eval_steps_per_second": 2.575, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1e-06, |
| "loss": 0.9064, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 0.9156414270401001, |
| "eval_runtime": 51.9765, |
| "eval_samples_per_second": 1304.667, |
| "eval_steps_per_second": 2.559, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1e-06, |
| "loss": 0.9267, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.85, |
| "eval_loss": 0.9150117635726929, |
| "eval_runtime": 51.8206, |
| "eval_samples_per_second": 1308.593, |
| "eval_steps_per_second": 2.567, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1e-06, |
| "loss": 0.9125, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_loss": 0.9144014716148376, |
| "eval_runtime": 51.936, |
| "eval_samples_per_second": 1305.684, |
| "eval_steps_per_second": 2.561, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1e-06, |
| "loss": 0.9263, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_loss": 0.9138918519020081, |
| "eval_runtime": 51.687, |
| "eval_samples_per_second": 1311.974, |
| "eval_steps_per_second": 2.573, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1e-06, |
| "loss": 0.9124, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_loss": 0.9132450222969055, |
| "eval_runtime": 51.6969, |
| "eval_samples_per_second": 1311.722, |
| "eval_steps_per_second": 2.573, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1e-06, |
| "loss": 0.9025, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 0.9127308130264282, |
| "eval_runtime": 51.7421, |
| "eval_samples_per_second": 1310.578, |
| "eval_steps_per_second": 2.57, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1e-06, |
| "loss": 0.916, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_loss": 0.9121115803718567, |
| "eval_runtime": 51.779, |
| "eval_samples_per_second": 1309.642, |
| "eval_steps_per_second": 2.569, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8957, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_loss": 0.9115496873855591, |
| "eval_runtime": 51.7016, |
| "eval_samples_per_second": 1311.604, |
| "eval_steps_per_second": 2.572, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1e-06, |
| "loss": 0.9107, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_loss": 0.9109433889389038, |
| "eval_runtime": 51.7718, |
| "eval_samples_per_second": 1309.826, |
| "eval_steps_per_second": 2.569, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1e-06, |
| "loss": 0.9072, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.9104679226875305, |
| "eval_runtime": 51.7958, |
| "eval_samples_per_second": 1309.218, |
| "eval_steps_per_second": 2.568, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1e-06, |
| "loss": 0.9009, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_loss": 0.9099251627922058, |
| "eval_runtime": 51.7577, |
| "eval_samples_per_second": 1310.181, |
| "eval_steps_per_second": 2.57, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1e-06, |
| "loss": 0.9208, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_loss": 0.9094162583351135, |
| "eval_runtime": 51.7631, |
| "eval_samples_per_second": 1310.046, |
| "eval_steps_per_second": 2.569, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1e-06, |
| "loss": 0.9114, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_loss": 0.908789873123169, |
| "eval_runtime": 51.6814, |
| "eval_samples_per_second": 1312.116, |
| "eval_steps_per_second": 2.573, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8926, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_loss": 0.9083024859428406, |
| "eval_runtime": 51.7598, |
| "eval_samples_per_second": 1310.128, |
| "eval_steps_per_second": 2.57, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1e-06, |
| "loss": 0.9126, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_loss": 0.9077491760253906, |
| "eval_runtime": 51.7667, |
| "eval_samples_per_second": 1309.954, |
| "eval_steps_per_second": 2.569, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1e-06, |
| "loss": 0.9111, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_loss": 0.9072958827018738, |
| "eval_runtime": 51.7977, |
| "eval_samples_per_second": 1309.17, |
| "eval_steps_per_second": 2.568, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8893, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_loss": 0.9067880511283875, |
| "eval_runtime": 51.8164, |
| "eval_samples_per_second": 1308.698, |
| "eval_steps_per_second": 2.567, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1e-06, |
| "loss": 0.9091, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.15, |
| "eval_loss": 0.9062515497207642, |
| "eval_runtime": 51.7521, |
| "eval_samples_per_second": 1310.323, |
| "eval_steps_per_second": 2.57, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1e-06, |
| "loss": 0.88, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_loss": 0.9057653546333313, |
| "eval_runtime": 51.8093, |
| "eval_samples_per_second": 1308.876, |
| "eval_steps_per_second": 2.567, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8956, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_loss": 0.9052146077156067, |
| "eval_runtime": 51.6703, |
| "eval_samples_per_second": 1312.399, |
| "eval_steps_per_second": 2.574, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1e-06, |
| "loss": 0.9016, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_loss": 0.904728353023529, |
| "eval_runtime": 51.8128, |
| "eval_samples_per_second": 1308.788, |
| "eval_steps_per_second": 2.567, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8985, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_loss": 0.9042342901229858, |
| "eval_runtime": 51.7265, |
| "eval_samples_per_second": 1310.973, |
| "eval_steps_per_second": 2.571, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1e-06, |
| "loss": 0.888, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.25, |
| "eval_loss": 0.9037690162658691, |
| "eval_runtime": 51.6956, |
| "eval_samples_per_second": 1311.755, |
| "eval_steps_per_second": 2.573, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1e-06, |
| "loss": 0.887, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_loss": 0.9033035039901733, |
| "eval_runtime": 51.861, |
| "eval_samples_per_second": 1307.571, |
| "eval_steps_per_second": 2.565, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1e-06, |
| "loss": 0.8888, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_loss": 0.9027730226516724, |
| "eval_runtime": 51.6531, |
| "eval_samples_per_second": 1312.834, |
| "eval_steps_per_second": 2.575, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1e-06, |
| "loss": 0.8921, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_loss": 0.9023709893226624, |
| "eval_runtime": 51.638, |
| "eval_samples_per_second": 1313.219, |
| "eval_steps_per_second": 2.576, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1e-06, |
| "loss": 0.8952, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_loss": 0.9018809795379639, |
| "eval_runtime": 51.8323, |
| "eval_samples_per_second": 1308.296, |
| "eval_steps_per_second": 2.566, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1e-06, |
| "loss": 0.8897, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.34, |
| "eval_loss": 0.90138179063797, |
| "eval_runtime": 51.672, |
| "eval_samples_per_second": 1312.354, |
| "eval_steps_per_second": 2.574, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1e-06, |
| "loss": 0.897, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_loss": 0.9009556174278259, |
| "eval_runtime": 51.8076, |
| "eval_samples_per_second": 1308.921, |
| "eval_steps_per_second": 2.567, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1e-06, |
| "loss": 0.8928, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_loss": 0.900455117225647, |
| "eval_runtime": 51.7097, |
| "eval_samples_per_second": 1311.397, |
| "eval_steps_per_second": 2.572, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1e-06, |
| "loss": 0.9051, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_loss": 0.9000570774078369, |
| "eval_runtime": 51.76, |
| "eval_samples_per_second": 1310.123, |
| "eval_steps_per_second": 2.57, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8985, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_loss": 0.8996244072914124, |
| "eval_runtime": 51.7876, |
| "eval_samples_per_second": 1309.425, |
| "eval_steps_per_second": 2.568, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1e-06, |
| "loss": 0.9074, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_loss": 0.899140477180481, |
| "eval_runtime": 51.7059, |
| "eval_samples_per_second": 1311.495, |
| "eval_steps_per_second": 2.572, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1e-06, |
| "loss": 0.8832, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.45, |
| "eval_loss": 0.8987085819244385, |
| "eval_runtime": 51.8508, |
| "eval_samples_per_second": 1307.829, |
| "eval_steps_per_second": 2.565, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8708, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_loss": 0.8981704711914062, |
| "eval_runtime": 51.6881, |
| "eval_samples_per_second": 1311.947, |
| "eval_steps_per_second": 2.573, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1e-06, |
| "loss": 0.901, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_loss": 0.8977486491203308, |
| "eval_runtime": 51.6963, |
| "eval_samples_per_second": 1311.739, |
| "eval_steps_per_second": 2.573, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8835, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_loss": 0.8974390625953674, |
| "eval_runtime": 51.6765, |
| "eval_samples_per_second": 1312.241, |
| "eval_steps_per_second": 2.574, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8897, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_loss": 0.8968603014945984, |
| "eval_runtime": 51.64, |
| "eval_samples_per_second": 1313.168, |
| "eval_steps_per_second": 2.576, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8893, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.55, |
| "eval_loss": 0.8964337706565857, |
| "eval_runtime": 51.7386, |
| "eval_samples_per_second": 1310.665, |
| "eval_steps_per_second": 2.571, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 1e-06, |
| "loss": 0.8809, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_loss": 0.8959853649139404, |
| "eval_runtime": 51.7398, |
| "eval_samples_per_second": 1310.634, |
| "eval_steps_per_second": 2.571, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1e-06, |
| "loss": 0.8977, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_loss": 0.8955875039100647, |
| "eval_runtime": 51.8409, |
| "eval_samples_per_second": 1308.08, |
| "eval_steps_per_second": 2.566, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 1e-06, |
| "loss": 0.8948, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_loss": 0.895256519317627, |
| "eval_runtime": 51.6804, |
| "eval_samples_per_second": 1312.142, |
| "eval_steps_per_second": 2.574, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 1e-06, |
| "loss": 0.8851, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.62, |
| "eval_loss": 0.8946732878684998, |
| "eval_runtime": 51.6288, |
| "eval_samples_per_second": 1313.454, |
| "eval_steps_per_second": 2.576, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 1e-06, |
| "loss": 0.8875, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_loss": 0.8942922353744507, |
| "eval_runtime": 51.8893, |
| "eval_samples_per_second": 1306.859, |
| "eval_steps_per_second": 2.563, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1e-06, |
| "loss": 0.8953, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_loss": 0.8939051032066345, |
| "eval_runtime": 51.8333, |
| "eval_samples_per_second": 1308.272, |
| "eval_steps_per_second": 2.566, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 1e-06, |
| "loss": 0.8709, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_loss": 0.8934805393218994, |
| "eval_runtime": 51.6417, |
| "eval_samples_per_second": 1313.125, |
| "eval_steps_per_second": 2.575, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 1e-06, |
| "loss": 0.8821, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_loss": 0.8930464386940002, |
| "eval_runtime": 51.8812, |
| "eval_samples_per_second": 1307.063, |
| "eval_steps_per_second": 2.564, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 1e-06, |
| "loss": 0.8925, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_loss": 0.8925924301147461, |
| "eval_runtime": 51.6652, |
| "eval_samples_per_second": 1312.527, |
| "eval_steps_per_second": 2.574, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 1e-06, |
| "loss": 0.8939, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_loss": 0.8922019004821777, |
| "eval_runtime": 51.7786, |
| "eval_samples_per_second": 1309.654, |
| "eval_steps_per_second": 2.569, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1e-06, |
| "loss": 0.8887, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_loss": 0.8918561339378357, |
| "eval_runtime": 51.7061, |
| "eval_samples_per_second": 1311.489, |
| "eval_steps_per_second": 2.572, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 1e-06, |
| "loss": 0.8842, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_loss": 0.8913816213607788, |
| "eval_runtime": 51.9169, |
| "eval_samples_per_second": 1306.164, |
| "eval_steps_per_second": 2.562, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8752, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_loss": 0.8909953236579895, |
| "eval_runtime": 51.8089, |
| "eval_samples_per_second": 1308.888, |
| "eval_steps_per_second": 2.567, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 1e-06, |
| "loss": 0.8755, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.81, |
| "eval_loss": 0.8905987739562988, |
| "eval_runtime": 51.6731, |
| "eval_samples_per_second": 1312.328, |
| "eval_steps_per_second": 2.574, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 1e-06, |
| "loss": 0.8804, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_loss": 0.8901605606079102, |
| "eval_runtime": 51.8047, |
| "eval_samples_per_second": 1308.993, |
| "eval_steps_per_second": 2.567, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1e-06, |
| "loss": 0.8881, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_loss": 0.8897448778152466, |
| "eval_runtime": 51.7378, |
| "eval_samples_per_second": 1310.687, |
| "eval_steps_per_second": 2.571, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 1e-06, |
| "loss": 0.8674, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_loss": 0.8893808126449585, |
| "eval_runtime": 51.6928, |
| "eval_samples_per_second": 1311.827, |
| "eval_steps_per_second": 2.573, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8718, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_loss": 0.8889936804771423, |
| "eval_runtime": 51.6709, |
| "eval_samples_per_second": 1312.382, |
| "eval_steps_per_second": 2.574, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1e-06, |
| "loss": 0.8867, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_loss": 0.8885937929153442, |
| "eval_runtime": 52.0485, |
| "eval_samples_per_second": 1302.862, |
| "eval_steps_per_second": 2.555, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1e-06, |
| "loss": 0.8983, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_loss": 0.8881903886795044, |
| "eval_runtime": 51.779, |
| "eval_samples_per_second": 1309.642, |
| "eval_steps_per_second": 2.569, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1e-06, |
| "loss": 0.8769, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.94, |
| "eval_loss": 0.8878265023231506, |
| "eval_runtime": 51.6439, |
| "eval_samples_per_second": 1313.07, |
| "eval_steps_per_second": 2.575, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8877, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_loss": 0.8874040246009827, |
| "eval_runtime": 51.7344, |
| "eval_samples_per_second": 1310.771, |
| "eval_steps_per_second": 2.571, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 1e-06, |
| "loss": 0.9046, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_loss": 0.8870102763175964, |
| "eval_runtime": 51.7489, |
| "eval_samples_per_second": 1310.404, |
| "eval_steps_per_second": 2.57, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1e-06, |
| "loss": 0.8814, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.8866777420043945, |
| "eval_runtime": 51.715, |
| "eval_samples_per_second": 1311.264, |
| "eval_steps_per_second": 2.572, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 1e-06, |
| "loss": 0.8832, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.02, |
| "eval_loss": 0.8863227963447571, |
| "eval_runtime": 51.8519, |
| "eval_samples_per_second": 1307.801, |
| "eval_steps_per_second": 2.565, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 1e-06, |
| "loss": 0.8814, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.04, |
| "eval_loss": 0.8860304951667786, |
| "eval_runtime": 51.7087, |
| "eval_samples_per_second": 1311.424, |
| "eval_steps_per_second": 2.572, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 1e-06, |
| "loss": 0.8867, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.06, |
| "eval_loss": 0.8856372237205505, |
| "eval_runtime": 51.6572, |
| "eval_samples_per_second": 1312.73, |
| "eval_steps_per_second": 2.575, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8873, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.08, |
| "eval_loss": 0.8852881193161011, |
| "eval_runtime": 51.7478, |
| "eval_samples_per_second": 1310.433, |
| "eval_steps_per_second": 2.57, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.09, |
| "learning_rate": 1e-06, |
| "loss": 0.8762, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.09, |
| "eval_loss": 0.8849110007286072, |
| "eval_runtime": 51.7028, |
| "eval_samples_per_second": 1311.572, |
| "eval_steps_per_second": 2.572, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 1e-06, |
| "loss": 0.876, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.11, |
| "eval_loss": 0.884550929069519, |
| "eval_runtime": 51.7551, |
| "eval_samples_per_second": 1310.248, |
| "eval_steps_per_second": 2.57, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8964, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.13, |
| "eval_loss": 0.8842361569404602, |
| "eval_runtime": 51.5961, |
| "eval_samples_per_second": 1314.285, |
| "eval_steps_per_second": 2.578, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 1e-06, |
| "loss": 0.86, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.15, |
| "eval_loss": 0.8838174343109131, |
| "eval_runtime": 51.7753, |
| "eval_samples_per_second": 1309.737, |
| "eval_steps_per_second": 2.569, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 1e-06, |
| "loss": 0.8848, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.17, |
| "eval_loss": 0.883499026298523, |
| "eval_runtime": 51.8229, |
| "eval_samples_per_second": 1308.534, |
| "eval_steps_per_second": 2.566, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8529, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.19, |
| "eval_loss": 0.8831055760383606, |
| "eval_runtime": 51.8274, |
| "eval_samples_per_second": 1308.42, |
| "eval_steps_per_second": 2.566, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 1e-06, |
| "loss": 0.8716, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.21, |
| "eval_loss": 0.8827484250068665, |
| "eval_runtime": 51.6637, |
| "eval_samples_per_second": 1312.566, |
| "eval_steps_per_second": 2.574, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8513, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.23, |
| "eval_loss": 0.8824192881584167, |
| "eval_runtime": 51.7982, |
| "eval_samples_per_second": 1309.156, |
| "eval_steps_per_second": 2.568, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 1e-06, |
| "loss": 0.8796, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.25, |
| "eval_loss": 0.8820593357086182, |
| "eval_runtime": 51.6857, |
| "eval_samples_per_second": 1312.007, |
| "eval_steps_per_second": 2.573, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 1e-06, |
| "loss": 0.8789, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.26, |
| "eval_loss": 0.8817575573921204, |
| "eval_runtime": 51.6335, |
| "eval_samples_per_second": 1313.333, |
| "eval_steps_per_second": 2.576, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 1e-06, |
| "loss": 0.8728, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.28, |
| "eval_loss": 0.8813122510910034, |
| "eval_runtime": 51.6387, |
| "eval_samples_per_second": 1313.202, |
| "eval_steps_per_second": 2.576, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 1e-06, |
| "loss": 0.8735, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.3, |
| "eval_loss": 0.8810012936592102, |
| "eval_runtime": 51.7373, |
| "eval_samples_per_second": 1310.7, |
| "eval_steps_per_second": 2.571, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 1e-06, |
| "loss": 0.8682, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.32, |
| "eval_loss": 0.8805950284004211, |
| "eval_runtime": 51.6928, |
| "eval_samples_per_second": 1311.827, |
| "eval_steps_per_second": 2.573, |
| "step": 1760 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 1e-06, |
| "loss": 0.862, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.34, |
| "eval_loss": 0.8802461624145508, |
| "eval_runtime": 51.7119, |
| "eval_samples_per_second": 1311.341, |
| "eval_steps_per_second": 2.572, |
| "step": 1770 |
| }, |
| { |
| "epoch": 3.36, |
| "learning_rate": 1e-06, |
| "loss": 0.8673, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.36, |
| "eval_loss": 0.8799049258232117, |
| "eval_runtime": 51.8086, |
| "eval_samples_per_second": 1308.894, |
| "eval_steps_per_second": 2.567, |
| "step": 1780 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 1e-06, |
| "loss": 0.8659, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.38, |
| "eval_loss": 0.8796053528785706, |
| "eval_runtime": 51.738, |
| "eval_samples_per_second": 1310.682, |
| "eval_steps_per_second": 2.571, |
| "step": 1790 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 1e-06, |
| "loss": 0.8611, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.4, |
| "eval_loss": 0.8792104125022888, |
| "eval_runtime": 51.8854, |
| "eval_samples_per_second": 1306.957, |
| "eval_steps_per_second": 2.563, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8703, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.42, |
| "eval_loss": 0.878919780254364, |
| "eval_runtime": 51.7353, |
| "eval_samples_per_second": 1310.75, |
| "eval_steps_per_second": 2.571, |
| "step": 1810 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 1e-06, |
| "loss": 0.8589, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.43, |
| "eval_loss": 0.8785597681999207, |
| "eval_runtime": 51.7103, |
| "eval_samples_per_second": 1311.383, |
| "eval_steps_per_second": 2.572, |
| "step": 1820 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 1e-06, |
| "loss": 0.8612, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.45, |
| "eval_loss": 0.8782058954238892, |
| "eval_runtime": 51.762, |
| "eval_samples_per_second": 1310.072, |
| "eval_steps_per_second": 2.569, |
| "step": 1830 |
| }, |
| { |
| "epoch": 3.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8656, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.47, |
| "eval_loss": 0.877843976020813, |
| "eval_runtime": 51.6306, |
| "eval_samples_per_second": 1313.407, |
| "eval_steps_per_second": 2.576, |
| "step": 1840 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 1e-06, |
| "loss": 0.8642, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.49, |
| "eval_loss": 0.8775426745414734, |
| "eval_runtime": 51.7629, |
| "eval_samples_per_second": 1310.05, |
| "eval_steps_per_second": 2.569, |
| "step": 1850 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8794, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.51, |
| "eval_loss": 0.8772388696670532, |
| "eval_runtime": 51.7172, |
| "eval_samples_per_second": 1311.207, |
| "eval_steps_per_second": 2.572, |
| "step": 1860 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8536, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.53, |
| "eval_loss": 0.8769137859344482, |
| "eval_runtime": 51.6913, |
| "eval_samples_per_second": 1311.864, |
| "eval_steps_per_second": 2.573, |
| "step": 1870 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8522, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.55, |
| "eval_loss": 0.876491367816925, |
| "eval_runtime": 51.9434, |
| "eval_samples_per_second": 1305.497, |
| "eval_steps_per_second": 2.56, |
| "step": 1880 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 1e-06, |
| "loss": 0.8699, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.57, |
| "eval_loss": 0.8762040734291077, |
| "eval_runtime": 51.7598, |
| "eval_samples_per_second": 1310.129, |
| "eval_steps_per_second": 2.57, |
| "step": 1890 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 1e-06, |
| "loss": 0.8643, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.58, |
| "eval_loss": 0.8758660554885864, |
| "eval_runtime": 51.7094, |
| "eval_samples_per_second": 1311.404, |
| "eval_steps_per_second": 2.572, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 1e-06, |
| "loss": 0.8778, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.6, |
| "eval_loss": 0.8755151629447937, |
| "eval_runtime": 51.938, |
| "eval_samples_per_second": 1305.634, |
| "eval_steps_per_second": 2.561, |
| "step": 1910 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 1e-06, |
| "loss": 0.8712, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.62, |
| "eval_loss": 0.8752315044403076, |
| "eval_runtime": 52.1258, |
| "eval_samples_per_second": 1300.929, |
| "eval_steps_per_second": 2.552, |
| "step": 1920 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 1e-06, |
| "loss": 0.864, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.64, |
| "eval_loss": 0.8748995065689087, |
| "eval_runtime": 51.7657, |
| "eval_samples_per_second": 1309.979, |
| "eval_steps_per_second": 2.569, |
| "step": 1930 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 1e-06, |
| "loss": 0.8651, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.66, |
| "eval_loss": 0.8745700716972351, |
| "eval_runtime": 51.7789, |
| "eval_samples_per_second": 1309.645, |
| "eval_steps_per_second": 2.569, |
| "step": 1940 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 1e-06, |
| "loss": 0.8406, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.68, |
| "eval_loss": 0.8742328882217407, |
| "eval_runtime": 51.6777, |
| "eval_samples_per_second": 1312.21, |
| "eval_steps_per_second": 2.574, |
| "step": 1950 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 1e-06, |
| "loss": 0.8724, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.7, |
| "eval_loss": 0.8738684058189392, |
| "eval_runtime": 51.851, |
| "eval_samples_per_second": 1307.826, |
| "eval_steps_per_second": 2.565, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 1e-06, |
| "loss": 0.861, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.72, |
| "eval_loss": 0.8735198974609375, |
| "eval_runtime": 51.7532, |
| "eval_samples_per_second": 1310.296, |
| "eval_steps_per_second": 2.57, |
| "step": 1970 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 1e-06, |
| "loss": 0.8836, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.74, |
| "eval_loss": 0.8732261657714844, |
| "eval_runtime": 51.8862, |
| "eval_samples_per_second": 1306.936, |
| "eval_steps_per_second": 2.563, |
| "step": 1980 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 1e-06, |
| "loss": 0.8668, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.75, |
| "eval_loss": 0.8728705048561096, |
| "eval_runtime": 51.8038, |
| "eval_samples_per_second": 1309.015, |
| "eval_steps_per_second": 2.567, |
| "step": 1990 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 1e-06, |
| "loss": 0.8557, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.77, |
| "eval_loss": 0.8725922107696533, |
| "eval_runtime": 51.7963, |
| "eval_samples_per_second": 1309.206, |
| "eval_steps_per_second": 2.568, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8662, |
| "step": 2010 |
| }, |
| { |
| "epoch": 3.79, |
| "eval_loss": 0.872270941734314, |
| "eval_runtime": 51.8085, |
| "eval_samples_per_second": 1308.898, |
| "eval_steps_per_second": 2.567, |
| "step": 2010 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 1e-06, |
| "loss": 0.8672, |
| "step": 2020 |
| }, |
| { |
| "epoch": 3.81, |
| "eval_loss": 0.8719350695610046, |
| "eval_runtime": 51.9143, |
| "eval_samples_per_second": 1306.231, |
| "eval_steps_per_second": 2.562, |
| "step": 2020 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 1e-06, |
| "loss": 0.8549, |
| "step": 2030 |
| }, |
| { |
| "epoch": 3.83, |
| "eval_loss": 0.8716722130775452, |
| "eval_runtime": 51.8011, |
| "eval_samples_per_second": 1309.083, |
| "eval_steps_per_second": 2.568, |
| "step": 2030 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 1e-06, |
| "loss": 0.861, |
| "step": 2040 |
| }, |
| { |
| "epoch": 3.85, |
| "eval_loss": 0.8712872862815857, |
| "eval_runtime": 51.9365, |
| "eval_samples_per_second": 1305.671, |
| "eval_steps_per_second": 2.561, |
| "step": 2040 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 1e-06, |
| "loss": 0.8668, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.87, |
| "eval_loss": 0.8710207939147949, |
| "eval_runtime": 51.7794, |
| "eval_samples_per_second": 1309.632, |
| "eval_steps_per_second": 2.569, |
| "step": 2050 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8642, |
| "step": 2060 |
| }, |
| { |
| "epoch": 3.89, |
| "eval_loss": 0.870637834072113, |
| "eval_runtime": 51.8156, |
| "eval_samples_per_second": 1308.717, |
| "eval_steps_per_second": 2.567, |
| "step": 2060 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 1e-06, |
| "loss": 0.8645, |
| "step": 2070 |
| }, |
| { |
| "epoch": 3.91, |
| "eval_loss": 0.87038254737854, |
| "eval_runtime": 51.9756, |
| "eval_samples_per_second": 1304.688, |
| "eval_steps_per_second": 2.559, |
| "step": 2070 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 1e-06, |
| "loss": 0.853, |
| "step": 2080 |
| }, |
| { |
| "epoch": 3.92, |
| "eval_loss": 0.8700686693191528, |
| "eval_runtime": 51.8858, |
| "eval_samples_per_second": 1306.946, |
| "eval_steps_per_second": 2.563, |
| "step": 2080 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1e-06, |
| "loss": 0.8744, |
| "step": 2090 |
| }, |
| { |
| "epoch": 3.94, |
| "eval_loss": 0.8697250485420227, |
| "eval_runtime": 51.8937, |
| "eval_samples_per_second": 1306.749, |
| "eval_steps_per_second": 2.563, |
| "step": 2090 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8485, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.96, |
| "eval_loss": 0.8694667220115662, |
| "eval_runtime": 51.8101, |
| "eval_samples_per_second": 1308.858, |
| "eval_steps_per_second": 2.567, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 1e-06, |
| "loss": 0.8708, |
| "step": 2110 |
| }, |
| { |
| "epoch": 3.98, |
| "eval_loss": 0.869096040725708, |
| "eval_runtime": 51.7678, |
| "eval_samples_per_second": 1309.925, |
| "eval_steps_per_second": 2.569, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 1e-06, |
| "loss": 0.8588, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.8688409328460693, |
| "eval_runtime": 51.7659, |
| "eval_samples_per_second": 1309.976, |
| "eval_steps_per_second": 2.569, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 1e-06, |
| "loss": 0.8497, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.02, |
| "eval_loss": 0.8686378002166748, |
| "eval_runtime": 51.9362, |
| "eval_samples_per_second": 1305.68, |
| "eval_steps_per_second": 2.561, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 1e-06, |
| "loss": 0.8567, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.04, |
| "eval_loss": 0.8682656288146973, |
| "eval_runtime": 51.8823, |
| "eval_samples_per_second": 1307.036, |
| "eval_steps_per_second": 2.563, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 1e-06, |
| "loss": 0.8727, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.06, |
| "eval_loss": 0.8680951595306396, |
| "eval_runtime": 51.8888, |
| "eval_samples_per_second": 1306.872, |
| "eval_steps_per_second": 2.563, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8497, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.08, |
| "eval_loss": 0.8676822185516357, |
| "eval_runtime": 51.8218, |
| "eval_samples_per_second": 1308.561, |
| "eval_steps_per_second": 2.566, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 1e-06, |
| "loss": 0.869, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.09, |
| "eval_loss": 0.8674589395523071, |
| "eval_runtime": 51.8359, |
| "eval_samples_per_second": 1308.206, |
| "eval_steps_per_second": 2.566, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 1e-06, |
| "loss": 0.8496, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.11, |
| "eval_loss": 0.8672137260437012, |
| "eval_runtime": 51.8903, |
| "eval_samples_per_second": 1306.834, |
| "eval_steps_per_second": 2.563, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8616, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.13, |
| "eval_loss": 0.8669330477714539, |
| "eval_runtime": 51.8142, |
| "eval_samples_per_second": 1308.754, |
| "eval_steps_per_second": 2.567, |
| "step": 2190 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 1e-06, |
| "loss": 0.8529, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.15, |
| "eval_loss": 0.8666622042655945, |
| "eval_runtime": 51.9401, |
| "eval_samples_per_second": 1305.58, |
| "eval_steps_per_second": 2.561, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 1e-06, |
| "loss": 0.8544, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.17, |
| "eval_loss": 0.8662629127502441, |
| "eval_runtime": 51.8488, |
| "eval_samples_per_second": 1307.881, |
| "eval_steps_per_second": 2.565, |
| "step": 2210 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8557, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.19, |
| "eval_loss": 0.8660885095596313, |
| "eval_runtime": 51.9277, |
| "eval_samples_per_second": 1305.892, |
| "eval_steps_per_second": 2.561, |
| "step": 2220 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 1e-06, |
| "loss": 0.8622, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.21, |
| "eval_loss": 0.8657708764076233, |
| "eval_runtime": 51.9531, |
| "eval_samples_per_second": 1305.255, |
| "eval_steps_per_second": 2.56, |
| "step": 2230 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8579, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.23, |
| "eval_loss": 0.8654367327690125, |
| "eval_runtime": 51.8806, |
| "eval_samples_per_second": 1307.079, |
| "eval_steps_per_second": 2.564, |
| "step": 2240 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 1e-06, |
| "loss": 0.8671, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.25, |
| "eval_loss": 0.8651946783065796, |
| "eval_runtime": 51.8436, |
| "eval_samples_per_second": 1308.011, |
| "eval_steps_per_second": 2.565, |
| "step": 2250 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 1e-06, |
| "loss": 0.8518, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.26, |
| "eval_loss": 0.8648258447647095, |
| "eval_runtime": 51.8609, |
| "eval_samples_per_second": 1307.574, |
| "eval_steps_per_second": 2.565, |
| "step": 2260 |
| }, |
| { |
| "epoch": 4.28, |
| "learning_rate": 1e-06, |
| "loss": 0.8539, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.28, |
| "eval_loss": 0.8645607233047485, |
| "eval_runtime": 51.9475, |
| "eval_samples_per_second": 1305.396, |
| "eval_steps_per_second": 2.56, |
| "step": 2270 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 1e-06, |
| "loss": 0.8477, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.3, |
| "eval_loss": 0.8643002510070801, |
| "eval_runtime": 51.8601, |
| "eval_samples_per_second": 1307.596, |
| "eval_steps_per_second": 2.565, |
| "step": 2280 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 1e-06, |
| "loss": 0.8514, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.32, |
| "eval_loss": 0.8639960885047913, |
| "eval_runtime": 51.9219, |
| "eval_samples_per_second": 1306.038, |
| "eval_steps_per_second": 2.562, |
| "step": 2290 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 1e-06, |
| "loss": 0.8541, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.34, |
| "eval_loss": 0.8637537360191345, |
| "eval_runtime": 51.9296, |
| "eval_samples_per_second": 1305.844, |
| "eval_steps_per_second": 2.561, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 1e-06, |
| "loss": 0.8488, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.36, |
| "eval_loss": 0.8634164333343506, |
| "eval_runtime": 51.9575, |
| "eval_samples_per_second": 1305.145, |
| "eval_steps_per_second": 2.56, |
| "step": 2310 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 1e-06, |
| "loss": 0.8477, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.38, |
| "eval_loss": 0.8631356954574585, |
| "eval_runtime": 51.9351, |
| "eval_samples_per_second": 1305.706, |
| "eval_steps_per_second": 2.561, |
| "step": 2320 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 1e-06, |
| "loss": 0.8428, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.4, |
| "eval_loss": 0.8628517389297485, |
| "eval_runtime": 51.8839, |
| "eval_samples_per_second": 1306.995, |
| "eval_steps_per_second": 2.563, |
| "step": 2330 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8488, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.42, |
| "eval_loss": 0.8625032305717468, |
| "eval_runtime": 51.8371, |
| "eval_samples_per_second": 1308.174, |
| "eval_steps_per_second": 2.566, |
| "step": 2340 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1e-06, |
| "loss": 0.8498, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.43, |
| "eval_loss": 0.8623167276382446, |
| "eval_runtime": 51.9502, |
| "eval_samples_per_second": 1305.327, |
| "eval_steps_per_second": 2.56, |
| "step": 2350 |
| }, |
| { |
| "epoch": 4.45, |
| "learning_rate": 1e-06, |
| "loss": 0.8593, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.45, |
| "eval_loss": 0.8619263172149658, |
| "eval_runtime": 51.8749, |
| "eval_samples_per_second": 1307.222, |
| "eval_steps_per_second": 2.564, |
| "step": 2360 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8563, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.47, |
| "eval_loss": 0.8616353869438171, |
| "eval_runtime": 51.9072, |
| "eval_samples_per_second": 1306.407, |
| "eval_steps_per_second": 2.562, |
| "step": 2370 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 1e-06, |
| "loss": 0.8589, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.49, |
| "eval_loss": 0.8614597916603088, |
| "eval_runtime": 51.9394, |
| "eval_samples_per_second": 1305.597, |
| "eval_steps_per_second": 2.561, |
| "step": 2380 |
| }, |
| { |
| "epoch": 4.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8508, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.51, |
| "eval_loss": 0.8611072301864624, |
| "eval_runtime": 51.9103, |
| "eval_samples_per_second": 1306.331, |
| "eval_steps_per_second": 2.562, |
| "step": 2390 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8266, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.53, |
| "eval_loss": 0.8608320951461792, |
| "eval_runtime": 51.9481, |
| "eval_samples_per_second": 1305.38, |
| "eval_steps_per_second": 2.56, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8546, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.55, |
| "eval_loss": 0.8605498671531677, |
| "eval_runtime": 51.9432, |
| "eval_samples_per_second": 1305.504, |
| "eval_steps_per_second": 2.56, |
| "step": 2410 |
| }, |
| { |
| "epoch": 4.57, |
| "learning_rate": 1e-06, |
| "loss": 0.853, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.57, |
| "eval_loss": 0.8603416085243225, |
| "eval_runtime": 52.0432, |
| "eval_samples_per_second": 1302.996, |
| "eval_steps_per_second": 2.556, |
| "step": 2420 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 1e-06, |
| "loss": 0.8484, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.58, |
| "eval_loss": 0.8599910736083984, |
| "eval_runtime": 51.8688, |
| "eval_samples_per_second": 1307.375, |
| "eval_steps_per_second": 2.564, |
| "step": 2430 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 1e-06, |
| "loss": 0.8328, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.6, |
| "eval_loss": 0.859768271446228, |
| "eval_runtime": 51.8564, |
| "eval_samples_per_second": 1307.689, |
| "eval_steps_per_second": 2.565, |
| "step": 2440 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 1e-06, |
| "loss": 0.834, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.62, |
| "eval_loss": 0.8594483733177185, |
| "eval_runtime": 51.951, |
| "eval_samples_per_second": 1305.306, |
| "eval_steps_per_second": 2.56, |
| "step": 2450 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 1e-06, |
| "loss": 0.8383, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.64, |
| "eval_loss": 0.8591568470001221, |
| "eval_runtime": 51.8704, |
| "eval_samples_per_second": 1307.335, |
| "eval_steps_per_second": 2.564, |
| "step": 2460 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 1e-06, |
| "loss": 0.841, |
| "step": 2470 |
| }, |
| { |
| "epoch": 4.66, |
| "eval_loss": 0.8589540719985962, |
| "eval_runtime": 51.9469, |
| "eval_samples_per_second": 1305.409, |
| "eval_steps_per_second": 2.56, |
| "step": 2470 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 1e-06, |
| "loss": 0.8472, |
| "step": 2480 |
| }, |
| { |
| "epoch": 4.68, |
| "eval_loss": 0.858716607093811, |
| "eval_runtime": 51.8753, |
| "eval_samples_per_second": 1307.213, |
| "eval_steps_per_second": 2.564, |
| "step": 2480 |
| }, |
| { |
| "epoch": 4.7, |
| "learning_rate": 1e-06, |
| "loss": 0.856, |
| "step": 2490 |
| }, |
| { |
| "epoch": 4.7, |
| "eval_loss": 0.8584380745887756, |
| "eval_runtime": 51.7822, |
| "eval_samples_per_second": 1309.561, |
| "eval_steps_per_second": 2.568, |
| "step": 2490 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 1e-06, |
| "loss": 0.8477, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.72, |
| "eval_loss": 0.8581625819206238, |
| "eval_runtime": 51.8356, |
| "eval_samples_per_second": 1308.214, |
| "eval_steps_per_second": 2.566, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 1e-06, |
| "loss": 0.8391, |
| "step": 2510 |
| }, |
| { |
| "epoch": 4.74, |
| "eval_loss": 0.85783851146698, |
| "eval_runtime": 51.9944, |
| "eval_samples_per_second": 1304.217, |
| "eval_steps_per_second": 2.558, |
| "step": 2510 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 1e-06, |
| "loss": 0.8428, |
| "step": 2520 |
| }, |
| { |
| "epoch": 4.75, |
| "eval_loss": 0.8575791120529175, |
| "eval_runtime": 51.7668, |
| "eval_samples_per_second": 1309.951, |
| "eval_steps_per_second": 2.569, |
| "step": 2520 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 1e-06, |
| "loss": 0.8348, |
| "step": 2530 |
| }, |
| { |
| "epoch": 4.77, |
| "eval_loss": 0.8571997284889221, |
| "eval_runtime": 51.7677, |
| "eval_samples_per_second": 1309.93, |
| "eval_steps_per_second": 2.569, |
| "step": 2530 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8387, |
| "step": 2540 |
| }, |
| { |
| "epoch": 4.79, |
| "eval_loss": 0.857079803943634, |
| "eval_runtime": 51.9747, |
| "eval_samples_per_second": 1304.711, |
| "eval_steps_per_second": 2.559, |
| "step": 2540 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 1e-06, |
| "loss": 0.8382, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.81, |
| "eval_loss": 0.8567344546318054, |
| "eval_runtime": 51.8152, |
| "eval_samples_per_second": 1308.727, |
| "eval_steps_per_second": 2.567, |
| "step": 2550 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 1e-06, |
| "loss": 0.8352, |
| "step": 2560 |
| }, |
| { |
| "epoch": 4.83, |
| "eval_loss": 0.856507420539856, |
| "eval_runtime": 51.8084, |
| "eval_samples_per_second": 1308.899, |
| "eval_steps_per_second": 2.567, |
| "step": 2560 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 1e-06, |
| "loss": 0.8583, |
| "step": 2570 |
| }, |
| { |
| "epoch": 4.85, |
| "eval_loss": 0.8562394380569458, |
| "eval_runtime": 51.8605, |
| "eval_samples_per_second": 1307.585, |
| "eval_steps_per_second": 2.565, |
| "step": 2570 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 1e-06, |
| "loss": 0.8576, |
| "step": 2580 |
| }, |
| { |
| "epoch": 4.87, |
| "eval_loss": 0.8559291362762451, |
| "eval_runtime": 51.9102, |
| "eval_samples_per_second": 1306.334, |
| "eval_steps_per_second": 2.562, |
| "step": 2580 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8509, |
| "step": 2590 |
| }, |
| { |
| "epoch": 4.89, |
| "eval_loss": 0.8556599617004395, |
| "eval_runtime": 51.7511, |
| "eval_samples_per_second": 1310.348, |
| "eval_steps_per_second": 2.57, |
| "step": 2590 |
| }, |
| { |
| "epoch": 4.91, |
| "learning_rate": 1e-06, |
| "loss": 0.8348, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.91, |
| "eval_loss": 0.8555102944374084, |
| "eval_runtime": 51.9474, |
| "eval_samples_per_second": 1305.398, |
| "eval_steps_per_second": 2.56, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 1e-06, |
| "loss": 0.8341, |
| "step": 2610 |
| }, |
| { |
| "epoch": 4.92, |
| "eval_loss": 0.8551704287528992, |
| "eval_runtime": 51.7687, |
| "eval_samples_per_second": 1309.904, |
| "eval_steps_per_second": 2.569, |
| "step": 2610 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 1e-06, |
| "loss": 0.8402, |
| "step": 2620 |
| }, |
| { |
| "epoch": 4.94, |
| "eval_loss": 0.8548431396484375, |
| "eval_runtime": 51.924, |
| "eval_samples_per_second": 1305.985, |
| "eval_steps_per_second": 2.561, |
| "step": 2620 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8266, |
| "step": 2630 |
| }, |
| { |
| "epoch": 4.96, |
| "eval_loss": 0.8546814322471619, |
| "eval_runtime": 51.8716, |
| "eval_samples_per_second": 1307.306, |
| "eval_steps_per_second": 2.564, |
| "step": 2630 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 1e-06, |
| "loss": 0.8239, |
| "step": 2640 |
| }, |
| { |
| "epoch": 4.98, |
| "eval_loss": 0.8543078303337097, |
| "eval_runtime": 51.8685, |
| "eval_samples_per_second": 1307.382, |
| "eval_steps_per_second": 2.564, |
| "step": 2640 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 1e-06, |
| "loss": 0.8442, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.8540557622909546, |
| "eval_runtime": 51.7829, |
| "eval_samples_per_second": 1309.543, |
| "eval_steps_per_second": 2.568, |
| "step": 2650 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 1e-06, |
| "loss": 0.8376, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.02, |
| "eval_loss": 0.8539601564407349, |
| "eval_runtime": 51.8171, |
| "eval_samples_per_second": 1308.68, |
| "eval_steps_per_second": 2.567, |
| "step": 2660 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 1e-06, |
| "loss": 0.8451, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.04, |
| "eval_loss": 0.8537167310714722, |
| "eval_runtime": 51.8574, |
| "eval_samples_per_second": 1307.663, |
| "eval_steps_per_second": 2.565, |
| "step": 2670 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 1e-06, |
| "loss": 0.832, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.06, |
| "eval_loss": 0.8533909320831299, |
| "eval_runtime": 51.8868, |
| "eval_samples_per_second": 1306.922, |
| "eval_steps_per_second": 2.563, |
| "step": 2680 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8439, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.08, |
| "eval_loss": 0.853160560131073, |
| "eval_runtime": 51.8888, |
| "eval_samples_per_second": 1306.873, |
| "eval_steps_per_second": 2.563, |
| "step": 2690 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 1e-06, |
| "loss": 0.8352, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.09, |
| "eval_loss": 0.852979302406311, |
| "eval_runtime": 51.9317, |
| "eval_samples_per_second": 1305.793, |
| "eval_steps_per_second": 2.561, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.11, |
| "learning_rate": 1e-06, |
| "loss": 0.8339, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.11, |
| "eval_loss": 0.8527371287345886, |
| "eval_runtime": 51.8547, |
| "eval_samples_per_second": 1307.732, |
| "eval_steps_per_second": 2.565, |
| "step": 2710 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8342, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.13, |
| "eval_loss": 0.8524041771888733, |
| "eval_runtime": 51.8908, |
| "eval_samples_per_second": 1306.82, |
| "eval_steps_per_second": 2.563, |
| "step": 2720 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 1e-06, |
| "loss": 0.8417, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.15, |
| "eval_loss": 0.8523533940315247, |
| "eval_runtime": 51.9792, |
| "eval_samples_per_second": 1304.6, |
| "eval_steps_per_second": 2.559, |
| "step": 2730 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 1e-06, |
| "loss": 0.8407, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.17, |
| "eval_loss": 0.8520050644874573, |
| "eval_runtime": 51.9453, |
| "eval_samples_per_second": 1305.45, |
| "eval_steps_per_second": 2.56, |
| "step": 2740 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8367, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.19, |
| "eval_loss": 0.8518672585487366, |
| "eval_runtime": 51.8852, |
| "eval_samples_per_second": 1306.963, |
| "eval_steps_per_second": 2.563, |
| "step": 2750 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 1e-06, |
| "loss": 0.8201, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.21, |
| "eval_loss": 0.8514959216117859, |
| "eval_runtime": 51.8894, |
| "eval_samples_per_second": 1306.855, |
| "eval_steps_per_second": 2.563, |
| "step": 2760 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8278, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.23, |
| "eval_loss": 0.8512703776359558, |
| "eval_runtime": 51.7905, |
| "eval_samples_per_second": 1309.351, |
| "eval_steps_per_second": 2.568, |
| "step": 2770 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 1e-06, |
| "loss": 0.8302, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.25, |
| "eval_loss": 0.8510229587554932, |
| "eval_runtime": 51.8436, |
| "eval_samples_per_second": 1308.01, |
| "eval_steps_per_second": 2.565, |
| "step": 2780 |
| }, |
| { |
| "epoch": 5.26, |
| "learning_rate": 1e-06, |
| "loss": 0.8148, |
| "step": 2790 |
| }, |
| { |
| "epoch": 5.26, |
| "eval_loss": 0.8507676720619202, |
| "eval_runtime": 51.818, |
| "eval_samples_per_second": 1308.657, |
| "eval_steps_per_second": 2.567, |
| "step": 2790 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 1e-06, |
| "loss": 0.84, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.28, |
| "eval_loss": 0.8504866361618042, |
| "eval_runtime": 51.7628, |
| "eval_samples_per_second": 1310.052, |
| "eval_steps_per_second": 2.569, |
| "step": 2800 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 1e-06, |
| "loss": 0.8267, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.3, |
| "eval_loss": 0.8502947688102722, |
| "eval_runtime": 51.7712, |
| "eval_samples_per_second": 1309.839, |
| "eval_steps_per_second": 2.569, |
| "step": 2810 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 1e-06, |
| "loss": 0.8356, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.32, |
| "eval_loss": 0.8499984741210938, |
| "eval_runtime": 51.8805, |
| "eval_samples_per_second": 1307.08, |
| "eval_steps_per_second": 2.564, |
| "step": 2820 |
| }, |
| { |
| "epoch": 5.34, |
| "learning_rate": 1e-06, |
| "loss": 0.8375, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.34, |
| "eval_loss": 0.8497674465179443, |
| "eval_runtime": 52.2672, |
| "eval_samples_per_second": 1297.41, |
| "eval_steps_per_second": 2.545, |
| "step": 2830 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 1e-06, |
| "loss": 0.829, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.36, |
| "eval_loss": 0.849518895149231, |
| "eval_runtime": 51.816, |
| "eval_samples_per_second": 1308.709, |
| "eval_steps_per_second": 2.567, |
| "step": 2840 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 1e-06, |
| "loss": 0.8212, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.38, |
| "eval_loss": 0.8492794632911682, |
| "eval_runtime": 51.9648, |
| "eval_samples_per_second": 1304.961, |
| "eval_steps_per_second": 2.559, |
| "step": 2850 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 1e-06, |
| "loss": 0.8353, |
| "step": 2860 |
| }, |
| { |
| "epoch": 5.4, |
| "eval_loss": 0.8490678071975708, |
| "eval_runtime": 51.8767, |
| "eval_samples_per_second": 1307.175, |
| "eval_steps_per_second": 2.564, |
| "step": 2860 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8322, |
| "step": 2870 |
| }, |
| { |
| "epoch": 5.42, |
| "eval_loss": 0.8488078713417053, |
| "eval_runtime": 51.8479, |
| "eval_samples_per_second": 1307.902, |
| "eval_steps_per_second": 2.565, |
| "step": 2870 |
| }, |
| { |
| "epoch": 5.43, |
| "learning_rate": 1e-06, |
| "loss": 0.826, |
| "step": 2880 |
| }, |
| { |
| "epoch": 5.43, |
| "eval_loss": 0.848633885383606, |
| "eval_runtime": 51.7082, |
| "eval_samples_per_second": 1311.436, |
| "eval_steps_per_second": 2.572, |
| "step": 2880 |
| }, |
| { |
| "epoch": 5.45, |
| "learning_rate": 1e-06, |
| "loss": 0.8237, |
| "step": 2890 |
| }, |
| { |
| "epoch": 5.45, |
| "eval_loss": 0.8483902812004089, |
| "eval_runtime": 52.0509, |
| "eval_samples_per_second": 1302.801, |
| "eval_steps_per_second": 2.555, |
| "step": 2890 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8499, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.47, |
| "eval_loss": 0.8481594920158386, |
| "eval_runtime": 51.9139, |
| "eval_samples_per_second": 1306.239, |
| "eval_steps_per_second": 2.562, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.49, |
| "learning_rate": 1e-06, |
| "loss": 0.8291, |
| "step": 2910 |
| }, |
| { |
| "epoch": 5.49, |
| "eval_loss": 0.8477580547332764, |
| "eval_runtime": 51.9491, |
| "eval_samples_per_second": 1305.354, |
| "eval_steps_per_second": 2.56, |
| "step": 2910 |
| }, |
| { |
| "epoch": 5.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8216, |
| "step": 2920 |
| }, |
| { |
| "epoch": 5.51, |
| "eval_loss": 0.8476288914680481, |
| "eval_runtime": 52.0178, |
| "eval_samples_per_second": 1303.631, |
| "eval_steps_per_second": 2.557, |
| "step": 2920 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 1e-06, |
| "loss": 0.841, |
| "step": 2930 |
| }, |
| { |
| "epoch": 5.53, |
| "eval_loss": 0.8473249673843384, |
| "eval_runtime": 52.0044, |
| "eval_samples_per_second": 1303.967, |
| "eval_steps_per_second": 2.557, |
| "step": 2930 |
| }, |
| { |
| "epoch": 5.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8265, |
| "step": 2940 |
| }, |
| { |
| "epoch": 5.55, |
| "eval_loss": 0.8471129536628723, |
| "eval_runtime": 51.799, |
| "eval_samples_per_second": 1309.137, |
| "eval_steps_per_second": 2.568, |
| "step": 2940 |
| }, |
| { |
| "epoch": 5.57, |
| "learning_rate": 1e-06, |
| "loss": 0.8329, |
| "step": 2950 |
| }, |
| { |
| "epoch": 5.57, |
| "eval_loss": 0.8468539118766785, |
| "eval_runtime": 51.8537, |
| "eval_samples_per_second": 1307.756, |
| "eval_steps_per_second": 2.565, |
| "step": 2950 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 1e-06, |
| "loss": 0.8257, |
| "step": 2960 |
| }, |
| { |
| "epoch": 5.58, |
| "eval_loss": 0.8465786576271057, |
| "eval_runtime": 52.0735, |
| "eval_samples_per_second": 1302.236, |
| "eval_steps_per_second": 2.554, |
| "step": 2960 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 1e-06, |
| "loss": 0.8332, |
| "step": 2970 |
| }, |
| { |
| "epoch": 5.6, |
| "eval_loss": 0.8464725613594055, |
| "eval_runtime": 51.7405, |
| "eval_samples_per_second": 1310.618, |
| "eval_steps_per_second": 2.571, |
| "step": 2970 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 1e-06, |
| "loss": 0.8188, |
| "step": 2980 |
| }, |
| { |
| "epoch": 5.62, |
| "eval_loss": 0.8461154699325562, |
| "eval_runtime": 51.9517, |
| "eval_samples_per_second": 1305.29, |
| "eval_steps_per_second": 2.56, |
| "step": 2980 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 1e-06, |
| "loss": 0.8336, |
| "step": 2990 |
| }, |
| { |
| "epoch": 5.64, |
| "eval_loss": 0.8458889126777649, |
| "eval_runtime": 51.9673, |
| "eval_samples_per_second": 1304.896, |
| "eval_steps_per_second": 2.559, |
| "step": 2990 |
| }, |
| { |
| "epoch": 5.66, |
| "learning_rate": 1e-06, |
| "loss": 0.8423, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.66, |
| "eval_loss": 0.845611035823822, |
| "eval_runtime": 51.8207, |
| "eval_samples_per_second": 1308.59, |
| "eval_steps_per_second": 2.567, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.68, |
| "learning_rate": 1e-06, |
| "loss": 0.8356, |
| "step": 3010 |
| }, |
| { |
| "epoch": 5.68, |
| "eval_loss": 0.8453643918037415, |
| "eval_runtime": 51.8234, |
| "eval_samples_per_second": 1308.52, |
| "eval_steps_per_second": 2.566, |
| "step": 3010 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 1e-06, |
| "loss": 0.8237, |
| "step": 3020 |
| }, |
| { |
| "epoch": 5.7, |
| "eval_loss": 0.845206081867218, |
| "eval_runtime": 51.8105, |
| "eval_samples_per_second": 1308.846, |
| "eval_steps_per_second": 2.567, |
| "step": 3020 |
| }, |
| { |
| "epoch": 5.72, |
| "learning_rate": 1e-06, |
| "loss": 0.8414, |
| "step": 3030 |
| }, |
| { |
| "epoch": 5.72, |
| "eval_loss": 0.8449116349220276, |
| "eval_runtime": 51.9928, |
| "eval_samples_per_second": 1304.256, |
| "eval_steps_per_second": 2.558, |
| "step": 3030 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 1e-06, |
| "loss": 0.8276, |
| "step": 3040 |
| }, |
| { |
| "epoch": 5.74, |
| "eval_loss": 0.8447545170783997, |
| "eval_runtime": 51.9148, |
| "eval_samples_per_second": 1306.217, |
| "eval_steps_per_second": 2.562, |
| "step": 3040 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 1e-06, |
| "loss": 0.818, |
| "step": 3050 |
| }, |
| { |
| "epoch": 5.75, |
| "eval_loss": 0.844507098197937, |
| "eval_runtime": 52.0252, |
| "eval_samples_per_second": 1303.446, |
| "eval_steps_per_second": 2.556, |
| "step": 3050 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 1e-06, |
| "loss": 0.8458, |
| "step": 3060 |
| }, |
| { |
| "epoch": 5.77, |
| "eval_loss": 0.8442298173904419, |
| "eval_runtime": 51.9337, |
| "eval_samples_per_second": 1305.742, |
| "eval_steps_per_second": 2.561, |
| "step": 3060 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8323, |
| "step": 3070 |
| }, |
| { |
| "epoch": 5.79, |
| "eval_loss": 0.843976616859436, |
| "eval_runtime": 51.9921, |
| "eval_samples_per_second": 1304.275, |
| "eval_steps_per_second": 2.558, |
| "step": 3070 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 1e-06, |
| "loss": 0.8312, |
| "step": 3080 |
| }, |
| { |
| "epoch": 5.81, |
| "eval_loss": 0.843853771686554, |
| "eval_runtime": 51.8976, |
| "eval_samples_per_second": 1306.649, |
| "eval_steps_per_second": 2.563, |
| "step": 3080 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 1e-06, |
| "loss": 0.8272, |
| "step": 3090 |
| }, |
| { |
| "epoch": 5.83, |
| "eval_loss": 0.8435570597648621, |
| "eval_runtime": 51.9376, |
| "eval_samples_per_second": 1305.643, |
| "eval_steps_per_second": 2.561, |
| "step": 3090 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 1e-06, |
| "loss": 0.8105, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.85, |
| "eval_loss": 0.8433138132095337, |
| "eval_runtime": 52.0207, |
| "eval_samples_per_second": 1303.559, |
| "eval_steps_per_second": 2.557, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 1e-06, |
| "loss": 0.827, |
| "step": 3110 |
| }, |
| { |
| "epoch": 5.87, |
| "eval_loss": 0.8431060314178467, |
| "eval_runtime": 52.0407, |
| "eval_samples_per_second": 1303.058, |
| "eval_steps_per_second": 2.556, |
| "step": 3110 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8338, |
| "step": 3120 |
| }, |
| { |
| "epoch": 5.89, |
| "eval_loss": 0.8428162336349487, |
| "eval_runtime": 52.0503, |
| "eval_samples_per_second": 1302.817, |
| "eval_steps_per_second": 2.555, |
| "step": 3120 |
| }, |
| { |
| "epoch": 5.91, |
| "learning_rate": 1e-06, |
| "loss": 0.8402, |
| "step": 3130 |
| }, |
| { |
| "epoch": 5.91, |
| "eval_loss": 0.8426678776741028, |
| "eval_runtime": 51.9479, |
| "eval_samples_per_second": 1305.384, |
| "eval_steps_per_second": 2.56, |
| "step": 3130 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 1e-06, |
| "loss": 0.8249, |
| "step": 3140 |
| }, |
| { |
| "epoch": 5.92, |
| "eval_loss": 0.8424062728881836, |
| "eval_runtime": 52.0747, |
| "eval_samples_per_second": 1302.206, |
| "eval_steps_per_second": 2.554, |
| "step": 3140 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 1e-06, |
| "loss": 0.8262, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.94, |
| "eval_loss": 0.8421285152435303, |
| "eval_runtime": 51.9477, |
| "eval_samples_per_second": 1305.39, |
| "eval_steps_per_second": 2.56, |
| "step": 3150 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8375, |
| "step": 3160 |
| }, |
| { |
| "epoch": 5.96, |
| "eval_loss": 0.8420241475105286, |
| "eval_runtime": 51.9104, |
| "eval_samples_per_second": 1306.329, |
| "eval_steps_per_second": 2.562, |
| "step": 3160 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 1e-06, |
| "loss": 0.8332, |
| "step": 3170 |
| }, |
| { |
| "epoch": 5.98, |
| "eval_loss": 0.8416071534156799, |
| "eval_runtime": 52.0768, |
| "eval_samples_per_second": 1302.154, |
| "eval_steps_per_second": 2.554, |
| "step": 3170 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 1e-06, |
| "loss": 0.8165, |
| "step": 3180 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.8414864540100098, |
| "eval_runtime": 52.0127, |
| "eval_samples_per_second": 1303.76, |
| "eval_steps_per_second": 2.557, |
| "step": 3180 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 1e-06, |
| "loss": 0.8023, |
| "step": 3190 |
| }, |
| { |
| "epoch": 6.02, |
| "eval_loss": 0.8415040969848633, |
| "eval_runtime": 51.9192, |
| "eval_samples_per_second": 1306.107, |
| "eval_steps_per_second": 2.562, |
| "step": 3190 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 1e-06, |
| "loss": 0.8223, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.04, |
| "eval_loss": 0.8412625789642334, |
| "eval_runtime": 52.0405, |
| "eval_samples_per_second": 1303.063, |
| "eval_steps_per_second": 2.556, |
| "step": 3200 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 1e-06, |
| "loss": 0.8273, |
| "step": 3210 |
| }, |
| { |
| "epoch": 6.06, |
| "eval_loss": 0.8409404158592224, |
| "eval_runtime": 51.9332, |
| "eval_samples_per_second": 1305.754, |
| "eval_steps_per_second": 2.561, |
| "step": 3210 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8175, |
| "step": 3220 |
| }, |
| { |
| "epoch": 6.08, |
| "eval_loss": 0.840887725353241, |
| "eval_runtime": 52.0538, |
| "eval_samples_per_second": 1302.729, |
| "eval_steps_per_second": 2.555, |
| "step": 3220 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 1e-06, |
| "loss": 0.8332, |
| "step": 3230 |
| }, |
| { |
| "epoch": 6.09, |
| "eval_loss": 0.8405251502990723, |
| "eval_runtime": 51.9819, |
| "eval_samples_per_second": 1304.53, |
| "eval_steps_per_second": 2.559, |
| "step": 3230 |
| }, |
| { |
| "epoch": 6.11, |
| "learning_rate": 1e-06, |
| "loss": 0.825, |
| "step": 3240 |
| }, |
| { |
| "epoch": 6.11, |
| "eval_loss": 0.8403586745262146, |
| "eval_runtime": 51.9603, |
| "eval_samples_per_second": 1305.073, |
| "eval_steps_per_second": 2.56, |
| "step": 3240 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8214, |
| "step": 3250 |
| }, |
| { |
| "epoch": 6.13, |
| "eval_loss": 0.8403131365776062, |
| "eval_runtime": 51.9874, |
| "eval_samples_per_second": 1304.393, |
| "eval_steps_per_second": 2.558, |
| "step": 3250 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 1e-06, |
| "loss": 0.8158, |
| "step": 3260 |
| }, |
| { |
| "epoch": 6.15, |
| "eval_loss": 0.8399011492729187, |
| "eval_runtime": 51.9729, |
| "eval_samples_per_second": 1304.757, |
| "eval_steps_per_second": 2.559, |
| "step": 3260 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 1e-06, |
| "loss": 0.8128, |
| "step": 3270 |
| }, |
| { |
| "epoch": 6.17, |
| "eval_loss": 0.8397038578987122, |
| "eval_runtime": 51.9517, |
| "eval_samples_per_second": 1305.29, |
| "eval_steps_per_second": 2.56, |
| "step": 3270 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8209, |
| "step": 3280 |
| }, |
| { |
| "epoch": 6.19, |
| "eval_loss": 0.8394789695739746, |
| "eval_runtime": 51.9885, |
| "eval_samples_per_second": 1304.366, |
| "eval_steps_per_second": 2.558, |
| "step": 3280 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 1e-06, |
| "loss": 0.807, |
| "step": 3290 |
| }, |
| { |
| "epoch": 6.21, |
| "eval_loss": 0.8392835855484009, |
| "eval_runtime": 52.0478, |
| "eval_samples_per_second": 1302.879, |
| "eval_steps_per_second": 2.555, |
| "step": 3290 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8187, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.23, |
| "eval_loss": 0.8390551209449768, |
| "eval_runtime": 52.5827, |
| "eval_samples_per_second": 1289.627, |
| "eval_steps_per_second": 2.529, |
| "step": 3300 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 1e-06, |
| "loss": 0.8299, |
| "step": 3310 |
| }, |
| { |
| "epoch": 6.25, |
| "eval_loss": 0.8387653231620789, |
| "eval_runtime": 52.0284, |
| "eval_samples_per_second": 1303.366, |
| "eval_steps_per_second": 2.556, |
| "step": 3310 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 1e-06, |
| "loss": 0.816, |
| "step": 3320 |
| }, |
| { |
| "epoch": 6.26, |
| "eval_loss": 0.8385831117630005, |
| "eval_runtime": 52.1251, |
| "eval_samples_per_second": 1300.947, |
| "eval_steps_per_second": 2.552, |
| "step": 3320 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 1e-06, |
| "loss": 0.7963, |
| "step": 3330 |
| }, |
| { |
| "epoch": 6.28, |
| "eval_loss": 0.8384872674942017, |
| "eval_runtime": 51.9641, |
| "eval_samples_per_second": 1304.977, |
| "eval_steps_per_second": 2.559, |
| "step": 3330 |
| }, |
| { |
| "epoch": 6.3, |
| "learning_rate": 1e-06, |
| "loss": 0.832, |
| "step": 3340 |
| }, |
| { |
| "epoch": 6.3, |
| "eval_loss": 0.8381639719009399, |
| "eval_runtime": 51.9857, |
| "eval_samples_per_second": 1304.436, |
| "eval_steps_per_second": 2.558, |
| "step": 3340 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 1e-06, |
| "loss": 0.8122, |
| "step": 3350 |
| }, |
| { |
| "epoch": 6.32, |
| "eval_loss": 0.8379948139190674, |
| "eval_runtime": 52.0508, |
| "eval_samples_per_second": 1302.805, |
| "eval_steps_per_second": 2.555, |
| "step": 3350 |
| }, |
| { |
| "epoch": 6.34, |
| "learning_rate": 1e-06, |
| "loss": 0.8173, |
| "step": 3360 |
| }, |
| { |
| "epoch": 6.34, |
| "eval_loss": 0.8377028107643127, |
| "eval_runtime": 52.0007, |
| "eval_samples_per_second": 1304.059, |
| "eval_steps_per_second": 2.558, |
| "step": 3360 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 1e-06, |
| "loss": 0.8254, |
| "step": 3370 |
| }, |
| { |
| "epoch": 6.36, |
| "eval_loss": 0.8377081155776978, |
| "eval_runtime": 52.0676, |
| "eval_samples_per_second": 1302.383, |
| "eval_steps_per_second": 2.554, |
| "step": 3370 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 1e-06, |
| "loss": 0.82, |
| "step": 3380 |
| }, |
| { |
| "epoch": 6.38, |
| "eval_loss": 0.8372619152069092, |
| "eval_runtime": 52.0154, |
| "eval_samples_per_second": 1303.692, |
| "eval_steps_per_second": 2.557, |
| "step": 3380 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 1e-06, |
| "loss": 0.8347, |
| "step": 3390 |
| }, |
| { |
| "epoch": 6.4, |
| "eval_loss": 0.8370408415794373, |
| "eval_runtime": 51.9385, |
| "eval_samples_per_second": 1305.622, |
| "eval_steps_per_second": 2.561, |
| "step": 3390 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8195, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.42, |
| "eval_loss": 0.8368457555770874, |
| "eval_runtime": 52.0455, |
| "eval_samples_per_second": 1302.937, |
| "eval_steps_per_second": 2.555, |
| "step": 3400 |
| }, |
| { |
| "epoch": 6.43, |
| "learning_rate": 1e-06, |
| "loss": 0.8121, |
| "step": 3410 |
| }, |
| { |
| "epoch": 6.43, |
| "eval_loss": 0.8366797566413879, |
| "eval_runtime": 51.9623, |
| "eval_samples_per_second": 1305.024, |
| "eval_steps_per_second": 2.56, |
| "step": 3410 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 1e-06, |
| "loss": 0.8226, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.45, |
| "eval_loss": 0.8365644812583923, |
| "eval_runtime": 52.0483, |
| "eval_samples_per_second": 1302.866, |
| "eval_steps_per_second": 2.555, |
| "step": 3420 |
| }, |
| { |
| "epoch": 6.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8278, |
| "step": 3430 |
| }, |
| { |
| "epoch": 6.47, |
| "eval_loss": 0.836248517036438, |
| "eval_runtime": 51.9763, |
| "eval_samples_per_second": 1304.671, |
| "eval_steps_per_second": 2.559, |
| "step": 3430 |
| }, |
| { |
| "epoch": 6.49, |
| "learning_rate": 1e-06, |
| "loss": 0.7992, |
| "step": 3440 |
| }, |
| { |
| "epoch": 6.49, |
| "eval_loss": 0.8359348177909851, |
| "eval_runtime": 51.9796, |
| "eval_samples_per_second": 1304.588, |
| "eval_steps_per_second": 2.559, |
| "step": 3440 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8262, |
| "step": 3450 |
| }, |
| { |
| "epoch": 6.51, |
| "eval_loss": 0.8360700607299805, |
| "eval_runtime": 51.8816, |
| "eval_samples_per_second": 1307.052, |
| "eval_steps_per_second": 2.564, |
| "step": 3450 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8161, |
| "step": 3460 |
| }, |
| { |
| "epoch": 6.53, |
| "eval_loss": 0.8355943560600281, |
| "eval_runtime": 51.9822, |
| "eval_samples_per_second": 1304.524, |
| "eval_steps_per_second": 2.559, |
| "step": 3460 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8063, |
| "step": 3470 |
| }, |
| { |
| "epoch": 6.55, |
| "eval_loss": 0.8354068398475647, |
| "eval_runtime": 51.8622, |
| "eval_samples_per_second": 1307.541, |
| "eval_steps_per_second": 2.564, |
| "step": 3470 |
| }, |
| { |
| "epoch": 6.57, |
| "learning_rate": 1e-06, |
| "loss": 0.8078, |
| "step": 3480 |
| }, |
| { |
| "epoch": 6.57, |
| "eval_loss": 0.8352031111717224, |
| "eval_runtime": 51.9774, |
| "eval_samples_per_second": 1304.643, |
| "eval_steps_per_second": 2.559, |
| "step": 3480 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 1e-06, |
| "loss": 0.8024, |
| "step": 3490 |
| }, |
| { |
| "epoch": 6.58, |
| "eval_loss": 0.8349164128303528, |
| "eval_runtime": 51.9339, |
| "eval_samples_per_second": 1305.738, |
| "eval_steps_per_second": 2.561, |
| "step": 3490 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 1e-06, |
| "loss": 0.808, |
| "step": 3500 |
| }, |
| { |
| "epoch": 6.6, |
| "eval_loss": 0.8347747921943665, |
| "eval_runtime": 52.0623, |
| "eval_samples_per_second": 1302.516, |
| "eval_steps_per_second": 2.555, |
| "step": 3500 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 1e-06, |
| "loss": 0.8173, |
| "step": 3510 |
| }, |
| { |
| "epoch": 6.62, |
| "eval_loss": 0.8345584869384766, |
| "eval_runtime": 51.9186, |
| "eval_samples_per_second": 1306.123, |
| "eval_steps_per_second": 2.562, |
| "step": 3510 |
| }, |
| { |
| "epoch": 6.64, |
| "learning_rate": 1e-06, |
| "loss": 0.8211, |
| "step": 3520 |
| }, |
| { |
| "epoch": 6.64, |
| "eval_loss": 0.834367573261261, |
| "eval_runtime": 52.0978, |
| "eval_samples_per_second": 1301.63, |
| "eval_steps_per_second": 2.553, |
| "step": 3520 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 1e-06, |
| "loss": 0.8349, |
| "step": 3530 |
| }, |
| { |
| "epoch": 6.66, |
| "eval_loss": 0.8342083692550659, |
| "eval_runtime": 52.0093, |
| "eval_samples_per_second": 1303.844, |
| "eval_steps_per_second": 2.557, |
| "step": 3530 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 1e-06, |
| "loss": 0.8306, |
| "step": 3540 |
| }, |
| { |
| "epoch": 6.68, |
| "eval_loss": 0.8338024616241455, |
| "eval_runtime": 51.9465, |
| "eval_samples_per_second": 1305.421, |
| "eval_steps_per_second": 2.56, |
| "step": 3540 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 1e-06, |
| "loss": 0.8193, |
| "step": 3550 |
| }, |
| { |
| "epoch": 6.7, |
| "eval_loss": 0.8336243033409119, |
| "eval_runtime": 51.9784, |
| "eval_samples_per_second": 1304.619, |
| "eval_steps_per_second": 2.559, |
| "step": 3550 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 1e-06, |
| "loss": 0.8118, |
| "step": 3560 |
| }, |
| { |
| "epoch": 6.72, |
| "eval_loss": 0.8336126804351807, |
| "eval_runtime": 52.0238, |
| "eval_samples_per_second": 1303.479, |
| "eval_steps_per_second": 2.557, |
| "step": 3560 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 1e-06, |
| "loss": 0.8263, |
| "step": 3570 |
| }, |
| { |
| "epoch": 6.74, |
| "eval_loss": 0.8331900238990784, |
| "eval_runtime": 51.9913, |
| "eval_samples_per_second": 1304.296, |
| "eval_steps_per_second": 2.558, |
| "step": 3570 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 1e-06, |
| "loss": 0.811, |
| "step": 3580 |
| }, |
| { |
| "epoch": 6.75, |
| "eval_loss": 0.8329805731773376, |
| "eval_runtime": 52.0168, |
| "eval_samples_per_second": 1303.656, |
| "eval_steps_per_second": 2.557, |
| "step": 3580 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 1e-06, |
| "loss": 0.8065, |
| "step": 3590 |
| }, |
| { |
| "epoch": 6.77, |
| "eval_loss": 0.8329923152923584, |
| "eval_runtime": 52.0099, |
| "eval_samples_per_second": 1303.83, |
| "eval_steps_per_second": 2.557, |
| "step": 3590 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8204, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.79, |
| "eval_loss": 0.8327195644378662, |
| "eval_runtime": 52.1332, |
| "eval_samples_per_second": 1300.746, |
| "eval_steps_per_second": 2.551, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 1e-06, |
| "loss": 0.818, |
| "step": 3610 |
| }, |
| { |
| "epoch": 6.81, |
| "eval_loss": 0.8323770761489868, |
| "eval_runtime": 52.001, |
| "eval_samples_per_second": 1304.052, |
| "eval_steps_per_second": 2.558, |
| "step": 3610 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 1e-06, |
| "loss": 0.805, |
| "step": 3620 |
| }, |
| { |
| "epoch": 6.83, |
| "eval_loss": 0.832315981388092, |
| "eval_runtime": 51.8866, |
| "eval_samples_per_second": 1306.928, |
| "eval_steps_per_second": 2.563, |
| "step": 3620 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 1e-06, |
| "loss": 0.8161, |
| "step": 3630 |
| }, |
| { |
| "epoch": 6.85, |
| "eval_loss": 0.8319803476333618, |
| "eval_runtime": 51.9838, |
| "eval_samples_per_second": 1304.484, |
| "eval_steps_per_second": 2.558, |
| "step": 3630 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 1e-06, |
| "loss": 0.8269, |
| "step": 3640 |
| }, |
| { |
| "epoch": 6.87, |
| "eval_loss": 0.831838846206665, |
| "eval_runtime": 51.9964, |
| "eval_samples_per_second": 1304.168, |
| "eval_steps_per_second": 2.558, |
| "step": 3640 |
| }, |
| { |
| "epoch": 6.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8244, |
| "step": 3650 |
| }, |
| { |
| "epoch": 6.89, |
| "eval_loss": 0.8317265510559082, |
| "eval_runtime": 51.8435, |
| "eval_samples_per_second": 1308.014, |
| "eval_steps_per_second": 2.565, |
| "step": 3650 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 1e-06, |
| "loss": 0.8007, |
| "step": 3660 |
| }, |
| { |
| "epoch": 6.91, |
| "eval_loss": 0.8313496112823486, |
| "eval_runtime": 51.9846, |
| "eval_samples_per_second": 1304.463, |
| "eval_steps_per_second": 2.558, |
| "step": 3660 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 1e-06, |
| "loss": 0.8021, |
| "step": 3670 |
| }, |
| { |
| "epoch": 6.92, |
| "eval_loss": 0.8311917781829834, |
| "eval_runtime": 51.9971, |
| "eval_samples_per_second": 1304.15, |
| "eval_steps_per_second": 2.558, |
| "step": 3670 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 1e-06, |
| "loss": 0.8014, |
| "step": 3680 |
| }, |
| { |
| "epoch": 6.94, |
| "eval_loss": 0.8310168981552124, |
| "eval_runtime": 51.8745, |
| "eval_samples_per_second": 1307.231, |
| "eval_steps_per_second": 2.564, |
| "step": 3680 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8108, |
| "step": 3690 |
| }, |
| { |
| "epoch": 6.96, |
| "eval_loss": 0.8306861519813538, |
| "eval_runtime": 51.9592, |
| "eval_samples_per_second": 1305.101, |
| "eval_steps_per_second": 2.56, |
| "step": 3690 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 1e-06, |
| "loss": 0.8007, |
| "step": 3700 |
| }, |
| { |
| "epoch": 6.98, |
| "eval_loss": 0.8306426405906677, |
| "eval_runtime": 52.0214, |
| "eval_samples_per_second": 1303.54, |
| "eval_steps_per_second": 2.557, |
| "step": 3700 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 1e-06, |
| "loss": 0.8008, |
| "step": 3710 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.8303616046905518, |
| "eval_runtime": 52.106, |
| "eval_samples_per_second": 1301.425, |
| "eval_steps_per_second": 2.552, |
| "step": 3710 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 1e-06, |
| "loss": 0.8087, |
| "step": 3720 |
| }, |
| { |
| "epoch": 7.02, |
| "eval_loss": 0.8302958607673645, |
| "eval_runtime": 51.8942, |
| "eval_samples_per_second": 1306.735, |
| "eval_steps_per_second": 2.563, |
| "step": 3720 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 1e-06, |
| "loss": 0.7951, |
| "step": 3730 |
| }, |
| { |
| "epoch": 7.04, |
| "eval_loss": 0.8302793502807617, |
| "eval_runtime": 52.0585, |
| "eval_samples_per_second": 1302.613, |
| "eval_steps_per_second": 2.555, |
| "step": 3730 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 1e-06, |
| "loss": 0.7948, |
| "step": 3740 |
| }, |
| { |
| "epoch": 7.06, |
| "eval_loss": 0.8299869894981384, |
| "eval_runtime": 51.889, |
| "eval_samples_per_second": 1306.867, |
| "eval_steps_per_second": 2.563, |
| "step": 3740 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 1e-06, |
| "loss": 0.8202, |
| "step": 3750 |
| }, |
| { |
| "epoch": 7.08, |
| "eval_loss": 0.8297907114028931, |
| "eval_runtime": 51.8406, |
| "eval_samples_per_second": 1308.086, |
| "eval_steps_per_second": 2.566, |
| "step": 3750 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 1e-06, |
| "loss": 0.8065, |
| "step": 3760 |
| }, |
| { |
| "epoch": 7.09, |
| "eval_loss": 0.8296034932136536, |
| "eval_runtime": 51.9961, |
| "eval_samples_per_second": 1304.174, |
| "eval_steps_per_second": 2.558, |
| "step": 3760 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 1e-06, |
| "loss": 0.8044, |
| "step": 3770 |
| }, |
| { |
| "epoch": 7.11, |
| "eval_loss": 0.829464316368103, |
| "eval_runtime": 51.9978, |
| "eval_samples_per_second": 1304.133, |
| "eval_steps_per_second": 2.558, |
| "step": 3770 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 1e-06, |
| "loss": 0.8093, |
| "step": 3780 |
| }, |
| { |
| "epoch": 7.13, |
| "eval_loss": 0.8291881084442139, |
| "eval_runtime": 52.0553, |
| "eval_samples_per_second": 1302.693, |
| "eval_steps_per_second": 2.555, |
| "step": 3780 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 1e-06, |
| "loss": 0.823, |
| "step": 3790 |
| }, |
| { |
| "epoch": 7.15, |
| "eval_loss": 0.8290471434593201, |
| "eval_runtime": 51.9731, |
| "eval_samples_per_second": 1304.751, |
| "eval_steps_per_second": 2.559, |
| "step": 3790 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 1e-06, |
| "loss": 0.7982, |
| "step": 3800 |
| }, |
| { |
| "epoch": 7.17, |
| "eval_loss": 0.828815758228302, |
| "eval_runtime": 52.0766, |
| "eval_samples_per_second": 1302.159, |
| "eval_steps_per_second": 2.554, |
| "step": 3800 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8129, |
| "step": 3810 |
| }, |
| { |
| "epoch": 7.19, |
| "eval_loss": 0.8286548256874084, |
| "eval_runtime": 51.9203, |
| "eval_samples_per_second": 1306.078, |
| "eval_steps_per_second": 2.562, |
| "step": 3810 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 1e-06, |
| "loss": 0.8017, |
| "step": 3820 |
| }, |
| { |
| "epoch": 7.21, |
| "eval_loss": 0.8284364938735962, |
| "eval_runtime": 52.0163, |
| "eval_samples_per_second": 1303.668, |
| "eval_steps_per_second": 2.557, |
| "step": 3820 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 1e-06, |
| "loss": 0.8091, |
| "step": 3830 |
| }, |
| { |
| "epoch": 7.23, |
| "eval_loss": 0.8282648921012878, |
| "eval_runtime": 52.0506, |
| "eval_samples_per_second": 1302.81, |
| "eval_steps_per_second": 2.555, |
| "step": 3830 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 1e-06, |
| "loss": 0.7956, |
| "step": 3840 |
| }, |
| { |
| "epoch": 7.25, |
| "eval_loss": 0.8282151222229004, |
| "eval_runtime": 52.0166, |
| "eval_samples_per_second": 1303.66, |
| "eval_steps_per_second": 2.557, |
| "step": 3840 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 1e-06, |
| "loss": 0.8153, |
| "step": 3850 |
| }, |
| { |
| "epoch": 7.26, |
| "eval_loss": 0.8278843760490417, |
| "eval_runtime": 52.0038, |
| "eval_samples_per_second": 1303.982, |
| "eval_steps_per_second": 2.558, |
| "step": 3850 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 1e-06, |
| "loss": 0.8066, |
| "step": 3860 |
| }, |
| { |
| "epoch": 7.28, |
| "eval_loss": 0.8276271820068359, |
| "eval_runtime": 52.0462, |
| "eval_samples_per_second": 1302.919, |
| "eval_steps_per_second": 2.555, |
| "step": 3860 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 1e-06, |
| "loss": 0.8046, |
| "step": 3870 |
| }, |
| { |
| "epoch": 7.3, |
| "eval_loss": 0.8274891376495361, |
| "eval_runtime": 51.955, |
| "eval_samples_per_second": 1305.208, |
| "eval_steps_per_second": 2.56, |
| "step": 3870 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 1e-06, |
| "loss": 0.7983, |
| "step": 3880 |
| }, |
| { |
| "epoch": 7.32, |
| "eval_loss": 0.8273729085922241, |
| "eval_runtime": 51.9905, |
| "eval_samples_per_second": 1304.315, |
| "eval_steps_per_second": 2.558, |
| "step": 3880 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 1e-06, |
| "loss": 0.8028, |
| "step": 3890 |
| }, |
| { |
| "epoch": 7.34, |
| "eval_loss": 0.8270576000213623, |
| "eval_runtime": 52.0076, |
| "eval_samples_per_second": 1303.886, |
| "eval_steps_per_second": 2.557, |
| "step": 3890 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 1e-06, |
| "loss": 0.7898, |
| "step": 3900 |
| }, |
| { |
| "epoch": 7.36, |
| "eval_loss": 0.8267748355865479, |
| "eval_runtime": 51.9676, |
| "eval_samples_per_second": 1304.891, |
| "eval_steps_per_second": 2.559, |
| "step": 3900 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 1e-06, |
| "loss": 0.8076, |
| "step": 3910 |
| }, |
| { |
| "epoch": 7.38, |
| "eval_loss": 0.82685387134552, |
| "eval_runtime": 51.949, |
| "eval_samples_per_second": 1305.357, |
| "eval_steps_per_second": 2.56, |
| "step": 3910 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 1e-06, |
| "loss": 0.8105, |
| "step": 3920 |
| }, |
| { |
| "epoch": 7.4, |
| "eval_loss": 0.8265158534049988, |
| "eval_runtime": 51.8551, |
| "eval_samples_per_second": 1307.722, |
| "eval_steps_per_second": 2.565, |
| "step": 3920 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 1e-06, |
| "loss": 0.8111, |
| "step": 3930 |
| }, |
| { |
| "epoch": 7.42, |
| "eval_loss": 0.8262932896614075, |
| "eval_runtime": 51.982, |
| "eval_samples_per_second": 1304.529, |
| "eval_steps_per_second": 2.559, |
| "step": 3930 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 1e-06, |
| "loss": 0.7957, |
| "step": 3940 |
| }, |
| { |
| "epoch": 7.43, |
| "eval_loss": 0.8261959552764893, |
| "eval_runtime": 52.0057, |
| "eval_samples_per_second": 1303.933, |
| "eval_steps_per_second": 2.557, |
| "step": 3940 |
| }, |
| { |
| "epoch": 7.45, |
| "learning_rate": 1e-06, |
| "loss": 0.7907, |
| "step": 3950 |
| }, |
| { |
| "epoch": 7.45, |
| "eval_loss": 0.8257889747619629, |
| "eval_runtime": 51.9024, |
| "eval_samples_per_second": 1306.529, |
| "eval_steps_per_second": 2.563, |
| "step": 3950 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 1e-06, |
| "loss": 0.8043, |
| "step": 3960 |
| }, |
| { |
| "epoch": 7.47, |
| "eval_loss": 0.8256938457489014, |
| "eval_runtime": 51.8359, |
| "eval_samples_per_second": 1308.204, |
| "eval_steps_per_second": 2.566, |
| "step": 3960 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 1e-06, |
| "loss": 0.7979, |
| "step": 3970 |
| }, |
| { |
| "epoch": 7.49, |
| "eval_loss": 0.8256092071533203, |
| "eval_runtime": 51.8834, |
| "eval_samples_per_second": 1307.008, |
| "eval_steps_per_second": 2.563, |
| "step": 3970 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 1e-06, |
| "loss": 0.8023, |
| "step": 3980 |
| }, |
| { |
| "epoch": 7.51, |
| "eval_loss": 0.8254022598266602, |
| "eval_runtime": 52.5271, |
| "eval_samples_per_second": 1290.99, |
| "eval_steps_per_second": 2.532, |
| "step": 3980 |
| }, |
| { |
| "epoch": 7.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8079, |
| "step": 3990 |
| }, |
| { |
| "epoch": 7.53, |
| "eval_loss": 0.8251172304153442, |
| "eval_runtime": 51.9409, |
| "eval_samples_per_second": 1305.561, |
| "eval_steps_per_second": 2.561, |
| "step": 3990 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 1e-06, |
| "loss": 0.8058, |
| "step": 4000 |
| }, |
| { |
| "epoch": 7.55, |
| "eval_loss": 0.8249055743217468, |
| "eval_runtime": 51.9893, |
| "eval_samples_per_second": 1304.345, |
| "eval_steps_per_second": 2.558, |
| "step": 4000 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 1e-06, |
| "loss": 0.8055, |
| "step": 4010 |
| }, |
| { |
| "epoch": 7.57, |
| "eval_loss": 0.8246675729751587, |
| "eval_runtime": 51.8262, |
| "eval_samples_per_second": 1308.449, |
| "eval_steps_per_second": 2.566, |
| "step": 4010 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 1e-06, |
| "loss": 0.812, |
| "step": 4020 |
| }, |
| { |
| "epoch": 7.58, |
| "eval_loss": 0.8246596455574036, |
| "eval_runtime": 51.8921, |
| "eval_samples_per_second": 1306.788, |
| "eval_steps_per_second": 2.563, |
| "step": 4020 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 1e-06, |
| "loss": 0.7859, |
| "step": 4030 |
| }, |
| { |
| "epoch": 7.6, |
| "eval_loss": 0.8243203163146973, |
| "eval_runtime": 51.8981, |
| "eval_samples_per_second": 1306.637, |
| "eval_steps_per_second": 2.563, |
| "step": 4030 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 1e-06, |
| "loss": 0.8007, |
| "step": 4040 |
| }, |
| { |
| "epoch": 7.62, |
| "eval_loss": 0.8241901397705078, |
| "eval_runtime": 51.9544, |
| "eval_samples_per_second": 1305.22, |
| "eval_steps_per_second": 2.56, |
| "step": 4040 |
| }, |
| { |
| "epoch": 7.64, |
| "learning_rate": 1e-06, |
| "loss": 0.799, |
| "step": 4050 |
| }, |
| { |
| "epoch": 7.64, |
| "eval_loss": 0.8239621520042419, |
| "eval_runtime": 51.9119, |
| "eval_samples_per_second": 1306.289, |
| "eval_steps_per_second": 2.562, |
| "step": 4050 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 1e-06, |
| "loss": 0.8007, |
| "step": 4060 |
| }, |
| { |
| "epoch": 7.66, |
| "eval_loss": 0.8240479826927185, |
| "eval_runtime": 52.0052, |
| "eval_samples_per_second": 1303.947, |
| "eval_steps_per_second": 2.557, |
| "step": 4060 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 1e-06, |
| "loss": 0.7952, |
| "step": 4070 |
| }, |
| { |
| "epoch": 7.68, |
| "eval_loss": 0.8235350847244263, |
| "eval_runtime": 51.9417, |
| "eval_samples_per_second": 1305.54, |
| "eval_steps_per_second": 2.561, |
| "step": 4070 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 1e-06, |
| "loss": 0.8089, |
| "step": 4080 |
| }, |
| { |
| "epoch": 7.7, |
| "eval_loss": 0.8234580159187317, |
| "eval_runtime": 51.9976, |
| "eval_samples_per_second": 1304.138, |
| "eval_steps_per_second": 2.558, |
| "step": 4080 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 1e-06, |
| "loss": 0.8106, |
| "step": 4090 |
| }, |
| { |
| "epoch": 7.72, |
| "eval_loss": 0.8232220411300659, |
| "eval_runtime": 51.9783, |
| "eval_samples_per_second": 1304.621, |
| "eval_steps_per_second": 2.559, |
| "step": 4090 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 1e-06, |
| "loss": 0.81, |
| "step": 4100 |
| }, |
| { |
| "epoch": 7.74, |
| "eval_loss": 0.8230018615722656, |
| "eval_runtime": 51.9098, |
| "eval_samples_per_second": 1306.343, |
| "eval_steps_per_second": 2.562, |
| "step": 4100 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 1e-06, |
| "loss": 0.8003, |
| "step": 4110 |
| }, |
| { |
| "epoch": 7.75, |
| "eval_loss": 0.8228157758712769, |
| "eval_runtime": 51.9865, |
| "eval_samples_per_second": 1304.415, |
| "eval_steps_per_second": 2.558, |
| "step": 4110 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 1e-06, |
| "loss": 0.7981, |
| "step": 4120 |
| }, |
| { |
| "epoch": 7.77, |
| "eval_loss": 0.8227835893630981, |
| "eval_runtime": 51.9696, |
| "eval_samples_per_second": 1304.841, |
| "eval_steps_per_second": 2.559, |
| "step": 4120 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 1e-06, |
| "loss": 0.8073, |
| "step": 4130 |
| }, |
| { |
| "epoch": 7.79, |
| "eval_loss": 0.8224825263023376, |
| "eval_runtime": 52.0838, |
| "eval_samples_per_second": 1301.978, |
| "eval_steps_per_second": 2.554, |
| "step": 4130 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 1e-06, |
| "loss": 0.8023, |
| "step": 4140 |
| }, |
| { |
| "epoch": 7.81, |
| "eval_loss": 0.8221595883369446, |
| "eval_runtime": 52.1232, |
| "eval_samples_per_second": 1300.994, |
| "eval_steps_per_second": 2.552, |
| "step": 4140 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 1e-06, |
| "loss": 0.7924, |
| "step": 4150 |
| }, |
| { |
| "epoch": 7.83, |
| "eval_loss": 0.8220430612564087, |
| "eval_runtime": 52.0247, |
| "eval_samples_per_second": 1303.458, |
| "eval_steps_per_second": 2.556, |
| "step": 4150 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 1e-06, |
| "loss": 0.793, |
| "step": 4160 |
| }, |
| { |
| "epoch": 7.85, |
| "eval_loss": 0.821935772895813, |
| "eval_runtime": 51.9774, |
| "eval_samples_per_second": 1304.644, |
| "eval_steps_per_second": 2.559, |
| "step": 4160 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 1e-06, |
| "loss": 0.8021, |
| "step": 4170 |
| }, |
| { |
| "epoch": 7.87, |
| "eval_loss": 0.8216782808303833, |
| "eval_runtime": 52.0293, |
| "eval_samples_per_second": 1303.343, |
| "eval_steps_per_second": 2.556, |
| "step": 4170 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 1e-06, |
| "loss": 0.8013, |
| "step": 4180 |
| }, |
| { |
| "epoch": 7.89, |
| "eval_loss": 0.821456789970398, |
| "eval_runtime": 51.9831, |
| "eval_samples_per_second": 1304.502, |
| "eval_steps_per_second": 2.559, |
| "step": 4180 |
| }, |
| { |
| "epoch": 7.91, |
| "learning_rate": 1e-06, |
| "loss": 0.799, |
| "step": 4190 |
| }, |
| { |
| "epoch": 7.91, |
| "eval_loss": 0.8213809728622437, |
| "eval_runtime": 51.9984, |
| "eval_samples_per_second": 1304.118, |
| "eval_steps_per_second": 2.558, |
| "step": 4190 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 1e-06, |
| "loss": 0.8031, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.92, |
| "eval_loss": 0.8211493492126465, |
| "eval_runtime": 52.1677, |
| "eval_samples_per_second": 1299.885, |
| "eval_steps_per_second": 2.549, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 1e-06, |
| "loss": 0.7866, |
| "step": 4210 |
| }, |
| { |
| "epoch": 7.94, |
| "eval_loss": 0.8209183812141418, |
| "eval_runtime": 52.0364, |
| "eval_samples_per_second": 1303.164, |
| "eval_steps_per_second": 2.556, |
| "step": 4210 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 1e-06, |
| "loss": 0.7912, |
| "step": 4220 |
| }, |
| { |
| "epoch": 7.96, |
| "eval_loss": 0.8207370042800903, |
| "eval_runtime": 52.1686, |
| "eval_samples_per_second": 1299.863, |
| "eval_steps_per_second": 2.549, |
| "step": 4220 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 1e-06, |
| "loss": 0.7894, |
| "step": 4230 |
| }, |
| { |
| "epoch": 7.98, |
| "eval_loss": 0.82054603099823, |
| "eval_runtime": 52.0068, |
| "eval_samples_per_second": 1303.906, |
| "eval_steps_per_second": 2.557, |
| "step": 4230 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 1e-06, |
| "loss": 0.7986, |
| "step": 4240 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.8204940557479858, |
| "eval_runtime": 52.0562, |
| "eval_samples_per_second": 1302.668, |
| "eval_steps_per_second": 2.555, |
| "step": 4240 |
| }, |
| { |
| "epoch": 8.02, |
| "learning_rate": 1e-06, |
| "loss": 0.7956, |
| "step": 4250 |
| }, |
| { |
| "epoch": 8.02, |
| "eval_loss": 0.8204708099365234, |
| "eval_runtime": 52.0158, |
| "eval_samples_per_second": 1303.681, |
| "eval_steps_per_second": 2.557, |
| "step": 4250 |
| }, |
| { |
| "epoch": 8.04, |
| "learning_rate": 1e-06, |
| "loss": 0.7977, |
| "step": 4260 |
| }, |
| { |
| "epoch": 8.04, |
| "eval_loss": 0.8203235864639282, |
| "eval_runtime": 51.9618, |
| "eval_samples_per_second": 1305.036, |
| "eval_steps_per_second": 2.56, |
| "step": 4260 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 1e-06, |
| "loss": 0.7893, |
| "step": 4270 |
| }, |
| { |
| "epoch": 8.06, |
| "eval_loss": 0.8200653195381165, |
| "eval_runtime": 52.0065, |
| "eval_samples_per_second": 1303.915, |
| "eval_steps_per_second": 2.557, |
| "step": 4270 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 1e-06, |
| "loss": 0.7886, |
| "step": 4280 |
| }, |
| { |
| "epoch": 8.08, |
| "eval_loss": 0.8199793696403503, |
| "eval_runtime": 52.0989, |
| "eval_samples_per_second": 1301.601, |
| "eval_steps_per_second": 2.553, |
| "step": 4280 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 1e-06, |
| "loss": 0.8015, |
| "step": 4290 |
| }, |
| { |
| "epoch": 8.09, |
| "eval_loss": 0.819825291633606, |
| "eval_runtime": 52.0141, |
| "eval_samples_per_second": 1303.723, |
| "eval_steps_per_second": 2.557, |
| "step": 4290 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 1e-06, |
| "loss": 0.7975, |
| "step": 4300 |
| }, |
| { |
| "epoch": 8.11, |
| "eval_loss": 0.8197045922279358, |
| "eval_runtime": 51.996, |
| "eval_samples_per_second": 1304.176, |
| "eval_steps_per_second": 2.558, |
| "step": 4300 |
| }, |
| { |
| "epoch": 8.13, |
| "learning_rate": 1e-06, |
| "loss": 0.7905, |
| "step": 4310 |
| }, |
| { |
| "epoch": 8.13, |
| "eval_loss": 0.8193663954734802, |
| "eval_runtime": 52.1388, |
| "eval_samples_per_second": 1300.606, |
| "eval_steps_per_second": 2.551, |
| "step": 4310 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 1e-06, |
| "loss": 0.7859, |
| "step": 4320 |
| }, |
| { |
| "epoch": 8.15, |
| "eval_loss": 0.8192564249038696, |
| "eval_runtime": 52.064, |
| "eval_samples_per_second": 1302.475, |
| "eval_steps_per_second": 2.555, |
| "step": 4320 |
| }, |
| { |
| "epoch": 8.17, |
| "learning_rate": 1e-06, |
| "loss": 0.7936, |
| "step": 4330 |
| }, |
| { |
| "epoch": 8.17, |
| "eval_loss": 0.8193119168281555, |
| "eval_runtime": 52.2011, |
| "eval_samples_per_second": 1299.053, |
| "eval_steps_per_second": 2.548, |
| "step": 4330 |
| }, |
| { |
| "epoch": 8.19, |
| "learning_rate": 1e-06, |
| "loss": 0.8034, |
| "step": 4340 |
| }, |
| { |
| "epoch": 8.19, |
| "eval_loss": 0.8189331293106079, |
| "eval_runtime": 51.9711, |
| "eval_samples_per_second": 1304.802, |
| "eval_steps_per_second": 2.559, |
| "step": 4340 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 1e-06, |
| "loss": 0.7855, |
| "step": 4350 |
| }, |
| { |
| "epoch": 8.21, |
| "eval_loss": 0.8188046813011169, |
| "eval_runtime": 52.0472, |
| "eval_samples_per_second": 1302.895, |
| "eval_steps_per_second": 2.555, |
| "step": 4350 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 1e-06, |
| "loss": 0.7841, |
| "step": 4360 |
| }, |
| { |
| "epoch": 8.23, |
| "eval_loss": 0.8185828924179077, |
| "eval_runtime": 52.0856, |
| "eval_samples_per_second": 1301.935, |
| "eval_steps_per_second": 2.553, |
| "step": 4360 |
| }, |
| { |
| "epoch": 8.25, |
| "learning_rate": 1e-06, |
| "loss": 0.7798, |
| "step": 4370 |
| }, |
| { |
| "epoch": 8.25, |
| "eval_loss": 0.8185929656028748, |
| "eval_runtime": 52.0044, |
| "eval_samples_per_second": 1303.968, |
| "eval_steps_per_second": 2.557, |
| "step": 4370 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 1e-06, |
| "loss": 0.7829, |
| "step": 4380 |
| }, |
| { |
| "epoch": 8.26, |
| "eval_loss": 0.8181660771369934, |
| "eval_runtime": 52.0905, |
| "eval_samples_per_second": 1301.811, |
| "eval_steps_per_second": 2.553, |
| "step": 4380 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 1e-06, |
| "loss": 0.7891, |
| "step": 4390 |
| }, |
| { |
| "epoch": 8.28, |
| "eval_loss": 0.8180428147315979, |
| "eval_runtime": 52.0875, |
| "eval_samples_per_second": 1301.887, |
| "eval_steps_per_second": 2.553, |
| "step": 4390 |
| }, |
| { |
| "epoch": 8.3, |
| "learning_rate": 1e-06, |
| "loss": 0.7813, |
| "step": 4400 |
| }, |
| { |
| "epoch": 8.3, |
| "eval_loss": 0.8179446458816528, |
| "eval_runtime": 51.9869, |
| "eval_samples_per_second": 1304.404, |
| "eval_steps_per_second": 2.558, |
| "step": 4400 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 1e-06, |
| "loss": 0.7976, |
| "step": 4410 |
| }, |
| { |
| "epoch": 8.32, |
| "eval_loss": 0.8177404403686523, |
| "eval_runtime": 52.004, |
| "eval_samples_per_second": 1303.975, |
| "eval_steps_per_second": 2.557, |
| "step": 4410 |
| }, |
| { |
| "epoch": 8.34, |
| "learning_rate": 1e-06, |
| "loss": 0.7898, |
| "step": 4420 |
| }, |
| { |
| "epoch": 8.34, |
| "eval_loss": 0.8177515268325806, |
| "eval_runtime": 52.0623, |
| "eval_samples_per_second": 1302.515, |
| "eval_steps_per_second": 2.555, |
| "step": 4420 |
| }, |
| { |
| "epoch": 8.36, |
| "learning_rate": 1e-06, |
| "loss": 0.8042, |
| "step": 4430 |
| }, |
| { |
| "epoch": 8.36, |
| "eval_loss": 0.8172406554222107, |
| "eval_runtime": 52.0842, |
| "eval_samples_per_second": 1301.969, |
| "eval_steps_per_second": 2.554, |
| "step": 4430 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 1e-06, |
| "loss": 0.7951, |
| "step": 4440 |
| }, |
| { |
| "epoch": 8.38, |
| "eval_loss": 0.8171444535255432, |
| "eval_runtime": 52.1302, |
| "eval_samples_per_second": 1300.821, |
| "eval_steps_per_second": 2.551, |
| "step": 4440 |
| }, |
| { |
| "epoch": 8.4, |
| "learning_rate": 1e-06, |
| "loss": 0.7918, |
| "step": 4450 |
| }, |
| { |
| "epoch": 8.4, |
| "eval_loss": 0.8172018527984619, |
| "eval_runtime": 52.3027, |
| "eval_samples_per_second": 1296.531, |
| "eval_steps_per_second": 2.543, |
| "step": 4450 |
| }, |
| { |
| "epoch": 8.42, |
| "learning_rate": 1e-06, |
| "loss": 0.7979, |
| "step": 4460 |
| }, |
| { |
| "epoch": 8.42, |
| "eval_loss": 0.8168366551399231, |
| "eval_runtime": 52.1192, |
| "eval_samples_per_second": 1301.094, |
| "eval_steps_per_second": 2.552, |
| "step": 4460 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 1e-06, |
| "loss": 0.7939, |
| "step": 4470 |
| }, |
| { |
| "epoch": 8.43, |
| "eval_loss": 0.8166252970695496, |
| "eval_runtime": 52.0263, |
| "eval_samples_per_second": 1303.417, |
| "eval_steps_per_second": 2.556, |
| "step": 4470 |
| }, |
| { |
| "epoch": 8.45, |
| "learning_rate": 1e-06, |
| "loss": 0.7914, |
| "step": 4480 |
| }, |
| { |
| "epoch": 8.45, |
| "eval_loss": 0.8164658546447754, |
| "eval_runtime": 52.148, |
| "eval_samples_per_second": 1300.377, |
| "eval_steps_per_second": 2.55, |
| "step": 4480 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 1e-06, |
| "loss": 0.7915, |
| "step": 4490 |
| }, |
| { |
| "epoch": 8.47, |
| "eval_loss": 0.8164568543434143, |
| "eval_runtime": 52.2088, |
| "eval_samples_per_second": 1298.861, |
| "eval_steps_per_second": 2.547, |
| "step": 4490 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 1e-06, |
| "loss": 0.7834, |
| "step": 4500 |
| }, |
| { |
| "epoch": 8.49, |
| "eval_loss": 0.8161565065383911, |
| "eval_runtime": 52.0345, |
| "eval_samples_per_second": 1303.211, |
| "eval_steps_per_second": 2.556, |
| "step": 4500 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 1e-06, |
| "loss": 0.7859, |
| "step": 4510 |
| }, |
| { |
| "epoch": 8.51, |
| "eval_loss": 0.8159014582633972, |
| "eval_runtime": 52.2477, |
| "eval_samples_per_second": 1297.895, |
| "eval_steps_per_second": 2.546, |
| "step": 4510 |
| }, |
| { |
| "epoch": 8.53, |
| "learning_rate": 1e-06, |
| "loss": 0.8038, |
| "step": 4520 |
| }, |
| { |
| "epoch": 8.53, |
| "eval_loss": 0.815627932548523, |
| "eval_runtime": 51.918, |
| "eval_samples_per_second": 1306.136, |
| "eval_steps_per_second": 2.562, |
| "step": 4520 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 1e-06, |
| "loss": 0.7982, |
| "step": 4530 |
| }, |
| { |
| "epoch": 8.55, |
| "eval_loss": 0.8155547380447388, |
| "eval_runtime": 52.0742, |
| "eval_samples_per_second": 1302.218, |
| "eval_steps_per_second": 2.554, |
| "step": 4530 |
| }, |
| { |
| "epoch": 8.57, |
| "learning_rate": 1e-06, |
| "loss": 0.7947, |
| "step": 4540 |
| }, |
| { |
| "epoch": 8.57, |
| "eval_loss": 0.8155472278594971, |
| "eval_runtime": 52.0767, |
| "eval_samples_per_second": 1302.157, |
| "eval_steps_per_second": 2.554, |
| "step": 4540 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 1e-06, |
| "loss": 0.7756, |
| "step": 4550 |
| }, |
| { |
| "epoch": 8.58, |
| "eval_loss": 0.8152143955230713, |
| "eval_runtime": 51.9632, |
| "eval_samples_per_second": 1305.0, |
| "eval_steps_per_second": 2.56, |
| "step": 4550 |
| }, |
| { |
| "epoch": 8.6, |
| "learning_rate": 1e-06, |
| "loss": 0.7886, |
| "step": 4560 |
| }, |
| { |
| "epoch": 8.6, |
| "eval_loss": 0.8149456977844238, |
| "eval_runtime": 52.1021, |
| "eval_samples_per_second": 1301.523, |
| "eval_steps_per_second": 2.553, |
| "step": 4560 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 1e-06, |
| "loss": 0.7979, |
| "step": 4570 |
| }, |
| { |
| "epoch": 8.62, |
| "eval_loss": 0.8148903846740723, |
| "eval_runtime": 52.1236, |
| "eval_samples_per_second": 1300.986, |
| "eval_steps_per_second": 2.552, |
| "step": 4570 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 1e-06, |
| "loss": 0.7932, |
| "step": 4580 |
| }, |
| { |
| "epoch": 8.64, |
| "eval_loss": 0.814667284488678, |
| "eval_runtime": 52.1496, |
| "eval_samples_per_second": 1300.337, |
| "eval_steps_per_second": 2.55, |
| "step": 4580 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 1e-06, |
| "loss": 0.7864, |
| "step": 4590 |
| }, |
| { |
| "epoch": 8.66, |
| "eval_loss": 0.81462162733078, |
| "eval_runtime": 52.1366, |
| "eval_samples_per_second": 1300.66, |
| "eval_steps_per_second": 2.551, |
| "step": 4590 |
| }, |
| { |
| "epoch": 8.68, |
| "learning_rate": 1e-06, |
| "loss": 0.7999, |
| "step": 4600 |
| }, |
| { |
| "epoch": 8.68, |
| "eval_loss": 0.8142070174217224, |
| "eval_runtime": 52.1471, |
| "eval_samples_per_second": 1300.398, |
| "eval_steps_per_second": 2.55, |
| "step": 4600 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 1e-06, |
| "loss": 0.7952, |
| "step": 4610 |
| }, |
| { |
| "epoch": 8.7, |
| "eval_loss": 0.8141977787017822, |
| "eval_runtime": 52.1159, |
| "eval_samples_per_second": 1301.176, |
| "eval_steps_per_second": 2.552, |
| "step": 4610 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 1e-06, |
| "loss": 0.7779, |
| "step": 4620 |
| }, |
| { |
| "epoch": 8.72, |
| "eval_loss": 0.8140039443969727, |
| "eval_runtime": 52.0607, |
| "eval_samples_per_second": 1302.556, |
| "eval_steps_per_second": 2.555, |
| "step": 4620 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 1e-06, |
| "loss": 0.7838, |
| "step": 4630 |
| }, |
| { |
| "epoch": 8.74, |
| "eval_loss": 0.8139258027076721, |
| "eval_runtime": 51.9226, |
| "eval_samples_per_second": 1306.02, |
| "eval_steps_per_second": 2.562, |
| "step": 4630 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 1e-06, |
| "loss": 0.7842, |
| "step": 4640 |
| }, |
| { |
| "epoch": 8.75, |
| "eval_loss": 0.8135305047035217, |
| "eval_runtime": 52.1327, |
| "eval_samples_per_second": 1300.757, |
| "eval_steps_per_second": 2.551, |
| "step": 4640 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 1e-06, |
| "loss": 0.7916, |
| "step": 4650 |
| }, |
| { |
| "epoch": 8.77, |
| "eval_loss": 0.8136957287788391, |
| "eval_runtime": 52.1197, |
| "eval_samples_per_second": 1301.083, |
| "eval_steps_per_second": 2.552, |
| "step": 4650 |
| }, |
| { |
| "epoch": 8.79, |
| "learning_rate": 1e-06, |
| "loss": 0.7843, |
| "step": 4660 |
| }, |
| { |
| "epoch": 8.79, |
| "eval_loss": 0.813274085521698, |
| "eval_runtime": 52.1225, |
| "eval_samples_per_second": 1301.011, |
| "eval_steps_per_second": 2.552, |
| "step": 4660 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 1e-06, |
| "loss": 0.7789, |
| "step": 4670 |
| }, |
| { |
| "epoch": 8.81, |
| "eval_loss": 0.8131061792373657, |
| "eval_runtime": 52.2467, |
| "eval_samples_per_second": 1297.919, |
| "eval_steps_per_second": 2.546, |
| "step": 4670 |
| }, |
| { |
| "epoch": 8.83, |
| "learning_rate": 1e-06, |
| "loss": 0.7848, |
| "step": 4680 |
| }, |
| { |
| "epoch": 8.83, |
| "eval_loss": 0.812892735004425, |
| "eval_runtime": 52.0469, |
| "eval_samples_per_second": 1302.903, |
| "eval_steps_per_second": 2.555, |
| "step": 4680 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 1e-06, |
| "loss": 0.7941, |
| "step": 4690 |
| }, |
| { |
| "epoch": 8.85, |
| "eval_loss": 0.8127599954605103, |
| "eval_runtime": 52.14, |
| "eval_samples_per_second": 1300.576, |
| "eval_steps_per_second": 2.551, |
| "step": 4690 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 1e-06, |
| "loss": 0.7715, |
| "step": 4700 |
| }, |
| { |
| "epoch": 8.87, |
| "eval_loss": 0.8127383589744568, |
| "eval_runtime": 52.1476, |
| "eval_samples_per_second": 1300.386, |
| "eval_steps_per_second": 2.55, |
| "step": 4700 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 1e-06, |
| "loss": 0.7845, |
| "step": 4710 |
| }, |
| { |
| "epoch": 8.89, |
| "eval_loss": 0.8124809861183167, |
| "eval_runtime": 52.1147, |
| "eval_samples_per_second": 1301.206, |
| "eval_steps_per_second": 2.552, |
| "step": 4710 |
| }, |
| { |
| "epoch": 8.91, |
| "learning_rate": 1e-06, |
| "loss": 0.793, |
| "step": 4720 |
| }, |
| { |
| "epoch": 8.91, |
| "eval_loss": 0.8122683167457581, |
| "eval_runtime": 52.1513, |
| "eval_samples_per_second": 1300.294, |
| "eval_steps_per_second": 2.55, |
| "step": 4720 |
| }, |
| { |
| "epoch": 8.92, |
| "learning_rate": 1e-06, |
| "loss": 0.7831, |
| "step": 4730 |
| }, |
| { |
| "epoch": 8.92, |
| "eval_loss": 0.8123452663421631, |
| "eval_runtime": 52.0764, |
| "eval_samples_per_second": 1302.165, |
| "eval_steps_per_second": 2.554, |
| "step": 4730 |
| }, |
| { |
| "epoch": 8.94, |
| "learning_rate": 1e-06, |
| "loss": 0.7676, |
| "step": 4740 |
| }, |
| { |
| "epoch": 8.94, |
| "eval_loss": 0.8120014071464539, |
| "eval_runtime": 52.2681, |
| "eval_samples_per_second": 1297.387, |
| "eval_steps_per_second": 2.545, |
| "step": 4740 |
| }, |
| { |
| "epoch": 8.96, |
| "learning_rate": 1e-06, |
| "loss": 0.8054, |
| "step": 4750 |
| }, |
| { |
| "epoch": 8.96, |
| "eval_loss": 0.8117390871047974, |
| "eval_runtime": 52.1213, |
| "eval_samples_per_second": 1301.043, |
| "eval_steps_per_second": 2.552, |
| "step": 4750 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 1e-06, |
| "loss": 0.7902, |
| "step": 4760 |
| }, |
| { |
| "epoch": 8.98, |
| "eval_loss": 0.811499297618866, |
| "eval_runtime": 52.1351, |
| "eval_samples_per_second": 1300.698, |
| "eval_steps_per_second": 2.551, |
| "step": 4760 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 1e-06, |
| "loss": 0.7635, |
| "step": 4770 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.8113887310028076, |
| "eval_runtime": 52.1292, |
| "eval_samples_per_second": 1300.846, |
| "eval_steps_per_second": 2.551, |
| "step": 4770 |
| }, |
| { |
| "epoch": 9.02, |
| "learning_rate": 1e-06, |
| "loss": 0.7836, |
| "step": 4780 |
| }, |
| { |
| "epoch": 9.02, |
| "eval_loss": 0.8115408420562744, |
| "eval_runtime": 52.2174, |
| "eval_samples_per_second": 1298.648, |
| "eval_steps_per_second": 2.547, |
| "step": 4780 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 1e-06, |
| "loss": 0.7819, |
| "step": 4790 |
| }, |
| { |
| "epoch": 9.04, |
| "eval_loss": 0.8114977478981018, |
| "eval_runtime": 52.1485, |
| "eval_samples_per_second": 1300.362, |
| "eval_steps_per_second": 2.55, |
| "step": 4790 |
| }, |
| { |
| "epoch": 9.06, |
| "learning_rate": 1e-06, |
| "loss": 0.7802, |
| "step": 4800 |
| }, |
| { |
| "epoch": 9.06, |
| "eval_loss": 0.8112668395042419, |
| "eval_runtime": 52.4451, |
| "eval_samples_per_second": 1293.009, |
| "eval_steps_per_second": 2.536, |
| "step": 4800 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 1e-06, |
| "loss": 0.7808, |
| "step": 4810 |
| }, |
| { |
| "epoch": 9.08, |
| "eval_loss": 0.8110054135322571, |
| "eval_runtime": 52.0098, |
| "eval_samples_per_second": 1303.83, |
| "eval_steps_per_second": 2.557, |
| "step": 4810 |
| }, |
| { |
| "epoch": 9.09, |
| "learning_rate": 1e-06, |
| "loss": 0.7942, |
| "step": 4820 |
| }, |
| { |
| "epoch": 9.09, |
| "eval_loss": 0.8112225532531738, |
| "eval_runtime": 52.1881, |
| "eval_samples_per_second": 1299.377, |
| "eval_steps_per_second": 2.548, |
| "step": 4820 |
| }, |
| { |
| "epoch": 9.11, |
| "learning_rate": 1e-06, |
| "loss": 0.7899, |
| "step": 4830 |
| }, |
| { |
| "epoch": 9.11, |
| "eval_loss": 0.8108929991722107, |
| "eval_runtime": 52.179, |
| "eval_samples_per_second": 1299.604, |
| "eval_steps_per_second": 2.549, |
| "step": 4830 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 1e-06, |
| "loss": 0.7814, |
| "step": 4840 |
| }, |
| { |
| "epoch": 9.13, |
| "eval_loss": 0.8106999397277832, |
| "eval_runtime": 52.0828, |
| "eval_samples_per_second": 1302.005, |
| "eval_steps_per_second": 2.554, |
| "step": 4840 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 1e-06, |
| "loss": 0.7886, |
| "step": 4850 |
| }, |
| { |
| "epoch": 9.15, |
| "eval_loss": 0.810533881187439, |
| "eval_runtime": 52.1124, |
| "eval_samples_per_second": 1301.263, |
| "eval_steps_per_second": 2.552, |
| "step": 4850 |
| }, |
| { |
| "epoch": 9.17, |
| "learning_rate": 1e-06, |
| "loss": 0.7697, |
| "step": 4860 |
| }, |
| { |
| "epoch": 9.17, |
| "eval_loss": 0.8104182481765747, |
| "eval_runtime": 52.0327, |
| "eval_samples_per_second": 1303.256, |
| "eval_steps_per_second": 2.556, |
| "step": 4860 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 1e-06, |
| "loss": 0.7859, |
| "step": 4870 |
| }, |
| { |
| "epoch": 9.19, |
| "eval_loss": 0.8101289868354797, |
| "eval_runtime": 52.1542, |
| "eval_samples_per_second": 1300.221, |
| "eval_steps_per_second": 2.55, |
| "step": 4870 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 1e-06, |
| "loss": 0.7718, |
| "step": 4880 |
| }, |
| { |
| "epoch": 9.21, |
| "eval_loss": 0.8099629878997803, |
| "eval_runtime": 52.087, |
| "eval_samples_per_second": 1301.899, |
| "eval_steps_per_second": 2.553, |
| "step": 4880 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 1e-06, |
| "loss": 0.771, |
| "step": 4890 |
| }, |
| { |
| "epoch": 9.23, |
| "eval_loss": 0.8100450038909912, |
| "eval_runtime": 52.1185, |
| "eval_samples_per_second": 1301.112, |
| "eval_steps_per_second": 2.552, |
| "step": 4890 |
| }, |
| { |
| "epoch": 9.25, |
| "learning_rate": 1e-06, |
| "loss": 0.7852, |
| "step": 4900 |
| }, |
| { |
| "epoch": 9.25, |
| "eval_loss": 0.8099735379219055, |
| "eval_runtime": 52.051, |
| "eval_samples_per_second": 1302.8, |
| "eval_steps_per_second": 2.555, |
| "step": 4900 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 1e-06, |
| "loss": 0.7722, |
| "step": 4910 |
| }, |
| { |
| "epoch": 9.26, |
| "eval_loss": 0.8093700408935547, |
| "eval_runtime": 51.9299, |
| "eval_samples_per_second": 1305.836, |
| "eval_steps_per_second": 2.561, |
| "step": 4910 |
| }, |
| { |
| "epoch": 9.28, |
| "learning_rate": 1e-06, |
| "loss": 0.7699, |
| "step": 4920 |
| }, |
| { |
| "epoch": 9.28, |
| "eval_loss": 0.8094404935836792, |
| "eval_runtime": 52.6564, |
| "eval_samples_per_second": 1287.821, |
| "eval_steps_per_second": 2.526, |
| "step": 4920 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 1e-06, |
| "loss": 0.7847, |
| "step": 4930 |
| }, |
| { |
| "epoch": 9.3, |
| "eval_loss": 0.8093037605285645, |
| "eval_runtime": 52.1908, |
| "eval_samples_per_second": 1299.309, |
| "eval_steps_per_second": 2.548, |
| "step": 4930 |
| }, |
| { |
| "epoch": 9.32, |
| "learning_rate": 1e-06, |
| "loss": 0.776, |
| "step": 4940 |
| }, |
| { |
| "epoch": 9.32, |
| "eval_loss": 0.8089527487754822, |
| "eval_runtime": 52.1434, |
| "eval_samples_per_second": 1300.49, |
| "eval_steps_per_second": 2.551, |
| "step": 4940 |
| }, |
| { |
| "epoch": 9.34, |
| "learning_rate": 1e-06, |
| "loss": 0.7727, |
| "step": 4950 |
| }, |
| { |
| "epoch": 9.34, |
| "eval_loss": 0.8089351058006287, |
| "eval_runtime": 52.0839, |
| "eval_samples_per_second": 1301.976, |
| "eval_steps_per_second": 2.554, |
| "step": 4950 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 1e-06, |
| "loss": 0.782, |
| "step": 4960 |
| }, |
| { |
| "epoch": 9.36, |
| "eval_loss": 0.8087407350540161, |
| "eval_runtime": 52.211, |
| "eval_samples_per_second": 1298.808, |
| "eval_steps_per_second": 2.547, |
| "step": 4960 |
| }, |
| { |
| "epoch": 9.38, |
| "learning_rate": 1e-06, |
| "loss": 0.7715, |
| "step": 4970 |
| }, |
| { |
| "epoch": 9.38, |
| "eval_loss": 0.8086969256401062, |
| "eval_runtime": 51.9535, |
| "eval_samples_per_second": 1305.245, |
| "eval_steps_per_second": 2.56, |
| "step": 4970 |
| }, |
| { |
| "epoch": 9.4, |
| "learning_rate": 1e-06, |
| "loss": 0.7812, |
| "step": 4980 |
| }, |
| { |
| "epoch": 9.4, |
| "eval_loss": 0.8085045218467712, |
| "eval_runtime": 52.0964, |
| "eval_samples_per_second": 1301.664, |
| "eval_steps_per_second": 2.553, |
| "step": 4980 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 1e-06, |
| "loss": 0.7648, |
| "step": 4990 |
| }, |
| { |
| "epoch": 9.42, |
| "eval_loss": 0.8081462383270264, |
| "eval_runtime": 51.9735, |
| "eval_samples_per_second": 1304.741, |
| "eval_steps_per_second": 2.559, |
| "step": 4990 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 1e-06, |
| "loss": 0.7694, |
| "step": 5000 |
| }, |
| { |
| "epoch": 9.43, |
| "eval_loss": 0.8081194758415222, |
| "eval_runtime": 52.1059, |
| "eval_samples_per_second": 1301.426, |
| "eval_steps_per_second": 2.552, |
| "step": 5000 |
| }, |
| { |
| "epoch": 9.45, |
| "learning_rate": 1e-06, |
| "loss": 0.7824, |
| "step": 5010 |
| }, |
| { |
| "epoch": 9.45, |
| "eval_loss": 0.8081274032592773, |
| "eval_runtime": 52.1903, |
| "eval_samples_per_second": 1299.322, |
| "eval_steps_per_second": 2.548, |
| "step": 5010 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 1e-06, |
| "loss": 0.7822, |
| "step": 5020 |
| }, |
| { |
| "epoch": 9.47, |
| "eval_loss": 0.8078827261924744, |
| "eval_runtime": 52.0034, |
| "eval_samples_per_second": 1303.991, |
| "eval_steps_per_second": 2.558, |
| "step": 5020 |
| }, |
| { |
| "epoch": 9.49, |
| "learning_rate": 1e-06, |
| "loss": 0.7766, |
| "step": 5030 |
| }, |
| { |
| "epoch": 9.49, |
| "eval_loss": 0.807519793510437, |
| "eval_runtime": 52.2252, |
| "eval_samples_per_second": 1298.453, |
| "eval_steps_per_second": 2.547, |
| "step": 5030 |
| }, |
| { |
| "epoch": 9.51, |
| "learning_rate": 1e-06, |
| "loss": 0.7732, |
| "step": 5040 |
| }, |
| { |
| "epoch": 9.51, |
| "eval_loss": 0.807347297668457, |
| "eval_runtime": 52.0984, |
| "eval_samples_per_second": 1301.614, |
| "eval_steps_per_second": 2.553, |
| "step": 5040 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 1e-06, |
| "loss": 0.7861, |
| "step": 5050 |
| }, |
| { |
| "epoch": 9.53, |
| "eval_loss": 0.8072365522384644, |
| "eval_runtime": 52.0257, |
| "eval_samples_per_second": 1303.434, |
| "eval_steps_per_second": 2.556, |
| "step": 5050 |
| }, |
| { |
| "epoch": 9.55, |
| "learning_rate": 1e-06, |
| "loss": 0.7828, |
| "step": 5060 |
| }, |
| { |
| "epoch": 9.55, |
| "eval_loss": 0.8069567084312439, |
| "eval_runtime": 52.0026, |
| "eval_samples_per_second": 1304.011, |
| "eval_steps_per_second": 2.558, |
| "step": 5060 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 1e-06, |
| "loss": 0.79, |
| "step": 5070 |
| }, |
| { |
| "epoch": 9.57, |
| "eval_loss": 0.807108998298645, |
| "eval_runtime": 52.0409, |
| "eval_samples_per_second": 1303.052, |
| "eval_steps_per_second": 2.556, |
| "step": 5070 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 1e-06, |
| "loss": 0.764, |
| "step": 5080 |
| }, |
| { |
| "epoch": 9.58, |
| "eval_loss": 0.8068212270736694, |
| "eval_runtime": 51.974, |
| "eval_samples_per_second": 1304.729, |
| "eval_steps_per_second": 2.559, |
| "step": 5080 |
| }, |
| { |
| "epoch": 9.6, |
| "learning_rate": 1e-06, |
| "loss": 0.7667, |
| "step": 5090 |
| }, |
| { |
| "epoch": 9.6, |
| "eval_loss": 0.8066729307174683, |
| "eval_runtime": 52.1256, |
| "eval_samples_per_second": 1300.934, |
| "eval_steps_per_second": 2.552, |
| "step": 5090 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 1e-06, |
| "loss": 0.7745, |
| "step": 5100 |
| }, |
| { |
| "epoch": 9.62, |
| "eval_loss": 0.8065735101699829, |
| "eval_runtime": 52.0723, |
| "eval_samples_per_second": 1302.266, |
| "eval_steps_per_second": 2.554, |
| "step": 5100 |
| }, |
| { |
| "epoch": 9.64, |
| "learning_rate": 1e-06, |
| "loss": 0.774, |
| "step": 5110 |
| }, |
| { |
| "epoch": 9.64, |
| "eval_loss": 0.8063581585884094, |
| "eval_runtime": 51.992, |
| "eval_samples_per_second": 1304.277, |
| "eval_steps_per_second": 2.558, |
| "step": 5110 |
| }, |
| { |
| "epoch": 9.66, |
| "learning_rate": 1e-06, |
| "loss": 0.7775, |
| "step": 5120 |
| }, |
| { |
| "epoch": 9.66, |
| "eval_loss": 0.8062164783477783, |
| "eval_runtime": 52.2512, |
| "eval_samples_per_second": 1297.809, |
| "eval_steps_per_second": 2.545, |
| "step": 5120 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 1e-06, |
| "loss": 0.7571, |
| "step": 5130 |
| }, |
| { |
| "epoch": 9.68, |
| "eval_loss": 0.8059104084968567, |
| "eval_runtime": 52.076, |
| "eval_samples_per_second": 1302.173, |
| "eval_steps_per_second": 2.554, |
| "step": 5130 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 1e-06, |
| "loss": 0.7807, |
| "step": 5140 |
| }, |
| { |
| "epoch": 9.7, |
| "eval_loss": 0.8059520125389099, |
| "eval_runtime": 52.109, |
| "eval_samples_per_second": 1301.349, |
| "eval_steps_per_second": 2.552, |
| "step": 5140 |
| }, |
| { |
| "epoch": 9.72, |
| "learning_rate": 1e-06, |
| "loss": 0.7838, |
| "step": 5150 |
| }, |
| { |
| "epoch": 9.72, |
| "eval_loss": 0.8056530952453613, |
| "eval_runtime": 51.8958, |
| "eval_samples_per_second": 1306.695, |
| "eval_steps_per_second": 2.563, |
| "step": 5150 |
| }, |
| { |
| "epoch": 9.74, |
| "learning_rate": 1e-06, |
| "loss": 0.7773, |
| "step": 5160 |
| }, |
| { |
| "epoch": 9.74, |
| "eval_loss": 0.8054617047309875, |
| "eval_runtime": 52.2537, |
| "eval_samples_per_second": 1297.746, |
| "eval_steps_per_second": 2.545, |
| "step": 5160 |
| }, |
| { |
| "epoch": 9.75, |
| "learning_rate": 1e-06, |
| "loss": 0.7917, |
| "step": 5170 |
| }, |
| { |
| "epoch": 9.75, |
| "eval_loss": 0.8053807616233826, |
| "eval_runtime": 52.0793, |
| "eval_samples_per_second": 1302.09, |
| "eval_steps_per_second": 2.554, |
| "step": 5170 |
| }, |
| { |
| "epoch": 9.77, |
| "learning_rate": 1e-06, |
| "loss": 0.7904, |
| "step": 5180 |
| }, |
| { |
| "epoch": 9.77, |
| "eval_loss": 0.8053616881370544, |
| "eval_runtime": 51.9922, |
| "eval_samples_per_second": 1304.272, |
| "eval_steps_per_second": 2.558, |
| "step": 5180 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 1e-06, |
| "loss": 0.781, |
| "step": 5190 |
| }, |
| { |
| "epoch": 9.79, |
| "eval_loss": 0.8050292134284973, |
| "eval_runtime": 52.2751, |
| "eval_samples_per_second": 1297.214, |
| "eval_steps_per_second": 2.544, |
| "step": 5190 |
| }, |
| { |
| "epoch": 9.81, |
| "learning_rate": 1e-06, |
| "loss": 0.7674, |
| "step": 5200 |
| }, |
| { |
| "epoch": 9.81, |
| "eval_loss": 0.8047987222671509, |
| "eval_runtime": 52.0988, |
| "eval_samples_per_second": 1301.603, |
| "eval_steps_per_second": 2.553, |
| "step": 5200 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 1e-06, |
| "loss": 0.7704, |
| "step": 5210 |
| }, |
| { |
| "epoch": 9.83, |
| "eval_loss": 0.8047385215759277, |
| "eval_runtime": 52.0987, |
| "eval_samples_per_second": 1301.607, |
| "eval_steps_per_second": 2.553, |
| "step": 5210 |
| }, |
| { |
| "epoch": 9.85, |
| "learning_rate": 1e-06, |
| "loss": 0.7526, |
| "step": 5220 |
| }, |
| { |
| "epoch": 9.85, |
| "eval_loss": 0.8044944405555725, |
| "eval_runtime": 52.1435, |
| "eval_samples_per_second": 1300.489, |
| "eval_steps_per_second": 2.551, |
| "step": 5220 |
| }, |
| { |
| "epoch": 9.87, |
| "learning_rate": 1e-06, |
| "loss": 0.784, |
| "step": 5230 |
| }, |
| { |
| "epoch": 9.87, |
| "eval_loss": 0.8044453859329224, |
| "eval_runtime": 52.2575, |
| "eval_samples_per_second": 1297.652, |
| "eval_steps_per_second": 2.545, |
| "step": 5230 |
| }, |
| { |
| "epoch": 9.89, |
| "learning_rate": 1e-06, |
| "loss": 0.7644, |
| "step": 5240 |
| }, |
| { |
| "epoch": 9.89, |
| "eval_loss": 0.8041695356369019, |
| "eval_runtime": 52.2688, |
| "eval_samples_per_second": 1297.372, |
| "eval_steps_per_second": 2.545, |
| "step": 5240 |
| }, |
| { |
| "epoch": 9.91, |
| "learning_rate": 1e-06, |
| "loss": 0.7684, |
| "step": 5250 |
| }, |
| { |
| "epoch": 9.91, |
| "eval_loss": 0.804131031036377, |
| "eval_runtime": 52.1426, |
| "eval_samples_per_second": 1300.51, |
| "eval_steps_per_second": 2.551, |
| "step": 5250 |
| }, |
| { |
| "epoch": 9.92, |
| "learning_rate": 1e-06, |
| "loss": 0.7565, |
| "step": 5260 |
| }, |
| { |
| "epoch": 9.92, |
| "eval_loss": 0.8040266036987305, |
| "eval_runtime": 52.0192, |
| "eval_samples_per_second": 1303.595, |
| "eval_steps_per_second": 2.557, |
| "step": 5260 |
| }, |
| { |
| "epoch": 9.94, |
| "learning_rate": 1e-06, |
| "loss": 0.7873, |
| "step": 5270 |
| }, |
| { |
| "epoch": 9.94, |
| "eval_loss": 0.8036627769470215, |
| "eval_runtime": 52.3717, |
| "eval_samples_per_second": 1294.821, |
| "eval_steps_per_second": 2.54, |
| "step": 5270 |
| }, |
| { |
| "epoch": 9.96, |
| "learning_rate": 1e-06, |
| "loss": 0.7871, |
| "step": 5280 |
| }, |
| { |
| "epoch": 9.96, |
| "eval_loss": 0.8036572337150574, |
| "eval_runtime": 52.2188, |
| "eval_samples_per_second": 1298.613, |
| "eval_steps_per_second": 2.547, |
| "step": 5280 |
| }, |
| { |
| "epoch": 9.98, |
| "learning_rate": 1e-06, |
| "loss": 0.7719, |
| "step": 5290 |
| }, |
| { |
| "epoch": 9.98, |
| "eval_loss": 0.8034039735794067, |
| "eval_runtime": 51.9811, |
| "eval_samples_per_second": 1304.551, |
| "eval_steps_per_second": 2.559, |
| "step": 5290 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 1e-06, |
| "loss": 0.7852, |
| "step": 5300 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.8034818172454834, |
| "eval_runtime": 52.3185, |
| "eval_samples_per_second": 1296.138, |
| "eval_steps_per_second": 2.542, |
| "step": 5300 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5300, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 10, |
| "total_flos": 767247589048320.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|