| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.012820512820513, |
| "eval_steps": 500, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9903846153846154e-05, |
| "loss": 5.8218, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.980769230769231e-05, |
| "loss": 5.2499, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9711538461538465e-05, |
| "loss": 5.1236, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.961538461538462e-05, |
| "loss": 4.8363, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9519230769230776e-05, |
| "loss": 4.5805, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.942307692307693e-05, |
| "loss": 4.5276, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.932692307692308e-05, |
| "loss": 4.3871, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 4.2706, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9134615384615384e-05, |
| "loss": 4.0906, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.9038461538461536e-05, |
| "loss": 4.1704, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8942307692307695e-05, |
| "loss": 3.9014, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.884615384615385e-05, |
| "loss": 4.0338, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.875e-05, |
| "loss": 4.011, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.865384615384616e-05, |
| "loss": 3.8017, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.855769230769231e-05, |
| "loss": 3.819, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.846153846153846e-05, |
| "loss": 3.8157, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.836538461538462e-05, |
| "loss": 3.7675, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.826923076923077e-05, |
| "loss": 3.756, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.8173076923076925e-05, |
| "loss": 3.7439, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.8076923076923084e-05, |
| "loss": 3.761, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.7980769230769236e-05, |
| "loss": 3.7318, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.788461538461539e-05, |
| "loss": 3.745, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.778846153846154e-05, |
| "loss": 3.784, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.76923076923077e-05, |
| "loss": 3.6114, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.7596153846153844e-05, |
| "loss": 3.637, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.75e-05, |
| "loss": 3.5881, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.7403846153846155e-05, |
| "loss": 3.5336, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.730769230769231e-05, |
| "loss": 3.5313, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.7211538461538465e-05, |
| "loss": 3.6357, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.711538461538462e-05, |
| "loss": 3.5818, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.701923076923077e-05, |
| "loss": 3.5017, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.692307692307693e-05, |
| "loss": 3.5815, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.682692307692308e-05, |
| "loss": 3.611, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.673076923076923e-05, |
| "loss": 3.6096, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.6634615384615384e-05, |
| "loss": 3.5436, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.653846153846154e-05, |
| "loss": 3.5258, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.6442307692307695e-05, |
| "loss": 3.5277, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.634615384615385e-05, |
| "loss": 3.505, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.6250000000000006e-05, |
| "loss": 3.4665, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 3.4245, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.605769230769231e-05, |
| "loss": 3.4547, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.596153846153846e-05, |
| "loss": 3.4382, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.5865384615384614e-05, |
| "loss": 3.482, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 4.576923076923077e-05, |
| "loss": 3.4795, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.5673076923076925e-05, |
| "loss": 3.5018, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.557692307692308e-05, |
| "loss": 3.448, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.548076923076923e-05, |
| "loss": 3.4206, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.538461538461539e-05, |
| "loss": 3.528, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.528846153846154e-05, |
| "loss": 3.475, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.519230769230769e-05, |
| "loss": 3.4142, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 4.509615384615385e-05, |
| "loss": 3.4156, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.5e-05, |
| "loss": 3.3274, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.4903846153846155e-05, |
| "loss": 3.3594, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 4.4807692307692314e-05, |
| "loss": 3.2517, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 4.4711538461538466e-05, |
| "loss": 3.2934, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 4.461538461538462e-05, |
| "loss": 3.2986, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 4.451923076923077e-05, |
| "loss": 3.2655, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 4.442307692307692e-05, |
| "loss": 3.4058, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 4.4326923076923074e-05, |
| "loss": 3.265, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.423076923076923e-05, |
| "loss": 3.3208, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.4134615384615385e-05, |
| "loss": 3.238, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 4.403846153846154e-05, |
| "loss": 3.2153, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 4.3942307692307695e-05, |
| "loss": 3.264, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 4.384615384615385e-05, |
| "loss": 3.2911, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 4.375e-05, |
| "loss": 3.3027, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 4.365384615384616e-05, |
| "loss": 3.2589, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 4.355769230769231e-05, |
| "loss": 3.3683, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 4.346153846153846e-05, |
| "loss": 3.2849, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 4.336538461538462e-05, |
| "loss": 3.2397, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.326923076923077e-05, |
| "loss": 3.2128, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 4.3173076923076925e-05, |
| "loss": 3.1944, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.3076923076923084e-05, |
| "loss": 3.1837, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 4.2980769230769236e-05, |
| "loss": 3.1793, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.288461538461538e-05, |
| "loss": 3.1447, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 4.278846153846154e-05, |
| "loss": 3.1028, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.269230769230769e-05, |
| "loss": 3.2471, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 4.2596153846153844e-05, |
| "loss": 3.1855, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 4.25e-05, |
| "loss": 3.1817, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 4.2403846153846155e-05, |
| "loss": 3.214, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 4.230769230769231e-05, |
| "loss": 2.997, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 4.2211538461538466e-05, |
| "loss": 3.2059, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 4.211538461538462e-05, |
| "loss": 3.1517, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 4.201923076923077e-05, |
| "loss": 3.2236, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.192307692307693e-05, |
| "loss": 3.1939, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.182692307692308e-05, |
| "loss": 3.1143, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.173076923076923e-05, |
| "loss": 3.2261, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 4.163461538461539e-05, |
| "loss": 3.2095, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 4.1538461538461544e-05, |
| "loss": 3.2498, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 4.1442307692307696e-05, |
| "loss": 3.1493, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 4.134615384615385e-05, |
| "loss": 3.0919, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 4.125e-05, |
| "loss": 3.2617, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 4.115384615384615e-05, |
| "loss": 3.1447, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 4.105769230769231e-05, |
| "loss": 3.2025, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 4.096153846153846e-05, |
| "loss": 3.1829, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 4.0865384615384615e-05, |
| "loss": 3.2216, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 4.0769230769230773e-05, |
| "loss": 3.1391, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 4.0673076923076926e-05, |
| "loss": 3.1344, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.057692307692308e-05, |
| "loss": 3.1699, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 4.0480769230769236e-05, |
| "loss": 3.0138, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 4.038461538461539e-05, |
| "loss": 3.1157, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 4.028846153846154e-05, |
| "loss": 3.1173, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 4.019230769230769e-05, |
| "loss": 3.2019, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 4.009615384615385e-05, |
| "loss": 3.0801, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4e-05, |
| "loss": 3.0888, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 3.9903846153846155e-05, |
| "loss": 3.0332, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 3.980769230769231e-05, |
| "loss": 2.9418, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 3.971153846153846e-05, |
| "loss": 2.933, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 3.961538461538462e-05, |
| "loss": 3.0467, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 3.951923076923077e-05, |
| "loss": 3.0991, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 3.942307692307692e-05, |
| "loss": 2.913, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 3.932692307692308e-05, |
| "loss": 3.0531, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.923076923076923e-05, |
| "loss": 2.9838, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.9134615384615385e-05, |
| "loss": 2.9406, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.903846153846154e-05, |
| "loss": 3.046, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.8942307692307696e-05, |
| "loss": 3.006, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.884615384615385e-05, |
| "loss": 2.9774, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.875e-05, |
| "loss": 2.9937, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 3.865384615384616e-05, |
| "loss": 2.9737, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 3.855769230769231e-05, |
| "loss": 2.9772, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 2.8405, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 3.836538461538462e-05, |
| "loss": 3.0856, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 3.826923076923077e-05, |
| "loss": 2.9947, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 3.8173076923076926e-05, |
| "loss": 2.9251, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 3.807692307692308e-05, |
| "loss": 2.9613, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 3.798076923076923e-05, |
| "loss": 2.9402, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.788461538461538e-05, |
| "loss": 2.8823, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.778846153846154e-05, |
| "loss": 2.8906, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.769230769230769e-05, |
| "loss": 2.9475, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.7596153846153845e-05, |
| "loss": 2.9999, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 2.9503, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.7403846153846156e-05, |
| "loss": 3.0112, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.730769230769231e-05, |
| "loss": 3.0096, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 3.7211538461538466e-05, |
| "loss": 2.9804, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 3.711538461538462e-05, |
| "loss": 2.9203, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 3.701923076923077e-05, |
| "loss": 2.939, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 2.8455, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 3.682692307692308e-05, |
| "loss": 2.9651, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.673076923076923e-05, |
| "loss": 2.9528, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 3.6634615384615385e-05, |
| "loss": 2.8042, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.653846153846154e-05, |
| "loss": 2.8311, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 3.644230769230769e-05, |
| "loss": 2.8888, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 3.634615384615385e-05, |
| "loss": 2.9151, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.625e-05, |
| "loss": 2.9463, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.615384615384615e-05, |
| "loss": 2.957, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.605769230769231e-05, |
| "loss": 2.9473, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.596153846153846e-05, |
| "loss": 2.9994, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 3.5865384615384615e-05, |
| "loss": 3.0486, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 3.5769230769230774e-05, |
| "loss": 2.9487, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 3.5673076923076926e-05, |
| "loss": 3.0173, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 3.557692307692308e-05, |
| "loss": 2.8656, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 3.548076923076924e-05, |
| "loss": 2.8834, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 2.9829, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 3.528846153846154e-05, |
| "loss": 3.001, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 3.51923076923077e-05, |
| "loss": 2.9618, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.5096153846153845e-05, |
| "loss": 2.7964, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 3.5e-05, |
| "loss": 2.89, |
| "step": 936 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 3.4903846153846156e-05, |
| "loss": 2.7191, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 3.480769230769231e-05, |
| "loss": 2.7875, |
| "step": 948 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 3.471153846153846e-05, |
| "loss": 2.8498, |
| "step": 954 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 2.7858, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 3.451923076923077e-05, |
| "loss": 2.7435, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 3.442307692307692e-05, |
| "loss": 2.8449, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 3.432692307692308e-05, |
| "loss": 2.7817, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 3.4230769230769234e-05, |
| "loss": 2.8797, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 3.4134615384615386e-05, |
| "loss": 2.7719, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 3.4038461538461544e-05, |
| "loss": 2.7919, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.21, |
| "learning_rate": 3.3942307692307696e-05, |
| "loss": 2.8072, |
| "step": 1002 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 3.384615384615385e-05, |
| "loss": 2.7281, |
| "step": 1008 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 3.375000000000001e-05, |
| "loss": 2.8431, |
| "step": 1014 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 3.365384615384616e-05, |
| "loss": 2.7154, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.29, |
| "learning_rate": 3.3557692307692304e-05, |
| "loss": 2.83, |
| "step": 1026 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 3.346153846153846e-05, |
| "loss": 2.8434, |
| "step": 1032 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 3.3365384615384615e-05, |
| "loss": 2.8399, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 3.326923076923077e-05, |
| "loss": 2.7072, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 3.3173076923076926e-05, |
| "loss": 2.773, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.38, |
| "learning_rate": 3.307692307692308e-05, |
| "loss": 2.822, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 3.298076923076923e-05, |
| "loss": 2.9056, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.42, |
| "learning_rate": 3.288461538461539e-05, |
| "loss": 2.749, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 3.278846153846154e-05, |
| "loss": 2.7579, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 3.269230769230769e-05, |
| "loss": 2.7444, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 3.2596153846153845e-05, |
| "loss": 2.818, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 3.2500000000000004e-05, |
| "loss": 2.6285, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 3.2403846153846156e-05, |
| "loss": 2.6473, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 3.230769230769231e-05, |
| "loss": 2.7093, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 3.221153846153847e-05, |
| "loss": 2.732, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 3.211538461538462e-05, |
| "loss": 2.7064, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 3.201923076923077e-05, |
| "loss": 2.9072, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 3.192307692307692e-05, |
| "loss": 2.7544, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 3.1826923076923075e-05, |
| "loss": 2.738, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 3.1730769230769234e-05, |
| "loss": 2.6721, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.67, |
| "learning_rate": 3.1634615384615386e-05, |
| "loss": 2.8218, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 3.153846153846154e-05, |
| "loss": 2.7553, |
| "step": 1152 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 3.144230769230769e-05, |
| "loss": 2.8186, |
| "step": 1158 |
| }, |
| { |
| "epoch": 3.73, |
| "learning_rate": 3.134615384615385e-05, |
| "loss": 2.7511, |
| "step": 1164 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 3.125e-05, |
| "loss": 2.6013, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 3.115384615384615e-05, |
| "loss": 2.7191, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 3.105769230769231e-05, |
| "loss": 2.7695, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.81, |
| "learning_rate": 3.0961538461538464e-05, |
| "loss": 2.738, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 3.0865384615384616e-05, |
| "loss": 2.7202, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": 2.6429, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 3.0673076923076926e-05, |
| "loss": 2.6742, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.88, |
| "learning_rate": 3.057692307692308e-05, |
| "loss": 2.7048, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 3.0480769230769234e-05, |
| "loss": 2.7016, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 3.0384615384615382e-05, |
| "loss": 2.7375, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 3.0288461538461538e-05, |
| "loss": 2.7926, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 3.0192307692307693e-05, |
| "loss": 2.7038, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 3.0096153846153845e-05, |
| "loss": 2.8294, |
| "step": 1242 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 3e-05, |
| "loss": 2.7543, |
| "step": 1248 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 2.9903846153846156e-05, |
| "loss": 2.6022, |
| "step": 1254 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 2.9807692307692308e-05, |
| "loss": 2.6609, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 2.9711538461538464e-05, |
| "loss": 2.6091, |
| "step": 1266 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 2.9615384615384616e-05, |
| "loss": 2.5386, |
| "step": 1272 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 2.951923076923077e-05, |
| "loss": 2.6655, |
| "step": 1278 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 2.9423076923076926e-05, |
| "loss": 2.6023, |
| "step": 1284 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 2.932692307692308e-05, |
| "loss": 2.6491, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 2.9230769230769234e-05, |
| "loss": 2.6548, |
| "step": 1296 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 2.913461538461539e-05, |
| "loss": 2.584, |
| "step": 1302 |
| }, |
| { |
| "epoch": 4.19, |
| "learning_rate": 2.903846153846154e-05, |
| "loss": 2.5447, |
| "step": 1308 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 2.8942307692307697e-05, |
| "loss": 2.5931, |
| "step": 1314 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 2.8846153846153845e-05, |
| "loss": 2.503, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 2.8749999999999997e-05, |
| "loss": 2.6097, |
| "step": 1326 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 2.8653846153846153e-05, |
| "loss": 2.6926, |
| "step": 1332 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 2.855769230769231e-05, |
| "loss": 2.5829, |
| "step": 1338 |
| }, |
| { |
| "epoch": 4.31, |
| "learning_rate": 2.846153846153846e-05, |
| "loss": 2.6712, |
| "step": 1344 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 2.8365384615384616e-05, |
| "loss": 2.3988, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 2.826923076923077e-05, |
| "loss": 2.5714, |
| "step": 1356 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 2.8173076923076923e-05, |
| "loss": 2.6627, |
| "step": 1362 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 2.807692307692308e-05, |
| "loss": 2.7195, |
| "step": 1368 |
| }, |
| { |
| "epoch": 4.4, |
| "learning_rate": 2.7980769230769234e-05, |
| "loss": 2.7181, |
| "step": 1374 |
| }, |
| { |
| "epoch": 4.42, |
| "learning_rate": 2.7884615384615386e-05, |
| "loss": 2.626, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 2.778846153846154e-05, |
| "loss": 2.638, |
| "step": 1386 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 2.7692307692307694e-05, |
| "loss": 2.6275, |
| "step": 1392 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 2.759615384615385e-05, |
| "loss": 2.6399, |
| "step": 1398 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 2.7500000000000004e-05, |
| "loss": 2.6894, |
| "step": 1404 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 2.7403846153846156e-05, |
| "loss": 2.5917, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.54, |
| "learning_rate": 2.7307692307692305e-05, |
| "loss": 2.5564, |
| "step": 1416 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 2.721153846153846e-05, |
| "loss": 2.6134, |
| "step": 1422 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 2.7115384615384616e-05, |
| "loss": 2.6375, |
| "step": 1428 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 2.7019230769230768e-05, |
| "loss": 2.6301, |
| "step": 1434 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 2.6923076923076923e-05, |
| "loss": 2.5679, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 2.682692307692308e-05, |
| "loss": 2.6704, |
| "step": 1446 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 2.673076923076923e-05, |
| "loss": 2.6778, |
| "step": 1452 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 2.6634615384615386e-05, |
| "loss": 2.5626, |
| "step": 1458 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 2.6538461538461538e-05, |
| "loss": 2.5557, |
| "step": 1464 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 2.6442307692307694e-05, |
| "loss": 2.5245, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.73, |
| "learning_rate": 2.634615384615385e-05, |
| "loss": 2.5189, |
| "step": 1476 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 2.625e-05, |
| "loss": 2.4994, |
| "step": 1482 |
| }, |
| { |
| "epoch": 4.77, |
| "learning_rate": 2.6153846153846157e-05, |
| "loss": 2.472, |
| "step": 1488 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 2.6057692307692312e-05, |
| "loss": 2.5451, |
| "step": 1494 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 2.5961538461538464e-05, |
| "loss": 2.5559, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 2.586538461538462e-05, |
| "loss": 2.5902, |
| "step": 1506 |
| }, |
| { |
| "epoch": 4.85, |
| "learning_rate": 2.5769230769230768e-05, |
| "loss": 2.6841, |
| "step": 1512 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 2.5673076923076923e-05, |
| "loss": 2.6092, |
| "step": 1518 |
| }, |
| { |
| "epoch": 4.88, |
| "learning_rate": 2.5576923076923075e-05, |
| "loss": 2.5646, |
| "step": 1524 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 2.548076923076923e-05, |
| "loss": 2.5201, |
| "step": 1530 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 2.5384615384615383e-05, |
| "loss": 2.5622, |
| "step": 1536 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 2.528846153846154e-05, |
| "loss": 2.6323, |
| "step": 1542 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 2.5192307692307694e-05, |
| "loss": 2.7179, |
| "step": 1548 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 2.5096153846153846e-05, |
| "loss": 2.6441, |
| "step": 1554 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 2.5e-05, |
| "loss": 2.668, |
| "step": 1560 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 2.4903846153846157e-05, |
| "loss": 2.5535, |
| "step": 1566 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 2.480769230769231e-05, |
| "loss": 2.4569, |
| "step": 1572 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 2.4711538461538464e-05, |
| "loss": 2.5378, |
| "step": 1578 |
| }, |
| { |
| "epoch": 5.08, |
| "learning_rate": 2.461538461538462e-05, |
| "loss": 2.537, |
| "step": 1584 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 2.4519230769230768e-05, |
| "loss": 2.4913, |
| "step": 1590 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 2.4423076923076924e-05, |
| "loss": 2.5156, |
| "step": 1596 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 2.432692307692308e-05, |
| "loss": 2.493, |
| "step": 1602 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 2.423076923076923e-05, |
| "loss": 2.5357, |
| "step": 1608 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 2.4134615384615386e-05, |
| "loss": 2.4508, |
| "step": 1614 |
| }, |
| { |
| "epoch": 5.19, |
| "learning_rate": 2.4038461538461542e-05, |
| "loss": 2.5045, |
| "step": 1620 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 2.3942307692307694e-05, |
| "loss": 2.4617, |
| "step": 1626 |
| }, |
| { |
| "epoch": 5.23, |
| "learning_rate": 2.384615384615385e-05, |
| "loss": 2.5392, |
| "step": 1632 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 2.375e-05, |
| "loss": 2.3976, |
| "step": 1638 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 2.3653846153846153e-05, |
| "loss": 2.5288, |
| "step": 1644 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 2.355769230769231e-05, |
| "loss": 2.5243, |
| "step": 1650 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 2.3461538461538464e-05, |
| "loss": 2.5018, |
| "step": 1656 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 2.3365384615384616e-05, |
| "loss": 2.5947, |
| "step": 1662 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 2.326923076923077e-05, |
| "loss": 2.5226, |
| "step": 1668 |
| }, |
| { |
| "epoch": 5.37, |
| "learning_rate": 2.3173076923076924e-05, |
| "loss": 2.4687, |
| "step": 1674 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": 2.5297, |
| "step": 1680 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 2.298076923076923e-05, |
| "loss": 2.5457, |
| "step": 1686 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 2.2884615384615387e-05, |
| "loss": 2.4218, |
| "step": 1692 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 2.278846153846154e-05, |
| "loss": 2.5219, |
| "step": 1698 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 2.2692307692307694e-05, |
| "loss": 2.4858, |
| "step": 1704 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 2.2596153846153846e-05, |
| "loss": 2.4715, |
| "step": 1710 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 2.25e-05, |
| "loss": 2.3294, |
| "step": 1716 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 2.2403846153846157e-05, |
| "loss": 2.5062, |
| "step": 1722 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 2.230769230769231e-05, |
| "loss": 2.392, |
| "step": 1728 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 2.221153846153846e-05, |
| "loss": 2.4092, |
| "step": 1734 |
| }, |
| { |
| "epoch": 5.58, |
| "learning_rate": 2.2115384615384616e-05, |
| "loss": 2.4257, |
| "step": 1740 |
| }, |
| { |
| "epoch": 5.6, |
| "learning_rate": 2.201923076923077e-05, |
| "loss": 2.4426, |
| "step": 1746 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 2.1923076923076924e-05, |
| "loss": 2.4672, |
| "step": 1752 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 2.182692307692308e-05, |
| "loss": 2.5468, |
| "step": 1758 |
| }, |
| { |
| "epoch": 5.65, |
| "learning_rate": 2.173076923076923e-05, |
| "loss": 2.3917, |
| "step": 1764 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 2.1634615384615387e-05, |
| "loss": 2.3895, |
| "step": 1770 |
| }, |
| { |
| "epoch": 5.69, |
| "learning_rate": 2.1538461538461542e-05, |
| "loss": 2.5246, |
| "step": 1776 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 2.144230769230769e-05, |
| "loss": 2.5025, |
| "step": 1782 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 2.1346153846153846e-05, |
| "loss": 2.4898, |
| "step": 1788 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 2.125e-05, |
| "loss": 2.5358, |
| "step": 1794 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 2.1153846153846154e-05, |
| "loss": 2.5346, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 2.105769230769231e-05, |
| "loss": 2.3057, |
| "step": 1806 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 2.0961538461538464e-05, |
| "loss": 2.5392, |
| "step": 1812 |
| }, |
| { |
| "epoch": 5.83, |
| "learning_rate": 2.0865384615384616e-05, |
| "loss": 2.5815, |
| "step": 1818 |
| }, |
| { |
| "epoch": 5.85, |
| "learning_rate": 2.0769230769230772e-05, |
| "loss": 2.2718, |
| "step": 1824 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 2.0673076923076924e-05, |
| "loss": 2.474, |
| "step": 1830 |
| }, |
| { |
| "epoch": 5.88, |
| "learning_rate": 2.0576923076923076e-05, |
| "loss": 2.4821, |
| "step": 1836 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 2.048076923076923e-05, |
| "loss": 2.5334, |
| "step": 1842 |
| }, |
| { |
| "epoch": 5.92, |
| "learning_rate": 2.0384615384615387e-05, |
| "loss": 2.3877, |
| "step": 1848 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 2.028846153846154e-05, |
| "loss": 2.3301, |
| "step": 1854 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 2.0192307692307694e-05, |
| "loss": 2.545, |
| "step": 1860 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 2.0096153846153846e-05, |
| "loss": 2.3865, |
| "step": 1866 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 2e-05, |
| "loss": 2.4531, |
| "step": 1872 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 1.9903846153846154e-05, |
| "loss": 2.4046, |
| "step": 1878 |
| }, |
| { |
| "epoch": 6.04, |
| "learning_rate": 1.980769230769231e-05, |
| "loss": 2.476, |
| "step": 1884 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 1.971153846153846e-05, |
| "loss": 2.3547, |
| "step": 1890 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 1.9615384615384617e-05, |
| "loss": 2.5396, |
| "step": 1896 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 1.951923076923077e-05, |
| "loss": 2.3868, |
| "step": 1902 |
| }, |
| { |
| "epoch": 6.12, |
| "learning_rate": 1.9423076923076924e-05, |
| "loss": 2.2733, |
| "step": 1908 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 1.932692307692308e-05, |
| "loss": 2.283, |
| "step": 1914 |
| }, |
| { |
| "epoch": 6.15, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": 2.4231, |
| "step": 1920 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 1.9134615384615383e-05, |
| "loss": 2.375, |
| "step": 1926 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 1.903846153846154e-05, |
| "loss": 2.3824, |
| "step": 1932 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 1.894230769230769e-05, |
| "loss": 2.3903, |
| "step": 1938 |
| }, |
| { |
| "epoch": 6.23, |
| "learning_rate": 1.8846153846153846e-05, |
| "loss": 2.2854, |
| "step": 1944 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 1.8750000000000002e-05, |
| "loss": 2.2358, |
| "step": 1950 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 1.8653846153846154e-05, |
| "loss": 2.4751, |
| "step": 1956 |
| }, |
| { |
| "epoch": 6.29, |
| "learning_rate": 1.855769230769231e-05, |
| "loss": 2.3434, |
| "step": 1962 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 2.4088, |
| "step": 1968 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 1.8365384615384617e-05, |
| "loss": 2.2746, |
| "step": 1974 |
| }, |
| { |
| "epoch": 6.35, |
| "learning_rate": 1.826923076923077e-05, |
| "loss": 2.4396, |
| "step": 1980 |
| }, |
| { |
| "epoch": 6.37, |
| "learning_rate": 1.8173076923076924e-05, |
| "loss": 2.3663, |
| "step": 1986 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 1.8076923076923076e-05, |
| "loss": 2.4488, |
| "step": 1992 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 1.798076923076923e-05, |
| "loss": 2.3495, |
| "step": 1998 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 1.7884615384615387e-05, |
| "loss": 2.4099, |
| "step": 2004 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 1.778846153846154e-05, |
| "loss": 2.4343, |
| "step": 2010 |
| }, |
| { |
| "epoch": 6.46, |
| "learning_rate": 1.7692307692307694e-05, |
| "loss": 2.3508, |
| "step": 2016 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 1.759615384615385e-05, |
| "loss": 2.4724, |
| "step": 2022 |
| }, |
| { |
| "epoch": 6.5, |
| "learning_rate": 1.75e-05, |
| "loss": 2.4746, |
| "step": 2028 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 1.7403846153846154e-05, |
| "loss": 2.3899, |
| "step": 2034 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 1.730769230769231e-05, |
| "loss": 2.3465, |
| "step": 2040 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 1.721153846153846e-05, |
| "loss": 2.3984, |
| "step": 2046 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 1.7115384615384617e-05, |
| "loss": 2.3041, |
| "step": 2052 |
| }, |
| { |
| "epoch": 6.6, |
| "learning_rate": 1.7019230769230772e-05, |
| "loss": 2.3035, |
| "step": 2058 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 1.6923076923076924e-05, |
| "loss": 2.2846, |
| "step": 2064 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 1.682692307692308e-05, |
| "loss": 2.4577, |
| "step": 2070 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 1.673076923076923e-05, |
| "loss": 2.3338, |
| "step": 2076 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 1.6634615384615384e-05, |
| "loss": 2.3623, |
| "step": 2082 |
| }, |
| { |
| "epoch": 6.69, |
| "learning_rate": 1.653846153846154e-05, |
| "loss": 2.3601, |
| "step": 2088 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 1.6442307692307695e-05, |
| "loss": 2.34, |
| "step": 2094 |
| }, |
| { |
| "epoch": 6.73, |
| "learning_rate": 1.6346153846153847e-05, |
| "loss": 2.3451, |
| "step": 2100 |
| }, |
| { |
| "epoch": 6.75, |
| "learning_rate": 1.6250000000000002e-05, |
| "loss": 2.3392, |
| "step": 2106 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 1.6153846153846154e-05, |
| "loss": 2.4011, |
| "step": 2112 |
| }, |
| { |
| "epoch": 6.79, |
| "learning_rate": 1.605769230769231e-05, |
| "loss": 2.382, |
| "step": 2118 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 1.596153846153846e-05, |
| "loss": 2.4005, |
| "step": 2124 |
| }, |
| { |
| "epoch": 6.83, |
| "learning_rate": 1.5865384615384617e-05, |
| "loss": 2.3398, |
| "step": 2130 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 1.576923076923077e-05, |
| "loss": 2.3156, |
| "step": 2136 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 1.5673076923076924e-05, |
| "loss": 2.3249, |
| "step": 2142 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 1.5576923076923076e-05, |
| "loss": 2.2891, |
| "step": 2148 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 1.5480769230769232e-05, |
| "loss": 2.3154, |
| "step": 2154 |
| }, |
| { |
| "epoch": 6.92, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 2.3642, |
| "step": 2160 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 1.528846153846154e-05, |
| "loss": 2.3303, |
| "step": 2166 |
| }, |
| { |
| "epoch": 6.96, |
| "learning_rate": 1.5192307692307691e-05, |
| "loss": 2.4364, |
| "step": 2172 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 1.5096153846153847e-05, |
| "loss": 2.4258, |
| "step": 2178 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 1.5e-05, |
| "loss": 2.3566, |
| "step": 2184 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 1.4903846153846154e-05, |
| "loss": 2.2916, |
| "step": 2190 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 1.4807692307692308e-05, |
| "loss": 2.3829, |
| "step": 2196 |
| }, |
| { |
| "epoch": 7.06, |
| "learning_rate": 1.4711538461538463e-05, |
| "loss": 2.242, |
| "step": 2202 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 1.4615384615384617e-05, |
| "loss": 2.2546, |
| "step": 2208 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 1.451923076923077e-05, |
| "loss": 2.1526, |
| "step": 2214 |
| }, |
| { |
| "epoch": 7.12, |
| "learning_rate": 1.4423076923076923e-05, |
| "loss": 2.2073, |
| "step": 2220 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 1.4326923076923076e-05, |
| "loss": 2.2992, |
| "step": 2226 |
| }, |
| { |
| "epoch": 7.15, |
| "learning_rate": 1.423076923076923e-05, |
| "loss": 2.2544, |
| "step": 2232 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 1.4134615384615386e-05, |
| "loss": 2.2632, |
| "step": 2238 |
| }, |
| { |
| "epoch": 7.19, |
| "learning_rate": 1.403846153846154e-05, |
| "loss": 2.3158, |
| "step": 2244 |
| }, |
| { |
| "epoch": 7.21, |
| "learning_rate": 1.3942307692307693e-05, |
| "loss": 2.2981, |
| "step": 2250 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 1.3846153846153847e-05, |
| "loss": 2.2468, |
| "step": 2256 |
| }, |
| { |
| "epoch": 7.25, |
| "learning_rate": 1.3750000000000002e-05, |
| "loss": 2.4333, |
| "step": 2262 |
| }, |
| { |
| "epoch": 7.27, |
| "learning_rate": 1.3653846153846153e-05, |
| "loss": 2.3005, |
| "step": 2268 |
| }, |
| { |
| "epoch": 7.29, |
| "learning_rate": 1.3557692307692308e-05, |
| "loss": 2.2629, |
| "step": 2274 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 1.3461538461538462e-05, |
| "loss": 2.3122, |
| "step": 2280 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 1.3365384615384615e-05, |
| "loss": 2.3257, |
| "step": 2286 |
| }, |
| { |
| "epoch": 7.35, |
| "learning_rate": 1.3269230769230769e-05, |
| "loss": 2.4191, |
| "step": 2292 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 1.3173076923076925e-05, |
| "loss": 2.2431, |
| "step": 2298 |
| }, |
| { |
| "epoch": 7.38, |
| "learning_rate": 1.3076923076923078e-05, |
| "loss": 2.3716, |
| "step": 2304 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 1.2980769230769232e-05, |
| "loss": 2.1685, |
| "step": 2310 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 1.2884615384615384e-05, |
| "loss": 2.2519, |
| "step": 2316 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 1.2788461538461538e-05, |
| "loss": 2.2242, |
| "step": 2322 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 1.2692307692307691e-05, |
| "loss": 2.3481, |
| "step": 2328 |
| }, |
| { |
| "epoch": 7.48, |
| "learning_rate": 1.2596153846153847e-05, |
| "loss": 2.3117, |
| "step": 2334 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 1.25e-05, |
| "loss": 2.2924, |
| "step": 2340 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 1.2403846153846154e-05, |
| "loss": 2.2933, |
| "step": 2346 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 1.230769230769231e-05, |
| "loss": 2.298, |
| "step": 2352 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 1.2211538461538462e-05, |
| "loss": 2.2145, |
| "step": 2358 |
| }, |
| { |
| "epoch": 7.58, |
| "learning_rate": 1.2115384615384615e-05, |
| "loss": 2.3118, |
| "step": 2364 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 1.2019230769230771e-05, |
| "loss": 2.3132, |
| "step": 2370 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 1.1923076923076925e-05, |
| "loss": 2.4653, |
| "step": 2376 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 1.1826923076923077e-05, |
| "loss": 2.2083, |
| "step": 2382 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 1.1730769230769232e-05, |
| "loss": 2.2876, |
| "step": 2388 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 1.1634615384615386e-05, |
| "loss": 2.2115, |
| "step": 2394 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 1.153846153846154e-05, |
| "loss": 2.3116, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.71, |
| "learning_rate": 1.1442307692307693e-05, |
| "loss": 2.2858, |
| "step": 2406 |
| }, |
| { |
| "epoch": 7.73, |
| "learning_rate": 1.1346153846153847e-05, |
| "loss": 2.2525, |
| "step": 2412 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 1.125e-05, |
| "loss": 2.3215, |
| "step": 2418 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 1.1153846153846154e-05, |
| "loss": 2.3884, |
| "step": 2424 |
| }, |
| { |
| "epoch": 7.79, |
| "learning_rate": 1.1057692307692308e-05, |
| "loss": 2.3767, |
| "step": 2430 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 1.0961538461538462e-05, |
| "loss": 2.1214, |
| "step": 2436 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 1.0865384615384616e-05, |
| "loss": 2.3003, |
| "step": 2442 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 1.0769230769230771e-05, |
| "loss": 2.426, |
| "step": 2448 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 1.0673076923076923e-05, |
| "loss": 2.2785, |
| "step": 2454 |
| }, |
| { |
| "epoch": 7.88, |
| "learning_rate": 1.0576923076923077e-05, |
| "loss": 2.3252, |
| "step": 2460 |
| }, |
| { |
| "epoch": 7.9, |
| "learning_rate": 1.0480769230769232e-05, |
| "loss": 2.2636, |
| "step": 2466 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 1.0384615384615386e-05, |
| "loss": 2.2732, |
| "step": 2472 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 1.0288461538461538e-05, |
| "loss": 2.1934, |
| "step": 2478 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 1.0192307692307693e-05, |
| "loss": 2.2967, |
| "step": 2484 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 1.0096153846153847e-05, |
| "loss": 2.2939, |
| "step": 2490 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 1e-05, |
| "loss": 2.3393, |
| "step": 2496 |
| } |
| ], |
| "logging_steps": 6, |
| "max_steps": 3120, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 1007380809216000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|