| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.2051282051282053, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9903846153846154e-05, |
| "loss": 5.8218, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.980769230769231e-05, |
| "loss": 5.2499, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.9711538461538465e-05, |
| "loss": 5.1236, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.961538461538462e-05, |
| "loss": 4.8363, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.9519230769230776e-05, |
| "loss": 4.5805, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.942307692307693e-05, |
| "loss": 4.5276, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.932692307692308e-05, |
| "loss": 4.3871, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.923076923076924e-05, |
| "loss": 4.2706, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.9134615384615384e-05, |
| "loss": 4.0906, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.9038461538461536e-05, |
| "loss": 4.1704, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.8942307692307695e-05, |
| "loss": 3.9014, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.884615384615385e-05, |
| "loss": 4.0338, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.875e-05, |
| "loss": 4.011, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.865384615384616e-05, |
| "loss": 3.8017, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.855769230769231e-05, |
| "loss": 3.819, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.846153846153846e-05, |
| "loss": 3.8157, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.836538461538462e-05, |
| "loss": 3.7675, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.826923076923077e-05, |
| "loss": 3.756, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.8173076923076925e-05, |
| "loss": 3.7439, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.8076923076923084e-05, |
| "loss": 3.761, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.7980769230769236e-05, |
| "loss": 3.7318, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.788461538461539e-05, |
| "loss": 3.745, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.778846153846154e-05, |
| "loss": 3.784, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.76923076923077e-05, |
| "loss": 3.6114, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.7596153846153844e-05, |
| "loss": 3.637, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.75e-05, |
| "loss": 3.5881, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.7403846153846155e-05, |
| "loss": 3.5336, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.730769230769231e-05, |
| "loss": 3.5313, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.7211538461538465e-05, |
| "loss": 3.6357, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.711538461538462e-05, |
| "loss": 3.5818, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.701923076923077e-05, |
| "loss": 3.5017, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.692307692307693e-05, |
| "loss": 3.5815, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.682692307692308e-05, |
| "loss": 3.611, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.673076923076923e-05, |
| "loss": 3.6096, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.6634615384615384e-05, |
| "loss": 3.5436, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.653846153846154e-05, |
| "loss": 3.5258, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.6442307692307695e-05, |
| "loss": 3.5277, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.634615384615385e-05, |
| "loss": 3.505, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.6250000000000006e-05, |
| "loss": 3.4665, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.615384615384616e-05, |
| "loss": 3.4245, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.605769230769231e-05, |
| "loss": 3.4547, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.596153846153846e-05, |
| "loss": 3.4382, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.5865384615384614e-05, |
| "loss": 3.482, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 4.576923076923077e-05, |
| "loss": 3.4795, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 4.5673076923076925e-05, |
| "loss": 3.5018, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.557692307692308e-05, |
| "loss": 3.448, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.548076923076923e-05, |
| "loss": 3.4206, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.538461538461539e-05, |
| "loss": 3.528, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 4.528846153846154e-05, |
| "loss": 3.475, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.519230769230769e-05, |
| "loss": 3.4142, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 4.509615384615385e-05, |
| "loss": 3.4156, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.5e-05, |
| "loss": 3.3274, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.4903846153846155e-05, |
| "loss": 3.3594, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 4.4807692307692314e-05, |
| "loss": 3.2517, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 4.4711538461538466e-05, |
| "loss": 3.2934, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 4.461538461538462e-05, |
| "loss": 3.2986, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 4.451923076923077e-05, |
| "loss": 3.2655, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 4.442307692307692e-05, |
| "loss": 3.4058, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 4.4326923076923074e-05, |
| "loss": 3.265, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.423076923076923e-05, |
| "loss": 3.3208, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 4.4134615384615385e-05, |
| "loss": 3.238, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 4.403846153846154e-05, |
| "loss": 3.2153, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 4.3942307692307695e-05, |
| "loss": 3.264, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 4.384615384615385e-05, |
| "loss": 3.2911, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 4.375e-05, |
| "loss": 3.3027, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 4.365384615384616e-05, |
| "loss": 3.2589, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 4.355769230769231e-05, |
| "loss": 3.3683, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 4.346153846153846e-05, |
| "loss": 3.2849, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 4.336538461538462e-05, |
| "loss": 3.2397, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 4.326923076923077e-05, |
| "loss": 3.2128, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 4.3173076923076925e-05, |
| "loss": 3.1944, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.3076923076923084e-05, |
| "loss": 3.1837, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 4.2980769230769236e-05, |
| "loss": 3.1793, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.288461538461538e-05, |
| "loss": 3.1447, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 4.278846153846154e-05, |
| "loss": 3.1028, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.269230769230769e-05, |
| "loss": 3.2471, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 4.2596153846153844e-05, |
| "loss": 3.1855, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 4.25e-05, |
| "loss": 3.1817, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 4.2403846153846155e-05, |
| "loss": 3.214, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 4.230769230769231e-05, |
| "loss": 2.997, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 4.2211538461538466e-05, |
| "loss": 3.2059, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 4.211538461538462e-05, |
| "loss": 3.1517, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 4.201923076923077e-05, |
| "loss": 3.2236, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 4.192307692307693e-05, |
| "loss": 3.1939, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 4.182692307692308e-05, |
| "loss": 3.1143, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.173076923076923e-05, |
| "loss": 3.2261, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 4.163461538461539e-05, |
| "loss": 3.2095, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 4.1538461538461544e-05, |
| "loss": 3.2498, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 4.1442307692307696e-05, |
| "loss": 3.1493, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 4.134615384615385e-05, |
| "loss": 3.0919, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 4.125e-05, |
| "loss": 3.2617, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 4.115384615384615e-05, |
| "loss": 3.1447, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 4.105769230769231e-05, |
| "loss": 3.2025, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 4.096153846153846e-05, |
| "loss": 3.1829, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 4.0865384615384615e-05, |
| "loss": 3.2216, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 4.0769230769230773e-05, |
| "loss": 3.1391, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 4.0673076923076926e-05, |
| "loss": 3.1344, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.057692307692308e-05, |
| "loss": 3.1699, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 4.0480769230769236e-05, |
| "loss": 3.0138, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 4.038461538461539e-05, |
| "loss": 3.1157, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 4.028846153846154e-05, |
| "loss": 3.1173, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 4.019230769230769e-05, |
| "loss": 3.2019, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 4.009615384615385e-05, |
| "loss": 3.0801, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4e-05, |
| "loss": 3.0888, |
| "step": 624 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 3.9903846153846155e-05, |
| "loss": 3.0332, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 3.980769230769231e-05, |
| "loss": 2.9418, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 3.971153846153846e-05, |
| "loss": 2.933, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 3.961538461538462e-05, |
| "loss": 3.0467, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 3.951923076923077e-05, |
| "loss": 3.0991, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 3.942307692307692e-05, |
| "loss": 2.913, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 3.932692307692308e-05, |
| "loss": 3.0531, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.923076923076923e-05, |
| "loss": 2.9838, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 3.9134615384615385e-05, |
| "loss": 2.9406, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 3.903846153846154e-05, |
| "loss": 3.046, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 3.8942307692307696e-05, |
| "loss": 3.006, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 3.884615384615385e-05, |
| "loss": 2.9774, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 3.875e-05, |
| "loss": 2.9937, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 3.865384615384616e-05, |
| "loss": 2.9737, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 3.855769230769231e-05, |
| "loss": 2.9772, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 3.846153846153846e-05, |
| "loss": 2.8405, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 3.836538461538462e-05, |
| "loss": 3.0856, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 3.826923076923077e-05, |
| "loss": 2.9947, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 3.8173076923076926e-05, |
| "loss": 2.9251, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 3.807692307692308e-05, |
| "loss": 2.9613, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 3.798076923076923e-05, |
| "loss": 2.9402, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 3.788461538461538e-05, |
| "loss": 2.8823, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 3.778846153846154e-05, |
| "loss": 2.8906, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 3.769230769230769e-05, |
| "loss": 2.9475, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 3.7596153846153845e-05, |
| "loss": 2.9999, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 2.9503, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 3.7403846153846156e-05, |
| "loss": 3.0112, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 3.730769230769231e-05, |
| "loss": 3.0096, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 3.7211538461538466e-05, |
| "loss": 2.9804, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 3.711538461538462e-05, |
| "loss": 2.9203, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 3.701923076923077e-05, |
| "loss": 2.939, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 2.8455, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 3.682692307692308e-05, |
| "loss": 2.9651, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 3.673076923076923e-05, |
| "loss": 2.9528, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 3.6634615384615385e-05, |
| "loss": 2.8042, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.653846153846154e-05, |
| "loss": 2.8311, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 3.644230769230769e-05, |
| "loss": 2.8888, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 3.634615384615385e-05, |
| "loss": 2.9151, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 3.625e-05, |
| "loss": 2.9463, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.615384615384615e-05, |
| "loss": 2.957, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.605769230769231e-05, |
| "loss": 2.9473, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.596153846153846e-05, |
| "loss": 2.9994, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 3.5865384615384615e-05, |
| "loss": 3.0486, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 3.5769230769230774e-05, |
| "loss": 2.9487, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 3.5673076923076926e-05, |
| "loss": 3.0173, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 3.557692307692308e-05, |
| "loss": 2.8656, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 3.548076923076924e-05, |
| "loss": 2.8834, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 3.538461538461539e-05, |
| "loss": 2.9829, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 3.528846153846154e-05, |
| "loss": 3.001, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 3.51923076923077e-05, |
| "loss": 2.9618, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.5096153846153845e-05, |
| "loss": 2.7964, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 3.5e-05, |
| "loss": 2.89, |
| "step": 936 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 3.4903846153846156e-05, |
| "loss": 2.7191, |
| "step": 942 |
| }, |
| { |
| "epoch": 3.04, |
| "learning_rate": 3.480769230769231e-05, |
| "loss": 2.7875, |
| "step": 948 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 3.471153846153846e-05, |
| "loss": 2.8498, |
| "step": 954 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": 2.7858, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 3.451923076923077e-05, |
| "loss": 2.7435, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 3.442307692307692e-05, |
| "loss": 2.8449, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 3.432692307692308e-05, |
| "loss": 2.7817, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 3.4230769230769234e-05, |
| "loss": 2.8797, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 3.4134615384615386e-05, |
| "loss": 2.7719, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 3.4038461538461544e-05, |
| "loss": 2.7919, |
| "step": 996 |
| } |
| ], |
| "logging_steps": 6, |
| "max_steps": 3120, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "total_flos": 402675517440000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|