| { | |
| "best_metric": 0.8738669753074646, | |
| "best_model_checkpoint": "./results/checkpoint-5960", | |
| "epoch": 2.9979879275653922, | |
| "global_step": 5960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 8.8266, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 8.881924629211426, | |
| "eval_runtime": 0.9616, | |
| "eval_samples_per_second": 229.834, | |
| "eval_steps_per_second": 229.834, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 8.6751, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 8.58967399597168, | |
| "eval_runtime": 0.9574, | |
| "eval_samples_per_second": 230.827, | |
| "eval_steps_per_second": 230.827, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4142, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 8.080711364746094, | |
| "eval_runtime": 0.9575, | |
| "eval_samples_per_second": 230.813, | |
| "eval_steps_per_second": 230.813, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 6.9745, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 7.486766338348389, | |
| "eval_runtime": 0.9544, | |
| "eval_samples_per_second": 231.562, | |
| "eval_steps_per_second": 231.562, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5e-06, | |
| "loss": 6.6743, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 6.813642501831055, | |
| "eval_runtime": 0.9524, | |
| "eval_samples_per_second": 232.057, | |
| "eval_steps_per_second": 232.057, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6e-06, | |
| "loss": 6.2099, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 5.959489345550537, | |
| "eval_runtime": 0.9558, | |
| "eval_samples_per_second": 231.214, | |
| "eval_steps_per_second": 231.214, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 4.9803, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 5.174376964569092, | |
| "eval_runtime": 0.9509, | |
| "eval_samples_per_second": 232.408, | |
| "eval_steps_per_second": 232.408, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 4.7144, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 4.122838020324707, | |
| "eval_runtime": 0.9547, | |
| "eval_samples_per_second": 231.497, | |
| "eval_steps_per_second": 231.497, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9e-06, | |
| "loss": 3.6935, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 3.0819661617279053, | |
| "eval_runtime": 0.957, | |
| "eval_samples_per_second": 230.938, | |
| "eval_steps_per_second": 230.938, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1e-05, | |
| "loss": 3.4562, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.7372376918792725, | |
| "eval_runtime": 0.9523, | |
| "eval_samples_per_second": 232.072, | |
| "eval_steps_per_second": 232.072, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 2.5252, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.583921432495117, | |
| "eval_runtime": 0.9625, | |
| "eval_samples_per_second": 229.618, | |
| "eval_steps_per_second": 229.618, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.2326, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.460407257080078, | |
| "eval_runtime": 0.9495, | |
| "eval_samples_per_second": 232.765, | |
| "eval_steps_per_second": 232.765, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 2.753, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.367825508117676, | |
| "eval_runtime": 0.9534, | |
| "eval_samples_per_second": 231.809, | |
| "eval_steps_per_second": 231.809, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 2.4226, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.286923885345459, | |
| "eval_runtime": 0.9561, | |
| "eval_samples_per_second": 231.137, | |
| "eval_steps_per_second": 231.137, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.3105, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.209494113922119, | |
| "eval_runtime": 0.9563, | |
| "eval_samples_per_second": 231.103, | |
| "eval_steps_per_second": 231.103, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 1.881, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.143799304962158, | |
| "eval_runtime": 0.9551, | |
| "eval_samples_per_second": 231.382, | |
| "eval_steps_per_second": 231.382, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 2.2803, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.091071128845215, | |
| "eval_runtime": 0.9551, | |
| "eval_samples_per_second": 231.398, | |
| "eval_steps_per_second": 231.398, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.9634, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.0437965393066406, | |
| "eval_runtime": 0.9559, | |
| "eval_samples_per_second": 231.207, | |
| "eval_steps_per_second": 231.207, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9e-05, | |
| "loss": 2.8158, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.005643367767334, | |
| "eval_runtime": 0.9522, | |
| "eval_samples_per_second": 232.084, | |
| "eval_steps_per_second": 232.084, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9818, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 1.9821481704711914, | |
| "eval_runtime": 0.9585, | |
| "eval_samples_per_second": 230.573, | |
| "eval_steps_per_second": 230.573, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.1e-05, | |
| "loss": 1.4871, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.9364498853683472, | |
| "eval_runtime": 0.9734, | |
| "eval_samples_per_second": 227.035, | |
| "eval_steps_per_second": 227.035, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 2.3802, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 1.899760127067566, | |
| "eval_runtime": 0.9575, | |
| "eval_samples_per_second": 230.799, | |
| "eval_steps_per_second": 230.799, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 2.4274, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.8640791177749634, | |
| "eval_runtime": 0.9627, | |
| "eval_samples_per_second": 229.557, | |
| "eval_steps_per_second": 229.557, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.1448, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 1.8371107578277588, | |
| "eval_runtime": 0.9546, | |
| "eval_samples_per_second": 231.505, | |
| "eval_steps_per_second": 231.505, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.7214, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.8137286901474, | |
| "eval_runtime": 0.9561, | |
| "eval_samples_per_second": 231.15, | |
| "eval_steps_per_second": 231.15, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 2.0771, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.7837375402450562, | |
| "eval_runtime": 0.9549, | |
| "eval_samples_per_second": 231.447, | |
| "eval_steps_per_second": 231.447, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.5333, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.7670602798461914, | |
| "eval_runtime": 0.9699, | |
| "eval_samples_per_second": 227.855, | |
| "eval_steps_per_second": 227.855, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 1.7843, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 1.7456753253936768, | |
| "eval_runtime": 0.9712, | |
| "eval_samples_per_second": 227.562, | |
| "eval_steps_per_second": 227.562, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2.9e-05, | |
| "loss": 2.0183, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.7233577966690063, | |
| "eval_runtime": 0.958, | |
| "eval_samples_per_second": 230.677, | |
| "eval_steps_per_second": 230.677, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3e-05, | |
| "loss": 1.8216, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 1.6905887126922607, | |
| "eval_runtime": 0.9606, | |
| "eval_samples_per_second": 230.053, | |
| "eval_steps_per_second": 230.053, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.1e-05, | |
| "loss": 1.888, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 1.6758980751037598, | |
| "eval_runtime": 0.9674, | |
| "eval_samples_per_second": 228.455, | |
| "eval_steps_per_second": 228.455, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 1.8896, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 1.666009545326233, | |
| "eval_runtime": 0.9583, | |
| "eval_samples_per_second": 230.614, | |
| "eval_steps_per_second": 230.614, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.3e-05, | |
| "loss": 1.3656, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.653030276298523, | |
| "eval_runtime": 0.9609, | |
| "eval_samples_per_second": 229.999, | |
| "eval_steps_per_second": 229.999, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 1.236, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 1.6311849355697632, | |
| "eval_runtime": 0.9613, | |
| "eval_samples_per_second": 229.907, | |
| "eval_steps_per_second": 229.907, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.5e-05, | |
| "loss": 1.8584, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.6052279472351074, | |
| "eval_runtime": 0.965, | |
| "eval_samples_per_second": 229.005, | |
| "eval_steps_per_second": 229.005, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 3.6e-05, | |
| "loss": 2.239, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 1.5936833620071411, | |
| "eval_runtime": 0.9553, | |
| "eval_samples_per_second": 231.337, | |
| "eval_steps_per_second": 231.337, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.7e-05, | |
| "loss": 2.0927, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.5970150232315063, | |
| "eval_runtime": 0.9567, | |
| "eval_samples_per_second": 231.009, | |
| "eval_steps_per_second": 231.009, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 3.8e-05, | |
| "loss": 2.1403, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.5708112716674805, | |
| "eval_runtime": 0.9665, | |
| "eval_samples_per_second": 228.655, | |
| "eval_steps_per_second": 228.655, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 1.847, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.559340000152588, | |
| "eval_runtime": 0.9557, | |
| "eval_samples_per_second": 231.241, | |
| "eval_steps_per_second": 231.241, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4e-05, | |
| "loss": 1.8602, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 1.5475170612335205, | |
| "eval_runtime": 0.9571, | |
| "eval_samples_per_second": 230.908, | |
| "eval_steps_per_second": 230.908, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.1e-05, | |
| "loss": 1.7433, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.5273540019989014, | |
| "eval_runtime": 0.963, | |
| "eval_samples_per_second": 229.481, | |
| "eval_steps_per_second": 229.481, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.2e-05, | |
| "loss": 2.3477, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 1.5230623483657837, | |
| "eval_runtime": 0.9627, | |
| "eval_samples_per_second": 229.563, | |
| "eval_steps_per_second": 229.563, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.3e-05, | |
| "loss": 1.7683, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.5165642499923706, | |
| "eval_runtime": 0.9591, | |
| "eval_samples_per_second": 230.414, | |
| "eval_steps_per_second": 230.414, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 2.2624, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 1.5028367042541504, | |
| "eval_runtime": 0.9556, | |
| "eval_samples_per_second": 231.276, | |
| "eval_steps_per_second": 231.276, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.6912, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.502634882926941, | |
| "eval_runtime": 0.9564, | |
| "eval_samples_per_second": 231.064, | |
| "eval_steps_per_second": 231.064, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 1.7615, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 1.504465937614441, | |
| "eval_runtime": 0.9662, | |
| "eval_samples_per_second": 228.741, | |
| "eval_steps_per_second": 228.741, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.7e-05, | |
| "loss": 1.7967, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.4737327098846436, | |
| "eval_runtime": 0.9569, | |
| "eval_samples_per_second": 230.949, | |
| "eval_steps_per_second": 230.949, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.8437, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 1.4702118635177612, | |
| "eval_runtime": 0.9597, | |
| "eval_samples_per_second": 230.291, | |
| "eval_steps_per_second": 230.291, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.9e-05, | |
| "loss": 1.1438, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.464814305305481, | |
| "eval_runtime": 0.9614, | |
| "eval_samples_per_second": 229.881, | |
| "eval_steps_per_second": 229.881, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 5e-05, | |
| "loss": 1.6064, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 1.470233678817749, | |
| "eval_runtime": 0.9662, | |
| "eval_samples_per_second": 228.74, | |
| "eval_steps_per_second": 228.74, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.9908491947291365e-05, | |
| "loss": 1.5118, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.4819172620773315, | |
| "eval_runtime": 0.9632, | |
| "eval_samples_per_second": 229.455, | |
| "eval_steps_per_second": 229.455, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.981698389458272e-05, | |
| "loss": 1.881, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.4433797597885132, | |
| "eval_runtime": 0.9566, | |
| "eval_samples_per_second": 231.033, | |
| "eval_steps_per_second": 231.033, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9725475841874085e-05, | |
| "loss": 2.1581, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.419671654701233, | |
| "eval_runtime": 0.9583, | |
| "eval_samples_per_second": 230.618, | |
| "eval_steps_per_second": 230.618, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.963396778916545e-05, | |
| "loss": 2.0712, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 1.410863995552063, | |
| "eval_runtime": 0.9559, | |
| "eval_samples_per_second": 231.207, | |
| "eval_steps_per_second": 231.207, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.954245973645681e-05, | |
| "loss": 1.3743, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.4000402688980103, | |
| "eval_runtime": 0.9555, | |
| "eval_samples_per_second": 231.286, | |
| "eval_steps_per_second": 231.286, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.945095168374817e-05, | |
| "loss": 1.903, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 1.4015473127365112, | |
| "eval_runtime": 0.959, | |
| "eval_samples_per_second": 230.446, | |
| "eval_steps_per_second": 230.446, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.935944363103953e-05, | |
| "loss": 1.4631, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.411484956741333, | |
| "eval_runtime": 0.9552, | |
| "eval_samples_per_second": 231.362, | |
| "eval_steps_per_second": 231.362, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.92679355783309e-05, | |
| "loss": 0.9568, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.398590326309204, | |
| "eval_runtime": 0.9589, | |
| "eval_samples_per_second": 230.48, | |
| "eval_steps_per_second": 230.48, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.9176427525622256e-05, | |
| "loss": 1.7517, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.393122911453247, | |
| "eval_runtime": 0.9582, | |
| "eval_samples_per_second": 230.642, | |
| "eval_steps_per_second": 230.642, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.908491947291362e-05, | |
| "loss": 1.6826, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 1.3865119218826294, | |
| "eval_runtime": 0.9539, | |
| "eval_samples_per_second": 231.676, | |
| "eval_steps_per_second": 231.676, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.899341142020498e-05, | |
| "loss": 1.2387, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.3691400289535522, | |
| "eval_runtime": 0.9531, | |
| "eval_samples_per_second": 231.884, | |
| "eval_steps_per_second": 231.884, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8901903367496345e-05, | |
| "loss": 1.3739, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 1.354348063468933, | |
| "eval_runtime": 0.9498, | |
| "eval_samples_per_second": 232.677, | |
| "eval_steps_per_second": 232.677, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.88103953147877e-05, | |
| "loss": 1.5507, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.3457704782485962, | |
| "eval_runtime": 0.9512, | |
| "eval_samples_per_second": 232.33, | |
| "eval_steps_per_second": 232.33, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.8718887262079065e-05, | |
| "loss": 1.125, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.3545788526535034, | |
| "eval_runtime": 0.9488, | |
| "eval_samples_per_second": 232.923, | |
| "eval_steps_per_second": 232.923, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.862737920937043e-05, | |
| "loss": 1.0714, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.3615885972976685, | |
| "eval_runtime": 0.9543, | |
| "eval_samples_per_second": 231.572, | |
| "eval_steps_per_second": 231.572, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.853587115666179e-05, | |
| "loss": 1.8402, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 1.336154580116272, | |
| "eval_runtime": 0.955, | |
| "eval_samples_per_second": 231.416, | |
| "eval_steps_per_second": 231.416, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.844436310395315e-05, | |
| "loss": 0.9943, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.3274977207183838, | |
| "eval_runtime": 0.953, | |
| "eval_samples_per_second": 231.905, | |
| "eval_steps_per_second": 231.905, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.835285505124451e-05, | |
| "loss": 0.7192, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 1.3557361364364624, | |
| "eval_runtime": 0.95, | |
| "eval_samples_per_second": 232.627, | |
| "eval_steps_per_second": 232.627, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.826134699853587e-05, | |
| "loss": 1.185, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.3398077487945557, | |
| "eval_runtime": 0.9487, | |
| "eval_samples_per_second": 232.96, | |
| "eval_steps_per_second": 232.96, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.8169838945827236e-05, | |
| "loss": 1.7011, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 1.3305870294570923, | |
| "eval_runtime": 0.9517, | |
| "eval_samples_per_second": 232.225, | |
| "eval_steps_per_second": 232.225, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.807833089311859e-05, | |
| "loss": 1.2217, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.3255352973937988, | |
| "eval_runtime": 0.9531, | |
| "eval_samples_per_second": 231.868, | |
| "eval_steps_per_second": 231.868, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.7986822840409956e-05, | |
| "loss": 1.1109, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 1.3300060033798218, | |
| "eval_runtime": 0.9533, | |
| "eval_samples_per_second": 231.835, | |
| "eval_steps_per_second": 231.835, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.789531478770132e-05, | |
| "loss": 1.6623, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.3133432865142822, | |
| "eval_runtime": 0.9532, | |
| "eval_samples_per_second": 231.857, | |
| "eval_steps_per_second": 231.857, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.780380673499268e-05, | |
| "loss": 1.2793, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.3158154487609863, | |
| "eval_runtime": 0.9571, | |
| "eval_samples_per_second": 230.91, | |
| "eval_steps_per_second": 230.91, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.7712298682284045e-05, | |
| "loss": 0.9004, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.3127093315124512, | |
| "eval_runtime": 0.9551, | |
| "eval_samples_per_second": 231.391, | |
| "eval_steps_per_second": 231.391, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.762079062957541e-05, | |
| "loss": 1.2883, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 1.294966459274292, | |
| "eval_runtime": 0.9473, | |
| "eval_samples_per_second": 233.288, | |
| "eval_steps_per_second": 233.288, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.752928257686677e-05, | |
| "loss": 1.265, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.2921117544174194, | |
| "eval_runtime": 0.9529, | |
| "eval_samples_per_second": 231.932, | |
| "eval_steps_per_second": 231.932, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.743777452415813e-05, | |
| "loss": 1.2239, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.2857725620269775, | |
| "eval_runtime": 0.9543, | |
| "eval_samples_per_second": 231.584, | |
| "eval_steps_per_second": 231.584, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.734626647144949e-05, | |
| "loss": 2.1154, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.2766932249069214, | |
| "eval_runtime": 0.9565, | |
| "eval_samples_per_second": 231.054, | |
| "eval_steps_per_second": 231.054, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.725475841874085e-05, | |
| "loss": 1.3946, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.2669578790664673, | |
| "eval_runtime": 0.955, | |
| "eval_samples_per_second": 231.403, | |
| "eval_steps_per_second": 231.403, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.7163250366032216e-05, | |
| "loss": 1.4104, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.2644246816635132, | |
| "eval_runtime": 0.9528, | |
| "eval_samples_per_second": 231.955, | |
| "eval_steps_per_second": 231.955, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.707174231332357e-05, | |
| "loss": 1.537, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 1.2694379091262817, | |
| "eval_runtime": 0.9604, | |
| "eval_samples_per_second": 230.102, | |
| "eval_steps_per_second": 230.102, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.6980234260614936e-05, | |
| "loss": 1.763, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 1.2598960399627686, | |
| "eval_runtime": 0.9538, | |
| "eval_samples_per_second": 231.696, | |
| "eval_steps_per_second": 231.696, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.68887262079063e-05, | |
| "loss": 1.1782, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 1.254785180091858, | |
| "eval_runtime": 0.9595, | |
| "eval_samples_per_second": 230.338, | |
| "eval_steps_per_second": 230.338, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.679721815519766e-05, | |
| "loss": 1.1035, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 1.2500094175338745, | |
| "eval_runtime": 0.9515, | |
| "eval_samples_per_second": 232.253, | |
| "eval_steps_per_second": 232.253, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.670571010248902e-05, | |
| "loss": 0.8982, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 1.2746824026107788, | |
| "eval_runtime": 0.9528, | |
| "eval_samples_per_second": 231.939, | |
| "eval_steps_per_second": 231.939, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.661420204978038e-05, | |
| "loss": 0.8505, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 1.2610752582550049, | |
| "eval_runtime": 0.9595, | |
| "eval_samples_per_second": 230.318, | |
| "eval_steps_per_second": 230.318, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.6522693997071744e-05, | |
| "loss": 1.1741, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 1.2515610456466675, | |
| "eval_runtime": 0.9586, | |
| "eval_samples_per_second": 230.533, | |
| "eval_steps_per_second": 230.533, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.643118594436311e-05, | |
| "loss": 1.5263, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 1.2399991750717163, | |
| "eval_runtime": 0.9649, | |
| "eval_samples_per_second": 229.03, | |
| "eval_steps_per_second": 229.03, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.633967789165447e-05, | |
| "loss": 0.8784, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 1.2559657096862793, | |
| "eval_runtime": 0.9671, | |
| "eval_samples_per_second": 228.507, | |
| "eval_steps_per_second": 228.507, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.624816983894583e-05, | |
| "loss": 1.32, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 1.2556980848312378, | |
| "eval_runtime": 0.9597, | |
| "eval_samples_per_second": 230.278, | |
| "eval_steps_per_second": 230.278, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.6156661786237196e-05, | |
| "loss": 1.5417, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 1.236193060874939, | |
| "eval_runtime": 0.9587, | |
| "eval_samples_per_second": 230.532, | |
| "eval_steps_per_second": 230.532, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.606515373352855e-05, | |
| "loss": 0.7619, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 1.2272766828536987, | |
| "eval_runtime": 0.9643, | |
| "eval_samples_per_second": 229.187, | |
| "eval_steps_per_second": 229.187, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.5973645680819915e-05, | |
| "loss": 1.4071, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 1.2210743427276611, | |
| "eval_runtime": 0.9622, | |
| "eval_samples_per_second": 229.674, | |
| "eval_steps_per_second": 229.674, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.588213762811128e-05, | |
| "loss": 1.2309, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 1.2143139839172363, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 230.088, | |
| "eval_steps_per_second": 230.088, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.579062957540264e-05, | |
| "loss": 1.0308, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 1.215963363647461, | |
| "eval_runtime": 0.968, | |
| "eval_samples_per_second": 228.307, | |
| "eval_steps_per_second": 228.307, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.5699121522694e-05, | |
| "loss": 0.7845, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 1.2274197340011597, | |
| "eval_runtime": 0.9712, | |
| "eval_samples_per_second": 227.556, | |
| "eval_steps_per_second": 227.556, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.560761346998536e-05, | |
| "loss": 1.8056, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 1.198835015296936, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 230.079, | |
| "eval_steps_per_second": 230.079, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.5516105417276724e-05, | |
| "loss": 1.1374, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 1.2004196643829346, | |
| "eval_runtime": 0.9623, | |
| "eval_samples_per_second": 229.652, | |
| "eval_steps_per_second": 229.652, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.542459736456808e-05, | |
| "loss": 1.3452, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 1.2066832780838013, | |
| "eval_runtime": 0.9585, | |
| "eval_samples_per_second": 230.566, | |
| "eval_steps_per_second": 230.566, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.533308931185944e-05, | |
| "loss": 1.3906, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.1961842775344849, | |
| "eval_runtime": 0.9558, | |
| "eval_samples_per_second": 231.226, | |
| "eval_steps_per_second": 231.226, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.5241581259150806e-05, | |
| "loss": 1.25, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.1854100227355957, | |
| "eval_runtime": 0.9583, | |
| "eval_samples_per_second": 230.615, | |
| "eval_steps_per_second": 230.615, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.515007320644217e-05, | |
| "loss": 1.2051, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 1.184650182723999, | |
| "eval_runtime": 0.9622, | |
| "eval_samples_per_second": 229.673, | |
| "eval_steps_per_second": 229.673, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.5058565153733526e-05, | |
| "loss": 0.9596, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 1.2029228210449219, | |
| "eval_runtime": 0.9618, | |
| "eval_samples_per_second": 229.779, | |
| "eval_steps_per_second": 229.779, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.496705710102489e-05, | |
| "loss": 1.3183, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 1.180678367614746, | |
| "eval_runtime": 0.9664, | |
| "eval_samples_per_second": 228.688, | |
| "eval_steps_per_second": 228.688, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.487554904831626e-05, | |
| "loss": 1.006, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 1.1920363903045654, | |
| "eval_runtime": 0.9641, | |
| "eval_samples_per_second": 229.239, | |
| "eval_steps_per_second": 229.239, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.4784040995607615e-05, | |
| "loss": 0.9121, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 1.1910887956619263, | |
| "eval_runtime": 0.9586, | |
| "eval_samples_per_second": 230.539, | |
| "eval_steps_per_second": 230.539, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.469253294289898e-05, | |
| "loss": 2.176, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 1.1731313467025757, | |
| "eval_runtime": 0.9599, | |
| "eval_samples_per_second": 230.234, | |
| "eval_steps_per_second": 230.234, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.460102489019034e-05, | |
| "loss": 1.7789, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 1.1691458225250244, | |
| "eval_runtime": 0.9571, | |
| "eval_samples_per_second": 230.9, | |
| "eval_steps_per_second": 230.9, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.4509516837481704e-05, | |
| "loss": 1.4745, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 1.1677985191345215, | |
| "eval_runtime": 0.9655, | |
| "eval_samples_per_second": 228.905, | |
| "eval_steps_per_second": 228.905, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.441800878477306e-05, | |
| "loss": 0.9953, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 1.1719450950622559, | |
| "eval_runtime": 0.9648, | |
| "eval_samples_per_second": 229.061, | |
| "eval_steps_per_second": 229.061, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.432650073206442e-05, | |
| "loss": 1.298, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 1.1667665243148804, | |
| "eval_runtime": 0.9554, | |
| "eval_samples_per_second": 231.322, | |
| "eval_steps_per_second": 231.322, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.4234992679355786e-05, | |
| "loss": 1.331, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 1.1645408868789673, | |
| "eval_runtime": 0.9701, | |
| "eval_samples_per_second": 227.801, | |
| "eval_steps_per_second": 227.801, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.414348462664715e-05, | |
| "loss": 1.3143, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 1.1627283096313477, | |
| "eval_runtime": 0.9574, | |
| "eval_samples_per_second": 230.829, | |
| "eval_steps_per_second": 230.829, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.4051976573938506e-05, | |
| "loss": 1.0592, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 1.1780223846435547, | |
| "eval_runtime": 0.9545, | |
| "eval_samples_per_second": 231.531, | |
| "eval_steps_per_second": 231.531, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.396046852122987e-05, | |
| "loss": 1.1096, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 1.1691240072250366, | |
| "eval_runtime": 0.9554, | |
| "eval_samples_per_second": 231.325, | |
| "eval_steps_per_second": 231.325, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.386896046852123e-05, | |
| "loss": 1.1909, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 1.1668262481689453, | |
| "eval_runtime": 0.9576, | |
| "eval_samples_per_second": 230.789, | |
| "eval_steps_per_second": 230.789, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.3777452415812595e-05, | |
| "loss": 1.0633, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 1.1455942392349243, | |
| "eval_runtime": 0.9577, | |
| "eval_samples_per_second": 230.772, | |
| "eval_steps_per_second": 230.772, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.368594436310395e-05, | |
| "loss": 1.8141, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 1.1431795358657837, | |
| "eval_runtime": 0.9668, | |
| "eval_samples_per_second": 228.581, | |
| "eval_steps_per_second": 228.581, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.3594436310395314e-05, | |
| "loss": 1.4685, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 1.1381019353866577, | |
| "eval_runtime": 0.956, | |
| "eval_samples_per_second": 231.169, | |
| "eval_steps_per_second": 231.169, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.350292825768668e-05, | |
| "loss": 0.8561, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 1.1337873935699463, | |
| "eval_runtime": 0.9539, | |
| "eval_samples_per_second": 231.673, | |
| "eval_steps_per_second": 231.673, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.341142020497804e-05, | |
| "loss": 1.5765, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 1.1372144222259521, | |
| "eval_runtime": 0.9569, | |
| "eval_samples_per_second": 230.945, | |
| "eval_steps_per_second": 230.945, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.33199121522694e-05, | |
| "loss": 1.1333, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 1.1468873023986816, | |
| "eval_runtime": 0.9528, | |
| "eval_samples_per_second": 231.958, | |
| "eval_steps_per_second": 231.958, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.3228404099560766e-05, | |
| "loss": 1.2821, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 1.1511610746383667, | |
| "eval_runtime": 0.9553, | |
| "eval_samples_per_second": 231.336, | |
| "eval_steps_per_second": 231.336, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.313689604685213e-05, | |
| "loss": 1.2568, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 1.142638087272644, | |
| "eval_runtime": 0.9539, | |
| "eval_samples_per_second": 231.668, | |
| "eval_steps_per_second": 231.668, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.3045387994143486e-05, | |
| "loss": 1.4843, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 1.1400805711746216, | |
| "eval_runtime": 0.952, | |
| "eval_samples_per_second": 232.148, | |
| "eval_steps_per_second": 232.148, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.295387994143485e-05, | |
| "loss": 1.4071, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.1460652351379395, | |
| "eval_runtime": 0.9548, | |
| "eval_samples_per_second": 231.466, | |
| "eval_steps_per_second": 231.466, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.286237188872621e-05, | |
| "loss": 1.3752, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.1431547403335571, | |
| "eval_runtime": 0.957, | |
| "eval_samples_per_second": 230.923, | |
| "eval_steps_per_second": 230.923, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.2770863836017575e-05, | |
| "loss": 1.4032, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 1.1358671188354492, | |
| "eval_runtime": 0.9636, | |
| "eval_samples_per_second": 229.352, | |
| "eval_steps_per_second": 229.352, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.267935578330893e-05, | |
| "loss": 1.2408, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 1.1381754875183105, | |
| "eval_runtime": 0.9667, | |
| "eval_samples_per_second": 228.608, | |
| "eval_steps_per_second": 228.608, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.2587847730600294e-05, | |
| "loss": 1.783, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 1.12925386428833, | |
| "eval_runtime": 0.962, | |
| "eval_samples_per_second": 229.723, | |
| "eval_steps_per_second": 229.723, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.249633967789166e-05, | |
| "loss": 0.8271, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 1.1325204372406006, | |
| "eval_runtime": 0.9527, | |
| "eval_samples_per_second": 231.97, | |
| "eval_steps_per_second": 231.97, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.240483162518302e-05, | |
| "loss": 1.2088, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 1.1299396753311157, | |
| "eval_runtime": 0.9591, | |
| "eval_samples_per_second": 230.424, | |
| "eval_steps_per_second": 230.424, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.2313323572474376e-05, | |
| "loss": 1.2929, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 1.1217139959335327, | |
| "eval_runtime": 0.959, | |
| "eval_samples_per_second": 230.459, | |
| "eval_steps_per_second": 230.459, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.222181551976574e-05, | |
| "loss": 1.4657, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 1.1141811609268188, | |
| "eval_runtime": 0.9593, | |
| "eval_samples_per_second": 230.382, | |
| "eval_steps_per_second": 230.382, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.21303074670571e-05, | |
| "loss": 1.1904, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 1.1085792779922485, | |
| "eval_runtime": 0.9613, | |
| "eval_samples_per_second": 229.902, | |
| "eval_steps_per_second": 229.902, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.203879941434846e-05, | |
| "loss": 1.3595, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 1.1091320514678955, | |
| "eval_runtime": 0.9559, | |
| "eval_samples_per_second": 231.188, | |
| "eval_steps_per_second": 231.188, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.194729136163983e-05, | |
| "loss": 1.6091, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 1.0970268249511719, | |
| "eval_runtime": 0.9652, | |
| "eval_samples_per_second": 228.965, | |
| "eval_steps_per_second": 228.965, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.185578330893119e-05, | |
| "loss": 1.0164, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 1.0938998460769653, | |
| "eval_runtime": 0.961, | |
| "eval_samples_per_second": 229.965, | |
| "eval_steps_per_second": 229.965, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.1764275256222555e-05, | |
| "loss": 1.2481, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 1.104056477546692, | |
| "eval_runtime": 0.9576, | |
| "eval_samples_per_second": 230.787, | |
| "eval_steps_per_second": 230.787, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.167276720351391e-05, | |
| "loss": 1.2545, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 1.0931925773620605, | |
| "eval_runtime": 0.9559, | |
| "eval_samples_per_second": 231.206, | |
| "eval_steps_per_second": 231.206, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.1581259150805274e-05, | |
| "loss": 0.6638, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 1.0907045602798462, | |
| "eval_runtime": 0.9563, | |
| "eval_samples_per_second": 231.097, | |
| "eval_steps_per_second": 231.097, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.148975109809664e-05, | |
| "loss": 1.454, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 1.094430685043335, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 230.083, | |
| "eval_steps_per_second": 230.083, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.139824304538799e-05, | |
| "loss": 1.3201, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 1.087951421737671, | |
| "eval_runtime": 0.9643, | |
| "eval_samples_per_second": 229.171, | |
| "eval_steps_per_second": 229.171, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.1306734992679356e-05, | |
| "loss": 1.4448, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 1.0853744745254517, | |
| "eval_runtime": 0.9556, | |
| "eval_samples_per_second": 231.257, | |
| "eval_steps_per_second": 231.257, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.121522693997072e-05, | |
| "loss": 0.9926, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 1.079766035079956, | |
| "eval_runtime": 0.956, | |
| "eval_samples_per_second": 231.172, | |
| "eval_steps_per_second": 231.172, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.112371888726208e-05, | |
| "loss": 1.3543, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 1.0773032903671265, | |
| "eval_runtime": 0.9572, | |
| "eval_samples_per_second": 230.88, | |
| "eval_steps_per_second": 230.88, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.103221083455344e-05, | |
| "loss": 1.3722, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 1.0744614601135254, | |
| "eval_runtime": 0.96, | |
| "eval_samples_per_second": 230.216, | |
| "eval_steps_per_second": 230.216, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.09407027818448e-05, | |
| "loss": 1.3096, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 1.0665591955184937, | |
| "eval_runtime": 0.9664, | |
| "eval_samples_per_second": 228.678, | |
| "eval_steps_per_second": 228.678, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.0849194729136165e-05, | |
| "loss": 1.2229, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 1.063207745552063, | |
| "eval_runtime": 0.9534, | |
| "eval_samples_per_second": 231.803, | |
| "eval_steps_per_second": 231.803, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.075768667642753e-05, | |
| "loss": 1.3215, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 1.0718333721160889, | |
| "eval_runtime": 0.9536, | |
| "eval_samples_per_second": 231.755, | |
| "eval_steps_per_second": 231.755, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.0666178623718884e-05, | |
| "loss": 0.9868, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 1.0817238092422485, | |
| "eval_runtime": 0.9533, | |
| "eval_samples_per_second": 231.827, | |
| "eval_steps_per_second": 231.827, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.057467057101025e-05, | |
| "loss": 1.0924, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.0640230178833008, | |
| "eval_runtime": 0.9552, | |
| "eval_samples_per_second": 231.364, | |
| "eval_steps_per_second": 231.364, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.048316251830162e-05, | |
| "loss": 1.1202, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.0627166032791138, | |
| "eval_runtime": 0.9558, | |
| "eval_samples_per_second": 231.218, | |
| "eval_steps_per_second": 231.218, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.039165446559297e-05, | |
| "loss": 0.4092, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 1.0712403059005737, | |
| "eval_runtime": 0.9594, | |
| "eval_samples_per_second": 230.347, | |
| "eval_steps_per_second": 230.347, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.0300146412884336e-05, | |
| "loss": 1.4682, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 1.067681908607483, | |
| "eval_runtime": 0.9565, | |
| "eval_samples_per_second": 231.063, | |
| "eval_steps_per_second": 231.063, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.02086383601757e-05, | |
| "loss": 0.9136, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 1.0556412935256958, | |
| "eval_runtime": 0.9649, | |
| "eval_samples_per_second": 229.04, | |
| "eval_steps_per_second": 229.04, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.011713030746706e-05, | |
| "loss": 0.7701, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 1.0573418140411377, | |
| "eval_runtime": 0.9569, | |
| "eval_samples_per_second": 230.954, | |
| "eval_steps_per_second": 230.954, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.002562225475842e-05, | |
| "loss": 0.7752, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 1.052242398262024, | |
| "eval_runtime": 0.9554, | |
| "eval_samples_per_second": 231.311, | |
| "eval_steps_per_second": 231.311, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.993411420204978e-05, | |
| "loss": 1.0774, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 1.0520769357681274, | |
| "eval_runtime": 0.953, | |
| "eval_samples_per_second": 231.897, | |
| "eval_steps_per_second": 231.897, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.9842606149341145e-05, | |
| "loss": 1.3267, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 1.0504757165908813, | |
| "eval_runtime": 0.9581, | |
| "eval_samples_per_second": 230.665, | |
| "eval_steps_per_second": 230.665, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.975109809663251e-05, | |
| "loss": 1.3089, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 1.0495104789733887, | |
| "eval_runtime": 0.9523, | |
| "eval_samples_per_second": 232.068, | |
| "eval_steps_per_second": 232.068, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.9659590043923864e-05, | |
| "loss": 0.945, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 1.0474438667297363, | |
| "eval_runtime": 0.9571, | |
| "eval_samples_per_second": 230.913, | |
| "eval_steps_per_second": 230.913, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.956808199121523e-05, | |
| "loss": 0.8179, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 1.0598093271255493, | |
| "eval_runtime": 0.9538, | |
| "eval_samples_per_second": 231.703, | |
| "eval_steps_per_second": 231.703, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.947657393850659e-05, | |
| "loss": 0.5567, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 1.0466389656066895, | |
| "eval_runtime": 0.9523, | |
| "eval_samples_per_second": 232.062, | |
| "eval_steps_per_second": 232.062, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.938506588579795e-05, | |
| "loss": 1.1638, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 1.0368340015411377, | |
| "eval_runtime": 0.953, | |
| "eval_samples_per_second": 231.894, | |
| "eval_steps_per_second": 231.894, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.929355783308931e-05, | |
| "loss": 0.8247, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 1.03778076171875, | |
| "eval_runtime": 0.9505, | |
| "eval_samples_per_second": 232.512, | |
| "eval_steps_per_second": 232.512, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.920204978038067e-05, | |
| "loss": 1.2614, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 1.0412153005599976, | |
| "eval_runtime": 0.9754, | |
| "eval_samples_per_second": 226.565, | |
| "eval_steps_per_second": 226.565, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.9110541727672036e-05, | |
| "loss": 1.0292, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 1.0404280424118042, | |
| "eval_runtime": 0.9699, | |
| "eval_samples_per_second": 227.849, | |
| "eval_steps_per_second": 227.849, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.90190336749634e-05, | |
| "loss": 0.7724, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 1.0412232875823975, | |
| "eval_runtime": 0.9483, | |
| "eval_samples_per_second": 233.059, | |
| "eval_steps_per_second": 233.059, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.892752562225476e-05, | |
| "loss": 1.1667, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 1.0384390354156494, | |
| "eval_runtime": 0.9483, | |
| "eval_samples_per_second": 233.039, | |
| "eval_steps_per_second": 233.039, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.8836017569546125e-05, | |
| "loss": 0.9633, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 1.0354883670806885, | |
| "eval_runtime": 0.949, | |
| "eval_samples_per_second": 232.874, | |
| "eval_steps_per_second": 232.874, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.874450951683749e-05, | |
| "loss": 0.9494, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 1.0366523265838623, | |
| "eval_runtime": 0.9465, | |
| "eval_samples_per_second": 233.496, | |
| "eval_steps_per_second": 233.496, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.8653001464128844e-05, | |
| "loss": 0.7663, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 1.0304687023162842, | |
| "eval_runtime": 0.9478, | |
| "eval_samples_per_second": 233.16, | |
| "eval_steps_per_second": 233.16, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.856149341142021e-05, | |
| "loss": 1.1879, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 1.0454905033111572, | |
| "eval_runtime": 0.9507, | |
| "eval_samples_per_second": 232.451, | |
| "eval_steps_per_second": 232.451, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.846998535871157e-05, | |
| "loss": 1.4189, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 1.0361486673355103, | |
| "eval_runtime": 0.9528, | |
| "eval_samples_per_second": 231.936, | |
| "eval_steps_per_second": 231.936, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.837847730600293e-05, | |
| "loss": 0.926, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 1.0228257179260254, | |
| "eval_runtime": 0.9485, | |
| "eval_samples_per_second": 232.987, | |
| "eval_steps_per_second": 232.987, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.828696925329429e-05, | |
| "loss": 1.2822, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.0228904485702515, | |
| "eval_runtime": 0.9521, | |
| "eval_samples_per_second": 232.122, | |
| "eval_steps_per_second": 232.122, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.819546120058565e-05, | |
| "loss": 1.2466, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.0200704336166382, | |
| "eval_runtime": 0.9498, | |
| "eval_samples_per_second": 232.691, | |
| "eval_steps_per_second": 232.691, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.8103953147877016e-05, | |
| "loss": 1.2439, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 1.0220086574554443, | |
| "eval_runtime": 0.9511, | |
| "eval_samples_per_second": 232.371, | |
| "eval_steps_per_second": 232.371, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.801244509516838e-05, | |
| "loss": 1.0168, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 1.0307519435882568, | |
| "eval_runtime": 0.9532, | |
| "eval_samples_per_second": 231.842, | |
| "eval_steps_per_second": 231.842, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.7920937042459735e-05, | |
| "loss": 0.8673, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 1.0238043069839478, | |
| "eval_runtime": 0.9483, | |
| "eval_samples_per_second": 233.053, | |
| "eval_steps_per_second": 233.053, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.78294289897511e-05, | |
| "loss": 0.9448, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 1.016867756843567, | |
| "eval_runtime": 0.9515, | |
| "eval_samples_per_second": 232.265, | |
| "eval_steps_per_second": 232.265, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.773792093704246e-05, | |
| "loss": 0.8879, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 1.0296683311462402, | |
| "eval_runtime": 0.9483, | |
| "eval_samples_per_second": 233.038, | |
| "eval_steps_per_second": 233.038, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.764641288433382e-05, | |
| "loss": 0.9863, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 1.0213333368301392, | |
| "eval_runtime": 0.9496, | |
| "eval_samples_per_second": 232.735, | |
| "eval_steps_per_second": 232.735, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.755490483162519e-05, | |
| "loss": 1.2896, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 1.0285658836364746, | |
| "eval_runtime": 0.9509, | |
| "eval_samples_per_second": 232.403, | |
| "eval_steps_per_second": 232.403, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.746339677891655e-05, | |
| "loss": 0.9763, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 1.0154588222503662, | |
| "eval_runtime": 0.953, | |
| "eval_samples_per_second": 231.893, | |
| "eval_steps_per_second": 231.893, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.737188872620791e-05, | |
| "loss": 1.3132, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 1.0065727233886719, | |
| "eval_runtime": 0.9546, | |
| "eval_samples_per_second": 231.499, | |
| "eval_steps_per_second": 231.499, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.728038067349927e-05, | |
| "loss": 1.071, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 1.0023553371429443, | |
| "eval_runtime": 0.9511, | |
| "eval_samples_per_second": 232.374, | |
| "eval_steps_per_second": 232.374, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.718887262079063e-05, | |
| "loss": 0.6794, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.9976633787155151, | |
| "eval_runtime": 0.9509, | |
| "eval_samples_per_second": 232.415, | |
| "eval_steps_per_second": 232.415, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.7097364568081995e-05, | |
| "loss": 1.3206, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.996859610080719, | |
| "eval_runtime": 0.9569, | |
| "eval_samples_per_second": 230.959, | |
| "eval_steps_per_second": 230.959, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.700585651537335e-05, | |
| "loss": 1.5452, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.9938207268714905, | |
| "eval_runtime": 0.9696, | |
| "eval_samples_per_second": 227.934, | |
| "eval_steps_per_second": 227.934, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.6914348462664715e-05, | |
| "loss": 0.8027, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 1.0008713006973267, | |
| "eval_runtime": 0.957, | |
| "eval_samples_per_second": 230.919, | |
| "eval_steps_per_second": 230.919, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.682284040995608e-05, | |
| "loss": 1.1007, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 0.9957152605056763, | |
| "eval_runtime": 0.9598, | |
| "eval_samples_per_second": 230.246, | |
| "eval_steps_per_second": 230.246, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.673133235724744e-05, | |
| "loss": 1.1719, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 0.991396963596344, | |
| "eval_runtime": 0.9584, | |
| "eval_samples_per_second": 230.604, | |
| "eval_steps_per_second": 230.604, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.66398243045388e-05, | |
| "loss": 1.585, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.9895206689834595, | |
| "eval_runtime": 0.9539, | |
| "eval_samples_per_second": 231.679, | |
| "eval_steps_per_second": 231.679, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.654831625183016e-05, | |
| "loss": 0.6906, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.9879434704780579, | |
| "eval_runtime": 0.9549, | |
| "eval_samples_per_second": 231.427, | |
| "eval_steps_per_second": 231.427, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.645680819912152e-05, | |
| "loss": 1.2103, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9838025569915771, | |
| "eval_runtime": 0.9623, | |
| "eval_samples_per_second": 229.658, | |
| "eval_steps_per_second": 229.658, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.6365300146412886e-05, | |
| "loss": 1.5245, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9860533475875854, | |
| "eval_runtime": 0.958, | |
| "eval_samples_per_second": 230.678, | |
| "eval_steps_per_second": 230.678, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.627379209370424e-05, | |
| "loss": 1.0269, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 0.9929577708244324, | |
| "eval_runtime": 0.9587, | |
| "eval_samples_per_second": 230.521, | |
| "eval_steps_per_second": 230.521, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.618228404099561e-05, | |
| "loss": 1.0424, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 0.9965341091156006, | |
| "eval_runtime": 0.9521, | |
| "eval_samples_per_second": 232.127, | |
| "eval_steps_per_second": 232.127, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.6090775988286975e-05, | |
| "loss": 0.8317, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.9904709458351135, | |
| "eval_runtime": 0.9552, | |
| "eval_samples_per_second": 231.361, | |
| "eval_steps_per_second": 231.361, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.599926793557833e-05, | |
| "loss": 0.6817, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.9927685856819153, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 230.089, | |
| "eval_steps_per_second": 230.089, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.5907759882869695e-05, | |
| "loss": 1.0936, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 0.987776517868042, | |
| "eval_runtime": 0.9589, | |
| "eval_samples_per_second": 230.475, | |
| "eval_steps_per_second": 230.475, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.581625183016106e-05, | |
| "loss": 0.8312, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 0.9806318283081055, | |
| "eval_runtime": 0.9662, | |
| "eval_samples_per_second": 228.728, | |
| "eval_steps_per_second": 228.728, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.572474377745242e-05, | |
| "loss": 0.7925, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 0.9841748476028442, | |
| "eval_runtime": 0.9661, | |
| "eval_samples_per_second": 228.748, | |
| "eval_steps_per_second": 228.748, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.563323572474378e-05, | |
| "loss": 0.9318, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 1.0040175914764404, | |
| "eval_runtime": 0.9633, | |
| "eval_samples_per_second": 229.424, | |
| "eval_steps_per_second": 229.424, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.554172767203514e-05, | |
| "loss": 1.0127, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 0.9929489493370056, | |
| "eval_runtime": 0.9567, | |
| "eval_samples_per_second": 231.014, | |
| "eval_steps_per_second": 231.014, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.54502196193265e-05, | |
| "loss": 0.9305, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 0.9910447001457214, | |
| "eval_runtime": 0.9577, | |
| "eval_samples_per_second": 230.759, | |
| "eval_steps_per_second": 230.759, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.5358711566617866e-05, | |
| "loss": 0.8096, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.981231153011322, | |
| "eval_runtime": 0.9607, | |
| "eval_samples_per_second": 230.043, | |
| "eval_steps_per_second": 230.043, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.526720351390922e-05, | |
| "loss": 1.1506, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.9730401635169983, | |
| "eval_runtime": 0.9557, | |
| "eval_samples_per_second": 231.239, | |
| "eval_steps_per_second": 231.239, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.5175695461200586e-05, | |
| "loss": 0.6672, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 0.9780378341674805, | |
| "eval_runtime": 0.9595, | |
| "eval_samples_per_second": 230.319, | |
| "eval_steps_per_second": 230.319, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.508418740849195e-05, | |
| "loss": 0.6234, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 1.0018703937530518, | |
| "eval_runtime": 0.9575, | |
| "eval_samples_per_second": 230.82, | |
| "eval_steps_per_second": 230.82, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.499267935578331e-05, | |
| "loss": 0.6104, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 1.0167769193649292, | |
| "eval_runtime": 0.9636, | |
| "eval_samples_per_second": 229.34, | |
| "eval_steps_per_second": 229.34, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.490117130307467e-05, | |
| "loss": 0.8127, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.9948529005050659, | |
| "eval_runtime": 0.9614, | |
| "eval_samples_per_second": 229.883, | |
| "eval_steps_per_second": 229.883, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.480966325036603e-05, | |
| "loss": 1.1747, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.9925655126571655, | |
| "eval_runtime": 0.9669, | |
| "eval_samples_per_second": 228.556, | |
| "eval_steps_per_second": 228.556, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.47181551976574e-05, | |
| "loss": 0.8788, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.9960690140724182, | |
| "eval_runtime": 0.9671, | |
| "eval_samples_per_second": 228.517, | |
| "eval_steps_per_second": 228.517, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.462664714494876e-05, | |
| "loss": 1.1523, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 0.9957139492034912, | |
| "eval_runtime": 0.9792, | |
| "eval_samples_per_second": 225.698, | |
| "eval_steps_per_second": 225.698, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.453513909224012e-05, | |
| "loss": 0.8191, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 0.9894182682037354, | |
| "eval_runtime": 1.0005, | |
| "eval_samples_per_second": 220.883, | |
| "eval_steps_per_second": 220.883, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.444363103953148e-05, | |
| "loss": 0.9451, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.9845729470252991, | |
| "eval_runtime": 1.0063, | |
| "eval_samples_per_second": 219.62, | |
| "eval_steps_per_second": 219.62, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.4352122986822846e-05, | |
| "loss": 1.0535, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.9834311008453369, | |
| "eval_runtime": 1.005, | |
| "eval_samples_per_second": 219.905, | |
| "eval_steps_per_second": 219.905, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.42606149341142e-05, | |
| "loss": 1.0205, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 0.9936839938163757, | |
| "eval_runtime": 1.0108, | |
| "eval_samples_per_second": 218.63, | |
| "eval_steps_per_second": 218.63, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.4169106881405566e-05, | |
| "loss": 0.6887, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 0.9953834414482117, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.692, | |
| "eval_steps_per_second": 220.692, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.407759882869693e-05, | |
| "loss": 1.2094, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.9885807633399963, | |
| "eval_runtime": 1.0043, | |
| "eval_samples_per_second": 220.058, | |
| "eval_steps_per_second": 220.058, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.398609077598829e-05, | |
| "loss": 1.2841, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.9784919023513794, | |
| "eval_runtime": 1.0109, | |
| "eval_samples_per_second": 218.617, | |
| "eval_steps_per_second": 218.617, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.389458272327965e-05, | |
| "loss": 1.3851, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.9803484678268433, | |
| "eval_runtime": 1.0124, | |
| "eval_samples_per_second": 218.289, | |
| "eval_steps_per_second": 218.289, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.380307467057101e-05, | |
| "loss": 1.0305, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.9807111024856567, | |
| "eval_runtime": 0.9944, | |
| "eval_samples_per_second": 222.251, | |
| "eval_steps_per_second": 222.251, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.3711566617862374e-05, | |
| "loss": 1.0227, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 0.9797064065933228, | |
| "eval_runtime": 0.9959, | |
| "eval_samples_per_second": 221.915, | |
| "eval_steps_per_second": 221.915, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.362005856515373e-05, | |
| "loss": 0.9356, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 0.9765809178352356, | |
| "eval_runtime": 1.002, | |
| "eval_samples_per_second": 220.55, | |
| "eval_steps_per_second": 220.55, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.352855051244509e-05, | |
| "loss": 0.6793, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 0.9791857600212097, | |
| "eval_runtime": 1.0113, | |
| "eval_samples_per_second": 218.526, | |
| "eval_steps_per_second": 218.526, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.3437042459736456e-05, | |
| "loss": 0.4783, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 0.9832126498222351, | |
| "eval_runtime": 1.0102, | |
| "eval_samples_per_second": 218.763, | |
| "eval_steps_per_second": 218.763, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.334553440702782e-05, | |
| "loss": 1.0296, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.9764915108680725, | |
| "eval_runtime": 1.0003, | |
| "eval_samples_per_second": 220.924, | |
| "eval_steps_per_second": 220.924, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.325402635431918e-05, | |
| "loss": 0.7206, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.9718484878540039, | |
| "eval_runtime": 1.0006, | |
| "eval_samples_per_second": 220.877, | |
| "eval_steps_per_second": 220.877, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.3162518301610545e-05, | |
| "loss": 0.5916, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.9733506441116333, | |
| "eval_runtime": 1.0129, | |
| "eval_samples_per_second": 218.19, | |
| "eval_steps_per_second": 218.19, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.307101024890191e-05, | |
| "loss": 0.6649, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.9809523224830627, | |
| "eval_runtime": 1.0146, | |
| "eval_samples_per_second": 217.816, | |
| "eval_steps_per_second": 217.816, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.2979502196193265e-05, | |
| "loss": 1.0807, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.9720730185508728, | |
| "eval_runtime": 1.0013, | |
| "eval_samples_per_second": 220.718, | |
| "eval_steps_per_second": 220.718, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.288799414348463e-05, | |
| "loss": 1.3233, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.9704715609550476, | |
| "eval_runtime": 1.0058, | |
| "eval_samples_per_second": 219.732, | |
| "eval_steps_per_second": 219.732, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.279648609077599e-05, | |
| "loss": 1.1487, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.9685859680175781, | |
| "eval_runtime": 1.0131, | |
| "eval_samples_per_second": 218.14, | |
| "eval_steps_per_second": 218.14, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.2704978038067354e-05, | |
| "loss": 1.0377, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.9717361927032471, | |
| "eval_runtime": 1.0005, | |
| "eval_samples_per_second": 220.896, | |
| "eval_steps_per_second": 220.896, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.261346998535871e-05, | |
| "loss": 0.9167, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.9671441912651062, | |
| "eval_runtime": 1.0004, | |
| "eval_samples_per_second": 220.906, | |
| "eval_steps_per_second": 220.906, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.252196193265007e-05, | |
| "loss": 0.7281, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.9680237770080566, | |
| "eval_runtime": 1.0021, | |
| "eval_samples_per_second": 220.54, | |
| "eval_steps_per_second": 220.54, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.2430453879941436e-05, | |
| "loss": 0.8004, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.9747995734214783, | |
| "eval_runtime": 1.0036, | |
| "eval_samples_per_second": 220.209, | |
| "eval_steps_per_second": 220.209, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.23389458272328e-05, | |
| "loss": 0.8977, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.9737166166305542, | |
| "eval_runtime": 1.0119, | |
| "eval_samples_per_second": 218.394, | |
| "eval_steps_per_second": 218.394, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.2247437774524156e-05, | |
| "loss": 0.9832, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.9728427529335022, | |
| "eval_runtime": 1.0004, | |
| "eval_samples_per_second": 220.905, | |
| "eval_steps_per_second": 220.905, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.215592972181552e-05, | |
| "loss": 0.6606, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.9685813784599304, | |
| "eval_runtime": 1.0122, | |
| "eval_samples_per_second": 218.342, | |
| "eval_steps_per_second": 218.342, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.206442166910688e-05, | |
| "loss": 1.381, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.9650890231132507, | |
| "eval_runtime": 1.011, | |
| "eval_samples_per_second": 218.599, | |
| "eval_steps_per_second": 218.599, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.1972913616398245e-05, | |
| "loss": 1.0271, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.9636799097061157, | |
| "eval_runtime": 0.9935, | |
| "eval_samples_per_second": 222.452, | |
| "eval_steps_per_second": 222.452, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.18814055636896e-05, | |
| "loss": 0.5892, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 0.9688751697540283, | |
| "eval_runtime": 0.9933, | |
| "eval_samples_per_second": 222.501, | |
| "eval_steps_per_second": 222.501, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.178989751098097e-05, | |
| "loss": 1.0916, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 0.9761135578155518, | |
| "eval_runtime": 1.0022, | |
| "eval_samples_per_second": 220.512, | |
| "eval_steps_per_second": 220.512, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.1698389458272334e-05, | |
| "loss": 0.5791, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.9762537479400635, | |
| "eval_runtime": 0.9973, | |
| "eval_samples_per_second": 221.59, | |
| "eval_steps_per_second": 221.59, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.160688140556369e-05, | |
| "loss": 0.9677, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.9627185463905334, | |
| "eval_runtime": 0.9991, | |
| "eval_samples_per_second": 221.207, | |
| "eval_steps_per_second": 221.207, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.151537335285505e-05, | |
| "loss": 0.5414, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.9636002779006958, | |
| "eval_runtime": 1.0073, | |
| "eval_samples_per_second": 219.391, | |
| "eval_steps_per_second": 219.391, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.1423865300146416e-05, | |
| "loss": 1.0274, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.9611531496047974, | |
| "eval_runtime": 1.0061, | |
| "eval_samples_per_second": 219.661, | |
| "eval_steps_per_second": 219.661, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.133235724743778e-05, | |
| "loss": 0.9746, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.9561561942100525, | |
| "eval_runtime": 0.9769, | |
| "eval_samples_per_second": 226.233, | |
| "eval_steps_per_second": 226.233, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.1240849194729136e-05, | |
| "loss": 0.7808, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.9541120529174805, | |
| "eval_runtime": 0.9794, | |
| "eval_samples_per_second": 225.658, | |
| "eval_steps_per_second": 225.658, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.11493411420205e-05, | |
| "loss": 1.3157, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.9499970078468323, | |
| "eval_runtime": 0.9821, | |
| "eval_samples_per_second": 225.03, | |
| "eval_steps_per_second": 225.03, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.105783308931186e-05, | |
| "loss": 1.0578, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.9560282230377197, | |
| "eval_runtime": 1.0141, | |
| "eval_samples_per_second": 217.924, | |
| "eval_steps_per_second": 217.924, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.0966325036603225e-05, | |
| "loss": 0.918, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.9617719650268555, | |
| "eval_runtime": 1.0055, | |
| "eval_samples_per_second": 219.798, | |
| "eval_steps_per_second": 219.798, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.087481698389458e-05, | |
| "loss": 1.0932, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.9546635746955872, | |
| "eval_runtime": 0.9984, | |
| "eval_samples_per_second": 221.362, | |
| "eval_steps_per_second": 221.362, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.0783308931185944e-05, | |
| "loss": 0.7853, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 0.9565637707710266, | |
| "eval_runtime": 1.0003, | |
| "eval_samples_per_second": 220.931, | |
| "eval_steps_per_second": 220.931, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.069180087847731e-05, | |
| "loss": 1.0242, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 0.9546669125556946, | |
| "eval_runtime": 1.0157, | |
| "eval_samples_per_second": 217.58, | |
| "eval_steps_per_second": 217.58, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.060029282576867e-05, | |
| "loss": 0.8443, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.9528049826622009, | |
| "eval_runtime": 1.003, | |
| "eval_samples_per_second": 220.335, | |
| "eval_steps_per_second": 220.335, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.050878477306003e-05, | |
| "loss": 1.0552, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.9504651427268982, | |
| "eval_runtime": 1.0082, | |
| "eval_samples_per_second": 219.193, | |
| "eval_steps_per_second": 219.193, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.041727672035139e-05, | |
| "loss": 0.8124, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.9550299048423767, | |
| "eval_runtime": 0.998, | |
| "eval_samples_per_second": 221.439, | |
| "eval_steps_per_second": 221.439, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.0325768667642756e-05, | |
| "loss": 0.3986, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.9648869633674622, | |
| "eval_runtime": 0.998, | |
| "eval_samples_per_second": 221.432, | |
| "eval_steps_per_second": 221.432, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.023426061493412e-05, | |
| "loss": 1.2103, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.9549727439880371, | |
| "eval_runtime": 0.9919, | |
| "eval_samples_per_second": 222.814, | |
| "eval_steps_per_second": 222.814, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.014275256222548e-05, | |
| "loss": 1.0141, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.9561426043510437, | |
| "eval_runtime": 0.9964, | |
| "eval_samples_per_second": 221.796, | |
| "eval_steps_per_second": 221.796, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.005124450951684e-05, | |
| "loss": 0.8462, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 0.9633329510688782, | |
| "eval_runtime": 0.998, | |
| "eval_samples_per_second": 221.441, | |
| "eval_steps_per_second": 221.441, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.99597364568082e-05, | |
| "loss": 1.1226, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 0.9539953470230103, | |
| "eval_runtime": 1.0009, | |
| "eval_samples_per_second": 220.808, | |
| "eval_steps_per_second": 220.808, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.9868228404099564e-05, | |
| "loss": 1.2035, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.9506434202194214, | |
| "eval_runtime": 1.0177, | |
| "eval_samples_per_second": 217.153, | |
| "eval_steps_per_second": 217.153, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.9776720351390924e-05, | |
| "loss": 0.875, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.952269971370697, | |
| "eval_runtime": 1.0114, | |
| "eval_samples_per_second": 218.508, | |
| "eval_steps_per_second": 218.508, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.9685212298682287e-05, | |
| "loss": 0.3386, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_loss": 0.9609423279762268, | |
| "eval_runtime": 0.9983, | |
| "eval_samples_per_second": 221.366, | |
| "eval_steps_per_second": 221.366, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.9593704245973647e-05, | |
| "loss": 1.33, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_loss": 0.9606022834777832, | |
| "eval_runtime": 0.9947, | |
| "eval_samples_per_second": 222.182, | |
| "eval_steps_per_second": 222.182, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.950219619326501e-05, | |
| "loss": 0.6649, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.9508395791053772, | |
| "eval_runtime": 0.9961, | |
| "eval_samples_per_second": 221.873, | |
| "eval_steps_per_second": 221.873, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.941068814055637e-05, | |
| "loss": 0.8999, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.946878969669342, | |
| "eval_runtime": 0.9912, | |
| "eval_samples_per_second": 222.954, | |
| "eval_steps_per_second": 222.954, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.931918008784773e-05, | |
| "loss": 0.755, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.9452105760574341, | |
| "eval_runtime": 0.9938, | |
| "eval_samples_per_second": 222.387, | |
| "eval_steps_per_second": 222.387, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.9227672035139092e-05, | |
| "loss": 0.5725, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.9571672677993774, | |
| "eval_runtime": 0.9854, | |
| "eval_samples_per_second": 224.27, | |
| "eval_steps_per_second": 224.27, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.9136163982430452e-05, | |
| "loss": 0.8084, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 0.9507928490638733, | |
| "eval_runtime": 0.9977, | |
| "eval_samples_per_second": 221.506, | |
| "eval_steps_per_second": 221.506, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.9044655929721815e-05, | |
| "loss": 1.2283, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 0.9474687576293945, | |
| "eval_runtime": 0.9983, | |
| "eval_samples_per_second": 221.375, | |
| "eval_steps_per_second": 221.375, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.8953147877013174e-05, | |
| "loss": 1.3059, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.9431435465812683, | |
| "eval_runtime": 0.9983, | |
| "eval_samples_per_second": 221.378, | |
| "eval_steps_per_second": 221.378, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.8861639824304544e-05, | |
| "loss": 0.6347, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.9469897150993347, | |
| "eval_runtime": 1.0047, | |
| "eval_samples_per_second": 219.968, | |
| "eval_steps_per_second": 219.968, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.8770131771595904e-05, | |
| "loss": 1.072, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.9481067061424255, | |
| "eval_runtime": 0.9965, | |
| "eval_samples_per_second": 221.78, | |
| "eval_steps_per_second": 221.78, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.8678623718887264e-05, | |
| "loss": 1.109, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.9449201822280884, | |
| "eval_runtime": 0.9947, | |
| "eval_samples_per_second": 222.188, | |
| "eval_steps_per_second": 222.188, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.8587115666178627e-05, | |
| "loss": 1.3118, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 0.9385907649993896, | |
| "eval_runtime": 0.9958, | |
| "eval_samples_per_second": 221.925, | |
| "eval_steps_per_second": 221.925, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.8495607613469986e-05, | |
| "loss": 0.878, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 0.9543187618255615, | |
| "eval_runtime": 0.9992, | |
| "eval_samples_per_second": 221.183, | |
| "eval_steps_per_second": 221.183, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.840409956076135e-05, | |
| "loss": 0.2237, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 0.9715589284896851, | |
| "eval_runtime": 0.996, | |
| "eval_samples_per_second": 221.897, | |
| "eval_steps_per_second": 221.897, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.831259150805271e-05, | |
| "loss": 0.7382, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 0.9529742002487183, | |
| "eval_runtime": 1.0044, | |
| "eval_samples_per_second": 220.035, | |
| "eval_steps_per_second": 220.035, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8221083455344072e-05, | |
| "loss": 0.8128, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.9429596066474915, | |
| "eval_runtime": 1.001, | |
| "eval_samples_per_second": 220.781, | |
| "eval_steps_per_second": 220.781, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.8129575402635432e-05, | |
| "loss": 0.7577, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.9350069761276245, | |
| "eval_runtime": 0.996, | |
| "eval_samples_per_second": 221.894, | |
| "eval_steps_per_second": 221.894, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.8038067349926795e-05, | |
| "loss": 1.0114, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 0.931096613407135, | |
| "eval_runtime": 1.0088, | |
| "eval_samples_per_second": 219.075, | |
| "eval_steps_per_second": 219.075, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.7946559297218154e-05, | |
| "loss": 1.1864, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 0.9336085319519043, | |
| "eval_runtime": 1.0011, | |
| "eval_samples_per_second": 220.746, | |
| "eval_steps_per_second": 220.746, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.7855051244509518e-05, | |
| "loss": 0.7451, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 0.9325514435768127, | |
| "eval_runtime": 0.9934, | |
| "eval_samples_per_second": 222.479, | |
| "eval_steps_per_second": 222.479, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.7763543191800877e-05, | |
| "loss": 0.6565, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 0.9335077404975891, | |
| "eval_runtime": 0.9961, | |
| "eval_samples_per_second": 221.864, | |
| "eval_steps_per_second": 221.864, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.767203513909224e-05, | |
| "loss": 0.857, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 0.9355486631393433, | |
| "eval_runtime": 0.9917, | |
| "eval_samples_per_second": 222.856, | |
| "eval_steps_per_second": 222.856, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.75805270863836e-05, | |
| "loss": 0.881, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 0.9264810085296631, | |
| "eval_runtime": 0.992, | |
| "eval_samples_per_second": 222.79, | |
| "eval_steps_per_second": 222.79, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.7489019033674963e-05, | |
| "loss": 0.6847, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.928283154964447, | |
| "eval_runtime": 1.0075, | |
| "eval_samples_per_second": 219.359, | |
| "eval_steps_per_second": 219.359, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.739751098096633e-05, | |
| "loss": 0.768, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.9357666969299316, | |
| "eval_runtime": 1.0042, | |
| "eval_samples_per_second": 220.086, | |
| "eval_steps_per_second": 220.086, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.730600292825769e-05, | |
| "loss": 1.1393, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.9425435662269592, | |
| "eval_runtime": 0.9944, | |
| "eval_samples_per_second": 222.234, | |
| "eval_steps_per_second": 222.234, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.7214494875549052e-05, | |
| "loss": 1.0572, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.9292907118797302, | |
| "eval_runtime": 0.9967, | |
| "eval_samples_per_second": 221.728, | |
| "eval_steps_per_second": 221.728, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.7122986822840412e-05, | |
| "loss": 0.4611, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.9363278746604919, | |
| "eval_runtime": 0.9924, | |
| "eval_samples_per_second": 222.688, | |
| "eval_steps_per_second": 222.688, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.7031478770131775e-05, | |
| "loss": 0.7708, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.9379692077636719, | |
| "eval_runtime": 1.0142, | |
| "eval_samples_per_second": 217.907, | |
| "eval_steps_per_second": 217.907, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.6939970717423134e-05, | |
| "loss": 0.7672, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.9304003119468689, | |
| "eval_runtime": 0.9952, | |
| "eval_samples_per_second": 222.072, | |
| "eval_steps_per_second": 222.072, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.6848462664714497e-05, | |
| "loss": 1.1156, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.9290468096733093, | |
| "eval_runtime": 0.9912, | |
| "eval_samples_per_second": 222.955, | |
| "eval_steps_per_second": 222.955, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.6756954612005857e-05, | |
| "loss": 1.2475, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.9259029030799866, | |
| "eval_runtime": 0.9945, | |
| "eval_samples_per_second": 222.22, | |
| "eval_steps_per_second": 222.22, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.666544655929722e-05, | |
| "loss": 0.6684, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.9293370842933655, | |
| "eval_runtime": 1.0017, | |
| "eval_samples_per_second": 220.618, | |
| "eval_steps_per_second": 220.618, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.657393850658858e-05, | |
| "loss": 0.9708, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 0.925391674041748, | |
| "eval_runtime": 1.0081, | |
| "eval_samples_per_second": 219.225, | |
| "eval_steps_per_second": 219.225, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.6482430453879943e-05, | |
| "loss": 0.517, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 0.9237042665481567, | |
| "eval_runtime": 0.9965, | |
| "eval_samples_per_second": 221.78, | |
| "eval_steps_per_second": 221.78, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6390922401171303e-05, | |
| "loss": 0.6535, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.9264727830886841, | |
| "eval_runtime": 1.0039, | |
| "eval_samples_per_second": 220.14, | |
| "eval_steps_per_second": 220.14, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.6299414348462666e-05, | |
| "loss": 0.8305, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 0.9250357151031494, | |
| "eval_runtime": 0.9956, | |
| "eval_samples_per_second": 221.966, | |
| "eval_steps_per_second": 221.966, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6207906295754025e-05, | |
| "loss": 0.7461, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 0.9280180931091309, | |
| "eval_runtime": 1.0051, | |
| "eval_samples_per_second": 219.88, | |
| "eval_steps_per_second": 219.88, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.6116398243045388e-05, | |
| "loss": 1.002, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 0.9301855564117432, | |
| "eval_runtime": 1.0018, | |
| "eval_samples_per_second": 220.602, | |
| "eval_steps_per_second": 220.602, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.6024890190336748e-05, | |
| "loss": 0.8358, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 0.9324009418487549, | |
| "eval_runtime": 0.9975, | |
| "eval_samples_per_second": 221.564, | |
| "eval_steps_per_second": 221.564, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.5933382137628114e-05, | |
| "loss": 0.9178, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 0.9264141321182251, | |
| "eval_runtime": 1.0079, | |
| "eval_samples_per_second": 219.258, | |
| "eval_steps_per_second": 219.258, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.5841874084919477e-05, | |
| "loss": 0.9611, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.9219666123390198, | |
| "eval_runtime": 1.013, | |
| "eval_samples_per_second": 218.154, | |
| "eval_steps_per_second": 218.154, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.5750366032210837e-05, | |
| "loss": 1.1209, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.91802978515625, | |
| "eval_runtime": 1.0188, | |
| "eval_samples_per_second": 216.913, | |
| "eval_steps_per_second": 216.913, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.56588579795022e-05, | |
| "loss": 0.515, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 0.9171512722969055, | |
| "eval_runtime": 0.999, | |
| "eval_samples_per_second": 221.218, | |
| "eval_steps_per_second": 221.218, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.556734992679356e-05, | |
| "loss": 0.5548, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 0.9176234006881714, | |
| "eval_runtime": 0.9965, | |
| "eval_samples_per_second": 221.776, | |
| "eval_steps_per_second": 221.776, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5475841874084923e-05, | |
| "loss": 0.9413, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.9156973361968994, | |
| "eval_runtime": 1.0239, | |
| "eval_samples_per_second": 215.837, | |
| "eval_steps_per_second": 215.837, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.5384333821376283e-05, | |
| "loss": 0.6407, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.91624915599823, | |
| "eval_runtime": 1.0004, | |
| "eval_samples_per_second": 220.921, | |
| "eval_steps_per_second": 220.921, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5292825768667646e-05, | |
| "loss": 0.8255, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.9145430326461792, | |
| "eval_runtime": 1.0176, | |
| "eval_samples_per_second": 217.178, | |
| "eval_steps_per_second": 217.178, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.5201317715959005e-05, | |
| "loss": 0.644, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.9187570810317993, | |
| "eval_runtime": 1.0, | |
| "eval_samples_per_second": 220.998, | |
| "eval_steps_per_second": 220.998, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5109809663250365e-05, | |
| "loss": 1.004, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.9200632572174072, | |
| "eval_runtime": 1.0044, | |
| "eval_samples_per_second": 220.032, | |
| "eval_steps_per_second": 220.032, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.5018301610541728e-05, | |
| "loss": 1.117, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.9216576218605042, | |
| "eval_runtime": 1.0023, | |
| "eval_samples_per_second": 220.504, | |
| "eval_steps_per_second": 220.504, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.492679355783309e-05, | |
| "loss": 0.8919, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 0.9223524928092957, | |
| "eval_runtime": 1.0022, | |
| "eval_samples_per_second": 220.506, | |
| "eval_steps_per_second": 220.506, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.4835285505124454e-05, | |
| "loss": 0.7564, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 0.9232571125030518, | |
| "eval_runtime": 0.9994, | |
| "eval_samples_per_second": 221.134, | |
| "eval_steps_per_second": 221.134, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.4743777452415814e-05, | |
| "loss": 1.2078, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.9224601984024048, | |
| "eval_runtime": 0.9987, | |
| "eval_samples_per_second": 221.291, | |
| "eval_steps_per_second": 221.291, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.4652269399707177e-05, | |
| "loss": 1.1681, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.9187254905700684, | |
| "eval_runtime": 0.9976, | |
| "eval_samples_per_second": 221.53, | |
| "eval_steps_per_second": 221.53, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.4560761346998536e-05, | |
| "loss": 0.6295, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 0.9133083820343018, | |
| "eval_runtime": 1.004, | |
| "eval_samples_per_second": 220.13, | |
| "eval_steps_per_second": 220.13, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.44692532942899e-05, | |
| "loss": 0.7138, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 0.9143015146255493, | |
| "eval_runtime": 1.0084, | |
| "eval_samples_per_second": 219.149, | |
| "eval_steps_per_second": 219.149, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.437774524158126e-05, | |
| "loss": 0.7402, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 0.914252519607544, | |
| "eval_runtime": 1.0003, | |
| "eval_samples_per_second": 220.944, | |
| "eval_steps_per_second": 220.944, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.4286237188872622e-05, | |
| "loss": 0.8711, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 0.9134473204612732, | |
| "eval_runtime": 0.9962, | |
| "eval_samples_per_second": 221.842, | |
| "eval_steps_per_second": 221.842, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.4194729136163985e-05, | |
| "loss": 0.8411, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 0.9173144698143005, | |
| "eval_runtime": 1.0086, | |
| "eval_samples_per_second": 219.116, | |
| "eval_steps_per_second": 219.116, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4103221083455345e-05, | |
| "loss": 1.0449, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 0.9242233633995056, | |
| "eval_runtime": 0.9989, | |
| "eval_samples_per_second": 221.239, | |
| "eval_steps_per_second": 221.239, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.4011713030746708e-05, | |
| "loss": 0.8045, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 0.9207603335380554, | |
| "eval_runtime": 0.9992, | |
| "eval_samples_per_second": 221.177, | |
| "eval_steps_per_second": 221.177, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.3920204978038068e-05, | |
| "loss": 0.6825, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 0.9101746082305908, | |
| "eval_runtime": 0.9998, | |
| "eval_samples_per_second": 221.048, | |
| "eval_steps_per_second": 221.048, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.382869692532943e-05, | |
| "loss": 0.9593, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 0.9140764474868774, | |
| "eval_runtime": 1.0111, | |
| "eval_samples_per_second": 218.575, | |
| "eval_steps_per_second": 218.575, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.373718887262079e-05, | |
| "loss": 1.1421, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.9256561398506165, | |
| "eval_runtime": 0.9915, | |
| "eval_samples_per_second": 222.895, | |
| "eval_steps_per_second": 222.895, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.3645680819912153e-05, | |
| "loss": 0.7391, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.9206637144088745, | |
| "eval_runtime": 1.019, | |
| "eval_samples_per_second": 216.885, | |
| "eval_steps_per_second": 216.885, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.3554172767203513e-05, | |
| "loss": 0.9972, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.9138339161872864, | |
| "eval_runtime": 1.0004, | |
| "eval_samples_per_second": 220.91, | |
| "eval_steps_per_second": 220.91, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.346266471449488e-05, | |
| "loss": 0.8846, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.9148464202880859, | |
| "eval_runtime": 0.9958, | |
| "eval_samples_per_second": 221.922, | |
| "eval_steps_per_second": 221.922, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.337115666178624e-05, | |
| "loss": 1.1577, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 0.9139500856399536, | |
| "eval_runtime": 0.9895, | |
| "eval_samples_per_second": 223.337, | |
| "eval_steps_per_second": 223.337, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.3279648609077602e-05, | |
| "loss": 0.6081, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 0.9128450155258179, | |
| "eval_runtime": 0.989, | |
| "eval_samples_per_second": 223.452, | |
| "eval_steps_per_second": 223.452, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3188140556368962e-05, | |
| "loss": 0.7401, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.913173496723175, | |
| "eval_runtime": 0.9887, | |
| "eval_samples_per_second": 223.535, | |
| "eval_steps_per_second": 223.535, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.3096632503660325e-05, | |
| "loss": 1.3848, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.9190066456794739, | |
| "eval_runtime": 0.988, | |
| "eval_samples_per_second": 223.685, | |
| "eval_steps_per_second": 223.685, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.3005124450951684e-05, | |
| "loss": 0.7132, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 0.9255449175834656, | |
| "eval_runtime": 0.9903, | |
| "eval_samples_per_second": 223.155, | |
| "eval_steps_per_second": 223.155, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.2913616398243044e-05, | |
| "loss": 0.4649, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 0.9188918471336365, | |
| "eval_runtime": 0.9908, | |
| "eval_samples_per_second": 223.051, | |
| "eval_steps_per_second": 223.051, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.2822108345534407e-05, | |
| "loss": 1.1004, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 0.9166772961616516, | |
| "eval_runtime": 0.9925, | |
| "eval_samples_per_second": 222.666, | |
| "eval_steps_per_second": 222.666, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.273060029282577e-05, | |
| "loss": 0.9214, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 0.9138955473899841, | |
| "eval_runtime": 1.0045, | |
| "eval_samples_per_second": 220.008, | |
| "eval_steps_per_second": 220.008, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.2639092240117133e-05, | |
| "loss": 0.6908, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.915971577167511, | |
| "eval_runtime": 0.997, | |
| "eval_samples_per_second": 221.673, | |
| "eval_steps_per_second": 221.673, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.2547584187408493e-05, | |
| "loss": 0.7928, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.9109075665473938, | |
| "eval_runtime": 0.9912, | |
| "eval_samples_per_second": 222.963, | |
| "eval_steps_per_second": 222.963, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.2456076134699856e-05, | |
| "loss": 1.132, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 0.9082236886024475, | |
| "eval_runtime": 0.9933, | |
| "eval_samples_per_second": 222.497, | |
| "eval_steps_per_second": 222.497, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.2364568081991216e-05, | |
| "loss": 0.4522, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 0.9198694229125977, | |
| "eval_runtime": 0.9979, | |
| "eval_samples_per_second": 221.459, | |
| "eval_steps_per_second": 221.459, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.227306002928258e-05, | |
| "loss": 0.8204, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 0.9264371991157532, | |
| "eval_runtime": 0.9942, | |
| "eval_samples_per_second": 222.291, | |
| "eval_steps_per_second": 222.291, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.218155197657394e-05, | |
| "loss": 0.7667, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 0.9129782915115356, | |
| "eval_runtime": 0.9945, | |
| "eval_samples_per_second": 222.216, | |
| "eval_steps_per_second": 222.216, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.20900439238653e-05, | |
| "loss": 1.3389, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.9032291769981384, | |
| "eval_runtime": 0.9954, | |
| "eval_samples_per_second": 222.02, | |
| "eval_steps_per_second": 222.02, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1998535871156664e-05, | |
| "loss": 0.7115, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.9023773074150085, | |
| "eval_runtime": 1.0021, | |
| "eval_samples_per_second": 220.538, | |
| "eval_steps_per_second": 220.538, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.1907027818448024e-05, | |
| "loss": 0.7246, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.9083009362220764, | |
| "eval_runtime": 0.9974, | |
| "eval_samples_per_second": 221.579, | |
| "eval_steps_per_second": 221.579, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.1815519765739387e-05, | |
| "loss": 0.8387, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.9141191244125366, | |
| "eval_runtime": 0.9947, | |
| "eval_samples_per_second": 222.173, | |
| "eval_steps_per_second": 222.173, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.1724011713030747e-05, | |
| "loss": 0.9222, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 0.9129745960235596, | |
| "eval_runtime": 0.9989, | |
| "eval_samples_per_second": 221.247, | |
| "eval_steps_per_second": 221.247, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.163250366032211e-05, | |
| "loss": 1.0447, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 0.9043551683425903, | |
| "eval_runtime": 0.992, | |
| "eval_samples_per_second": 222.79, | |
| "eval_steps_per_second": 222.79, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.154099560761347e-05, | |
| "loss": 1.0981, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 0.9048230051994324, | |
| "eval_runtime": 1.004, | |
| "eval_samples_per_second": 220.117, | |
| "eval_steps_per_second": 220.117, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.1449487554904833e-05, | |
| "loss": 0.6937, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 0.9051251411437988, | |
| "eval_runtime": 0.9896, | |
| "eval_samples_per_second": 223.316, | |
| "eval_steps_per_second": 223.316, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.1357979502196192e-05, | |
| "loss": 0.9807, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.9107654690742493, | |
| "eval_runtime": 0.9982, | |
| "eval_samples_per_second": 221.406, | |
| "eval_steps_per_second": 221.406, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 2.126647144948756e-05, | |
| "loss": 0.875, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.9070552587509155, | |
| "eval_runtime": 0.9994, | |
| "eval_samples_per_second": 221.141, | |
| "eval_steps_per_second": 221.141, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.1174963396778918e-05, | |
| "loss": 0.12, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 0.9132441878318787, | |
| "eval_runtime": 0.989, | |
| "eval_samples_per_second": 223.469, | |
| "eval_steps_per_second": 223.469, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 2.108345534407028e-05, | |
| "loss": 1.0182, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 0.9071489572525024, | |
| "eval_runtime": 0.9963, | |
| "eval_samples_per_second": 221.819, | |
| "eval_steps_per_second": 221.819, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.099194729136164e-05, | |
| "loss": 0.5262, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 0.9050890207290649, | |
| "eval_runtime": 0.9957, | |
| "eval_samples_per_second": 221.96, | |
| "eval_steps_per_second": 221.96, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 2.0900439238653004e-05, | |
| "loss": 1.1146, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 0.903910219669342, | |
| "eval_runtime": 1.0082, | |
| "eval_samples_per_second": 219.213, | |
| "eval_steps_per_second": 219.213, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.0808931185944364e-05, | |
| "loss": 1.0651, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.8979349732398987, | |
| "eval_runtime": 0.995, | |
| "eval_samples_per_second": 222.117, | |
| "eval_steps_per_second": 222.117, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 2.0717423133235723e-05, | |
| "loss": 0.6354, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.8976560831069946, | |
| "eval_runtime": 0.9991, | |
| "eval_samples_per_second": 221.197, | |
| "eval_steps_per_second": 221.197, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.0625915080527086e-05, | |
| "loss": 0.6519, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 0.9032682776451111, | |
| "eval_runtime": 0.9932, | |
| "eval_samples_per_second": 222.51, | |
| "eval_steps_per_second": 222.51, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 2.053440702781845e-05, | |
| "loss": 0.7339, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 0.8973162174224854, | |
| "eval_runtime": 0.9916, | |
| "eval_samples_per_second": 222.877, | |
| "eval_steps_per_second": 222.877, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0442898975109812e-05, | |
| "loss": 0.5961, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 0.9044464230537415, | |
| "eval_runtime": 1.0037, | |
| "eval_samples_per_second": 220.194, | |
| "eval_steps_per_second": 220.194, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.0351390922401172e-05, | |
| "loss": 0.9241, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 0.9145064949989319, | |
| "eval_runtime": 0.9989, | |
| "eval_samples_per_second": 221.235, | |
| "eval_steps_per_second": 221.235, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0259882869692535e-05, | |
| "loss": 0.8266, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.9076390266418457, | |
| "eval_runtime": 0.9911, | |
| "eval_samples_per_second": 222.994, | |
| "eval_steps_per_second": 222.994, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.0168374816983895e-05, | |
| "loss": 1.2164, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.8995986580848694, | |
| "eval_runtime": 0.99, | |
| "eval_samples_per_second": 223.242, | |
| "eval_steps_per_second": 223.242, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.0076866764275258e-05, | |
| "loss": 0.7718, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.8990570902824402, | |
| "eval_runtime": 0.9948, | |
| "eval_samples_per_second": 222.144, | |
| "eval_steps_per_second": 222.144, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.9985358711566618e-05, | |
| "loss": 0.6008, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.9060280919075012, | |
| "eval_runtime": 0.9957, | |
| "eval_samples_per_second": 221.946, | |
| "eval_steps_per_second": 221.946, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.989385065885798e-05, | |
| "loss": 1.3144, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 0.9031801819801331, | |
| "eval_runtime": 0.9908, | |
| "eval_samples_per_second": 223.048, | |
| "eval_steps_per_second": 223.048, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.9802342606149344e-05, | |
| "loss": 0.7852, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 0.8996669054031372, | |
| "eval_runtime": 0.9939, | |
| "eval_samples_per_second": 222.347, | |
| "eval_steps_per_second": 222.347, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9710834553440703e-05, | |
| "loss": 0.6096, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.9040927290916443, | |
| "eval_runtime": 1.0, | |
| "eval_samples_per_second": 221.008, | |
| "eval_steps_per_second": 221.008, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.9619326500732066e-05, | |
| "loss": 1.1556, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.8982120752334595, | |
| "eval_runtime": 0.9924, | |
| "eval_samples_per_second": 222.693, | |
| "eval_steps_per_second": 222.693, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9527818448023426e-05, | |
| "loss": 0.679, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 0.8950504660606384, | |
| "eval_runtime": 0.9933, | |
| "eval_samples_per_second": 222.489, | |
| "eval_steps_per_second": 222.489, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.943631039531479e-05, | |
| "loss": 1.1343, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 0.894602358341217, | |
| "eval_runtime": 0.9943, | |
| "eval_samples_per_second": 222.258, | |
| "eval_steps_per_second": 222.258, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.934480234260615e-05, | |
| "loss": 1.1132, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 0.8948307037353516, | |
| "eval_runtime": 0.9944, | |
| "eval_samples_per_second": 222.238, | |
| "eval_steps_per_second": 222.238, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9253294289897512e-05, | |
| "loss": 0.9618, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 0.8963745832443237, | |
| "eval_runtime": 0.9899, | |
| "eval_samples_per_second": 223.252, | |
| "eval_steps_per_second": 223.252, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.916178623718887e-05, | |
| "loss": 1.2705, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.8955935835838318, | |
| "eval_runtime": 0.9969, | |
| "eval_samples_per_second": 221.691, | |
| "eval_steps_per_second": 221.691, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.9070278184480238e-05, | |
| "loss": 1.2233, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.8896967172622681, | |
| "eval_runtime": 1.0009, | |
| "eval_samples_per_second": 220.808, | |
| "eval_steps_per_second": 220.808, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.8978770131771598e-05, | |
| "loss": 0.9835, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 0.8930822014808655, | |
| "eval_runtime": 1.0072, | |
| "eval_samples_per_second": 219.427, | |
| "eval_steps_per_second": 219.427, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.888726207906296e-05, | |
| "loss": 0.7342, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 0.9027377367019653, | |
| "eval_runtime": 0.9925, | |
| "eval_samples_per_second": 222.677, | |
| "eval_steps_per_second": 222.677, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.879575402635432e-05, | |
| "loss": 0.5454, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 0.9098514914512634, | |
| "eval_runtime": 0.9995, | |
| "eval_samples_per_second": 221.103, | |
| "eval_steps_per_second": 221.103, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.870424597364568e-05, | |
| "loss": 1.0659, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 0.9146174788475037, | |
| "eval_runtime": 1.005, | |
| "eval_samples_per_second": 219.91, | |
| "eval_steps_per_second": 219.91, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8612737920937043e-05, | |
| "loss": 0.6418, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.9074834585189819, | |
| "eval_runtime": 1.0003, | |
| "eval_samples_per_second": 220.934, | |
| "eval_steps_per_second": 220.934, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.8521229868228403e-05, | |
| "loss": 0.9448, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.9007245898246765, | |
| "eval_runtime": 1.0037, | |
| "eval_samples_per_second": 220.186, | |
| "eval_steps_per_second": 220.186, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.8429721815519766e-05, | |
| "loss": 1.2571, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 0.8986222743988037, | |
| "eval_runtime": 0.9981, | |
| "eval_samples_per_second": 221.427, | |
| "eval_steps_per_second": 221.427, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.833821376281113e-05, | |
| "loss": 0.8203, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 0.9077406525611877, | |
| "eval_runtime": 1.0229, | |
| "eval_samples_per_second": 216.045, | |
| "eval_steps_per_second": 216.045, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.8246705710102492e-05, | |
| "loss": 0.5973, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.9096502065658569, | |
| "eval_runtime": 0.9989, | |
| "eval_samples_per_second": 221.244, | |
| "eval_steps_per_second": 221.244, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.815519765739385e-05, | |
| "loss": 0.8533, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.9049461483955383, | |
| "eval_runtime": 1.0008, | |
| "eval_samples_per_second": 220.823, | |
| "eval_steps_per_second": 220.823, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.8063689604685214e-05, | |
| "loss": 0.5243, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.9102587103843689, | |
| "eval_runtime": 1.002, | |
| "eval_samples_per_second": 220.552, | |
| "eval_steps_per_second": 220.552, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.7972181551976574e-05, | |
| "loss": 0.6593, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.9104261994361877, | |
| "eval_runtime": 1.003, | |
| "eval_samples_per_second": 220.345, | |
| "eval_steps_per_second": 220.345, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.7880673499267937e-05, | |
| "loss": 0.6231, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 0.9062316417694092, | |
| "eval_runtime": 1.0036, | |
| "eval_samples_per_second": 220.212, | |
| "eval_steps_per_second": 220.212, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.7789165446559297e-05, | |
| "loss": 0.3603, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 0.9053772687911987, | |
| "eval_runtime": 1.0082, | |
| "eval_samples_per_second": 219.206, | |
| "eval_steps_per_second": 219.206, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.769765739385066e-05, | |
| "loss": 0.2853, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_loss": 0.906538188457489, | |
| "eval_runtime": 1.0121, | |
| "eval_samples_per_second": 218.366, | |
| "eval_steps_per_second": 218.366, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.7606149341142023e-05, | |
| "loss": 0.7358, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_loss": 0.9098958373069763, | |
| "eval_runtime": 0.9996, | |
| "eval_samples_per_second": 221.089, | |
| "eval_steps_per_second": 221.089, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7514641288433383e-05, | |
| "loss": 0.7306, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.9099730253219604, | |
| "eval_runtime": 1.022, | |
| "eval_samples_per_second": 216.244, | |
| "eval_steps_per_second": 216.244, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.7423133235724746e-05, | |
| "loss": 0.9585, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.9043567776679993, | |
| "eval_runtime": 1.0036, | |
| "eval_samples_per_second": 220.202, | |
| "eval_steps_per_second": 220.202, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7331625183016105e-05, | |
| "loss": 0.7872, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 0.9016985893249512, | |
| "eval_runtime": 1.0221, | |
| "eval_samples_per_second": 216.211, | |
| "eval_steps_per_second": 216.211, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.7240117130307468e-05, | |
| "loss": 0.3869, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 0.9016183614730835, | |
| "eval_runtime": 1.0178, | |
| "eval_samples_per_second": 217.136, | |
| "eval_steps_per_second": 217.136, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.7148609077598828e-05, | |
| "loss": 0.7671, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 0.9008880257606506, | |
| "eval_runtime": 1.0404, | |
| "eval_samples_per_second": 212.421, | |
| "eval_steps_per_second": 212.421, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.705710102489019e-05, | |
| "loss": 1.1349, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 0.9024242758750916, | |
| "eval_runtime": 1.0106, | |
| "eval_samples_per_second": 218.676, | |
| "eval_steps_per_second": 218.676, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.696559297218155e-05, | |
| "loss": 0.5429, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.9048688411712646, | |
| "eval_runtime": 1.0083, | |
| "eval_samples_per_second": 219.18, | |
| "eval_steps_per_second": 219.18, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.6874084919472917e-05, | |
| "loss": 0.3887, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.9077779650688171, | |
| "eval_runtime": 1.0047, | |
| "eval_samples_per_second": 219.96, | |
| "eval_steps_per_second": 219.96, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.6782576866764277e-05, | |
| "loss": 0.9391, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 0.9069033265113831, | |
| "eval_runtime": 0.9966, | |
| "eval_samples_per_second": 221.763, | |
| "eval_steps_per_second": 221.763, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.669106881405564e-05, | |
| "loss": 0.4818, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 0.9083073735237122, | |
| "eval_runtime": 0.9997, | |
| "eval_samples_per_second": 221.068, | |
| "eval_steps_per_second": 221.068, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.6599560761347e-05, | |
| "loss": 0.6984, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.9053521752357483, | |
| "eval_runtime": 1.0076, | |
| "eval_samples_per_second": 219.329, | |
| "eval_steps_per_second": 219.329, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.650805270863836e-05, | |
| "loss": 0.9192, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.896609902381897, | |
| "eval_runtime": 0.9977, | |
| "eval_samples_per_second": 221.508, | |
| "eval_steps_per_second": 221.508, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.6416544655929722e-05, | |
| "loss": 0.7977, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 0.8977360129356384, | |
| "eval_runtime": 1.0005, | |
| "eval_samples_per_second": 220.891, | |
| "eval_steps_per_second": 220.891, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.6325036603221082e-05, | |
| "loss": 0.5356, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 0.8997947573661804, | |
| "eval_runtime": 1.0055, | |
| "eval_samples_per_second": 219.8, | |
| "eval_steps_per_second": 219.8, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6233528550512445e-05, | |
| "loss": 1.1228, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 0.8971582651138306, | |
| "eval_runtime": 1.0272, | |
| "eval_samples_per_second": 215.151, | |
| "eval_steps_per_second": 215.151, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.6142020497803808e-05, | |
| "loss": 0.5621, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 0.9032644033432007, | |
| "eval_runtime": 1.0142, | |
| "eval_samples_per_second": 217.912, | |
| "eval_steps_per_second": 217.912, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.605051244509517e-05, | |
| "loss": 1.2936, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_loss": 0.907042920589447, | |
| "eval_runtime": 0.9958, | |
| "eval_samples_per_second": 221.935, | |
| "eval_steps_per_second": 221.935, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.595900439238653e-05, | |
| "loss": 0.695, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_loss": 0.8966180682182312, | |
| "eval_runtime": 1.0048, | |
| "eval_samples_per_second": 219.942, | |
| "eval_steps_per_second": 219.942, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.5867496339677894e-05, | |
| "loss": 0.8678, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.8943141102790833, | |
| "eval_runtime": 1.0064, | |
| "eval_samples_per_second": 219.6, | |
| "eval_steps_per_second": 219.6, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.5775988286969253e-05, | |
| "loss": 0.621, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.9032623767852783, | |
| "eval_runtime": 1.0029, | |
| "eval_samples_per_second": 220.37, | |
| "eval_steps_per_second": 220.37, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5684480234260616e-05, | |
| "loss": 0.5612, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 0.9057121276855469, | |
| "eval_runtime": 0.9993, | |
| "eval_samples_per_second": 221.164, | |
| "eval_steps_per_second": 221.164, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.5592972181551976e-05, | |
| "loss": 0.6432, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 0.9013468027114868, | |
| "eval_runtime": 1.0066, | |
| "eval_samples_per_second": 219.544, | |
| "eval_steps_per_second": 219.544, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.550146412884334e-05, | |
| "loss": 0.8455, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_loss": 0.9007333517074585, | |
| "eval_runtime": 1.0108, | |
| "eval_samples_per_second": 218.644, | |
| "eval_steps_per_second": 218.644, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.5409956076134702e-05, | |
| "loss": 0.8689, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_loss": 0.8959780335426331, | |
| "eval_runtime": 1.0128, | |
| "eval_samples_per_second": 218.204, | |
| "eval_steps_per_second": 218.204, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5318448023426062e-05, | |
| "loss": 0.8177, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 0.8960052728652954, | |
| "eval_runtime": 1.0159, | |
| "eval_samples_per_second": 217.546, | |
| "eval_steps_per_second": 217.546, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.5226939970717425e-05, | |
| "loss": 0.788, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 0.8973272442817688, | |
| "eval_runtime": 1.0007, | |
| "eval_samples_per_second": 220.85, | |
| "eval_steps_per_second": 220.85, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5135431918008786e-05, | |
| "loss": 0.6841, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.9006291627883911, | |
| "eval_runtime": 1.0026, | |
| "eval_samples_per_second": 220.425, | |
| "eval_steps_per_second": 220.425, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.5043923865300148e-05, | |
| "loss": 0.542, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.8980528116226196, | |
| "eval_runtime": 1.0054, | |
| "eval_samples_per_second": 219.805, | |
| "eval_steps_per_second": 219.805, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.4952415812591509e-05, | |
| "loss": 0.6806, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 0.8979552388191223, | |
| "eval_runtime": 0.9967, | |
| "eval_samples_per_second": 221.735, | |
| "eval_steps_per_second": 221.735, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.486090775988287e-05, | |
| "loss": 1.0197, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 0.8998746275901794, | |
| "eval_runtime": 1.0002, | |
| "eval_samples_per_second": 220.951, | |
| "eval_steps_per_second": 220.951, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.476939970717423e-05, | |
| "loss": 0.4407, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_loss": 0.9042376279830933, | |
| "eval_runtime": 1.016, | |
| "eval_samples_per_second": 217.526, | |
| "eval_steps_per_second": 217.526, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.4677891654465595e-05, | |
| "loss": 1.164, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_loss": 0.9058394432067871, | |
| "eval_runtime": 1.0659, | |
| "eval_samples_per_second": 207.344, | |
| "eval_steps_per_second": 207.344, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4586383601756956e-05, | |
| "loss": 0.9107, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.8990769982337952, | |
| "eval_runtime": 1.0349, | |
| "eval_samples_per_second": 213.546, | |
| "eval_steps_per_second": 213.546, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.4494875549048317e-05, | |
| "loss": 0.5929, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.8981482982635498, | |
| "eval_runtime": 0.9883, | |
| "eval_samples_per_second": 223.619, | |
| "eval_steps_per_second": 223.619, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.4403367496339679e-05, | |
| "loss": 0.7521, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 0.9011045694351196, | |
| "eval_runtime": 1.0001, | |
| "eval_samples_per_second": 220.974, | |
| "eval_steps_per_second": 220.974, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.431185944363104e-05, | |
| "loss": 1.1089, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 0.8992206454277039, | |
| "eval_runtime": 1.0047, | |
| "eval_samples_per_second": 219.967, | |
| "eval_steps_per_second": 219.967, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4220351390922401e-05, | |
| "loss": 0.8715, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_loss": 0.894744873046875, | |
| "eval_runtime": 1.0219, | |
| "eval_samples_per_second": 216.255, | |
| "eval_steps_per_second": 216.255, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.4128843338213763e-05, | |
| "loss": 0.223, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_loss": 0.895232617855072, | |
| "eval_runtime": 0.9972, | |
| "eval_samples_per_second": 221.62, | |
| "eval_steps_per_second": 221.62, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.4037335285505124e-05, | |
| "loss": 1.2705, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.8937389254570007, | |
| "eval_runtime": 0.9979, | |
| "eval_samples_per_second": 221.469, | |
| "eval_steps_per_second": 221.469, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.3945827232796487e-05, | |
| "loss": 0.8429, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.8975211977958679, | |
| "eval_runtime": 1.0413, | |
| "eval_samples_per_second": 212.232, | |
| "eval_steps_per_second": 212.232, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.3854319180087849e-05, | |
| "loss": 0.8727, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 0.8977360129356384, | |
| "eval_runtime": 1.0236, | |
| "eval_samples_per_second": 215.915, | |
| "eval_steps_per_second": 215.915, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.376281112737921e-05, | |
| "loss": 0.7136, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 0.897425651550293, | |
| "eval_runtime": 1.0065, | |
| "eval_samples_per_second": 219.581, | |
| "eval_steps_per_second": 219.581, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3671303074670571e-05, | |
| "loss": 0.8114, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 0.8991961479187012, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.697, | |
| "eval_steps_per_second": 220.697, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.3579795021961933e-05, | |
| "loss": 0.5067, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 0.8997776508331299, | |
| "eval_runtime": 1.044, | |
| "eval_samples_per_second": 211.689, | |
| "eval_steps_per_second": 211.689, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3488286969253294e-05, | |
| "loss": 0.7263, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.8950998783111572, | |
| "eval_runtime": 1.0054, | |
| "eval_samples_per_second": 219.802, | |
| "eval_steps_per_second": 219.802, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.3396778916544655e-05, | |
| "loss": 0.5715, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.8947245478630066, | |
| "eval_runtime": 1.0081, | |
| "eval_samples_per_second": 219.22, | |
| "eval_steps_per_second": 219.22, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3305270863836017e-05, | |
| "loss": 0.6422, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 0.8984644412994385, | |
| "eval_runtime": 1.0113, | |
| "eval_samples_per_second": 218.534, | |
| "eval_steps_per_second": 218.534, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.3213762811127381e-05, | |
| "loss": 0.4204, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 0.8999758362770081, | |
| "eval_runtime": 1.0031, | |
| "eval_samples_per_second": 220.311, | |
| "eval_steps_per_second": 220.311, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3122254758418743e-05, | |
| "loss": 0.8309, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_loss": 0.9033350348472595, | |
| "eval_runtime": 1.0079, | |
| "eval_samples_per_second": 219.26, | |
| "eval_steps_per_second": 219.26, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.3030746705710104e-05, | |
| "loss": 0.5329, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_loss": 0.9023324251174927, | |
| "eval_runtime": 1.0049, | |
| "eval_samples_per_second": 219.912, | |
| "eval_steps_per_second": 219.912, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.2939238653001465e-05, | |
| "loss": 1.1072, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.8925628066062927, | |
| "eval_runtime": 1.0224, | |
| "eval_samples_per_second": 216.151, | |
| "eval_steps_per_second": 216.151, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.2847730600292827e-05, | |
| "loss": 0.3493, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.8900936841964722, | |
| "eval_runtime": 1.0036, | |
| "eval_samples_per_second": 220.211, | |
| "eval_steps_per_second": 220.211, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2756222547584188e-05, | |
| "loss": 0.7056, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.8923791646957397, | |
| "eval_runtime": 1.0236, | |
| "eval_samples_per_second": 215.914, | |
| "eval_steps_per_second": 215.914, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.2664714494875548e-05, | |
| "loss": 0.562, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.8951922655105591, | |
| "eval_runtime": 1.0108, | |
| "eval_samples_per_second": 218.633, | |
| "eval_steps_per_second": 218.633, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2573206442166909e-05, | |
| "loss": 1.082, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_loss": 0.8923650979995728, | |
| "eval_runtime": 1.0076, | |
| "eval_samples_per_second": 219.334, | |
| "eval_steps_per_second": 219.334, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.2481698389458272e-05, | |
| "loss": 0.6974, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_loss": 0.8902477025985718, | |
| "eval_runtime": 1.0048, | |
| "eval_samples_per_second": 219.94, | |
| "eval_steps_per_second": 219.94, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2390190336749635e-05, | |
| "loss": 0.5137, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.8948673605918884, | |
| "eval_runtime": 1.0056, | |
| "eval_samples_per_second": 219.766, | |
| "eval_steps_per_second": 219.766, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.2298682284040997e-05, | |
| "loss": 0.6971, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.8960031270980835, | |
| "eval_runtime": 1.0208, | |
| "eval_samples_per_second": 216.489, | |
| "eval_steps_per_second": 216.489, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.2207174231332358e-05, | |
| "loss": 1.3381, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 0.8894782662391663, | |
| "eval_runtime": 1.0122, | |
| "eval_samples_per_second": 218.333, | |
| "eval_steps_per_second": 218.333, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.211566617862372e-05, | |
| "loss": 0.9586, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 0.8867659568786621, | |
| "eval_runtime": 1.0094, | |
| "eval_samples_per_second": 218.94, | |
| "eval_steps_per_second": 218.94, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.2024158125915082e-05, | |
| "loss": 0.4076, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 0.8893986940383911, | |
| "eval_runtime": 1.0013, | |
| "eval_samples_per_second": 220.705, | |
| "eval_steps_per_second": 220.705, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1932650073206444e-05, | |
| "loss": 0.847, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 0.8909441232681274, | |
| "eval_runtime": 1.0012, | |
| "eval_samples_per_second": 220.733, | |
| "eval_steps_per_second": 220.733, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.1841142020497805e-05, | |
| "loss": 0.8145, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 0.8907016515731812, | |
| "eval_runtime": 1.001, | |
| "eval_samples_per_second": 220.785, | |
| "eval_steps_per_second": 220.785, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.1749633967789166e-05, | |
| "loss": 0.4424, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 0.8914759159088135, | |
| "eval_runtime": 1.012, | |
| "eval_samples_per_second": 218.377, | |
| "eval_steps_per_second": 218.377, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1658125915080528e-05, | |
| "loss": 0.5713, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 0.8976419568061829, | |
| "eval_runtime": 1.0069, | |
| "eval_samples_per_second": 219.475, | |
| "eval_steps_per_second": 219.475, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.1566617862371889e-05, | |
| "loss": 0.6782, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 0.8994885683059692, | |
| "eval_runtime": 0.9983, | |
| "eval_samples_per_second": 221.365, | |
| "eval_steps_per_second": 221.365, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.147510980966325e-05, | |
| "loss": 0.4682, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 0.9000893235206604, | |
| "eval_runtime": 1.002, | |
| "eval_samples_per_second": 220.565, | |
| "eval_steps_per_second": 220.565, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.1383601756954612e-05, | |
| "loss": 0.4482, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 0.8948642015457153, | |
| "eval_runtime": 1.0012, | |
| "eval_samples_per_second": 220.734, | |
| "eval_steps_per_second": 220.734, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1292093704245975e-05, | |
| "loss": 0.5259, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.8957579135894775, | |
| "eval_runtime": 0.9997, | |
| "eval_samples_per_second": 221.075, | |
| "eval_steps_per_second": 221.075, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.1200585651537336e-05, | |
| "loss": 0.6456, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.8964543342590332, | |
| "eval_runtime": 1.0085, | |
| "eval_samples_per_second": 219.135, | |
| "eval_steps_per_second": 219.135, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1109077598828698e-05, | |
| "loss": 0.6959, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.8979729413986206, | |
| "eval_runtime": 1.0048, | |
| "eval_samples_per_second": 219.942, | |
| "eval_steps_per_second": 219.942, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1017569546120059e-05, | |
| "loss": 0.8039, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.8941496014595032, | |
| "eval_runtime": 1.0013, | |
| "eval_samples_per_second": 220.71, | |
| "eval_steps_per_second": 220.71, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0926061493411422e-05, | |
| "loss": 0.8743, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 0.8881919980049133, | |
| "eval_runtime": 1.0131, | |
| "eval_samples_per_second": 218.151, | |
| "eval_steps_per_second": 218.151, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0834553440702783e-05, | |
| "loss": 0.7932, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 0.8856217265129089, | |
| "eval_runtime": 1.0099, | |
| "eval_samples_per_second": 218.832, | |
| "eval_steps_per_second": 218.832, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0743045387994145e-05, | |
| "loss": 0.551, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 0.8860388994216919, | |
| "eval_runtime": 1.0037, | |
| "eval_samples_per_second": 220.187, | |
| "eval_steps_per_second": 220.187, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.0651537335285506e-05, | |
| "loss": 1.1144, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 0.8845486640930176, | |
| "eval_runtime": 1.003, | |
| "eval_samples_per_second": 220.341, | |
| "eval_steps_per_second": 220.341, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0560029282576867e-05, | |
| "loss": 0.8202, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_loss": 0.8838350176811218, | |
| "eval_runtime": 1.0079, | |
| "eval_samples_per_second": 219.264, | |
| "eval_steps_per_second": 219.264, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.0468521229868229e-05, | |
| "loss": 0.8216, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_loss": 0.8857699632644653, | |
| "eval_runtime": 1.0039, | |
| "eval_samples_per_second": 220.146, | |
| "eval_steps_per_second": 220.146, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.037701317715959e-05, | |
| "loss": 0.913, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 0.8844879269599915, | |
| "eval_runtime": 1.002, | |
| "eval_samples_per_second": 220.553, | |
| "eval_steps_per_second": 220.553, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.0285505124450951e-05, | |
| "loss": 0.8252, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 0.8838766813278198, | |
| "eval_runtime": 1.0052, | |
| "eval_samples_per_second": 219.85, | |
| "eval_steps_per_second": 219.85, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0193997071742314e-05, | |
| "loss": 1.0437, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 0.8805793523788452, | |
| "eval_runtime": 1.0111, | |
| "eval_samples_per_second": 218.575, | |
| "eval_steps_per_second": 218.575, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 1.0102489019033676e-05, | |
| "loss": 0.4831, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 0.8810590505599976, | |
| "eval_runtime": 0.9994, | |
| "eval_samples_per_second": 221.142, | |
| "eval_steps_per_second": 221.142, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0010980966325037e-05, | |
| "loss": 0.9598, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_loss": 0.8848403096199036, | |
| "eval_runtime": 1.0118, | |
| "eval_samples_per_second": 218.421, | |
| "eval_steps_per_second": 218.421, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.919472913616399e-06, | |
| "loss": 1.1448, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_loss": 0.8866164088249207, | |
| "eval_runtime": 1.0002, | |
| "eval_samples_per_second": 220.949, | |
| "eval_steps_per_second": 220.949, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.827964860907762e-06, | |
| "loss": 0.9504, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 0.8847929835319519, | |
| "eval_runtime": 0.9987, | |
| "eval_samples_per_second": 221.287, | |
| "eval_steps_per_second": 221.287, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.736456808199123e-06, | |
| "loss": 1.1163, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 0.8818463087081909, | |
| "eval_runtime": 0.9994, | |
| "eval_samples_per_second": 221.139, | |
| "eval_steps_per_second": 221.139, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.644948755490484e-06, | |
| "loss": 0.9589, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 0.8802144527435303, | |
| "eval_runtime": 0.9993, | |
| "eval_samples_per_second": 221.146, | |
| "eval_steps_per_second": 221.146, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 9.553440702781846e-06, | |
| "loss": 1.1822, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 0.8798092007637024, | |
| "eval_runtime": 0.9959, | |
| "eval_samples_per_second": 221.904, | |
| "eval_steps_per_second": 221.904, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.461932650073207e-06, | |
| "loss": 0.8328, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.8807886838912964, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.693, | |
| "eval_steps_per_second": 220.693, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 9.370424597364568e-06, | |
| "loss": 0.6027, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.8822863698005676, | |
| "eval_runtime": 0.9987, | |
| "eval_samples_per_second": 221.278, | |
| "eval_steps_per_second": 221.278, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.27891654465593e-06, | |
| "loss": 1.019, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_loss": 0.8839148879051208, | |
| "eval_runtime": 0.9885, | |
| "eval_samples_per_second": 223.569, | |
| "eval_steps_per_second": 223.569, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 9.187408491947291e-06, | |
| "loss": 0.936, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_loss": 0.8859519362449646, | |
| "eval_runtime": 0.9866, | |
| "eval_samples_per_second": 224.003, | |
| "eval_steps_per_second": 224.003, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 9.095900439238654e-06, | |
| "loss": 0.8023, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_loss": 0.887075662612915, | |
| "eval_runtime": 0.9929, | |
| "eval_samples_per_second": 222.588, | |
| "eval_steps_per_second": 222.588, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 9.004392386530015e-06, | |
| "loss": 1.0101, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.8854756951332092, | |
| "eval_runtime": 0.9924, | |
| "eval_samples_per_second": 222.7, | |
| "eval_steps_per_second": 222.7, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.912884333821377e-06, | |
| "loss": 0.989, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.8838714361190796, | |
| "eval_runtime": 0.9964, | |
| "eval_samples_per_second": 221.788, | |
| "eval_steps_per_second": 221.788, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.821376281112738e-06, | |
| "loss": 0.6067, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 0.8805833458900452, | |
| "eval_runtime": 0.9886, | |
| "eval_samples_per_second": 223.54, | |
| "eval_steps_per_second": 223.54, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.729868228404101e-06, | |
| "loss": 0.4761, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 0.8800801634788513, | |
| "eval_runtime": 0.9919, | |
| "eval_samples_per_second": 222.812, | |
| "eval_steps_per_second": 222.812, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.638360175695463e-06, | |
| "loss": 0.9411, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 0.8769246935844421, | |
| "eval_runtime": 1.0075, | |
| "eval_samples_per_second": 219.354, | |
| "eval_steps_per_second": 219.354, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 8.546852122986824e-06, | |
| "loss": 0.4522, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 0.8762901425361633, | |
| "eval_runtime": 0.9968, | |
| "eval_samples_per_second": 221.703, | |
| "eval_steps_per_second": 221.703, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.455344070278185e-06, | |
| "loss": 0.8327, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 0.8773915767669678, | |
| "eval_runtime": 1.0124, | |
| "eval_samples_per_second": 218.303, | |
| "eval_steps_per_second": 218.303, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 8.363836017569547e-06, | |
| "loss": 1.0405, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 0.8796553611755371, | |
| "eval_runtime": 0.9917, | |
| "eval_samples_per_second": 222.857, | |
| "eval_steps_per_second": 222.857, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.272327964860908e-06, | |
| "loss": 0.9218, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 0.8763919472694397, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 222.546, | |
| "eval_steps_per_second": 222.546, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 8.18081991215227e-06, | |
| "loss": 0.6071, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 0.8748125433921814, | |
| "eval_runtime": 0.9943, | |
| "eval_samples_per_second": 222.264, | |
| "eval_steps_per_second": 222.264, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 8.08931185944363e-06, | |
| "loss": 0.885, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 0.8777368068695068, | |
| "eval_runtime": 1.0145, | |
| "eval_samples_per_second": 217.842, | |
| "eval_steps_per_second": 217.842, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.997803806734994e-06, | |
| "loss": 0.6058, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 0.8796027898788452, | |
| "eval_runtime": 0.9924, | |
| "eval_samples_per_second": 222.686, | |
| "eval_steps_per_second": 222.686, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.906295754026355e-06, | |
| "loss": 0.5881, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.8780397176742554, | |
| "eval_runtime": 0.9986, | |
| "eval_samples_per_second": 221.306, | |
| "eval_steps_per_second": 221.306, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.814787701317716e-06, | |
| "loss": 0.4406, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.8805845379829407, | |
| "eval_runtime": 0.9958, | |
| "eval_samples_per_second": 221.937, | |
| "eval_steps_per_second": 221.937, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.723279648609078e-06, | |
| "loss": 0.9626, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.8792752027511597, | |
| "eval_runtime": 0.9918, | |
| "eval_samples_per_second": 222.827, | |
| "eval_steps_per_second": 222.827, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.63177159590044e-06, | |
| "loss": 0.2071, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.8789242506027222, | |
| "eval_runtime": 1.0012, | |
| "eval_samples_per_second": 220.734, | |
| "eval_steps_per_second": 220.734, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.540263543191801e-06, | |
| "loss": 0.4227, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 0.8812814354896545, | |
| "eval_runtime": 0.9991, | |
| "eval_samples_per_second": 221.19, | |
| "eval_steps_per_second": 221.19, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.448755490483163e-06, | |
| "loss": 1.0321, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 0.8809959292411804, | |
| "eval_runtime": 1.0032, | |
| "eval_samples_per_second": 220.306, | |
| "eval_steps_per_second": 220.306, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.357247437774524e-06, | |
| "loss": 0.8141, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.881201446056366, | |
| "eval_runtime": 0.9965, | |
| "eval_samples_per_second": 221.787, | |
| "eval_steps_per_second": 221.787, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 7.265739385065887e-06, | |
| "loss": 0.9206, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.8817071914672852, | |
| "eval_runtime": 0.9992, | |
| "eval_samples_per_second": 221.184, | |
| "eval_steps_per_second": 221.184, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.174231332357248e-06, | |
| "loss": 0.7468, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 0.8844788670539856, | |
| "eval_runtime": 1.0113, | |
| "eval_samples_per_second": 218.524, | |
| "eval_steps_per_second": 218.524, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.082723279648609e-06, | |
| "loss": 0.4643, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 0.8853857517242432, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.687, | |
| "eval_steps_per_second": 220.687, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.99121522693997e-06, | |
| "loss": 0.3561, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 0.887630820274353, | |
| "eval_runtime": 0.9993, | |
| "eval_samples_per_second": 221.153, | |
| "eval_steps_per_second": 221.153, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.899707174231333e-06, | |
| "loss": 0.479, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 0.8909384608268738, | |
| "eval_runtime": 1.0006, | |
| "eval_samples_per_second": 220.877, | |
| "eval_steps_per_second": 220.877, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.808199121522695e-06, | |
| "loss": 0.7082, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.8923120498657227, | |
| "eval_runtime": 1.0141, | |
| "eval_samples_per_second": 217.921, | |
| "eval_steps_per_second": 217.921, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.716691068814056e-06, | |
| "loss": 0.4282, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.888241708278656, | |
| "eval_runtime": 1.0092, | |
| "eval_samples_per_second": 218.976, | |
| "eval_steps_per_second": 218.976, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.625183016105417e-06, | |
| "loss": 0.6731, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 0.885208249092102, | |
| "eval_runtime": 0.9993, | |
| "eval_samples_per_second": 221.163, | |
| "eval_steps_per_second": 221.163, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.53367496339678e-06, | |
| "loss": 0.9022, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 0.8838015198707581, | |
| "eval_runtime": 0.9971, | |
| "eval_samples_per_second": 221.649, | |
| "eval_steps_per_second": 221.649, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.442166910688141e-06, | |
| "loss": 0.4623, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_loss": 0.8837681412696838, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.694, | |
| "eval_steps_per_second": 220.694, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 6.350658857979502e-06, | |
| "loss": 1.0055, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_loss": 0.8835201263427734, | |
| "eval_runtime": 0.9915, | |
| "eval_samples_per_second": 222.894, | |
| "eval_steps_per_second": 222.894, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.259150805270864e-06, | |
| "loss": 0.6244, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.8837607502937317, | |
| "eval_runtime": 0.9904, | |
| "eval_samples_per_second": 223.145, | |
| "eval_steps_per_second": 223.145, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 6.167642752562226e-06, | |
| "loss": 0.4699, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.8821557760238647, | |
| "eval_runtime": 0.9948, | |
| "eval_samples_per_second": 222.165, | |
| "eval_steps_per_second": 222.165, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 6.076134699853587e-06, | |
| "loss": 0.5148, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 0.8820451498031616, | |
| "eval_runtime": 0.9925, | |
| "eval_samples_per_second": 222.661, | |
| "eval_steps_per_second": 222.661, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.9846266471449486e-06, | |
| "loss": 0.1828, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 0.8836950063705444, | |
| "eval_runtime": 0.9938, | |
| "eval_samples_per_second": 222.379, | |
| "eval_steps_per_second": 222.379, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.893118594436311e-06, | |
| "loss": 0.5911, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 0.8837562203407288, | |
| "eval_runtime": 0.9973, | |
| "eval_samples_per_second": 221.598, | |
| "eval_steps_per_second": 221.598, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.801610541727672e-06, | |
| "loss": 0.8317, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 0.8834384679794312, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 222.529, | |
| "eval_steps_per_second": 222.529, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.710102489019034e-06, | |
| "loss": 1.1709, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.8813214302062988, | |
| "eval_runtime": 0.9912, | |
| "eval_samples_per_second": 222.961, | |
| "eval_steps_per_second": 222.961, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.618594436310396e-06, | |
| "loss": 0.5461, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.8793975710868835, | |
| "eval_runtime": 0.9944, | |
| "eval_samples_per_second": 222.248, | |
| "eval_steps_per_second": 222.248, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.527086383601757e-06, | |
| "loss": 1.1389, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 0.8754835724830627, | |
| "eval_runtime": 1.0097, | |
| "eval_samples_per_second": 218.873, | |
| "eval_steps_per_second": 218.873, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.435578330893118e-06, | |
| "loss": 0.7979, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 0.8739802241325378, | |
| "eval_runtime": 0.9967, | |
| "eval_samples_per_second": 221.727, | |
| "eval_steps_per_second": 221.727, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.3440702781844806e-06, | |
| "loss": 0.5774, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 0.8745384812355042, | |
| "eval_runtime": 1.0029, | |
| "eval_samples_per_second": 220.369, | |
| "eval_steps_per_second": 220.369, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 5.252562225475842e-06, | |
| "loss": 0.6384, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 0.8761541247367859, | |
| "eval_runtime": 1.0145, | |
| "eval_samples_per_second": 217.839, | |
| "eval_steps_per_second": 217.839, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.161054172767204e-06, | |
| "loss": 0.4634, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.8765793442726135, | |
| "eval_runtime": 1.0079, | |
| "eval_samples_per_second": 219.266, | |
| "eval_steps_per_second": 219.266, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 5.0695461200585655e-06, | |
| "loss": 0.5765, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.8775705099105835, | |
| "eval_runtime": 0.9953, | |
| "eval_samples_per_second": 222.041, | |
| "eval_steps_per_second": 222.041, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.978038067349927e-06, | |
| "loss": 0.7886, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_loss": 0.8787454962730408, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 222.525, | |
| "eval_steps_per_second": 222.525, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.886530014641288e-06, | |
| "loss": 1.0283, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_loss": 0.8788084983825684, | |
| "eval_runtime": 1.0049, | |
| "eval_samples_per_second": 219.914, | |
| "eval_steps_per_second": 219.914, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.79502196193265e-06, | |
| "loss": 0.5732, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 0.8806591629981995, | |
| "eval_runtime": 0.9982, | |
| "eval_samples_per_second": 221.404, | |
| "eval_steps_per_second": 221.404, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.703513909224012e-06, | |
| "loss": 0.6771, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 0.8825165629386902, | |
| "eval_runtime": 1.0077, | |
| "eval_samples_per_second": 219.302, | |
| "eval_steps_per_second": 219.302, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.612005856515374e-06, | |
| "loss": 0.7993, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 0.8819605112075806, | |
| "eval_runtime": 0.984, | |
| "eval_samples_per_second": 224.593, | |
| "eval_steps_per_second": 224.593, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.520497803806735e-06, | |
| "loss": 0.6496, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 0.8803231120109558, | |
| "eval_runtime": 1.006, | |
| "eval_samples_per_second": 219.681, | |
| "eval_steps_per_second": 219.681, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.428989751098097e-06, | |
| "loss": 0.3994, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 0.8783472180366516, | |
| "eval_runtime": 1.0, | |
| "eval_samples_per_second": 220.992, | |
| "eval_steps_per_second": 220.992, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.337481698389458e-06, | |
| "loss": 0.6052, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 0.8770036101341248, | |
| "eval_runtime": 0.9969, | |
| "eval_samples_per_second": 221.696, | |
| "eval_steps_per_second": 221.696, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.24597364568082e-06, | |
| "loss": 0.8144, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 0.8765595555305481, | |
| "eval_runtime": 1.0019, | |
| "eval_samples_per_second": 220.578, | |
| "eval_steps_per_second": 220.578, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 4.1544655929721815e-06, | |
| "loss": 0.6044, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 0.876719057559967, | |
| "eval_runtime": 0.9979, | |
| "eval_samples_per_second": 221.464, | |
| "eval_steps_per_second": 221.464, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.062957540263544e-06, | |
| "loss": 0.795, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.8779201507568359, | |
| "eval_runtime": 0.9993, | |
| "eval_samples_per_second": 221.145, | |
| "eval_steps_per_second": 221.145, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.971449487554905e-06, | |
| "loss": 0.8205, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.8794294595718384, | |
| "eval_runtime": 0.991, | |
| "eval_samples_per_second": 223.01, | |
| "eval_steps_per_second": 223.01, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.8799414348462664e-06, | |
| "loss": 0.624, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_loss": 0.8792880773544312, | |
| "eval_runtime": 0.9899, | |
| "eval_samples_per_second": 223.254, | |
| "eval_steps_per_second": 223.254, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.7884333821376282e-06, | |
| "loss": 0.7718, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_loss": 0.8786001205444336, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 222.54, | |
| "eval_steps_per_second": 222.54, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.69692532942899e-06, | |
| "loss": 0.5014, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 0.8761178851127625, | |
| "eval_runtime": 0.9908, | |
| "eval_samples_per_second": 223.052, | |
| "eval_steps_per_second": 223.052, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.6054172767203514e-06, | |
| "loss": 0.7651, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 0.875769317150116, | |
| "eval_runtime": 0.996, | |
| "eval_samples_per_second": 221.898, | |
| "eval_steps_per_second": 221.898, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.513909224011713e-06, | |
| "loss": 0.6789, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 0.8760455846786499, | |
| "eval_runtime": 0.9903, | |
| "eval_samples_per_second": 223.16, | |
| "eval_steps_per_second": 223.16, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.4224011713030745e-06, | |
| "loss": 1.2432, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 0.875510573387146, | |
| "eval_runtime": 0.9949, | |
| "eval_samples_per_second": 222.126, | |
| "eval_steps_per_second": 222.126, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.3308931185944367e-06, | |
| "loss": 0.7199, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.8750349283218384, | |
| "eval_runtime": 0.9913, | |
| "eval_samples_per_second": 222.943, | |
| "eval_steps_per_second": 222.943, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.239385065885798e-06, | |
| "loss": 0.6854, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.8758002519607544, | |
| "eval_runtime": 1.0124, | |
| "eval_samples_per_second": 218.284, | |
| "eval_steps_per_second": 218.284, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.14787701317716e-06, | |
| "loss": 0.6829, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 0.8758509159088135, | |
| "eval_runtime": 0.9927, | |
| "eval_samples_per_second": 222.616, | |
| "eval_steps_per_second": 222.616, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.0563689604685216e-06, | |
| "loss": 0.6762, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 0.8760889768600464, | |
| "eval_runtime": 0.9931, | |
| "eval_samples_per_second": 222.527, | |
| "eval_steps_per_second": 222.527, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.964860907759883e-06, | |
| "loss": 0.5172, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 0.8758929371833801, | |
| "eval_runtime": 0.997, | |
| "eval_samples_per_second": 221.667, | |
| "eval_steps_per_second": 221.667, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.8733528550512447e-06, | |
| "loss": 0.507, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 0.8757414817810059, | |
| "eval_runtime": 0.9947, | |
| "eval_samples_per_second": 222.185, | |
| "eval_steps_per_second": 222.185, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.7818448023426065e-06, | |
| "loss": 0.6824, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 0.8757789731025696, | |
| "eval_runtime": 1.0115, | |
| "eval_samples_per_second": 218.483, | |
| "eval_steps_per_second": 218.483, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.690336749633968e-06, | |
| "loss": 0.823, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 0.8769077062606812, | |
| "eval_runtime": 0.9955, | |
| "eval_samples_per_second": 221.997, | |
| "eval_steps_per_second": 221.997, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.5988286969253296e-06, | |
| "loss": 0.791, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_loss": 0.8774532675743103, | |
| "eval_runtime": 0.9897, | |
| "eval_samples_per_second": 223.305, | |
| "eval_steps_per_second": 223.305, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.5073206442166914e-06, | |
| "loss": 1.2633, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_loss": 0.8784446120262146, | |
| "eval_runtime": 0.9938, | |
| "eval_samples_per_second": 222.372, | |
| "eval_steps_per_second": 222.372, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.4158125915080528e-06, | |
| "loss": 0.3029, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 0.879937469959259, | |
| "eval_runtime": 1.0007, | |
| "eval_samples_per_second": 220.855, | |
| "eval_steps_per_second": 220.855, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.3243045387994145e-06, | |
| "loss": 0.954, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 0.8803999423980713, | |
| "eval_runtime": 0.9959, | |
| "eval_samples_per_second": 221.902, | |
| "eval_steps_per_second": 221.902, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.2327964860907763e-06, | |
| "loss": 0.8125, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.8793956637382507, | |
| "eval_runtime": 0.9927, | |
| "eval_samples_per_second": 222.621, | |
| "eval_steps_per_second": 222.621, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1412884333821377e-06, | |
| "loss": 1.0554, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.8773664236068726, | |
| "eval_runtime": 0.9937, | |
| "eval_samples_per_second": 222.405, | |
| "eval_steps_per_second": 222.405, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.0497803806734994e-06, | |
| "loss": 0.2374, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 0.8770005106925964, | |
| "eval_runtime": 0.9923, | |
| "eval_samples_per_second": 222.709, | |
| "eval_steps_per_second": 222.709, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.958272327964861e-06, | |
| "loss": 0.5341, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 0.8772048354148865, | |
| "eval_runtime": 1.0041, | |
| "eval_samples_per_second": 220.098, | |
| "eval_steps_per_second": 220.098, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.8667642752562228e-06, | |
| "loss": 0.8124, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_loss": 0.8775603771209717, | |
| "eval_runtime": 1.0009, | |
| "eval_samples_per_second": 220.801, | |
| "eval_steps_per_second": 220.801, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7752562225475843e-06, | |
| "loss": 0.8315, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_loss": 0.8778148889541626, | |
| "eval_runtime": 0.9966, | |
| "eval_samples_per_second": 221.751, | |
| "eval_steps_per_second": 221.751, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.683748169838946e-06, | |
| "loss": 0.6822, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 0.8776082396507263, | |
| "eval_runtime": 0.9918, | |
| "eval_samples_per_second": 222.82, | |
| "eval_steps_per_second": 222.82, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5922401171303077e-06, | |
| "loss": 0.8981, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 0.8771771192550659, | |
| "eval_runtime": 0.9943, | |
| "eval_samples_per_second": 222.263, | |
| "eval_steps_per_second": 222.263, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5007320644216692e-06, | |
| "loss": 0.7869, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 0.8763890266418457, | |
| "eval_runtime": 1.0014, | |
| "eval_samples_per_second": 220.697, | |
| "eval_steps_per_second": 220.697, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.4092240117130308e-06, | |
| "loss": 0.5915, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 0.8759109973907471, | |
| "eval_runtime": 0.9882, | |
| "eval_samples_per_second": 223.635, | |
| "eval_steps_per_second": 223.635, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.3177159590043926e-06, | |
| "loss": 0.7693, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 0.875470757484436, | |
| "eval_runtime": 1.0011, | |
| "eval_samples_per_second": 220.764, | |
| "eval_steps_per_second": 220.764, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2262079062957541e-06, | |
| "loss": 1.1183, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 0.8752223253250122, | |
| "eval_runtime": 0.9934, | |
| "eval_samples_per_second": 222.46, | |
| "eval_steps_per_second": 222.46, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.1346998535871157e-06, | |
| "loss": 1.0614, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.8747947216033936, | |
| "eval_runtime": 0.9864, | |
| "eval_samples_per_second": 224.052, | |
| "eval_steps_per_second": 224.052, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0431918008784775e-06, | |
| "loss": 0.6841, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.8746020197868347, | |
| "eval_runtime": 1.0047, | |
| "eval_samples_per_second": 219.973, | |
| "eval_steps_per_second": 219.973, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 9.51683748169839e-07, | |
| "loss": 1.0103, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 0.8747249245643616, | |
| "eval_runtime": 0.992, | |
| "eval_samples_per_second": 222.792, | |
| "eval_steps_per_second": 222.792, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.601756954612006e-07, | |
| "loss": 0.9452, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 0.8745383620262146, | |
| "eval_runtime": 1.0006, | |
| "eval_samples_per_second": 220.866, | |
| "eval_steps_per_second": 220.866, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.686676427525623e-07, | |
| "loss": 1.1954, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 0.8742164373397827, | |
| "eval_runtime": 0.9975, | |
| "eval_samples_per_second": 221.552, | |
| "eval_steps_per_second": 221.552, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.77159590043924e-07, | |
| "loss": 0.4939, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 0.8739895820617676, | |
| "eval_runtime": 0.9941, | |
| "eval_samples_per_second": 222.321, | |
| "eval_steps_per_second": 222.321, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.856515373352855e-07, | |
| "loss": 0.9654, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.8739458322525024, | |
| "eval_runtime": 0.9947, | |
| "eval_samples_per_second": 222.187, | |
| "eval_steps_per_second": 222.187, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.941434846266472e-07, | |
| "loss": 0.6801, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.874081015586853, | |
| "eval_runtime": 0.9944, | |
| "eval_samples_per_second": 222.236, | |
| "eval_steps_per_second": 222.236, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.026354319180088e-07, | |
| "loss": 0.9845, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 0.8740487098693848, | |
| "eval_runtime": 0.9968, | |
| "eval_samples_per_second": 221.703, | |
| "eval_steps_per_second": 221.703, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.1112737920937043e-07, | |
| "loss": 0.7604, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 0.8739599585533142, | |
| "eval_runtime": 0.9951, | |
| "eval_samples_per_second": 222.085, | |
| "eval_steps_per_second": 222.085, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.1961932650073207e-07, | |
| "loss": 0.8234, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 0.8739203810691833, | |
| "eval_runtime": 1.0052, | |
| "eval_samples_per_second": 219.853, | |
| "eval_steps_per_second": 219.853, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.2811127379209371e-07, | |
| "loss": 1.0262, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 0.8738732933998108, | |
| "eval_runtime": 0.9956, | |
| "eval_samples_per_second": 221.969, | |
| "eval_steps_per_second": 221.969, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 3.6603221083455345e-08, | |
| "loss": 0.8696, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.8738669753074646, | |
| "eval_runtime": 0.9974, | |
| "eval_samples_per_second": 221.579, | |
| "eval_steps_per_second": 221.579, | |
| "step": 5960 | |
| } | |
| ], | |
| "max_steps": 5964, | |
| "num_train_epochs": 3, | |
| "total_flos": 194666079191040.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |