| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 76.3076923076923, |
| "eval_steps": 500, |
| "global_step": 9920, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.038461538461538464, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9980000000000006e-05, |
| "loss": 1.7957, |
| "mean_token_accuracy": 0.6374512881040573, |
| "num_tokens": 17597.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9955e-05, |
| "loss": 1.7508, |
| "mean_token_accuracy": 0.6454170644283295, |
| "num_tokens": 35641.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11538461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9930000000000005e-05, |
| "loss": 1.6757, |
| "mean_token_accuracy": 0.6618378311395645, |
| "num_tokens": 54379.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.15384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9905000000000004e-05, |
| "loss": 1.7597, |
| "mean_token_accuracy": 0.6484396398067475, |
| "num_tokens": 71860.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19230769230769232, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9880000000000004e-05, |
| "loss": 1.669, |
| "mean_token_accuracy": 0.6693991690874099, |
| "num_tokens": 90064.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.23076923076923078, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9855e-05, |
| "loss": 1.7247, |
| "mean_token_accuracy": 0.6533850133419037, |
| "num_tokens": 108700.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.983e-05, |
| "loss": 1.7422, |
| "mean_token_accuracy": 0.6492581188678741, |
| "num_tokens": 126746.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9805e-05, |
| "loss": 1.8267, |
| "mean_token_accuracy": 0.6348982483148575, |
| "num_tokens": 144478.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.34615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.978e-05, |
| "loss": 1.7499, |
| "mean_token_accuracy": 0.6513226598501205, |
| "num_tokens": 163267.0, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9755e-05, |
| "loss": 1.8227, |
| "mean_token_accuracy": 0.6310899078845977, |
| "num_tokens": 181224.0, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.973000000000001e-05, |
| "loss": 1.7641, |
| "mean_token_accuracy": 0.6496782422065734, |
| "num_tokens": 199090.0, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.46153846153846156, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9705e-05, |
| "loss": 1.7405, |
| "mean_token_accuracy": 0.6453521817922592, |
| "num_tokens": 217701.0, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9680000000000005e-05, |
| "loss": 1.7214, |
| "mean_token_accuracy": 0.658767506480217, |
| "num_tokens": 236196.0, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5384615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9655000000000005e-05, |
| "loss": 1.7138, |
| "mean_token_accuracy": 0.6556680232286454, |
| "num_tokens": 254323.0, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9630000000000004e-05, |
| "loss": 1.6761, |
| "mean_token_accuracy": 0.6626079142093658, |
| "num_tokens": 273788.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9605000000000004e-05, |
| "loss": 1.7992, |
| "mean_token_accuracy": 0.6399475276470185, |
| "num_tokens": 291226.0, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6538461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 4.958e-05, |
| "loss": 1.7466, |
| "mean_token_accuracy": 0.645823523402214, |
| "num_tokens": 309131.0, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9555e-05, |
| "loss": 1.6855, |
| "mean_token_accuracy": 0.6573584616184235, |
| "num_tokens": 328164.0, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 4.953e-05, |
| "loss": 1.7877, |
| "mean_token_accuracy": 0.636717626452446, |
| "num_tokens": 346359.0, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9505e-05, |
| "loss": 1.6851, |
| "mean_token_accuracy": 0.6630265355110169, |
| "num_tokens": 364985.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.948000000000001e-05, |
| "loss": 1.7477, |
| "mean_token_accuracy": 0.6559244930744171, |
| "num_tokens": 382893.0, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.8461538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9455e-05, |
| "loss": 1.7006, |
| "mean_token_accuracy": 0.6579539209604264, |
| "num_tokens": 401596.0, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9430000000000006e-05, |
| "loss": 1.7604, |
| "mean_token_accuracy": 0.6474329262971878, |
| "num_tokens": 419595.0, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9405e-05, |
| "loss": 1.761, |
| "mean_token_accuracy": 0.644348555803299, |
| "num_tokens": 437633.0, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9380000000000005e-05, |
| "loss": 1.7521, |
| "mean_token_accuracy": 0.6455009877681732, |
| "num_tokens": 455648.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9355000000000004e-05, |
| "loss": 1.7483, |
| "mean_token_accuracy": 0.6529789954423905, |
| "num_tokens": 474105.0, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.0384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9330000000000004e-05, |
| "loss": 1.7308, |
| "mean_token_accuracy": 0.6532562792301178, |
| "num_tokens": 492861.0, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.0769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9305e-05, |
| "loss": 1.7815, |
| "mean_token_accuracy": 0.6435200899839402, |
| "num_tokens": 510988.0, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.1153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.928e-05, |
| "loss": 1.8093, |
| "mean_token_accuracy": 0.6363381177186966, |
| "num_tokens": 528455.0, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.1538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9255e-05, |
| "loss": 1.7437, |
| "mean_token_accuracy": 0.651939743757248, |
| "num_tokens": 546439.0, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.1923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.923e-05, |
| "loss": 1.7413, |
| "mean_token_accuracy": 0.6526101022958756, |
| "num_tokens": 564750.0, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9205e-05, |
| "loss": 1.7649, |
| "mean_token_accuracy": 0.6482356518507004, |
| "num_tokens": 582901.0, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.2692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.918000000000001e-05, |
| "loss": 1.7267, |
| "mean_token_accuracy": 0.6545612186193466, |
| "num_tokens": 601222.0, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.3076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9155e-05, |
| "loss": 1.7107, |
| "mean_token_accuracy": 0.6566696882247924, |
| "num_tokens": 619157.0, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.3461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9130000000000006e-05, |
| "loss": 1.6747, |
| "mean_token_accuracy": 0.6630686283111572, |
| "num_tokens": 638896.0, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.3846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9105e-05, |
| "loss": 1.7635, |
| "mean_token_accuracy": 0.6433518409729004, |
| "num_tokens": 656329.0, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.4230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9080000000000004e-05, |
| "loss": 1.759, |
| "mean_token_accuracy": 0.6476027637720108, |
| "num_tokens": 674377.0, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.4615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9055000000000004e-05, |
| "loss": 1.7288, |
| "mean_token_accuracy": 0.6561032950878143, |
| "num_tokens": 692477.0, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.903e-05, |
| "loss": 1.6869, |
| "mean_token_accuracy": 0.6576298862695694, |
| "num_tokens": 711509.0, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9005e-05, |
| "loss": 1.8134, |
| "mean_token_accuracy": 0.6296403974294662, |
| "num_tokens": 728738.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.5769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.898e-05, |
| "loss": 1.7075, |
| "mean_token_accuracy": 0.6586553543806076, |
| "num_tokens": 747160.0, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.6153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8955e-05, |
| "loss": 1.7835, |
| "mean_token_accuracy": 0.6415642440319062, |
| "num_tokens": 765489.0, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.6538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.893e-05, |
| "loss": 1.6952, |
| "mean_token_accuracy": 0.6612388670444489, |
| "num_tokens": 783973.0, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.6923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8905e-05, |
| "loss": 1.7186, |
| "mean_token_accuracy": 0.6559958964586258, |
| "num_tokens": 802295.0, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.7307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8880000000000006e-05, |
| "loss": 1.7534, |
| "mean_token_accuracy": 0.647523045539856, |
| "num_tokens": 820482.0, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.7692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8855e-05, |
| "loss": 1.7675, |
| "mean_token_accuracy": 0.6462311297655106, |
| "num_tokens": 838437.0, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.8076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8830000000000005e-05, |
| "loss": 1.753, |
| "mean_token_accuracy": 0.6491482555866241, |
| "num_tokens": 856808.0, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8805e-05, |
| "loss": 1.756, |
| "mean_token_accuracy": 0.6505926042795181, |
| "num_tokens": 874848.0, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.8846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8780000000000004e-05, |
| "loss": 1.7598, |
| "mean_token_accuracy": 0.6512235313653946, |
| "num_tokens": 893014.0, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8755e-05, |
| "loss": 1.748, |
| "mean_token_accuracy": 0.6448923200368881, |
| "num_tokens": 911604.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.9615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.873e-05, |
| "loss": 1.7326, |
| "mean_token_accuracy": 0.6498657464981079, |
| "num_tokens": 929944.0, |
| "step": 255 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8705e-05, |
| "loss": 1.6613, |
| "mean_token_accuracy": 0.6733784437179565, |
| "num_tokens": 948210.0, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.0384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 4.868e-05, |
| "loss": 1.74, |
| "mean_token_accuracy": 0.6532588005065918, |
| "num_tokens": 966435.0, |
| "step": 265 |
| }, |
| { |
| "epoch": 2.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8655e-05, |
| "loss": 1.6645, |
| "mean_token_accuracy": 0.6623793184757233, |
| "num_tokens": 985701.0, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.1153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.863e-05, |
| "loss": 1.7274, |
| "mean_token_accuracy": 0.6545837461948395, |
| "num_tokens": 1004078.0, |
| "step": 275 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8605e-05, |
| "loss": 1.7044, |
| "mean_token_accuracy": 0.6571849673986435, |
| "num_tokens": 1023219.0, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8580000000000006e-05, |
| "loss": 1.7217, |
| "mean_token_accuracy": 0.6511914372444153, |
| "num_tokens": 1041512.0, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8555e-05, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6483366042375565, |
| "num_tokens": 1059633.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8530000000000005e-05, |
| "loss": 1.6675, |
| "mean_token_accuracy": 0.6683109492063523, |
| "num_tokens": 1078768.0, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8505e-05, |
| "loss": 1.7856, |
| "mean_token_accuracy": 0.6457105249166488, |
| "num_tokens": 1096433.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.3461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8480000000000003e-05, |
| "loss": 1.7599, |
| "mean_token_accuracy": 0.6529647082090377, |
| "num_tokens": 1114140.0, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.3846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8455e-05, |
| "loss": 1.7591, |
| "mean_token_accuracy": 0.6483295440673829, |
| "num_tokens": 1132366.0, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.843e-05, |
| "loss": 1.6774, |
| "mean_token_accuracy": 0.660743135213852, |
| "num_tokens": 1151832.0, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8405e-05, |
| "loss": 1.8422, |
| "mean_token_accuracy": 0.6320727407932282, |
| "num_tokens": 1168795.0, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.838e-05, |
| "loss": 1.7624, |
| "mean_token_accuracy": 0.6471054494380951, |
| "num_tokens": 1186567.0, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.5384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8355e-05, |
| "loss": 1.7518, |
| "mean_token_accuracy": 0.6520386338233948, |
| "num_tokens": 1204773.0, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.5769230769230766, |
| "grad_norm": 0.0, |
| "learning_rate": 4.833e-05, |
| "loss": 1.8146, |
| "mean_token_accuracy": 0.63519766330719, |
| "num_tokens": 1222245.0, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.6153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8305e-05, |
| "loss": 1.7304, |
| "mean_token_accuracy": 0.6472829192876816, |
| "num_tokens": 1240309.0, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.6538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8280000000000005e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6456574499607086, |
| "num_tokens": 1258875.0, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8255e-05, |
| "loss": 1.7172, |
| "mean_token_accuracy": 0.6524255007505417, |
| "num_tokens": 1277367.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8230000000000004e-05, |
| "loss": 1.6652, |
| "mean_token_accuracy": 0.6617098182439805, |
| "num_tokens": 1296866.0, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8205000000000003e-05, |
| "loss": 1.6834, |
| "mean_token_accuracy": 0.662646809220314, |
| "num_tokens": 1315690.0, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.818e-05, |
| "loss": 1.7963, |
| "mean_token_accuracy": 0.6397953987121582, |
| "num_tokens": 1333332.0, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.8461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8155e-05, |
| "loss": 1.7771, |
| "mean_token_accuracy": 0.6447630435228348, |
| "num_tokens": 1351095.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.813e-05, |
| "loss": 1.7453, |
| "mean_token_accuracy": 0.6486737489700317, |
| "num_tokens": 1369302.0, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.9230769230769234, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8105e-05, |
| "loss": 1.7446, |
| "mean_token_accuracy": 0.6489187270402909, |
| "num_tokens": 1387456.0, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.9615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.808e-05, |
| "loss": 1.7578, |
| "mean_token_accuracy": 0.6475874185562134, |
| "num_tokens": 1404820.0, |
| "step": 385 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8055e-05, |
| "loss": 1.8244, |
| "mean_token_accuracy": 0.636838635802269, |
| "num_tokens": 1422315.0, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.0384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8030000000000006e-05, |
| "loss": 1.7325, |
| "mean_token_accuracy": 0.6527264744043351, |
| "num_tokens": 1440658.0, |
| "step": 395 |
| }, |
| { |
| "epoch": 3.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8005e-05, |
| "loss": 1.7647, |
| "mean_token_accuracy": 0.6484371930360794, |
| "num_tokens": 1458704.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.1153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7980000000000005e-05, |
| "loss": 1.7741, |
| "mean_token_accuracy": 0.6491596013307571, |
| "num_tokens": 1476178.0, |
| "step": 405 |
| }, |
| { |
| "epoch": 3.1538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7955e-05, |
| "loss": 1.7011, |
| "mean_token_accuracy": 0.6573891878128052, |
| "num_tokens": 1494800.0, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7930000000000004e-05, |
| "loss": 1.8141, |
| "mean_token_accuracy": 0.6418175488710404, |
| "num_tokens": 1512686.0, |
| "step": 415 |
| }, |
| { |
| "epoch": 3.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7905e-05, |
| "loss": 1.7422, |
| "mean_token_accuracy": 0.6511821269989013, |
| "num_tokens": 1530549.0, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.788e-05, |
| "loss": 1.7655, |
| "mean_token_accuracy": 0.6506276488304138, |
| "num_tokens": 1548502.0, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7855e-05, |
| "loss": 1.6594, |
| "mean_token_accuracy": 0.6622169464826584, |
| "num_tokens": 1568042.0, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.3461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.783e-05, |
| "loss": 1.6894, |
| "mean_token_accuracy": 0.6561817824840546, |
| "num_tokens": 1586468.0, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.3846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7805e-05, |
| "loss": 1.6675, |
| "mean_token_accuracy": 0.6636441439390183, |
| "num_tokens": 1605918.0, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.778e-05, |
| "loss": 1.7238, |
| "mean_token_accuracy": 0.6505128175020218, |
| "num_tokens": 1624275.0, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.4615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7755e-05, |
| "loss": 1.684, |
| "mean_token_accuracy": 0.6628347337245941, |
| "num_tokens": 1642694.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7730000000000005e-05, |
| "loss": 1.7789, |
| "mean_token_accuracy": 0.6409151911735534, |
| "num_tokens": 1660710.0, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.5384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7705e-05, |
| "loss": 1.8119, |
| "mean_token_accuracy": 0.6357834041118622, |
| "num_tokens": 1678569.0, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.5769230769230766, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7680000000000004e-05, |
| "loss": 1.7048, |
| "mean_token_accuracy": 0.6561109334230423, |
| "num_tokens": 1697511.0, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.6153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7655e-05, |
| "loss": 1.6402, |
| "mean_token_accuracy": 0.6671018153429031, |
| "num_tokens": 1717113.0, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.6538461538461537, |
| "grad_norm": 0.0, |
| "learning_rate": 4.763e-05, |
| "loss": 1.7797, |
| "mean_token_accuracy": 0.6456864833831787, |
| "num_tokens": 1735083.0, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7605e-05, |
| "loss": 1.83, |
| "mean_token_accuracy": 0.6383150666952133, |
| "num_tokens": 1752374.0, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.758e-05, |
| "loss": 1.7517, |
| "mean_token_accuracy": 0.6486301571130753, |
| "num_tokens": 1770507.0, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7555e-05, |
| "loss": 1.786, |
| "mean_token_accuracy": 0.6404696077108383, |
| "num_tokens": 1788454.0, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.753e-05, |
| "loss": 1.7256, |
| "mean_token_accuracy": 0.6483594387769699, |
| "num_tokens": 1807208.0, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.8461538461538463, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7505e-05, |
| "loss": 1.6791, |
| "mean_token_accuracy": 0.6660710662603379, |
| "num_tokens": 1826017.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.8846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.748e-05, |
| "loss": 1.7516, |
| "mean_token_accuracy": 0.6497161477804184, |
| "num_tokens": 1843251.0, |
| "step": 505 |
| }, |
| { |
| "epoch": 3.9230769230769234, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7455000000000006e-05, |
| "loss": 1.7586, |
| "mean_token_accuracy": 0.6480034649372101, |
| "num_tokens": 1861334.0, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.9615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7430000000000005e-05, |
| "loss": 1.7783, |
| "mean_token_accuracy": 0.6420908480882644, |
| "num_tokens": 1879077.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7405000000000004e-05, |
| "loss": 1.7984, |
| "mean_token_accuracy": 0.6410065919160843, |
| "num_tokens": 1896420.0, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7380000000000004e-05, |
| "loss": 1.7327, |
| "mean_token_accuracy": 0.651974669098854, |
| "num_tokens": 1915163.0, |
| "step": 525 |
| }, |
| { |
| "epoch": 4.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7355e-05, |
| "loss": 1.8077, |
| "mean_token_accuracy": 0.6375770300626755, |
| "num_tokens": 1932820.0, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.733e-05, |
| "loss": 1.7939, |
| "mean_token_accuracy": 0.635331529378891, |
| "num_tokens": 1951154.0, |
| "step": 535 |
| }, |
| { |
| "epoch": 4.153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7305e-05, |
| "loss": 1.7697, |
| "mean_token_accuracy": 0.6442851930856704, |
| "num_tokens": 1968989.0, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.728e-05, |
| "loss": 1.7535, |
| "mean_token_accuracy": 0.6471328794956207, |
| "num_tokens": 1987280.0, |
| "step": 545 |
| }, |
| { |
| "epoch": 4.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.725500000000001e-05, |
| "loss": 1.7691, |
| "mean_token_accuracy": 0.6456631302833558, |
| "num_tokens": 2005118.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.723e-05, |
| "loss": 1.6754, |
| "mean_token_accuracy": 0.6634574323892594, |
| "num_tokens": 2024373.0, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7205000000000006e-05, |
| "loss": 1.7345, |
| "mean_token_accuracy": 0.6540951490402221, |
| "num_tokens": 2042349.0, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.346153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.718e-05, |
| "loss": 1.7127, |
| "mean_token_accuracy": 0.6528139978647232, |
| "num_tokens": 2060824.0, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7155000000000005e-05, |
| "loss": 1.7282, |
| "mean_token_accuracy": 0.6538840979337692, |
| "num_tokens": 2079449.0, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7130000000000004e-05, |
| "loss": 1.719, |
| "mean_token_accuracy": 0.6608097106218338, |
| "num_tokens": 2097502.0, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7105000000000004e-05, |
| "loss": 1.6963, |
| "mean_token_accuracy": 0.6655160158872604, |
| "num_tokens": 2115911.0, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.708e-05, |
| "loss": 1.7936, |
| "mean_token_accuracy": 0.635148537158966, |
| "num_tokens": 2133543.0, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7055e-05, |
| "loss": 1.7272, |
| "mean_token_accuracy": 0.6549283742904664, |
| "num_tokens": 2151767.0, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.703e-05, |
| "loss": 1.7624, |
| "mean_token_accuracy": 0.6483795702457428, |
| "num_tokens": 2169614.0, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7005e-05, |
| "loss": 1.7502, |
| "mean_token_accuracy": 0.6509936511516571, |
| "num_tokens": 2187439.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.653846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.698e-05, |
| "loss": 1.712, |
| "mean_token_accuracy": 0.6584094524383545, |
| "num_tokens": 2206096.0, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.695500000000001e-05, |
| "loss": 1.6726, |
| "mean_token_accuracy": 0.6688409745693207, |
| "num_tokens": 2225085.0, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.693e-05, |
| "loss": 1.6953, |
| "mean_token_accuracy": 0.6592785984277725, |
| "num_tokens": 2243907.0, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6905000000000006e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6406325221061706, |
| "num_tokens": 2261272.0, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.688e-05, |
| "loss": 1.7167, |
| "mean_token_accuracy": 0.6534736841917038, |
| "num_tokens": 2279807.0, |
| "step": 625 |
| }, |
| { |
| "epoch": 4.846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6855000000000005e-05, |
| "loss": 1.8479, |
| "mean_token_accuracy": 0.6247967123985291, |
| "num_tokens": 2297161.0, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6830000000000004e-05, |
| "loss": 1.8183, |
| "mean_token_accuracy": 0.6385220259428024, |
| "num_tokens": 2314811.0, |
| "step": 635 |
| }, |
| { |
| "epoch": 4.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6805e-05, |
| "loss": 1.6373, |
| "mean_token_accuracy": 0.6711235165596008, |
| "num_tokens": 2333816.0, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.678e-05, |
| "loss": 1.7512, |
| "mean_token_accuracy": 0.647821244597435, |
| "num_tokens": 2352150.0, |
| "step": 645 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6755e-05, |
| "loss": 1.7376, |
| "mean_token_accuracy": 0.6522481203079223, |
| "num_tokens": 2370525.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 5.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.673e-05, |
| "loss": 1.7032, |
| "mean_token_accuracy": 0.66204434633255, |
| "num_tokens": 2388858.0, |
| "step": 655 |
| }, |
| { |
| "epoch": 5.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.670500000000001e-05, |
| "loss": 1.6985, |
| "mean_token_accuracy": 0.6554162800312042, |
| "num_tokens": 2408143.0, |
| "step": 660 |
| }, |
| { |
| "epoch": 5.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.668e-05, |
| "loss": 1.7786, |
| "mean_token_accuracy": 0.645899361371994, |
| "num_tokens": 2425600.0, |
| "step": 665 |
| }, |
| { |
| "epoch": 5.153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6655000000000006e-05, |
| "loss": 1.7416, |
| "mean_token_accuracy": 0.6496942937374115, |
| "num_tokens": 2443792.0, |
| "step": 670 |
| }, |
| { |
| "epoch": 5.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.663e-05, |
| "loss": 1.7879, |
| "mean_token_accuracy": 0.6398061364889145, |
| "num_tokens": 2461179.0, |
| "step": 675 |
| }, |
| { |
| "epoch": 5.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6605000000000005e-05, |
| "loss": 1.7348, |
| "mean_token_accuracy": 0.642570036649704, |
| "num_tokens": 2480028.0, |
| "step": 680 |
| }, |
| { |
| "epoch": 5.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6580000000000005e-05, |
| "loss": 1.7696, |
| "mean_token_accuracy": 0.6482695400714874, |
| "num_tokens": 2497710.0, |
| "step": 685 |
| }, |
| { |
| "epoch": 5.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6555000000000004e-05, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.655092054605484, |
| "num_tokens": 2515935.0, |
| "step": 690 |
| }, |
| { |
| "epoch": 5.346153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6530000000000003e-05, |
| "loss": 1.7177, |
| "mean_token_accuracy": 0.6567189127206803, |
| "num_tokens": 2534143.0, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6505e-05, |
| "loss": 1.771, |
| "mean_token_accuracy": 0.6448938339948654, |
| "num_tokens": 2552295.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.648e-05, |
| "loss": 1.7079, |
| "mean_token_accuracy": 0.6543023705482482, |
| "num_tokens": 2571330.0, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6455e-05, |
| "loss": 1.6662, |
| "mean_token_accuracy": 0.6654859989881515, |
| "num_tokens": 2590405.0, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.643e-05, |
| "loss": 1.7794, |
| "mean_token_accuracy": 0.6464732140302658, |
| "num_tokens": 2607594.0, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.640500000000001e-05, |
| "loss": 1.7341, |
| "mean_token_accuracy": 0.6509989589452744, |
| "num_tokens": 2625915.0, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.638e-05, |
| "loss": 1.6809, |
| "mean_token_accuracy": 0.6608490258455276, |
| "num_tokens": 2644604.0, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6355000000000006e-05, |
| "loss": 1.7797, |
| "mean_token_accuracy": 0.6429726481437683, |
| "num_tokens": 2662473.0, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.653846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.633e-05, |
| "loss": 1.7277, |
| "mean_token_accuracy": 0.6474452137947082, |
| "num_tokens": 2681136.0, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6305000000000005e-05, |
| "loss": 1.7779, |
| "mean_token_accuracy": 0.6477038502693176, |
| "num_tokens": 2698512.0, |
| "step": 740 |
| }, |
| { |
| "epoch": 5.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6280000000000004e-05, |
| "loss": 1.7583, |
| "mean_token_accuracy": 0.6484469920396805, |
| "num_tokens": 2716683.0, |
| "step": 745 |
| }, |
| { |
| "epoch": 5.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6255000000000004e-05, |
| "loss": 1.7656, |
| "mean_token_accuracy": 0.6475028216838836, |
| "num_tokens": 2734221.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 5.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.623e-05, |
| "loss": 1.7389, |
| "mean_token_accuracy": 0.6497486799955368, |
| "num_tokens": 2752696.0, |
| "step": 755 |
| }, |
| { |
| "epoch": 5.846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6205e-05, |
| "loss": 1.751, |
| "mean_token_accuracy": 0.6450219005346298, |
| "num_tokens": 2771304.0, |
| "step": 760 |
| }, |
| { |
| "epoch": 5.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.618e-05, |
| "loss": 1.6985, |
| "mean_token_accuracy": 0.655203464627266, |
| "num_tokens": 2789693.0, |
| "step": 765 |
| }, |
| { |
| "epoch": 5.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6155e-05, |
| "loss": 1.7607, |
| "mean_token_accuracy": 0.6439933836460113, |
| "num_tokens": 2808244.0, |
| "step": 770 |
| }, |
| { |
| "epoch": 5.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.613e-05, |
| "loss": 1.7215, |
| "mean_token_accuracy": 0.6554672598838807, |
| "num_tokens": 2826964.0, |
| "step": 775 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.610500000000001e-05, |
| "loss": 1.8066, |
| "mean_token_accuracy": 0.6397966831922531, |
| "num_tokens": 2844630.0, |
| "step": 780 |
| }, |
| { |
| "epoch": 6.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.608e-05, |
| "loss": 1.7057, |
| "mean_token_accuracy": 0.6596891850233078, |
| "num_tokens": 2863298.0, |
| "step": 785 |
| }, |
| { |
| "epoch": 6.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6055000000000005e-05, |
| "loss": 1.7406, |
| "mean_token_accuracy": 0.6515646994113922, |
| "num_tokens": 2880896.0, |
| "step": 790 |
| }, |
| { |
| "epoch": 6.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.603e-05, |
| "loss": 1.7509, |
| "mean_token_accuracy": 0.6432402580976486, |
| "num_tokens": 2899195.0, |
| "step": 795 |
| }, |
| { |
| "epoch": 6.153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6005000000000004e-05, |
| "loss": 1.7547, |
| "mean_token_accuracy": 0.6499321848154068, |
| "num_tokens": 2916949.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 6.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5980000000000004e-05, |
| "loss": 1.7903, |
| "mean_token_accuracy": 0.6418056339025497, |
| "num_tokens": 2935129.0, |
| "step": 805 |
| }, |
| { |
| "epoch": 6.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5955e-05, |
| "loss": 1.7601, |
| "mean_token_accuracy": 0.64379281103611, |
| "num_tokens": 2953207.0, |
| "step": 810 |
| }, |
| { |
| "epoch": 6.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.593e-05, |
| "loss": 1.7694, |
| "mean_token_accuracy": 0.6440808981657028, |
| "num_tokens": 2971018.0, |
| "step": 815 |
| }, |
| { |
| "epoch": 6.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5905e-05, |
| "loss": 1.6246, |
| "mean_token_accuracy": 0.6741865813732147, |
| "num_tokens": 2990306.0, |
| "step": 820 |
| }, |
| { |
| "epoch": 6.346153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.588e-05, |
| "loss": 1.7449, |
| "mean_token_accuracy": 0.6494547843933105, |
| "num_tokens": 3008827.0, |
| "step": 825 |
| }, |
| { |
| "epoch": 6.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5855e-05, |
| "loss": 1.716, |
| "mean_token_accuracy": 0.6559219211339951, |
| "num_tokens": 3027465.0, |
| "step": 830 |
| }, |
| { |
| "epoch": 6.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.583e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6473995357751846, |
| "num_tokens": 3046088.0, |
| "step": 835 |
| }, |
| { |
| "epoch": 6.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5805000000000006e-05, |
| "loss": 1.7288, |
| "mean_token_accuracy": 0.653452581167221, |
| "num_tokens": 3064572.0, |
| "step": 840 |
| }, |
| { |
| "epoch": 6.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.578e-05, |
| "loss": 1.7867, |
| "mean_token_accuracy": 0.6452230870723724, |
| "num_tokens": 3081959.0, |
| "step": 845 |
| }, |
| { |
| "epoch": 6.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5755000000000005e-05, |
| "loss": 1.7674, |
| "mean_token_accuracy": 0.6463764935731888, |
| "num_tokens": 3100123.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 6.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.573e-05, |
| "loss": 1.7434, |
| "mean_token_accuracy": 0.6475829243659973, |
| "num_tokens": 3117886.0, |
| "step": 855 |
| }, |
| { |
| "epoch": 6.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5705000000000004e-05, |
| "loss": 1.6876, |
| "mean_token_accuracy": 0.6611418306827546, |
| "num_tokens": 3136818.0, |
| "step": 860 |
| }, |
| { |
| "epoch": 6.653846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.568e-05, |
| "loss": 1.6901, |
| "mean_token_accuracy": 0.6592308074235916, |
| "num_tokens": 3156269.0, |
| "step": 865 |
| }, |
| { |
| "epoch": 6.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5655e-05, |
| "loss": 1.7783, |
| "mean_token_accuracy": 0.6404279589653015, |
| "num_tokens": 3174097.0, |
| "step": 870 |
| }, |
| { |
| "epoch": 6.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.563e-05, |
| "loss": 1.7347, |
| "mean_token_accuracy": 0.649324044585228, |
| "num_tokens": 3192152.0, |
| "step": 875 |
| }, |
| { |
| "epoch": 6.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5605e-05, |
| "loss": 1.7318, |
| "mean_token_accuracy": 0.6562680333852768, |
| "num_tokens": 3210441.0, |
| "step": 880 |
| }, |
| { |
| "epoch": 6.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.558e-05, |
| "loss": 1.754, |
| "mean_token_accuracy": 0.6517583757638932, |
| "num_tokens": 3228565.0, |
| "step": 885 |
| }, |
| { |
| "epoch": 6.846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5555e-05, |
| "loss": 1.719, |
| "mean_token_accuracy": 0.6558358192443847, |
| "num_tokens": 3246686.0, |
| "step": 890 |
| }, |
| { |
| "epoch": 6.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.553e-05, |
| "loss": 1.7535, |
| "mean_token_accuracy": 0.6434608608484268, |
| "num_tokens": 3264528.0, |
| "step": 895 |
| }, |
| { |
| "epoch": 6.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5505000000000006e-05, |
| "loss": 1.7823, |
| "mean_token_accuracy": 0.6435014694929123, |
| "num_tokens": 3282692.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 6.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.548e-05, |
| "loss": 1.8069, |
| "mean_token_accuracy": 0.6398796170949936, |
| "num_tokens": 3300093.0, |
| "step": 905 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5455000000000004e-05, |
| "loss": 1.7239, |
| "mean_token_accuracy": 0.6568648606538773, |
| "num_tokens": 3318735.0, |
| "step": 910 |
| }, |
| { |
| "epoch": 7.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.543e-05, |
| "loss": 1.6958, |
| "mean_token_accuracy": 0.657955089211464, |
| "num_tokens": 3337799.0, |
| "step": 915 |
| }, |
| { |
| "epoch": 7.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5405e-05, |
| "loss": 1.704, |
| "mean_token_accuracy": 0.6572122246026992, |
| "num_tokens": 3356448.0, |
| "step": 920 |
| }, |
| { |
| "epoch": 7.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.538e-05, |
| "loss": 1.7811, |
| "mean_token_accuracy": 0.6435384958982467, |
| "num_tokens": 3374165.0, |
| "step": 925 |
| }, |
| { |
| "epoch": 7.153846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5355e-05, |
| "loss": 1.7932, |
| "mean_token_accuracy": 0.643210482597351, |
| "num_tokens": 3391722.0, |
| "step": 930 |
| }, |
| { |
| "epoch": 7.1923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.533e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6444118946790696, |
| "num_tokens": 3409413.0, |
| "step": 935 |
| }, |
| { |
| "epoch": 7.230769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5305e-05, |
| "loss": 1.6323, |
| "mean_token_accuracy": 0.672014307975769, |
| "num_tokens": 3428160.0, |
| "step": 940 |
| }, |
| { |
| "epoch": 7.269230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.528e-05, |
| "loss": 1.7143, |
| "mean_token_accuracy": 0.6540875136852264, |
| "num_tokens": 3447098.0, |
| "step": 945 |
| }, |
| { |
| "epoch": 7.3076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5255000000000006e-05, |
| "loss": 1.7804, |
| "mean_token_accuracy": 0.6401316851377488, |
| "num_tokens": 3465037.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 7.346153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.523e-05, |
| "loss": 1.7542, |
| "mean_token_accuracy": 0.6487213045358657, |
| "num_tokens": 3483393.0, |
| "step": 955 |
| }, |
| { |
| "epoch": 7.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5205000000000005e-05, |
| "loss": 1.7863, |
| "mean_token_accuracy": 0.6442375689744949, |
| "num_tokens": 3500954.0, |
| "step": 960 |
| }, |
| { |
| "epoch": 7.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.518e-05, |
| "loss": 1.807, |
| "mean_token_accuracy": 0.6401415497064591, |
| "num_tokens": 3519001.0, |
| "step": 965 |
| }, |
| { |
| "epoch": 7.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5155000000000004e-05, |
| "loss": 1.7395, |
| "mean_token_accuracy": 0.6544391065835953, |
| "num_tokens": 3537230.0, |
| "step": 970 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.513e-05, |
| "loss": 1.7505, |
| "mean_token_accuracy": 0.6461744338274003, |
| "num_tokens": 3555470.0, |
| "step": 975 |
| }, |
| { |
| "epoch": 7.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5105e-05, |
| "loss": 1.7005, |
| "mean_token_accuracy": 0.6609607338905334, |
| "num_tokens": 3574088.0, |
| "step": 980 |
| }, |
| { |
| "epoch": 7.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.508e-05, |
| "loss": 1.7752, |
| "mean_token_accuracy": 0.6407621741294861, |
| "num_tokens": 3592361.0, |
| "step": 985 |
| }, |
| { |
| "epoch": 7.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5055e-05, |
| "loss": 1.7903, |
| "mean_token_accuracy": 0.6438402742147445, |
| "num_tokens": 3610243.0, |
| "step": 990 |
| }, |
| { |
| "epoch": 7.653846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.503e-05, |
| "loss": 1.7926, |
| "mean_token_accuracy": 0.6445695728063583, |
| "num_tokens": 3627858.0, |
| "step": 995 |
| }, |
| { |
| "epoch": 7.6923076923076925, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5005e-05, |
| "loss": 1.679, |
| "mean_token_accuracy": 0.6652253627777099, |
| "num_tokens": 3646594.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 7.730769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 4.498e-05, |
| "loss": 1.8077, |
| "mean_token_accuracy": 0.6364081174135208, |
| "num_tokens": 3664583.0, |
| "step": 1005 |
| }, |
| { |
| "epoch": 7.769230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4955000000000006e-05, |
| "loss": 1.7589, |
| "mean_token_accuracy": 0.6449447929859161, |
| "num_tokens": 3682471.0, |
| "step": 1010 |
| }, |
| { |
| "epoch": 7.8076923076923075, |
| "grad_norm": 0.0, |
| "learning_rate": 4.493e-05, |
| "loss": 1.6992, |
| "mean_token_accuracy": 0.6603027373552323, |
| "num_tokens": 3701373.0, |
| "step": 1015 |
| }, |
| { |
| "epoch": 7.846153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4905000000000005e-05, |
| "loss": 1.6505, |
| "mean_token_accuracy": 0.6682931154966354, |
| "num_tokens": 3720280.0, |
| "step": 1020 |
| }, |
| { |
| "epoch": 7.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.488e-05, |
| "loss": 1.7144, |
| "mean_token_accuracy": 0.6550876766443252, |
| "num_tokens": 3738885.0, |
| "step": 1025 |
| }, |
| { |
| "epoch": 7.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4855e-05, |
| "loss": 1.6457, |
| "mean_token_accuracy": 0.6694303750991821, |
| "num_tokens": 3757638.0, |
| "step": 1030 |
| }, |
| { |
| "epoch": 7.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.483e-05, |
| "loss": 1.8101, |
| "mean_token_accuracy": 0.6355878323316574, |
| "num_tokens": 3775161.0, |
| "step": 1035 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4805e-05, |
| "loss": 1.7585, |
| "mean_token_accuracy": 0.6487736940383911, |
| "num_tokens": 3792840.0, |
| "step": 1040 |
| }, |
| { |
| "epoch": 8.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.478e-05, |
| "loss": 1.7554, |
| "mean_token_accuracy": 0.6465101599693298, |
| "num_tokens": 3810513.0, |
| "step": 1045 |
| }, |
| { |
| "epoch": 8.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4755e-05, |
| "loss": 1.7955, |
| "mean_token_accuracy": 0.6376824468374253, |
| "num_tokens": 3828269.0, |
| "step": 1050 |
| }, |
| { |
| "epoch": 8.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.473e-05, |
| "loss": 1.7313, |
| "mean_token_accuracy": 0.6513699233531952, |
| "num_tokens": 3846785.0, |
| "step": 1055 |
| }, |
| { |
| "epoch": 8.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4705e-05, |
| "loss": 1.779, |
| "mean_token_accuracy": 0.6457870662212372, |
| "num_tokens": 3864589.0, |
| "step": 1060 |
| }, |
| { |
| "epoch": 8.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.468e-05, |
| "loss": 1.7206, |
| "mean_token_accuracy": 0.655270129442215, |
| "num_tokens": 3882774.0, |
| "step": 1065 |
| }, |
| { |
| "epoch": 8.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4655000000000005e-05, |
| "loss": 1.6979, |
| "mean_token_accuracy": 0.6581394881010055, |
| "num_tokens": 3901162.0, |
| "step": 1070 |
| }, |
| { |
| "epoch": 8.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.463e-05, |
| "loss": 1.7762, |
| "mean_token_accuracy": 0.6421261847019195, |
| "num_tokens": 3919194.0, |
| "step": 1075 |
| }, |
| { |
| "epoch": 8.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4605000000000004e-05, |
| "loss": 1.7165, |
| "mean_token_accuracy": 0.6560532629489899, |
| "num_tokens": 3937378.0, |
| "step": 1080 |
| }, |
| { |
| "epoch": 8.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.458e-05, |
| "loss": 1.7614, |
| "mean_token_accuracy": 0.6535450726747513, |
| "num_tokens": 3955373.0, |
| "step": 1085 |
| }, |
| { |
| "epoch": 8.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4555e-05, |
| "loss": 1.6481, |
| "mean_token_accuracy": 0.6688576519489289, |
| "num_tokens": 3974246.0, |
| "step": 1090 |
| }, |
| { |
| "epoch": 8.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.453e-05, |
| "loss": 1.6684, |
| "mean_token_accuracy": 0.6643936514854432, |
| "num_tokens": 3992871.0, |
| "step": 1095 |
| }, |
| { |
| "epoch": 8.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4505e-05, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6398821622133255, |
| "num_tokens": 4011349.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 8.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.448e-05, |
| "loss": 1.784, |
| "mean_token_accuracy": 0.6396127253770828, |
| "num_tokens": 4029422.0, |
| "step": 1105 |
| }, |
| { |
| "epoch": 8.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4455e-05, |
| "loss": 1.7972, |
| "mean_token_accuracy": 0.6439882755279541, |
| "num_tokens": 4047081.0, |
| "step": 1110 |
| }, |
| { |
| "epoch": 8.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.443e-05, |
| "loss": 1.8174, |
| "mean_token_accuracy": 0.6325599551200867, |
| "num_tokens": 4065118.0, |
| "step": 1115 |
| }, |
| { |
| "epoch": 8.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4405e-05, |
| "loss": 1.7934, |
| "mean_token_accuracy": 0.6423143684864044, |
| "num_tokens": 4083327.0, |
| "step": 1120 |
| }, |
| { |
| "epoch": 8.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.438e-05, |
| "loss": 1.746, |
| "mean_token_accuracy": 0.646085262298584, |
| "num_tokens": 4101731.0, |
| "step": 1125 |
| }, |
| { |
| "epoch": 8.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4355000000000005e-05, |
| "loss": 1.7223, |
| "mean_token_accuracy": 0.6499712377786636, |
| "num_tokens": 4120160.0, |
| "step": 1130 |
| }, |
| { |
| "epoch": 8.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4330000000000004e-05, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.6554156959056854, |
| "num_tokens": 4138405.0, |
| "step": 1135 |
| }, |
| { |
| "epoch": 8.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4305000000000004e-05, |
| "loss": 1.6461, |
| "mean_token_accuracy": 0.6685263246297837, |
| "num_tokens": 4157763.0, |
| "step": 1140 |
| }, |
| { |
| "epoch": 8.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.428e-05, |
| "loss": 1.7541, |
| "mean_token_accuracy": 0.6515760749578476, |
| "num_tokens": 4176205.0, |
| "step": 1145 |
| }, |
| { |
| "epoch": 8.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4255e-05, |
| "loss": 1.6547, |
| "mean_token_accuracy": 0.6698046773672104, |
| "num_tokens": 4195030.0, |
| "step": 1150 |
| }, |
| { |
| "epoch": 8.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.423e-05, |
| "loss": 1.7975, |
| "mean_token_accuracy": 0.638244041800499, |
| "num_tokens": 4212955.0, |
| "step": 1155 |
| }, |
| { |
| "epoch": 8.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4205e-05, |
| "loss": 1.7643, |
| "mean_token_accuracy": 0.6474886476993561, |
| "num_tokens": 4230913.0, |
| "step": 1160 |
| }, |
| { |
| "epoch": 8.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.418000000000001e-05, |
| "loss": 1.763, |
| "mean_token_accuracy": 0.6492164492607116, |
| "num_tokens": 4248472.0, |
| "step": 1165 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4155e-05, |
| "loss": 1.6968, |
| "mean_token_accuracy": 0.661212831735611, |
| "num_tokens": 4266945.0, |
| "step": 1170 |
| }, |
| { |
| "epoch": 9.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4130000000000006e-05, |
| "loss": 1.6492, |
| "mean_token_accuracy": 0.6669099152088165, |
| "num_tokens": 4285875.0, |
| "step": 1175 |
| }, |
| { |
| "epoch": 9.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4105e-05, |
| "loss": 1.7311, |
| "mean_token_accuracy": 0.6541598349809646, |
| "num_tokens": 4304365.0, |
| "step": 1180 |
| }, |
| { |
| "epoch": 9.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4080000000000005e-05, |
| "loss": 1.6924, |
| "mean_token_accuracy": 0.6604053825139999, |
| "num_tokens": 4323536.0, |
| "step": 1185 |
| }, |
| { |
| "epoch": 9.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4055000000000004e-05, |
| "loss": 1.8026, |
| "mean_token_accuracy": 0.6396621257066727, |
| "num_tokens": 4341032.0, |
| "step": 1190 |
| }, |
| { |
| "epoch": 9.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4030000000000004e-05, |
| "loss": 1.7641, |
| "mean_token_accuracy": 0.644762921333313, |
| "num_tokens": 4358653.0, |
| "step": 1195 |
| }, |
| { |
| "epoch": 9.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4005e-05, |
| "loss": 1.8077, |
| "mean_token_accuracy": 0.6364401876926422, |
| "num_tokens": 4376472.0, |
| "step": 1200 |
| }, |
| { |
| "epoch": 9.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.398e-05, |
| "loss": 1.7567, |
| "mean_token_accuracy": 0.6473460882902146, |
| "num_tokens": 4394898.0, |
| "step": 1205 |
| }, |
| { |
| "epoch": 9.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3955e-05, |
| "loss": 1.6926, |
| "mean_token_accuracy": 0.6541574269533157, |
| "num_tokens": 4413886.0, |
| "step": 1210 |
| }, |
| { |
| "epoch": 9.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.393e-05, |
| "loss": 1.6822, |
| "mean_token_accuracy": 0.6630573570728302, |
| "num_tokens": 4432735.0, |
| "step": 1215 |
| }, |
| { |
| "epoch": 9.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3905e-05, |
| "loss": 1.7395, |
| "mean_token_accuracy": 0.6511468648910522, |
| "num_tokens": 4450737.0, |
| "step": 1220 |
| }, |
| { |
| "epoch": 9.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.388000000000001e-05, |
| "loss": 1.7185, |
| "mean_token_accuracy": 0.6551208138465882, |
| "num_tokens": 4469277.0, |
| "step": 1225 |
| }, |
| { |
| "epoch": 9.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3855e-05, |
| "loss": 1.7301, |
| "mean_token_accuracy": 0.6529223084449768, |
| "num_tokens": 4487471.0, |
| "step": 1230 |
| }, |
| { |
| "epoch": 9.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3830000000000006e-05, |
| "loss": 1.7984, |
| "mean_token_accuracy": 0.6414839684963226, |
| "num_tokens": 4504876.0, |
| "step": 1235 |
| }, |
| { |
| "epoch": 9.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3805000000000005e-05, |
| "loss": 1.7501, |
| "mean_token_accuracy": 0.650190019607544, |
| "num_tokens": 4522905.0, |
| "step": 1240 |
| }, |
| { |
| "epoch": 9.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3780000000000004e-05, |
| "loss": 1.7429, |
| "mean_token_accuracy": 0.6522337943315506, |
| "num_tokens": 4541124.0, |
| "step": 1245 |
| }, |
| { |
| "epoch": 9.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3755000000000004e-05, |
| "loss": 1.6859, |
| "mean_token_accuracy": 0.6608754128217698, |
| "num_tokens": 4560264.0, |
| "step": 1250 |
| }, |
| { |
| "epoch": 9.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.373e-05, |
| "loss": 1.6919, |
| "mean_token_accuracy": 0.6609419882297516, |
| "num_tokens": 4579633.0, |
| "step": 1255 |
| }, |
| { |
| "epoch": 9.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3705e-05, |
| "loss": 1.7671, |
| "mean_token_accuracy": 0.6452909797430039, |
| "num_tokens": 4597291.0, |
| "step": 1260 |
| }, |
| { |
| "epoch": 9.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.368e-05, |
| "loss": 1.7981, |
| "mean_token_accuracy": 0.6409468352794647, |
| "num_tokens": 4615469.0, |
| "step": 1265 |
| }, |
| { |
| "epoch": 9.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3655e-05, |
| "loss": 1.7488, |
| "mean_token_accuracy": 0.6545175701379776, |
| "num_tokens": 4633258.0, |
| "step": 1270 |
| }, |
| { |
| "epoch": 9.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.363000000000001e-05, |
| "loss": 1.7886, |
| "mean_token_accuracy": 0.6439413070678711, |
| "num_tokens": 4650771.0, |
| "step": 1275 |
| }, |
| { |
| "epoch": 9.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3605e-05, |
| "loss": 1.6981, |
| "mean_token_accuracy": 0.658900797367096, |
| "num_tokens": 4669522.0, |
| "step": 1280 |
| }, |
| { |
| "epoch": 9.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3580000000000006e-05, |
| "loss": 1.778, |
| "mean_token_accuracy": 0.6451369762420655, |
| "num_tokens": 4687769.0, |
| "step": 1285 |
| }, |
| { |
| "epoch": 9.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3555e-05, |
| "loss": 1.733, |
| "mean_token_accuracy": 0.6531021893024445, |
| "num_tokens": 4705563.0, |
| "step": 1290 |
| }, |
| { |
| "epoch": 9.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3530000000000005e-05, |
| "loss": 1.7758, |
| "mean_token_accuracy": 0.6376697093248367, |
| "num_tokens": 4723178.0, |
| "step": 1295 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3505000000000004e-05, |
| "loss": 1.7658, |
| "mean_token_accuracy": 0.6424418658018112, |
| "num_tokens": 4741050.0, |
| "step": 1300 |
| }, |
| { |
| "epoch": 10.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3480000000000004e-05, |
| "loss": 1.7156, |
| "mean_token_accuracy": 0.6535384625196456, |
| "num_tokens": 4759671.0, |
| "step": 1305 |
| }, |
| { |
| "epoch": 10.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3455e-05, |
| "loss": 1.7894, |
| "mean_token_accuracy": 0.6466272503137589, |
| "num_tokens": 4777543.0, |
| "step": 1310 |
| }, |
| { |
| "epoch": 10.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.343e-05, |
| "loss": 1.7418, |
| "mean_token_accuracy": 0.6525053322315216, |
| "num_tokens": 4795767.0, |
| "step": 1315 |
| }, |
| { |
| "epoch": 10.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3405e-05, |
| "loss": 1.8019, |
| "mean_token_accuracy": 0.641847088932991, |
| "num_tokens": 4812856.0, |
| "step": 1320 |
| }, |
| { |
| "epoch": 10.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.338e-05, |
| "loss": 1.713, |
| "mean_token_accuracy": 0.6589140474796296, |
| "num_tokens": 4831209.0, |
| "step": 1325 |
| }, |
| { |
| "epoch": 10.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3355e-05, |
| "loss": 1.7601, |
| "mean_token_accuracy": 0.6469725668430328, |
| "num_tokens": 4849251.0, |
| "step": 1330 |
| }, |
| { |
| "epoch": 10.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.333000000000001e-05, |
| "loss": 1.7413, |
| "mean_token_accuracy": 0.6520713210105896, |
| "num_tokens": 4867548.0, |
| "step": 1335 |
| }, |
| { |
| "epoch": 10.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3305e-05, |
| "loss": 1.7205, |
| "mean_token_accuracy": 0.650857400894165, |
| "num_tokens": 4885905.0, |
| "step": 1340 |
| }, |
| { |
| "epoch": 10.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3280000000000006e-05, |
| "loss": 1.7016, |
| "mean_token_accuracy": 0.6605552464723587, |
| "num_tokens": 4904774.0, |
| "step": 1345 |
| }, |
| { |
| "epoch": 10.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3255e-05, |
| "loss": 1.6779, |
| "mean_token_accuracy": 0.6666248500347137, |
| "num_tokens": 4923475.0, |
| "step": 1350 |
| }, |
| { |
| "epoch": 10.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3230000000000005e-05, |
| "loss": 1.7332, |
| "mean_token_accuracy": 0.6488200217485428, |
| "num_tokens": 4942112.0, |
| "step": 1355 |
| }, |
| { |
| "epoch": 10.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3205000000000004e-05, |
| "loss": 1.7457, |
| "mean_token_accuracy": 0.6494636863470078, |
| "num_tokens": 4960202.0, |
| "step": 1360 |
| }, |
| { |
| "epoch": 10.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.318e-05, |
| "loss": 1.7592, |
| "mean_token_accuracy": 0.6439968854188919, |
| "num_tokens": 4978469.0, |
| "step": 1365 |
| }, |
| { |
| "epoch": 10.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3155e-05, |
| "loss": 1.7408, |
| "mean_token_accuracy": 0.6498746544122695, |
| "num_tokens": 4997586.0, |
| "step": 1370 |
| }, |
| { |
| "epoch": 10.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.313e-05, |
| "loss": 1.7511, |
| "mean_token_accuracy": 0.647632023692131, |
| "num_tokens": 5015966.0, |
| "step": 1375 |
| }, |
| { |
| "epoch": 10.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3105e-05, |
| "loss": 1.7565, |
| "mean_token_accuracy": 0.6496962130069732, |
| "num_tokens": 5033532.0, |
| "step": 1380 |
| }, |
| { |
| "epoch": 10.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.308e-05, |
| "loss": 1.7098, |
| "mean_token_accuracy": 0.6550800025463104, |
| "num_tokens": 5052100.0, |
| "step": 1385 |
| }, |
| { |
| "epoch": 10.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3055e-05, |
| "loss": 1.7241, |
| "mean_token_accuracy": 0.6496223241090775, |
| "num_tokens": 5070966.0, |
| "step": 1390 |
| }, |
| { |
| "epoch": 10.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3030000000000006e-05, |
| "loss": 1.7204, |
| "mean_token_accuracy": 0.6540403842926026, |
| "num_tokens": 5089950.0, |
| "step": 1395 |
| }, |
| { |
| "epoch": 10.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3005e-05, |
| "loss": 1.7644, |
| "mean_token_accuracy": 0.6442721575498581, |
| "num_tokens": 5107960.0, |
| "step": 1400 |
| }, |
| { |
| "epoch": 10.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2980000000000005e-05, |
| "loss": 1.7286, |
| "mean_token_accuracy": 0.6496645718812942, |
| "num_tokens": 5126317.0, |
| "step": 1405 |
| }, |
| { |
| "epoch": 10.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2955e-05, |
| "loss": 1.7809, |
| "mean_token_accuracy": 0.6493318378925323, |
| "num_tokens": 5143608.0, |
| "step": 1410 |
| }, |
| { |
| "epoch": 10.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2930000000000004e-05, |
| "loss": 1.7568, |
| "mean_token_accuracy": 0.6470446825027466, |
| "num_tokens": 5161679.0, |
| "step": 1415 |
| }, |
| { |
| "epoch": 10.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2905000000000003e-05, |
| "loss": 1.7742, |
| "mean_token_accuracy": 0.6468683093786239, |
| "num_tokens": 5179282.0, |
| "step": 1420 |
| }, |
| { |
| "epoch": 10.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.288e-05, |
| "loss": 1.6856, |
| "mean_token_accuracy": 0.6599813520908355, |
| "num_tokens": 5197538.0, |
| "step": 1425 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2855e-05, |
| "loss": 1.7956, |
| "mean_token_accuracy": 0.642606571316719, |
| "num_tokens": 5215155.0, |
| "step": 1430 |
| }, |
| { |
| "epoch": 11.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.283e-05, |
| "loss": 1.7845, |
| "mean_token_accuracy": 0.6424670696258545, |
| "num_tokens": 5232784.0, |
| "step": 1435 |
| }, |
| { |
| "epoch": 11.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2805e-05, |
| "loss": 1.7199, |
| "mean_token_accuracy": 0.6572603791952133, |
| "num_tokens": 5250874.0, |
| "step": 1440 |
| }, |
| { |
| "epoch": 11.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.278e-05, |
| "loss": 1.6408, |
| "mean_token_accuracy": 0.6750534921884537, |
| "num_tokens": 5270343.0, |
| "step": 1445 |
| }, |
| { |
| "epoch": 11.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2755e-05, |
| "loss": 1.7426, |
| "mean_token_accuracy": 0.6530660510063171, |
| "num_tokens": 5287928.0, |
| "step": 1450 |
| }, |
| { |
| "epoch": 11.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2730000000000006e-05, |
| "loss": 1.7704, |
| "mean_token_accuracy": 0.6498903334140778, |
| "num_tokens": 5305899.0, |
| "step": 1455 |
| }, |
| { |
| "epoch": 11.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2705e-05, |
| "loss": 1.8094, |
| "mean_token_accuracy": 0.6360325515270233, |
| "num_tokens": 5323814.0, |
| "step": 1460 |
| }, |
| { |
| "epoch": 11.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2680000000000005e-05, |
| "loss": 1.7991, |
| "mean_token_accuracy": 0.6365718424320221, |
| "num_tokens": 5341443.0, |
| "step": 1465 |
| }, |
| { |
| "epoch": 11.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2655e-05, |
| "loss": 1.6735, |
| "mean_token_accuracy": 0.6595575481653213, |
| "num_tokens": 5361091.0, |
| "step": 1470 |
| }, |
| { |
| "epoch": 11.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2630000000000004e-05, |
| "loss": 1.8037, |
| "mean_token_accuracy": 0.639024817943573, |
| "num_tokens": 5378623.0, |
| "step": 1475 |
| }, |
| { |
| "epoch": 11.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2605e-05, |
| "loss": 1.7528, |
| "mean_token_accuracy": 0.6455299586057663, |
| "num_tokens": 5397097.0, |
| "step": 1480 |
| }, |
| { |
| "epoch": 11.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.258e-05, |
| "loss": 1.7405, |
| "mean_token_accuracy": 0.6521054923534393, |
| "num_tokens": 5415277.0, |
| "step": 1485 |
| }, |
| { |
| "epoch": 11.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2555e-05, |
| "loss": 1.7168, |
| "mean_token_accuracy": 0.6546613723039627, |
| "num_tokens": 5433912.0, |
| "step": 1490 |
| }, |
| { |
| "epoch": 11.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.253e-05, |
| "loss": 1.6738, |
| "mean_token_accuracy": 0.6659036576747894, |
| "num_tokens": 5453181.0, |
| "step": 1495 |
| }, |
| { |
| "epoch": 11.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2505e-05, |
| "loss": 1.7129, |
| "mean_token_accuracy": 0.6556811064481736, |
| "num_tokens": 5472123.0, |
| "step": 1500 |
| }, |
| { |
| "epoch": 11.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.248e-05, |
| "loss": 1.7642, |
| "mean_token_accuracy": 0.6464402437210083, |
| "num_tokens": 5489385.0, |
| "step": 1505 |
| }, |
| { |
| "epoch": 11.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2455e-05, |
| "loss": 1.7674, |
| "mean_token_accuracy": 0.6475972950458526, |
| "num_tokens": 5506841.0, |
| "step": 1510 |
| }, |
| { |
| "epoch": 11.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2430000000000005e-05, |
| "loss": 1.7238, |
| "mean_token_accuracy": 0.6543399661779403, |
| "num_tokens": 5525059.0, |
| "step": 1515 |
| }, |
| { |
| "epoch": 11.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2405e-05, |
| "loss": 1.7736, |
| "mean_token_accuracy": 0.6412011951208114, |
| "num_tokens": 5542986.0, |
| "step": 1520 |
| }, |
| { |
| "epoch": 11.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2380000000000004e-05, |
| "loss": 1.7368, |
| "mean_token_accuracy": 0.6486877024173736, |
| "num_tokens": 5560991.0, |
| "step": 1525 |
| }, |
| { |
| "epoch": 11.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2355000000000004e-05, |
| "loss": 1.7506, |
| "mean_token_accuracy": 0.6509437531232833, |
| "num_tokens": 5579072.0, |
| "step": 1530 |
| }, |
| { |
| "epoch": 11.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.233e-05, |
| "loss": 1.7534, |
| "mean_token_accuracy": 0.6474777191877366, |
| "num_tokens": 5597799.0, |
| "step": 1535 |
| }, |
| { |
| "epoch": 11.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2305e-05, |
| "loss": 1.7265, |
| "mean_token_accuracy": 0.6489449918270112, |
| "num_tokens": 5616381.0, |
| "step": 1540 |
| }, |
| { |
| "epoch": 11.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.228e-05, |
| "loss": 1.7094, |
| "mean_token_accuracy": 0.6595662713050843, |
| "num_tokens": 5635297.0, |
| "step": 1545 |
| }, |
| { |
| "epoch": 11.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2255e-05, |
| "loss": 1.7149, |
| "mean_token_accuracy": 0.6520039916038514, |
| "num_tokens": 5653729.0, |
| "step": 1550 |
| }, |
| { |
| "epoch": 11.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.223e-05, |
| "loss": 1.7762, |
| "mean_token_accuracy": 0.6417809814214707, |
| "num_tokens": 5671517.0, |
| "step": 1555 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2205e-05, |
| "loss": 1.7578, |
| "mean_token_accuracy": 0.6531469017267227, |
| "num_tokens": 5689260.0, |
| "step": 1560 |
| }, |
| { |
| "epoch": 12.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2180000000000006e-05, |
| "loss": 1.7791, |
| "mean_token_accuracy": 0.6447166264057159, |
| "num_tokens": 5706593.0, |
| "step": 1565 |
| }, |
| { |
| "epoch": 12.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2155e-05, |
| "loss": 1.7705, |
| "mean_token_accuracy": 0.6480188548564911, |
| "num_tokens": 5724377.0, |
| "step": 1570 |
| }, |
| { |
| "epoch": 12.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2130000000000005e-05, |
| "loss": 1.7288, |
| "mean_token_accuracy": 0.653497377038002, |
| "num_tokens": 5742391.0, |
| "step": 1575 |
| }, |
| { |
| "epoch": 12.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2105e-05, |
| "loss": 1.8046, |
| "mean_token_accuracy": 0.6407176822423934, |
| "num_tokens": 5759857.0, |
| "step": 1580 |
| }, |
| { |
| "epoch": 12.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2080000000000004e-05, |
| "loss": 1.8206, |
| "mean_token_accuracy": 0.6390235781669616, |
| "num_tokens": 5777131.0, |
| "step": 1585 |
| }, |
| { |
| "epoch": 12.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2055e-05, |
| "loss": 1.7858, |
| "mean_token_accuracy": 0.6427943378686904, |
| "num_tokens": 5795346.0, |
| "step": 1590 |
| }, |
| { |
| "epoch": 12.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.203e-05, |
| "loss": 1.7938, |
| "mean_token_accuracy": 0.6385728657245636, |
| "num_tokens": 5812776.0, |
| "step": 1595 |
| }, |
| { |
| "epoch": 12.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2005e-05, |
| "loss": 1.7442, |
| "mean_token_accuracy": 0.6511879444122315, |
| "num_tokens": 5831416.0, |
| "step": 1600 |
| }, |
| { |
| "epoch": 12.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.198e-05, |
| "loss": 1.7307, |
| "mean_token_accuracy": 0.6532302528619767, |
| "num_tokens": 5850125.0, |
| "step": 1605 |
| }, |
| { |
| "epoch": 12.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1955e-05, |
| "loss": 1.7729, |
| "mean_token_accuracy": 0.6425417333841323, |
| "num_tokens": 5868205.0, |
| "step": 1610 |
| }, |
| { |
| "epoch": 12.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.193e-05, |
| "loss": 1.6777, |
| "mean_token_accuracy": 0.6608704894781112, |
| "num_tokens": 5887309.0, |
| "step": 1615 |
| }, |
| { |
| "epoch": 12.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1905e-05, |
| "loss": 1.7262, |
| "mean_token_accuracy": 0.651836696267128, |
| "num_tokens": 5906272.0, |
| "step": 1620 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1880000000000006e-05, |
| "loss": 1.7005, |
| "mean_token_accuracy": 0.6608695417642594, |
| "num_tokens": 5924826.0, |
| "step": 1625 |
| }, |
| { |
| "epoch": 12.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1855e-05, |
| "loss": 1.6497, |
| "mean_token_accuracy": 0.6752032458782196, |
| "num_tokens": 5944267.0, |
| "step": 1630 |
| }, |
| { |
| "epoch": 12.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1830000000000004e-05, |
| "loss": 1.7544, |
| "mean_token_accuracy": 0.6387367337942124, |
| "num_tokens": 5962881.0, |
| "step": 1635 |
| }, |
| { |
| "epoch": 12.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1805e-05, |
| "loss": 1.7518, |
| "mean_token_accuracy": 0.6540467470884324, |
| "num_tokens": 5980352.0, |
| "step": 1640 |
| }, |
| { |
| "epoch": 12.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.178e-05, |
| "loss": 1.6636, |
| "mean_token_accuracy": 0.6685753583908081, |
| "num_tokens": 5998878.0, |
| "step": 1645 |
| }, |
| { |
| "epoch": 12.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1755e-05, |
| "loss": 1.7515, |
| "mean_token_accuracy": 0.6498158782720566, |
| "num_tokens": 6016722.0, |
| "step": 1650 |
| }, |
| { |
| "epoch": 12.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.173e-05, |
| "loss": 1.6561, |
| "mean_token_accuracy": 0.6655506461858749, |
| "num_tokens": 6035815.0, |
| "step": 1655 |
| }, |
| { |
| "epoch": 12.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1705e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6420869499444961, |
| "num_tokens": 6053674.0, |
| "step": 1660 |
| }, |
| { |
| "epoch": 12.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.168e-05, |
| "loss": 1.7457, |
| "mean_token_accuracy": 0.6514944612979889, |
| "num_tokens": 6071380.0, |
| "step": 1665 |
| }, |
| { |
| "epoch": 12.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1655e-05, |
| "loss": 1.7276, |
| "mean_token_accuracy": 0.6513452738523483, |
| "num_tokens": 6090024.0, |
| "step": 1670 |
| }, |
| { |
| "epoch": 12.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.163e-05, |
| "loss": 1.7536, |
| "mean_token_accuracy": 0.6504515618085861, |
| "num_tokens": 6108154.0, |
| "step": 1675 |
| }, |
| { |
| "epoch": 12.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1605e-05, |
| "loss": 1.7592, |
| "mean_token_accuracy": 0.6449838757514954, |
| "num_tokens": 6126273.0, |
| "step": 1680 |
| }, |
| { |
| "epoch": 12.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1580000000000005e-05, |
| "loss": 1.6927, |
| "mean_token_accuracy": 0.6574580699205399, |
| "num_tokens": 6144991.0, |
| "step": 1685 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1555e-05, |
| "loss": 1.7501, |
| "mean_token_accuracy": 0.6433569133281708, |
| "num_tokens": 6163365.0, |
| "step": 1690 |
| }, |
| { |
| "epoch": 13.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1530000000000004e-05, |
| "loss": 1.7762, |
| "mean_token_accuracy": 0.6436060070991516, |
| "num_tokens": 6181008.0, |
| "step": 1695 |
| }, |
| { |
| "epoch": 13.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1504999999999996e-05, |
| "loss": 1.8095, |
| "mean_token_accuracy": 0.6394443988800049, |
| "num_tokens": 6198379.0, |
| "step": 1700 |
| }, |
| { |
| "epoch": 13.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.148e-05, |
| "loss": 1.7128, |
| "mean_token_accuracy": 0.6577008962631226, |
| "num_tokens": 6217116.0, |
| "step": 1705 |
| }, |
| { |
| "epoch": 13.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1455e-05, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6536228835582734, |
| "num_tokens": 6235654.0, |
| "step": 1710 |
| }, |
| { |
| "epoch": 13.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.143e-05, |
| "loss": 1.8063, |
| "mean_token_accuracy": 0.6444395124912262, |
| "num_tokens": 6253216.0, |
| "step": 1715 |
| }, |
| { |
| "epoch": 13.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1405e-05, |
| "loss": 1.7436, |
| "mean_token_accuracy": 0.6505606740713119, |
| "num_tokens": 6270726.0, |
| "step": 1720 |
| }, |
| { |
| "epoch": 13.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.138e-05, |
| "loss": 1.7738, |
| "mean_token_accuracy": 0.6452360033988953, |
| "num_tokens": 6287830.0, |
| "step": 1725 |
| }, |
| { |
| "epoch": 13.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1355e-05, |
| "loss": 1.6774, |
| "mean_token_accuracy": 0.6625390321016311, |
| "num_tokens": 6306547.0, |
| "step": 1730 |
| }, |
| { |
| "epoch": 13.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.133e-05, |
| "loss": 1.6985, |
| "mean_token_accuracy": 0.6607826173305511, |
| "num_tokens": 6325192.0, |
| "step": 1735 |
| }, |
| { |
| "epoch": 13.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1305e-05, |
| "loss": 1.7158, |
| "mean_token_accuracy": 0.6505321115255356, |
| "num_tokens": 6344059.0, |
| "step": 1740 |
| }, |
| { |
| "epoch": 13.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1280000000000005e-05, |
| "loss": 1.7164, |
| "mean_token_accuracy": 0.652134558558464, |
| "num_tokens": 6362364.0, |
| "step": 1745 |
| }, |
| { |
| "epoch": 13.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1255e-05, |
| "loss": 1.7474, |
| "mean_token_accuracy": 0.6487911343574524, |
| "num_tokens": 6380492.0, |
| "step": 1750 |
| }, |
| { |
| "epoch": 13.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.123e-05, |
| "loss": 1.7241, |
| "mean_token_accuracy": 0.6540584862232208, |
| "num_tokens": 6399116.0, |
| "step": 1755 |
| }, |
| { |
| "epoch": 13.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1205e-05, |
| "loss": 1.6792, |
| "mean_token_accuracy": 0.6668368667364121, |
| "num_tokens": 6417790.0, |
| "step": 1760 |
| }, |
| { |
| "epoch": 13.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.118e-05, |
| "loss": 1.7109, |
| "mean_token_accuracy": 0.651064109802246, |
| "num_tokens": 6436016.0, |
| "step": 1765 |
| }, |
| { |
| "epoch": 13.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1155e-05, |
| "loss": 1.7844, |
| "mean_token_accuracy": 0.6407437533140182, |
| "num_tokens": 6453594.0, |
| "step": 1770 |
| }, |
| { |
| "epoch": 13.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.113e-05, |
| "loss": 1.7622, |
| "mean_token_accuracy": 0.642905455827713, |
| "num_tokens": 6471529.0, |
| "step": 1775 |
| }, |
| { |
| "epoch": 13.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.110500000000001e-05, |
| "loss": 1.7422, |
| "mean_token_accuracy": 0.6522348582744598, |
| "num_tokens": 6489689.0, |
| "step": 1780 |
| }, |
| { |
| "epoch": 13.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.108e-05, |
| "loss": 1.764, |
| "mean_token_accuracy": 0.6411303877830505, |
| "num_tokens": 6508258.0, |
| "step": 1785 |
| }, |
| { |
| "epoch": 13.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1055000000000006e-05, |
| "loss": 1.733, |
| "mean_token_accuracy": 0.6555010586977005, |
| "num_tokens": 6526939.0, |
| "step": 1790 |
| }, |
| { |
| "epoch": 13.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.103e-05, |
| "loss": 1.7877, |
| "mean_token_accuracy": 0.6444415628910065, |
| "num_tokens": 6544917.0, |
| "step": 1795 |
| }, |
| { |
| "epoch": 13.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1005000000000005e-05, |
| "loss": 1.6953, |
| "mean_token_accuracy": 0.6559826284646988, |
| "num_tokens": 6563694.0, |
| "step": 1800 |
| }, |
| { |
| "epoch": 13.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0980000000000004e-05, |
| "loss": 1.7663, |
| "mean_token_accuracy": 0.6459847629070282, |
| "num_tokens": 6581448.0, |
| "step": 1805 |
| }, |
| { |
| "epoch": 13.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0955000000000003e-05, |
| "loss": 1.7447, |
| "mean_token_accuracy": 0.6494243443012238, |
| "num_tokens": 6600126.0, |
| "step": 1810 |
| }, |
| { |
| "epoch": 13.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.093e-05, |
| "loss": 1.7329, |
| "mean_token_accuracy": 0.6540934264659881, |
| "num_tokens": 6618707.0, |
| "step": 1815 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0905e-05, |
| "loss": 1.7251, |
| "mean_token_accuracy": 0.6513684660196304, |
| "num_tokens": 6637470.0, |
| "step": 1820 |
| }, |
| { |
| "epoch": 14.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.088e-05, |
| "loss": 1.6674, |
| "mean_token_accuracy": 0.656906321644783, |
| "num_tokens": 6657126.0, |
| "step": 1825 |
| }, |
| { |
| "epoch": 14.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0855e-05, |
| "loss": 1.7146, |
| "mean_token_accuracy": 0.6526720136404037, |
| "num_tokens": 6675746.0, |
| "step": 1830 |
| }, |
| { |
| "epoch": 14.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.083e-05, |
| "loss": 1.6588, |
| "mean_token_accuracy": 0.663616544008255, |
| "num_tokens": 6695210.0, |
| "step": 1835 |
| }, |
| { |
| "epoch": 14.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0805000000000007e-05, |
| "loss": 1.7722, |
| "mean_token_accuracy": 0.6440047711133957, |
| "num_tokens": 6713034.0, |
| "step": 1840 |
| }, |
| { |
| "epoch": 14.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.078e-05, |
| "loss": 1.7022, |
| "mean_token_accuracy": 0.6584948360919952, |
| "num_tokens": 6731872.0, |
| "step": 1845 |
| }, |
| { |
| "epoch": 14.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0755000000000005e-05, |
| "loss": 1.7393, |
| "mean_token_accuracy": 0.6503253132104874, |
| "num_tokens": 6750060.0, |
| "step": 1850 |
| }, |
| { |
| "epoch": 14.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0730000000000005e-05, |
| "loss": 1.7848, |
| "mean_token_accuracy": 0.6464446872472763, |
| "num_tokens": 6767696.0, |
| "step": 1855 |
| }, |
| { |
| "epoch": 14.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0705000000000004e-05, |
| "loss": 1.7344, |
| "mean_token_accuracy": 0.6560231685638428, |
| "num_tokens": 6785928.0, |
| "step": 1860 |
| }, |
| { |
| "epoch": 14.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0680000000000004e-05, |
| "loss": 1.7642, |
| "mean_token_accuracy": 0.6425097972154618, |
| "num_tokens": 6804181.0, |
| "step": 1865 |
| }, |
| { |
| "epoch": 14.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0655e-05, |
| "loss": 1.7797, |
| "mean_token_accuracy": 0.6466529428958893, |
| "num_tokens": 6821867.0, |
| "step": 1870 |
| }, |
| { |
| "epoch": 14.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.063e-05, |
| "loss": 1.8023, |
| "mean_token_accuracy": 0.6425188779830933, |
| "num_tokens": 6839252.0, |
| "step": 1875 |
| }, |
| { |
| "epoch": 14.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0605e-05, |
| "loss": 1.7618, |
| "mean_token_accuracy": 0.6461942523717881, |
| "num_tokens": 6857208.0, |
| "step": 1880 |
| }, |
| { |
| "epoch": 14.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.058e-05, |
| "loss": 1.7463, |
| "mean_token_accuracy": 0.6480780661106109, |
| "num_tokens": 6875091.0, |
| "step": 1885 |
| }, |
| { |
| "epoch": 14.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.055500000000001e-05, |
| "loss": 1.745, |
| "mean_token_accuracy": 0.6500824391841888, |
| "num_tokens": 6893171.0, |
| "step": 1890 |
| }, |
| { |
| "epoch": 14.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.053e-05, |
| "loss": 1.6749, |
| "mean_token_accuracy": 0.6628648489713669, |
| "num_tokens": 6911806.0, |
| "step": 1895 |
| }, |
| { |
| "epoch": 14.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0505000000000006e-05, |
| "loss": 1.7282, |
| "mean_token_accuracy": 0.6511125653982163, |
| "num_tokens": 6930030.0, |
| "step": 1900 |
| }, |
| { |
| "epoch": 14.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.048e-05, |
| "loss": 1.738, |
| "mean_token_accuracy": 0.6520918369293213, |
| "num_tokens": 6948110.0, |
| "step": 1905 |
| }, |
| { |
| "epoch": 14.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0455000000000005e-05, |
| "loss": 1.764, |
| "mean_token_accuracy": 0.6516493141651154, |
| "num_tokens": 6966319.0, |
| "step": 1910 |
| }, |
| { |
| "epoch": 14.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0430000000000004e-05, |
| "loss": 1.7763, |
| "mean_token_accuracy": 0.6418019294738769, |
| "num_tokens": 6984403.0, |
| "step": 1915 |
| }, |
| { |
| "epoch": 14.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0405000000000004e-05, |
| "loss": 1.8325, |
| "mean_token_accuracy": 0.6336044192314148, |
| "num_tokens": 7001378.0, |
| "step": 1920 |
| }, |
| { |
| "epoch": 14.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.038e-05, |
| "loss": 1.7307, |
| "mean_token_accuracy": 0.6568962961435318, |
| "num_tokens": 7019494.0, |
| "step": 1925 |
| }, |
| { |
| "epoch": 14.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0355e-05, |
| "loss": 1.7421, |
| "mean_token_accuracy": 0.6525963991880417, |
| "num_tokens": 7037790.0, |
| "step": 1930 |
| }, |
| { |
| "epoch": 14.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.033e-05, |
| "loss": 1.7614, |
| "mean_token_accuracy": 0.6488069415092468, |
| "num_tokens": 7055472.0, |
| "step": 1935 |
| }, |
| { |
| "epoch": 14.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0305e-05, |
| "loss": 1.7207, |
| "mean_token_accuracy": 0.6501289933919907, |
| "num_tokens": 7074488.0, |
| "step": 1940 |
| }, |
| { |
| "epoch": 14.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 4.028e-05, |
| "loss": 1.7577, |
| "mean_token_accuracy": 0.6519861459732056, |
| "num_tokens": 7092711.0, |
| "step": 1945 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.025500000000001e-05, |
| "loss": 1.6968, |
| "mean_token_accuracy": 0.6624333202838898, |
| "num_tokens": 7111575.0, |
| "step": 1950 |
| }, |
| { |
| "epoch": 15.038461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 4.023e-05, |
| "loss": 1.7382, |
| "mean_token_accuracy": 0.6494399189949036, |
| "num_tokens": 7130012.0, |
| "step": 1955 |
| }, |
| { |
| "epoch": 15.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0205000000000006e-05, |
| "loss": 1.7673, |
| "mean_token_accuracy": 0.6474148839712143, |
| "num_tokens": 7147764.0, |
| "step": 1960 |
| }, |
| { |
| "epoch": 15.115384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 4.018e-05, |
| "loss": 1.7608, |
| "mean_token_accuracy": 0.6430147528648377, |
| "num_tokens": 7165688.0, |
| "step": 1965 |
| }, |
| { |
| "epoch": 15.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0155000000000004e-05, |
| "loss": 1.768, |
| "mean_token_accuracy": 0.6483392864465714, |
| "num_tokens": 7183723.0, |
| "step": 1970 |
| }, |
| { |
| "epoch": 15.192307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0130000000000004e-05, |
| "loss": 1.7712, |
| "mean_token_accuracy": 0.6451886177062989, |
| "num_tokens": 7200954.0, |
| "step": 1975 |
| }, |
| { |
| "epoch": 15.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0105e-05, |
| "loss": 1.7852, |
| "mean_token_accuracy": 0.6437583029270172, |
| "num_tokens": 7218897.0, |
| "step": 1980 |
| }, |
| { |
| "epoch": 15.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.008e-05, |
| "loss": 1.7186, |
| "mean_token_accuracy": 0.6547761648893357, |
| "num_tokens": 7236557.0, |
| "step": 1985 |
| }, |
| { |
| "epoch": 15.307692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0055e-05, |
| "loss": 1.7277, |
| "mean_token_accuracy": 0.6466970533132553, |
| "num_tokens": 7255192.0, |
| "step": 1990 |
| }, |
| { |
| "epoch": 15.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.003e-05, |
| "loss": 1.817, |
| "mean_token_accuracy": 0.6392653495073318, |
| "num_tokens": 7272870.0, |
| "step": 1995 |
| }, |
| { |
| "epoch": 15.384615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 4.0005e-05, |
| "loss": 1.7256, |
| "mean_token_accuracy": 0.6513147324323654, |
| "num_tokens": 7291754.0, |
| "step": 2000 |
| }, |
| { |
| "epoch": 15.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.998e-05, |
| "loss": 1.7946, |
| "mean_token_accuracy": 0.6427055537700653, |
| "num_tokens": 7309133.0, |
| "step": 2005 |
| }, |
| { |
| "epoch": 15.461538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9955000000000006e-05, |
| "loss": 1.7461, |
| "mean_token_accuracy": 0.6472333878278732, |
| "num_tokens": 7328009.0, |
| "step": 2010 |
| }, |
| { |
| "epoch": 15.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.993e-05, |
| "loss": 1.713, |
| "mean_token_accuracy": 0.6539407223463058, |
| "num_tokens": 7346790.0, |
| "step": 2015 |
| }, |
| { |
| "epoch": 15.538461538461538, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9905000000000005e-05, |
| "loss": 1.7869, |
| "mean_token_accuracy": 0.6394775450229645, |
| "num_tokens": 7364128.0, |
| "step": 2020 |
| }, |
| { |
| "epoch": 15.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.988e-05, |
| "loss": 1.7062, |
| "mean_token_accuracy": 0.6575020164251327, |
| "num_tokens": 7382926.0, |
| "step": 2025 |
| }, |
| { |
| "epoch": 15.615384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9855000000000004e-05, |
| "loss": 1.6781, |
| "mean_token_accuracy": 0.6595021516084671, |
| "num_tokens": 7402054.0, |
| "step": 2030 |
| }, |
| { |
| "epoch": 15.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.983e-05, |
| "loss": 1.7171, |
| "mean_token_accuracy": 0.6517434686422348, |
| "num_tokens": 7420489.0, |
| "step": 2035 |
| }, |
| { |
| "epoch": 15.692307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9805e-05, |
| "loss": 1.7047, |
| "mean_token_accuracy": 0.6589993953704834, |
| "num_tokens": 7438755.0, |
| "step": 2040 |
| }, |
| { |
| "epoch": 15.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.978e-05, |
| "loss": 1.741, |
| "mean_token_accuracy": 0.6524647355079651, |
| "num_tokens": 7456837.0, |
| "step": 2045 |
| }, |
| { |
| "epoch": 15.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9755e-05, |
| "loss": 1.8081, |
| "mean_token_accuracy": 0.6410402476787567, |
| "num_tokens": 7473972.0, |
| "step": 2050 |
| }, |
| { |
| "epoch": 15.807692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 3.973e-05, |
| "loss": 1.6997, |
| "mean_token_accuracy": 0.6587257742881775, |
| "num_tokens": 7493430.0, |
| "step": 2055 |
| }, |
| { |
| "epoch": 15.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9705e-05, |
| "loss": 1.7487, |
| "mean_token_accuracy": 0.648233824968338, |
| "num_tokens": 7511123.0, |
| "step": 2060 |
| }, |
| { |
| "epoch": 15.884615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 3.968e-05, |
| "loss": 1.6579, |
| "mean_token_accuracy": 0.6663879871368408, |
| "num_tokens": 7530727.0, |
| "step": 2065 |
| }, |
| { |
| "epoch": 15.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9655000000000006e-05, |
| "loss": 1.7077, |
| "mean_token_accuracy": 0.6618927121162415, |
| "num_tokens": 7549141.0, |
| "step": 2070 |
| }, |
| { |
| "epoch": 15.961538461538462, |
| "grad_norm": 0.0, |
| "learning_rate": 3.963e-05, |
| "loss": 1.6903, |
| "mean_token_accuracy": 0.6613986879587174, |
| "num_tokens": 7567995.0, |
| "step": 2075 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9605000000000005e-05, |
| "loss": 1.7907, |
| "mean_token_accuracy": 0.6406063586473465, |
| "num_tokens": 7585680.0, |
| "step": 2080 |
| }, |
| { |
| "epoch": 16.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.958e-05, |
| "loss": 1.7458, |
| "mean_token_accuracy": 0.6477556735277176, |
| "num_tokens": 7604192.0, |
| "step": 2085 |
| }, |
| { |
| "epoch": 16.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9555e-05, |
| "loss": 1.67, |
| "mean_token_accuracy": 0.6667103677988052, |
| "num_tokens": 7622647.0, |
| "step": 2090 |
| }, |
| { |
| "epoch": 16.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.953e-05, |
| "loss": 1.7433, |
| "mean_token_accuracy": 0.652290990948677, |
| "num_tokens": 7640570.0, |
| "step": 2095 |
| }, |
| { |
| "epoch": 16.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9505e-05, |
| "loss": 1.6936, |
| "mean_token_accuracy": 0.6566350907087326, |
| "num_tokens": 7658920.0, |
| "step": 2100 |
| }, |
| { |
| "epoch": 16.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.948e-05, |
| "loss": 1.8121, |
| "mean_token_accuracy": 0.6355432808399201, |
| "num_tokens": 7676608.0, |
| "step": 2105 |
| }, |
| { |
| "epoch": 16.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9455e-05, |
| "loss": 1.7127, |
| "mean_token_accuracy": 0.65393525660038, |
| "num_tokens": 7695148.0, |
| "step": 2110 |
| }, |
| { |
| "epoch": 16.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.943e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6496871590614319, |
| "num_tokens": 7713440.0, |
| "step": 2115 |
| }, |
| { |
| "epoch": 16.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9405e-05, |
| "loss": 1.7737, |
| "mean_token_accuracy": 0.6332459360361099, |
| "num_tokens": 7731634.0, |
| "step": 2120 |
| }, |
| { |
| "epoch": 16.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.938e-05, |
| "loss": 1.7469, |
| "mean_token_accuracy": 0.6482865303754807, |
| "num_tokens": 7749952.0, |
| "step": 2125 |
| }, |
| { |
| "epoch": 16.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9355000000000005e-05, |
| "loss": 1.7426, |
| "mean_token_accuracy": 0.6485352158546448, |
| "num_tokens": 7768389.0, |
| "step": 2130 |
| }, |
| { |
| "epoch": 16.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.933e-05, |
| "loss": 1.6815, |
| "mean_token_accuracy": 0.6569307982921601, |
| "num_tokens": 7787655.0, |
| "step": 2135 |
| }, |
| { |
| "epoch": 16.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9305000000000004e-05, |
| "loss": 1.7333, |
| "mean_token_accuracy": 0.6505914777517319, |
| "num_tokens": 7805096.0, |
| "step": 2140 |
| }, |
| { |
| "epoch": 16.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9280000000000003e-05, |
| "loss": 1.7297, |
| "mean_token_accuracy": 0.6588895499706269, |
| "num_tokens": 7823436.0, |
| "step": 2145 |
| }, |
| { |
| "epoch": 16.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9255e-05, |
| "loss": 1.6984, |
| "mean_token_accuracy": 0.6600228071212768, |
| "num_tokens": 7842178.0, |
| "step": 2150 |
| }, |
| { |
| "epoch": 16.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.923e-05, |
| "loss": 1.7287, |
| "mean_token_accuracy": 0.6550074636936187, |
| "num_tokens": 7860260.0, |
| "step": 2155 |
| }, |
| { |
| "epoch": 16.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9205e-05, |
| "loss": 1.7764, |
| "mean_token_accuracy": 0.6480809897184372, |
| "num_tokens": 7877839.0, |
| "step": 2160 |
| }, |
| { |
| "epoch": 16.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.918e-05, |
| "loss": 1.66, |
| "mean_token_accuracy": 0.6657153934240341, |
| "num_tokens": 7896808.0, |
| "step": 2165 |
| }, |
| { |
| "epoch": 16.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9155e-05, |
| "loss": 1.6743, |
| "mean_token_accuracy": 0.6643134742975235, |
| "num_tokens": 7915776.0, |
| "step": 2170 |
| }, |
| { |
| "epoch": 16.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.913e-05, |
| "loss": 1.7643, |
| "mean_token_accuracy": 0.6498584270477294, |
| "num_tokens": 7933578.0, |
| "step": 2175 |
| }, |
| { |
| "epoch": 16.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9105000000000006e-05, |
| "loss": 1.8256, |
| "mean_token_accuracy": 0.638142678141594, |
| "num_tokens": 7950816.0, |
| "step": 2180 |
| }, |
| { |
| "epoch": 16.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.908e-05, |
| "loss": 1.7457, |
| "mean_token_accuracy": 0.6490698546171189, |
| "num_tokens": 7969482.0, |
| "step": 2185 |
| }, |
| { |
| "epoch": 16.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9055000000000005e-05, |
| "loss": 1.7675, |
| "mean_token_accuracy": 0.6475545018911362, |
| "num_tokens": 7987748.0, |
| "step": 2190 |
| }, |
| { |
| "epoch": 16.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.903e-05, |
| "loss": 1.7839, |
| "mean_token_accuracy": 0.639941617846489, |
| "num_tokens": 8005598.0, |
| "step": 2195 |
| }, |
| { |
| "epoch": 16.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9005000000000003e-05, |
| "loss": 1.7498, |
| "mean_token_accuracy": 0.6494361340999604, |
| "num_tokens": 8023661.0, |
| "step": 2200 |
| }, |
| { |
| "epoch": 16.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.898e-05, |
| "loss": 1.7785, |
| "mean_token_accuracy": 0.6451682835817337, |
| "num_tokens": 8042112.0, |
| "step": 2205 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8955e-05, |
| "loss": 1.7905, |
| "mean_token_accuracy": 0.6438369482755661, |
| "num_tokens": 8059785.0, |
| "step": 2210 |
| }, |
| { |
| "epoch": 17.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.893e-05, |
| "loss": 1.6973, |
| "mean_token_accuracy": 0.6598532944917679, |
| "num_tokens": 8078152.0, |
| "step": 2215 |
| }, |
| { |
| "epoch": 17.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8905e-05, |
| "loss": 1.7373, |
| "mean_token_accuracy": 0.6510017067193985, |
| "num_tokens": 8096780.0, |
| "step": 2220 |
| }, |
| { |
| "epoch": 17.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.888e-05, |
| "loss": 1.7209, |
| "mean_token_accuracy": 0.6596309930086136, |
| "num_tokens": 8114668.0, |
| "step": 2225 |
| }, |
| { |
| "epoch": 17.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8855e-05, |
| "loss": 1.6939, |
| "mean_token_accuracy": 0.6576298594474792, |
| "num_tokens": 8133880.0, |
| "step": 2230 |
| }, |
| { |
| "epoch": 17.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.883e-05, |
| "loss": 1.78, |
| "mean_token_accuracy": 0.6426860511302948, |
| "num_tokens": 8151540.0, |
| "step": 2235 |
| }, |
| { |
| "epoch": 17.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8805000000000005e-05, |
| "loss": 1.7545, |
| "mean_token_accuracy": 0.6476239621639251, |
| "num_tokens": 8170218.0, |
| "step": 2240 |
| }, |
| { |
| "epoch": 17.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.878e-05, |
| "loss": 1.7575, |
| "mean_token_accuracy": 0.6513453900814057, |
| "num_tokens": 8188203.0, |
| "step": 2245 |
| }, |
| { |
| "epoch": 17.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8755000000000004e-05, |
| "loss": 1.794, |
| "mean_token_accuracy": 0.6376719444990158, |
| "num_tokens": 8206190.0, |
| "step": 2250 |
| }, |
| { |
| "epoch": 17.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.873e-05, |
| "loss": 1.7844, |
| "mean_token_accuracy": 0.6456444442272187, |
| "num_tokens": 8224005.0, |
| "step": 2255 |
| }, |
| { |
| "epoch": 17.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8705e-05, |
| "loss": 1.7172, |
| "mean_token_accuracy": 0.6534038037061691, |
| "num_tokens": 8242421.0, |
| "step": 2260 |
| }, |
| { |
| "epoch": 17.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.868e-05, |
| "loss": 1.6841, |
| "mean_token_accuracy": 0.659113222360611, |
| "num_tokens": 8261508.0, |
| "step": 2265 |
| }, |
| { |
| "epoch": 17.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8655e-05, |
| "loss": 1.7875, |
| "mean_token_accuracy": 0.6438043415546417, |
| "num_tokens": 8279321.0, |
| "step": 2270 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.863e-05, |
| "loss": 1.7198, |
| "mean_token_accuracy": 0.6564956575632095, |
| "num_tokens": 8298176.0, |
| "step": 2275 |
| }, |
| { |
| "epoch": 17.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8605e-05, |
| "loss": 1.7513, |
| "mean_token_accuracy": 0.6444843232631683, |
| "num_tokens": 8316630.0, |
| "step": 2280 |
| }, |
| { |
| "epoch": 17.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.858e-05, |
| "loss": 1.6235, |
| "mean_token_accuracy": 0.6704483658075333, |
| "num_tokens": 8335880.0, |
| "step": 2285 |
| }, |
| { |
| "epoch": 17.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8555e-05, |
| "loss": 1.7432, |
| "mean_token_accuracy": 0.6476155072450638, |
| "num_tokens": 8354016.0, |
| "step": 2290 |
| }, |
| { |
| "epoch": 17.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.853e-05, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6534331560134887, |
| "num_tokens": 8371707.0, |
| "step": 2295 |
| }, |
| { |
| "epoch": 17.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8505000000000005e-05, |
| "loss": 1.7394, |
| "mean_token_accuracy": 0.6537044525146485, |
| "num_tokens": 8389636.0, |
| "step": 2300 |
| }, |
| { |
| "epoch": 17.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.848e-05, |
| "loss": 1.7573, |
| "mean_token_accuracy": 0.6542636543512345, |
| "num_tokens": 8407513.0, |
| "step": 2305 |
| }, |
| { |
| "epoch": 17.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8455000000000004e-05, |
| "loss": 1.7613, |
| "mean_token_accuracy": 0.641442459821701, |
| "num_tokens": 8425390.0, |
| "step": 2310 |
| }, |
| { |
| "epoch": 17.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8429999999999996e-05, |
| "loss": 1.7467, |
| "mean_token_accuracy": 0.6489367455244064, |
| "num_tokens": 8443863.0, |
| "step": 2315 |
| }, |
| { |
| "epoch": 17.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8405e-05, |
| "loss": 1.8034, |
| "mean_token_accuracy": 0.6354307472705841, |
| "num_tokens": 8461483.0, |
| "step": 2320 |
| }, |
| { |
| "epoch": 17.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.838e-05, |
| "loss": 1.7575, |
| "mean_token_accuracy": 0.6472324639558792, |
| "num_tokens": 8479006.0, |
| "step": 2325 |
| }, |
| { |
| "epoch": 17.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8355e-05, |
| "loss": 1.7741, |
| "mean_token_accuracy": 0.646085774898529, |
| "num_tokens": 8496986.0, |
| "step": 2330 |
| }, |
| { |
| "epoch": 17.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.833e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6486167579889297, |
| "num_tokens": 8515399.0, |
| "step": 2335 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8305e-05, |
| "loss": 1.7085, |
| "mean_token_accuracy": 0.658332034945488, |
| "num_tokens": 8533890.0, |
| "step": 2340 |
| }, |
| { |
| "epoch": 18.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.828e-05, |
| "loss": 1.7182, |
| "mean_token_accuracy": 0.6543307691812515, |
| "num_tokens": 8552119.0, |
| "step": 2345 |
| }, |
| { |
| "epoch": 18.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8255e-05, |
| "loss": 1.721, |
| "mean_token_accuracy": 0.6576748192310333, |
| "num_tokens": 8570610.0, |
| "step": 2350 |
| }, |
| { |
| "epoch": 18.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.823e-05, |
| "loss": 1.7423, |
| "mean_token_accuracy": 0.6538129627704621, |
| "num_tokens": 8588510.0, |
| "step": 2355 |
| }, |
| { |
| "epoch": 18.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8205000000000004e-05, |
| "loss": 1.7196, |
| "mean_token_accuracy": 0.6549855172634125, |
| "num_tokens": 8606513.0, |
| "step": 2360 |
| }, |
| { |
| "epoch": 18.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.818e-05, |
| "loss": 1.7534, |
| "mean_token_accuracy": 0.6542291522026062, |
| "num_tokens": 8624369.0, |
| "step": 2365 |
| }, |
| { |
| "epoch": 18.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8155e-05, |
| "loss": 1.696, |
| "mean_token_accuracy": 0.6589814841747283, |
| "num_tokens": 8643122.0, |
| "step": 2370 |
| }, |
| { |
| "epoch": 18.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8129999999999996e-05, |
| "loss": 1.7568, |
| "mean_token_accuracy": 0.6480594784021377, |
| "num_tokens": 8660823.0, |
| "step": 2375 |
| }, |
| { |
| "epoch": 18.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8105e-05, |
| "loss": 1.7531, |
| "mean_token_accuracy": 0.6460558891296386, |
| "num_tokens": 8678813.0, |
| "step": 2380 |
| }, |
| { |
| "epoch": 18.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.808e-05, |
| "loss": 1.7844, |
| "mean_token_accuracy": 0.6402776807546615, |
| "num_tokens": 8696912.0, |
| "step": 2385 |
| }, |
| { |
| "epoch": 18.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8055e-05, |
| "loss": 1.8192, |
| "mean_token_accuracy": 0.6349106699228286, |
| "num_tokens": 8714150.0, |
| "step": 2390 |
| }, |
| { |
| "epoch": 18.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.803000000000001e-05, |
| "loss": 1.7805, |
| "mean_token_accuracy": 0.6420513331890106, |
| "num_tokens": 8732388.0, |
| "step": 2395 |
| }, |
| { |
| "epoch": 18.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8005e-05, |
| "loss": 1.7583, |
| "mean_token_accuracy": 0.6472231477499009, |
| "num_tokens": 8750733.0, |
| "step": 2400 |
| }, |
| { |
| "epoch": 18.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7980000000000006e-05, |
| "loss": 1.822, |
| "mean_token_accuracy": 0.6343230813741684, |
| "num_tokens": 8768670.0, |
| "step": 2405 |
| }, |
| { |
| "epoch": 18.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7955e-05, |
| "loss": 1.7632, |
| "mean_token_accuracy": 0.6466387540102005, |
| "num_tokens": 8786506.0, |
| "step": 2410 |
| }, |
| { |
| "epoch": 18.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7930000000000004e-05, |
| "loss": 1.7275, |
| "mean_token_accuracy": 0.6533702045679093, |
| "num_tokens": 8804613.0, |
| "step": 2415 |
| }, |
| { |
| "epoch": 18.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7905000000000004e-05, |
| "loss": 1.6716, |
| "mean_token_accuracy": 0.6637963593006134, |
| "num_tokens": 8823585.0, |
| "step": 2420 |
| }, |
| { |
| "epoch": 18.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.788e-05, |
| "loss": 1.7647, |
| "mean_token_accuracy": 0.648153355717659, |
| "num_tokens": 8841533.0, |
| "step": 2425 |
| }, |
| { |
| "epoch": 18.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7855e-05, |
| "loss": 1.7309, |
| "mean_token_accuracy": 0.6484656900167465, |
| "num_tokens": 8860614.0, |
| "step": 2430 |
| }, |
| { |
| "epoch": 18.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.783e-05, |
| "loss": 1.775, |
| "mean_token_accuracy": 0.6378404378890992, |
| "num_tokens": 8879255.0, |
| "step": 2435 |
| }, |
| { |
| "epoch": 18.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7805e-05, |
| "loss": 1.7502, |
| "mean_token_accuracy": 0.6478937238454818, |
| "num_tokens": 8896994.0, |
| "step": 2440 |
| }, |
| { |
| "epoch": 18.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.778000000000001e-05, |
| "loss": 1.7486, |
| "mean_token_accuracy": 0.6547796040773392, |
| "num_tokens": 8915326.0, |
| "step": 2445 |
| }, |
| { |
| "epoch": 18.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7755e-05, |
| "loss": 1.6805, |
| "mean_token_accuracy": 0.6655582249164581, |
| "num_tokens": 8933916.0, |
| "step": 2450 |
| }, |
| { |
| "epoch": 18.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7730000000000006e-05, |
| "loss": 1.7298, |
| "mean_token_accuracy": 0.6513529330492019, |
| "num_tokens": 8952047.0, |
| "step": 2455 |
| }, |
| { |
| "epoch": 18.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7705e-05, |
| "loss": 1.6774, |
| "mean_token_accuracy": 0.6640833884477615, |
| "num_tokens": 8970281.0, |
| "step": 2460 |
| }, |
| { |
| "epoch": 18.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7680000000000005e-05, |
| "loss": 1.6164, |
| "mean_token_accuracy": 0.6714823067188262, |
| "num_tokens": 8989804.0, |
| "step": 2465 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7655000000000005e-05, |
| "loss": 1.8069, |
| "mean_token_accuracy": 0.634514644742012, |
| "num_tokens": 9007995.0, |
| "step": 2470 |
| }, |
| { |
| "epoch": 19.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7630000000000004e-05, |
| "loss": 1.7192, |
| "mean_token_accuracy": 0.6563860654830933, |
| "num_tokens": 9026162.0, |
| "step": 2475 |
| }, |
| { |
| "epoch": 19.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7605e-05, |
| "loss": 1.7035, |
| "mean_token_accuracy": 0.6546828061342239, |
| "num_tokens": 9044982.0, |
| "step": 2480 |
| }, |
| { |
| "epoch": 19.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.758e-05, |
| "loss": 1.7711, |
| "mean_token_accuracy": 0.6455140680074691, |
| "num_tokens": 9062948.0, |
| "step": 2485 |
| }, |
| { |
| "epoch": 19.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7555e-05, |
| "loss": 1.7543, |
| "mean_token_accuracy": 0.6509835839271545, |
| "num_tokens": 9080800.0, |
| "step": 2490 |
| }, |
| { |
| "epoch": 19.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.753e-05, |
| "loss": 1.749, |
| "mean_token_accuracy": 0.6524321138858795, |
| "num_tokens": 9099027.0, |
| "step": 2495 |
| }, |
| { |
| "epoch": 19.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7505e-05, |
| "loss": 1.7379, |
| "mean_token_accuracy": 0.6502025574445724, |
| "num_tokens": 9117027.0, |
| "step": 2500 |
| }, |
| { |
| "epoch": 19.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.748000000000001e-05, |
| "loss": 1.7259, |
| "mean_token_accuracy": 0.6557818174362182, |
| "num_tokens": 9135276.0, |
| "step": 2505 |
| }, |
| { |
| "epoch": 19.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7455e-05, |
| "loss": 1.6784, |
| "mean_token_accuracy": 0.6623701632022858, |
| "num_tokens": 9154044.0, |
| "step": 2510 |
| }, |
| { |
| "epoch": 19.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7430000000000006e-05, |
| "loss": 1.7474, |
| "mean_token_accuracy": 0.6527136623859405, |
| "num_tokens": 9172242.0, |
| "step": 2515 |
| }, |
| { |
| "epoch": 19.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7405e-05, |
| "loss": 1.7387, |
| "mean_token_accuracy": 0.647335684299469, |
| "num_tokens": 9190090.0, |
| "step": 2520 |
| }, |
| { |
| "epoch": 19.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7380000000000005e-05, |
| "loss": 1.824, |
| "mean_token_accuracy": 0.6351722240447998, |
| "num_tokens": 9207627.0, |
| "step": 2525 |
| }, |
| { |
| "epoch": 19.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7355000000000004e-05, |
| "loss": 1.8204, |
| "mean_token_accuracy": 0.6388478010892868, |
| "num_tokens": 9224815.0, |
| "step": 2530 |
| }, |
| { |
| "epoch": 19.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7330000000000003e-05, |
| "loss": 1.6962, |
| "mean_token_accuracy": 0.6541598588228226, |
| "num_tokens": 9244225.0, |
| "step": 2535 |
| }, |
| { |
| "epoch": 19.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7305e-05, |
| "loss": 1.7581, |
| "mean_token_accuracy": 0.6506271123886108, |
| "num_tokens": 9262267.0, |
| "step": 2540 |
| }, |
| { |
| "epoch": 19.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.728e-05, |
| "loss": 1.7494, |
| "mean_token_accuracy": 0.6438189834356308, |
| "num_tokens": 9280916.0, |
| "step": 2545 |
| }, |
| { |
| "epoch": 19.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7255e-05, |
| "loss": 1.7621, |
| "mean_token_accuracy": 0.6443462133407593, |
| "num_tokens": 9299236.0, |
| "step": 2550 |
| }, |
| { |
| "epoch": 19.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.723e-05, |
| "loss": 1.7594, |
| "mean_token_accuracy": 0.6466510325670243, |
| "num_tokens": 9317391.0, |
| "step": 2555 |
| }, |
| { |
| "epoch": 19.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7205e-05, |
| "loss": 1.6792, |
| "mean_token_accuracy": 0.6581139594316483, |
| "num_tokens": 9336860.0, |
| "step": 2560 |
| }, |
| { |
| "epoch": 19.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7180000000000007e-05, |
| "loss": 1.7497, |
| "mean_token_accuracy": 0.6539162546396255, |
| "num_tokens": 9354743.0, |
| "step": 2565 |
| }, |
| { |
| "epoch": 19.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7155e-05, |
| "loss": 1.7125, |
| "mean_token_accuracy": 0.6545383244752884, |
| "num_tokens": 9373351.0, |
| "step": 2570 |
| }, |
| { |
| "epoch": 19.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7130000000000005e-05, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.6508906304836273, |
| "num_tokens": 9391237.0, |
| "step": 2575 |
| }, |
| { |
| "epoch": 19.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7105e-05, |
| "loss": 1.6926, |
| "mean_token_accuracy": 0.6579229325056076, |
| "num_tokens": 9409658.0, |
| "step": 2580 |
| }, |
| { |
| "epoch": 19.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7080000000000004e-05, |
| "loss": 1.7471, |
| "mean_token_accuracy": 0.6523754239082337, |
| "num_tokens": 9427268.0, |
| "step": 2585 |
| }, |
| { |
| "epoch": 19.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7055000000000004e-05, |
| "loss": 1.7494, |
| "mean_token_accuracy": 0.6500475823879241, |
| "num_tokens": 9445373.0, |
| "step": 2590 |
| }, |
| { |
| "epoch": 19.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.703e-05, |
| "loss": 1.763, |
| "mean_token_accuracy": 0.6419122099876404, |
| "num_tokens": 9463346.0, |
| "step": 2595 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7005e-05, |
| "loss": 1.7377, |
| "mean_token_accuracy": 0.6534196078777313, |
| "num_tokens": 9482100.0, |
| "step": 2600 |
| }, |
| { |
| "epoch": 20.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.698e-05, |
| "loss": 1.7222, |
| "mean_token_accuracy": 0.6527847439050675, |
| "num_tokens": 9500755.0, |
| "step": 2605 |
| }, |
| { |
| "epoch": 20.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6955e-05, |
| "loss": 1.8368, |
| "mean_token_accuracy": 0.6314696133136749, |
| "num_tokens": 9518008.0, |
| "step": 2610 |
| }, |
| { |
| "epoch": 20.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.693e-05, |
| "loss": 1.7166, |
| "mean_token_accuracy": 0.656936526298523, |
| "num_tokens": 9536514.0, |
| "step": 2615 |
| }, |
| { |
| "epoch": 20.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6905e-05, |
| "loss": 1.7997, |
| "mean_token_accuracy": 0.6421493351459503, |
| "num_tokens": 9554049.0, |
| "step": 2620 |
| }, |
| { |
| "epoch": 20.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6880000000000006e-05, |
| "loss": 1.7075, |
| "mean_token_accuracy": 0.6625353038311005, |
| "num_tokens": 9572125.0, |
| "step": 2625 |
| }, |
| { |
| "epoch": 20.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6855e-05, |
| "loss": 1.7367, |
| "mean_token_accuracy": 0.6516992777585984, |
| "num_tokens": 9590370.0, |
| "step": 2630 |
| }, |
| { |
| "epoch": 20.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6830000000000005e-05, |
| "loss": 1.7493, |
| "mean_token_accuracy": 0.6508445411920547, |
| "num_tokens": 9608387.0, |
| "step": 2635 |
| }, |
| { |
| "epoch": 20.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6805e-05, |
| "loss": 1.754, |
| "mean_token_accuracy": 0.6476915091276169, |
| "num_tokens": 9626593.0, |
| "step": 2640 |
| }, |
| { |
| "epoch": 20.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6780000000000004e-05, |
| "loss": 1.7698, |
| "mean_token_accuracy": 0.6473258316516877, |
| "num_tokens": 9644649.0, |
| "step": 2645 |
| }, |
| { |
| "epoch": 20.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6755e-05, |
| "loss": 1.7206, |
| "mean_token_accuracy": 0.6517573058605194, |
| "num_tokens": 9663339.0, |
| "step": 2650 |
| }, |
| { |
| "epoch": 20.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.673e-05, |
| "loss": 1.8082, |
| "mean_token_accuracy": 0.6340971022844315, |
| "num_tokens": 9681407.0, |
| "step": 2655 |
| }, |
| { |
| "epoch": 20.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6705e-05, |
| "loss": 1.76, |
| "mean_token_accuracy": 0.6423662751913071, |
| "num_tokens": 9698951.0, |
| "step": 2660 |
| }, |
| { |
| "epoch": 20.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.668e-05, |
| "loss": 1.7479, |
| "mean_token_accuracy": 0.6455884456634522, |
| "num_tokens": 9716798.0, |
| "step": 2665 |
| }, |
| { |
| "epoch": 20.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6655e-05, |
| "loss": 1.6967, |
| "mean_token_accuracy": 0.6592382907867431, |
| "num_tokens": 9735601.0, |
| "step": 2670 |
| }, |
| { |
| "epoch": 20.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.663e-05, |
| "loss": 1.7629, |
| "mean_token_accuracy": 0.6453653991222381, |
| "num_tokens": 9753360.0, |
| "step": 2675 |
| }, |
| { |
| "epoch": 20.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6605e-05, |
| "loss": 1.7007, |
| "mean_token_accuracy": 0.660412722826004, |
| "num_tokens": 9771848.0, |
| "step": 2680 |
| }, |
| { |
| "epoch": 20.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6580000000000006e-05, |
| "loss": 1.7466, |
| "mean_token_accuracy": 0.6478160947561264, |
| "num_tokens": 9789801.0, |
| "step": 2685 |
| }, |
| { |
| "epoch": 20.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6555e-05, |
| "loss": 1.7047, |
| "mean_token_accuracy": 0.6545427888631821, |
| "num_tokens": 9808227.0, |
| "step": 2690 |
| }, |
| { |
| "epoch": 20.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6530000000000004e-05, |
| "loss": 1.7437, |
| "mean_token_accuracy": 0.6440727055072785, |
| "num_tokens": 9826666.0, |
| "step": 2695 |
| }, |
| { |
| "epoch": 20.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6505e-05, |
| "loss": 1.6243, |
| "mean_token_accuracy": 0.6773371279239655, |
| "num_tokens": 9845929.0, |
| "step": 2700 |
| }, |
| { |
| "epoch": 20.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.648e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6442576110363006, |
| "num_tokens": 9864387.0, |
| "step": 2705 |
| }, |
| { |
| "epoch": 20.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6455e-05, |
| "loss": 1.7623, |
| "mean_token_accuracy": 0.6505689769983292, |
| "num_tokens": 9881953.0, |
| "step": 2710 |
| }, |
| { |
| "epoch": 20.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.643e-05, |
| "loss": 1.7447, |
| "mean_token_accuracy": 0.650253239274025, |
| "num_tokens": 9900170.0, |
| "step": 2715 |
| }, |
| { |
| "epoch": 20.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6405e-05, |
| "loss": 1.7563, |
| "mean_token_accuracy": 0.6442853361368179, |
| "num_tokens": 9918308.0, |
| "step": 2720 |
| }, |
| { |
| "epoch": 20.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.638e-05, |
| "loss": 1.7022, |
| "mean_token_accuracy": 0.6568427920341492, |
| "num_tokens": 9937402.0, |
| "step": 2725 |
| }, |
| { |
| "epoch": 21.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6355e-05, |
| "loss": 1.7716, |
| "mean_token_accuracy": 0.6476004511117935, |
| "num_tokens": 9956205.0, |
| "step": 2730 |
| }, |
| { |
| "epoch": 21.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6330000000000006e-05, |
| "loss": 1.7709, |
| "mean_token_accuracy": 0.6501421749591827, |
| "num_tokens": 9973863.0, |
| "step": 2735 |
| }, |
| { |
| "epoch": 21.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6305e-05, |
| "loss": 1.7418, |
| "mean_token_accuracy": 0.6446869194507598, |
| "num_tokens": 9991767.0, |
| "step": 2740 |
| }, |
| { |
| "epoch": 21.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6280000000000005e-05, |
| "loss": 1.7733, |
| "mean_token_accuracy": 0.6450621813535691, |
| "num_tokens": 10009914.0, |
| "step": 2745 |
| }, |
| { |
| "epoch": 21.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6255e-05, |
| "loss": 1.744, |
| "mean_token_accuracy": 0.6536423653364182, |
| "num_tokens": 10027897.0, |
| "step": 2750 |
| }, |
| { |
| "epoch": 21.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6230000000000004e-05, |
| "loss": 1.7402, |
| "mean_token_accuracy": 0.6494254291057586, |
| "num_tokens": 10046965.0, |
| "step": 2755 |
| }, |
| { |
| "epoch": 21.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6205e-05, |
| "loss": 1.758, |
| "mean_token_accuracy": 0.6502227276563645, |
| "num_tokens": 10065041.0, |
| "step": 2760 |
| }, |
| { |
| "epoch": 21.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.618e-05, |
| "loss": 1.805, |
| "mean_token_accuracy": 0.6402064859867096, |
| "num_tokens": 10082396.0, |
| "step": 2765 |
| }, |
| { |
| "epoch": 21.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6155e-05, |
| "loss": 1.7417, |
| "mean_token_accuracy": 0.6509008765220642, |
| "num_tokens": 10100522.0, |
| "step": 2770 |
| }, |
| { |
| "epoch": 21.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.613e-05, |
| "loss": 1.7474, |
| "mean_token_accuracy": 0.6551948994398117, |
| "num_tokens": 10118325.0, |
| "step": 2775 |
| }, |
| { |
| "epoch": 21.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6105e-05, |
| "loss": 1.6998, |
| "mean_token_accuracy": 0.6569103240966797, |
| "num_tokens": 10137261.0, |
| "step": 2780 |
| }, |
| { |
| "epoch": 21.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.608e-05, |
| "loss": 1.7573, |
| "mean_token_accuracy": 0.6484821021556855, |
| "num_tokens": 10155636.0, |
| "step": 2785 |
| }, |
| { |
| "epoch": 21.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6055e-05, |
| "loss": 1.7989, |
| "mean_token_accuracy": 0.6373539805412293, |
| "num_tokens": 10173181.0, |
| "step": 2790 |
| }, |
| { |
| "epoch": 21.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6030000000000006e-05, |
| "loss": 1.7508, |
| "mean_token_accuracy": 0.6406475752592087, |
| "num_tokens": 10191767.0, |
| "step": 2795 |
| }, |
| { |
| "epoch": 21.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6005e-05, |
| "loss": 1.7051, |
| "mean_token_accuracy": 0.6602169930934906, |
| "num_tokens": 10210631.0, |
| "step": 2800 |
| }, |
| { |
| "epoch": 21.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5980000000000004e-05, |
| "loss": 1.6777, |
| "mean_token_accuracy": 0.6621813416481018, |
| "num_tokens": 10229408.0, |
| "step": 2805 |
| }, |
| { |
| "epoch": 21.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5955e-05, |
| "loss": 1.7332, |
| "mean_token_accuracy": 0.6526377439498902, |
| "num_tokens": 10247835.0, |
| "step": 2810 |
| }, |
| { |
| "epoch": 21.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.593e-05, |
| "loss": 1.7106, |
| "mean_token_accuracy": 0.6549345046281815, |
| "num_tokens": 10266504.0, |
| "step": 2815 |
| }, |
| { |
| "epoch": 21.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5905e-05, |
| "loss": 1.7201, |
| "mean_token_accuracy": 0.6562414228916168, |
| "num_tokens": 10284706.0, |
| "step": 2820 |
| }, |
| { |
| "epoch": 21.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.588e-05, |
| "loss": 1.7226, |
| "mean_token_accuracy": 0.6576842248439789, |
| "num_tokens": 10302875.0, |
| "step": 2825 |
| }, |
| { |
| "epoch": 21.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5855e-05, |
| "loss": 1.6721, |
| "mean_token_accuracy": 0.6636721462011337, |
| "num_tokens": 10321523.0, |
| "step": 2830 |
| }, |
| { |
| "epoch": 21.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.583e-05, |
| "loss": 1.7523, |
| "mean_token_accuracy": 0.6470031648874283, |
| "num_tokens": 10339297.0, |
| "step": 2835 |
| }, |
| { |
| "epoch": 21.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5805e-05, |
| "loss": 1.7419, |
| "mean_token_accuracy": 0.6513529509305954, |
| "num_tokens": 10357839.0, |
| "step": 2840 |
| }, |
| { |
| "epoch": 21.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.578e-05, |
| "loss": 1.7623, |
| "mean_token_accuracy": 0.6404614865779876, |
| "num_tokens": 10375584.0, |
| "step": 2845 |
| }, |
| { |
| "epoch": 21.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5755e-05, |
| "loss": 1.737, |
| "mean_token_accuracy": 0.6452168464660645, |
| "num_tokens": 10394319.0, |
| "step": 2850 |
| }, |
| { |
| "epoch": 21.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5730000000000005e-05, |
| "loss": 1.6929, |
| "mean_token_accuracy": 0.6594237118959427, |
| "num_tokens": 10412963.0, |
| "step": 2855 |
| }, |
| { |
| "epoch": 22.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5705e-05, |
| "loss": 1.786, |
| "mean_token_accuracy": 0.6423673510551453, |
| "num_tokens": 10430310.0, |
| "step": 2860 |
| }, |
| { |
| "epoch": 22.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5680000000000004e-05, |
| "loss": 1.7693, |
| "mean_token_accuracy": 0.6458660453557968, |
| "num_tokens": 10448139.0, |
| "step": 2865 |
| }, |
| { |
| "epoch": 22.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5654999999999997e-05, |
| "loss": 1.7493, |
| "mean_token_accuracy": 0.6492699027061463, |
| "num_tokens": 10466020.0, |
| "step": 2870 |
| }, |
| { |
| "epoch": 22.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.563e-05, |
| "loss": 1.7669, |
| "mean_token_accuracy": 0.6457598328590393, |
| "num_tokens": 10483856.0, |
| "step": 2875 |
| }, |
| { |
| "epoch": 22.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5605e-05, |
| "loss": 1.6918, |
| "mean_token_accuracy": 0.6608535885810852, |
| "num_tokens": 10502391.0, |
| "step": 2880 |
| }, |
| { |
| "epoch": 22.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.558e-05, |
| "loss": 1.7783, |
| "mean_token_accuracy": 0.6480051964521408, |
| "num_tokens": 10520184.0, |
| "step": 2885 |
| }, |
| { |
| "epoch": 22.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5555e-05, |
| "loss": 1.7946, |
| "mean_token_accuracy": 0.6426310211420059, |
| "num_tokens": 10537625.0, |
| "step": 2890 |
| }, |
| { |
| "epoch": 22.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.553e-05, |
| "loss": 1.8393, |
| "mean_token_accuracy": 0.6324712634086609, |
| "num_tokens": 10554726.0, |
| "step": 2895 |
| }, |
| { |
| "epoch": 22.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5505e-05, |
| "loss": 1.7677, |
| "mean_token_accuracy": 0.6496516615152359, |
| "num_tokens": 10572379.0, |
| "step": 2900 |
| }, |
| { |
| "epoch": 22.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.548e-05, |
| "loss": 1.7381, |
| "mean_token_accuracy": 0.6457451343536377, |
| "num_tokens": 10591449.0, |
| "step": 2905 |
| }, |
| { |
| "epoch": 22.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5455e-05, |
| "loss": 1.6608, |
| "mean_token_accuracy": 0.6606644958257675, |
| "num_tokens": 10611295.0, |
| "step": 2910 |
| }, |
| { |
| "epoch": 22.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5430000000000005e-05, |
| "loss": 1.6849, |
| "mean_token_accuracy": 0.6587489366531372, |
| "num_tokens": 10629917.0, |
| "step": 2915 |
| }, |
| { |
| "epoch": 22.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5405e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6465423703193665, |
| "num_tokens": 10647956.0, |
| "step": 2920 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5380000000000003e-05, |
| "loss": 1.7415, |
| "mean_token_accuracy": 0.6542441755533218, |
| "num_tokens": 10666004.0, |
| "step": 2925 |
| }, |
| { |
| "epoch": 22.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5354999999999996e-05, |
| "loss": 1.7098, |
| "mean_token_accuracy": 0.654433760046959, |
| "num_tokens": 10684772.0, |
| "step": 2930 |
| }, |
| { |
| "epoch": 22.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.533e-05, |
| "loss": 1.8088, |
| "mean_token_accuracy": 0.63780497610569, |
| "num_tokens": 10702520.0, |
| "step": 2935 |
| }, |
| { |
| "epoch": 22.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5305e-05, |
| "loss": 1.7072, |
| "mean_token_accuracy": 0.659371867775917, |
| "num_tokens": 10721196.0, |
| "step": 2940 |
| }, |
| { |
| "epoch": 22.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.528e-05, |
| "loss": 1.7689, |
| "mean_token_accuracy": 0.647321754693985, |
| "num_tokens": 10738660.0, |
| "step": 2945 |
| }, |
| { |
| "epoch": 22.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5255e-05, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6515435039997101, |
| "num_tokens": 10757158.0, |
| "step": 2950 |
| }, |
| { |
| "epoch": 22.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.523e-05, |
| "loss": 1.7007, |
| "mean_token_accuracy": 0.6563295513391495, |
| "num_tokens": 10775636.0, |
| "step": 2955 |
| }, |
| { |
| "epoch": 22.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5205e-05, |
| "loss": 1.7159, |
| "mean_token_accuracy": 0.6521429270505905, |
| "num_tokens": 10794398.0, |
| "step": 2960 |
| }, |
| { |
| "epoch": 22.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.518e-05, |
| "loss": 1.667, |
| "mean_token_accuracy": 0.6644465386867523, |
| "num_tokens": 10813475.0, |
| "step": 2965 |
| }, |
| { |
| "epoch": 22.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5155e-05, |
| "loss": 1.7503, |
| "mean_token_accuracy": 0.6510158330202103, |
| "num_tokens": 10831125.0, |
| "step": 2970 |
| }, |
| { |
| "epoch": 22.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5130000000000004e-05, |
| "loss": 1.7005, |
| "mean_token_accuracy": 0.662487056851387, |
| "num_tokens": 10849387.0, |
| "step": 2975 |
| }, |
| { |
| "epoch": 22.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5105e-05, |
| "loss": 1.7911, |
| "mean_token_accuracy": 0.6427533149719238, |
| "num_tokens": 10867773.0, |
| "step": 2980 |
| }, |
| { |
| "epoch": 22.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.508e-05, |
| "loss": 1.7629, |
| "mean_token_accuracy": 0.6471167922019958, |
| "num_tokens": 10885829.0, |
| "step": 2985 |
| }, |
| { |
| "epoch": 23.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5055e-05, |
| "loss": 1.7349, |
| "mean_token_accuracy": 0.65148167014122, |
| "num_tokens": 10904415.0, |
| "step": 2990 |
| }, |
| { |
| "epoch": 23.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.503e-05, |
| "loss": 1.7296, |
| "mean_token_accuracy": 0.6501170575618744, |
| "num_tokens": 10922228.0, |
| "step": 2995 |
| }, |
| { |
| "epoch": 23.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5005e-05, |
| "loss": 1.7787, |
| "mean_token_accuracy": 0.6366728276014328, |
| "num_tokens": 10940239.0, |
| "step": 3000 |
| }, |
| { |
| "epoch": 23.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.498e-05, |
| "loss": 1.7362, |
| "mean_token_accuracy": 0.6528029084205628, |
| "num_tokens": 10958391.0, |
| "step": 3005 |
| }, |
| { |
| "epoch": 23.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.495500000000001e-05, |
| "loss": 1.7442, |
| "mean_token_accuracy": 0.6534345805644989, |
| "num_tokens": 10976257.0, |
| "step": 3010 |
| }, |
| { |
| "epoch": 23.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.493e-05, |
| "loss": 1.7464, |
| "mean_token_accuracy": 0.6423383712768554, |
| "num_tokens": 10995098.0, |
| "step": 3015 |
| }, |
| { |
| "epoch": 23.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4905000000000005e-05, |
| "loss": 1.7095, |
| "mean_token_accuracy": 0.6603679537773133, |
| "num_tokens": 11013198.0, |
| "step": 3020 |
| }, |
| { |
| "epoch": 23.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4880000000000005e-05, |
| "loss": 1.6675, |
| "mean_token_accuracy": 0.6619115889072418, |
| "num_tokens": 11032529.0, |
| "step": 3025 |
| }, |
| { |
| "epoch": 23.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4855000000000004e-05, |
| "loss": 1.7215, |
| "mean_token_accuracy": 0.6535635858774185, |
| "num_tokens": 11051447.0, |
| "step": 3030 |
| }, |
| { |
| "epoch": 23.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4830000000000004e-05, |
| "loss": 1.7505, |
| "mean_token_accuracy": 0.6513790518045426, |
| "num_tokens": 11069725.0, |
| "step": 3035 |
| }, |
| { |
| "epoch": 23.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4805e-05, |
| "loss": 1.7062, |
| "mean_token_accuracy": 0.660124909877777, |
| "num_tokens": 11088473.0, |
| "step": 3040 |
| }, |
| { |
| "epoch": 23.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.478e-05, |
| "loss": 1.7365, |
| "mean_token_accuracy": 0.6506242454051971, |
| "num_tokens": 11106701.0, |
| "step": 3045 |
| }, |
| { |
| "epoch": 23.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4755e-05, |
| "loss": 1.747, |
| "mean_token_accuracy": 0.6486080348491668, |
| "num_tokens": 11124657.0, |
| "step": 3050 |
| }, |
| { |
| "epoch": 23.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.473e-05, |
| "loss": 1.7576, |
| "mean_token_accuracy": 0.6459647357463837, |
| "num_tokens": 11142612.0, |
| "step": 3055 |
| }, |
| { |
| "epoch": 23.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.470500000000001e-05, |
| "loss": 1.7698, |
| "mean_token_accuracy": 0.6444110184907913, |
| "num_tokens": 11160572.0, |
| "step": 3060 |
| }, |
| { |
| "epoch": 23.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.468e-05, |
| "loss": 1.7671, |
| "mean_token_accuracy": 0.6471373438835144, |
| "num_tokens": 11178814.0, |
| "step": 3065 |
| }, |
| { |
| "epoch": 23.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4655000000000006e-05, |
| "loss": 1.7604, |
| "mean_token_accuracy": 0.650464779138565, |
| "num_tokens": 11196788.0, |
| "step": 3070 |
| }, |
| { |
| "epoch": 23.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.463e-05, |
| "loss": 1.7563, |
| "mean_token_accuracy": 0.6483624398708343, |
| "num_tokens": 11214549.0, |
| "step": 3075 |
| }, |
| { |
| "epoch": 23.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4605000000000005e-05, |
| "loss": 1.6635, |
| "mean_token_accuracy": 0.6658029884099961, |
| "num_tokens": 11233473.0, |
| "step": 3080 |
| }, |
| { |
| "epoch": 23.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4580000000000004e-05, |
| "loss": 1.8208, |
| "mean_token_accuracy": 0.6367295324802399, |
| "num_tokens": 11250907.0, |
| "step": 3085 |
| }, |
| { |
| "epoch": 23.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4555000000000004e-05, |
| "loss": 1.7758, |
| "mean_token_accuracy": 0.6431278616189957, |
| "num_tokens": 11268887.0, |
| "step": 3090 |
| }, |
| { |
| "epoch": 23.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.453e-05, |
| "loss": 1.6927, |
| "mean_token_accuracy": 0.6590600997209549, |
| "num_tokens": 11287728.0, |
| "step": 3095 |
| }, |
| { |
| "epoch": 23.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4505e-05, |
| "loss": 1.6928, |
| "mean_token_accuracy": 0.6589304953813553, |
| "num_tokens": 11306559.0, |
| "step": 3100 |
| }, |
| { |
| "epoch": 23.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.448e-05, |
| "loss": 1.7559, |
| "mean_token_accuracy": 0.647815215587616, |
| "num_tokens": 11324582.0, |
| "step": 3105 |
| }, |
| { |
| "epoch": 23.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4455e-05, |
| "loss": 1.7362, |
| "mean_token_accuracy": 0.6531584694981575, |
| "num_tokens": 11343111.0, |
| "step": 3110 |
| }, |
| { |
| "epoch": 23.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.443e-05, |
| "loss": 1.7569, |
| "mean_token_accuracy": 0.6495847851037979, |
| "num_tokens": 11361415.0, |
| "step": 3115 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.440500000000001e-05, |
| "loss": 1.8106, |
| "mean_token_accuracy": 0.6377548843622207, |
| "num_tokens": 11378520.0, |
| "step": 3120 |
| }, |
| { |
| "epoch": 24.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.438e-05, |
| "loss": 1.7197, |
| "mean_token_accuracy": 0.6562131911516189, |
| "num_tokens": 11397124.0, |
| "step": 3125 |
| }, |
| { |
| "epoch": 24.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4355000000000006e-05, |
| "loss": 1.7154, |
| "mean_token_accuracy": 0.6540484815835953, |
| "num_tokens": 11415431.0, |
| "step": 3130 |
| }, |
| { |
| "epoch": 24.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.433e-05, |
| "loss": 1.7791, |
| "mean_token_accuracy": 0.6443376511335372, |
| "num_tokens": 11433445.0, |
| "step": 3135 |
| }, |
| { |
| "epoch": 24.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4305000000000004e-05, |
| "loss": 1.7501, |
| "mean_token_accuracy": 0.6464499235153198, |
| "num_tokens": 11451759.0, |
| "step": 3140 |
| }, |
| { |
| "epoch": 24.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4280000000000004e-05, |
| "loss": 1.7251, |
| "mean_token_accuracy": 0.6546833992004395, |
| "num_tokens": 11470163.0, |
| "step": 3145 |
| }, |
| { |
| "epoch": 24.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4255e-05, |
| "loss": 1.7204, |
| "mean_token_accuracy": 0.6526606291532516, |
| "num_tokens": 11488656.0, |
| "step": 3150 |
| }, |
| { |
| "epoch": 24.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.423e-05, |
| "loss": 1.8343, |
| "mean_token_accuracy": 0.6308804154396057, |
| "num_tokens": 11505937.0, |
| "step": 3155 |
| }, |
| { |
| "epoch": 24.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4205e-05, |
| "loss": 1.7237, |
| "mean_token_accuracy": 0.6544101685285568, |
| "num_tokens": 11524471.0, |
| "step": 3160 |
| }, |
| { |
| "epoch": 24.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.418e-05, |
| "loss": 1.7619, |
| "mean_token_accuracy": 0.6497153013944625, |
| "num_tokens": 11542302.0, |
| "step": 3165 |
| }, |
| { |
| "epoch": 24.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4155e-05, |
| "loss": 1.7937, |
| "mean_token_accuracy": 0.6424742221832276, |
| "num_tokens": 11560367.0, |
| "step": 3170 |
| }, |
| { |
| "epoch": 24.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.413e-05, |
| "loss": 1.7211, |
| "mean_token_accuracy": 0.6575414210557937, |
| "num_tokens": 11578662.0, |
| "step": 3175 |
| }, |
| { |
| "epoch": 24.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4105000000000006e-05, |
| "loss": 1.7301, |
| "mean_token_accuracy": 0.6548975586891175, |
| "num_tokens": 11597057.0, |
| "step": 3180 |
| }, |
| { |
| "epoch": 24.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.408e-05, |
| "loss": 1.8287, |
| "mean_token_accuracy": 0.6332879096269608, |
| "num_tokens": 11614420.0, |
| "step": 3185 |
| }, |
| { |
| "epoch": 24.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4055000000000005e-05, |
| "loss": 1.6783, |
| "mean_token_accuracy": 0.6583501130342484, |
| "num_tokens": 11633485.0, |
| "step": 3190 |
| }, |
| { |
| "epoch": 24.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.403e-05, |
| "loss": 1.7974, |
| "mean_token_accuracy": 0.6381134271621705, |
| "num_tokens": 11651379.0, |
| "step": 3195 |
| }, |
| { |
| "epoch": 24.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4005000000000004e-05, |
| "loss": 1.7224, |
| "mean_token_accuracy": 0.6523280829191208, |
| "num_tokens": 11669811.0, |
| "step": 3200 |
| }, |
| { |
| "epoch": 24.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.398e-05, |
| "loss": 1.71, |
| "mean_token_accuracy": 0.6583969056606293, |
| "num_tokens": 11687820.0, |
| "step": 3205 |
| }, |
| { |
| "epoch": 24.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3955e-05, |
| "loss": 1.7925, |
| "mean_token_accuracy": 0.636654618382454, |
| "num_tokens": 11705825.0, |
| "step": 3210 |
| }, |
| { |
| "epoch": 24.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.393e-05, |
| "loss": 1.7292, |
| "mean_token_accuracy": 0.6524395048618317, |
| "num_tokens": 11723774.0, |
| "step": 3215 |
| }, |
| { |
| "epoch": 24.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3905e-05, |
| "loss": 1.7275, |
| "mean_token_accuracy": 0.6529559135437012, |
| "num_tokens": 11741969.0, |
| "step": 3220 |
| }, |
| { |
| "epoch": 24.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.388e-05, |
| "loss": 1.7514, |
| "mean_token_accuracy": 0.6474628210067749, |
| "num_tokens": 11759857.0, |
| "step": 3225 |
| }, |
| { |
| "epoch": 24.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3855e-05, |
| "loss": 1.696, |
| "mean_token_accuracy": 0.6536848098039627, |
| "num_tokens": 11778672.0, |
| "step": 3230 |
| }, |
| { |
| "epoch": 24.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.383e-05, |
| "loss": 1.6997, |
| "mean_token_accuracy": 0.6613029778003693, |
| "num_tokens": 11797888.0, |
| "step": 3235 |
| }, |
| { |
| "epoch": 24.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3805000000000006e-05, |
| "loss": 1.6657, |
| "mean_token_accuracy": 0.6732263565063477, |
| "num_tokens": 11816590.0, |
| "step": 3240 |
| }, |
| { |
| "epoch": 24.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.378e-05, |
| "loss": 1.8309, |
| "mean_token_accuracy": 0.634924265742302, |
| "num_tokens": 11833413.0, |
| "step": 3245 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3755000000000005e-05, |
| "loss": 1.6911, |
| "mean_token_accuracy": 0.6547602891921998, |
| "num_tokens": 11852625.0, |
| "step": 3250 |
| }, |
| { |
| "epoch": 25.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.373e-05, |
| "loss": 1.7518, |
| "mean_token_accuracy": 0.6456693172454834, |
| "num_tokens": 11870498.0, |
| "step": 3255 |
| }, |
| { |
| "epoch": 25.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3705000000000003e-05, |
| "loss": 1.734, |
| "mean_token_accuracy": 0.6480823725461959, |
| "num_tokens": 11888806.0, |
| "step": 3260 |
| }, |
| { |
| "epoch": 25.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.368e-05, |
| "loss": 1.7296, |
| "mean_token_accuracy": 0.6544734388589859, |
| "num_tokens": 11907007.0, |
| "step": 3265 |
| }, |
| { |
| "epoch": 25.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3655e-05, |
| "loss": 1.7, |
| "mean_token_accuracy": 0.661970067024231, |
| "num_tokens": 11925855.0, |
| "step": 3270 |
| }, |
| { |
| "epoch": 25.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.363e-05, |
| "loss": 1.7531, |
| "mean_token_accuracy": 0.6502303391695022, |
| "num_tokens": 11944217.0, |
| "step": 3275 |
| }, |
| { |
| "epoch": 25.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3605e-05, |
| "loss": 1.7867, |
| "mean_token_accuracy": 0.6456191658973693, |
| "num_tokens": 11961900.0, |
| "step": 3280 |
| }, |
| { |
| "epoch": 25.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.358e-05, |
| "loss": 1.7118, |
| "mean_token_accuracy": 0.6574157625436783, |
| "num_tokens": 11980478.0, |
| "step": 3285 |
| }, |
| { |
| "epoch": 25.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3555e-05, |
| "loss": 1.752, |
| "mean_token_accuracy": 0.6447003424167633, |
| "num_tokens": 11998539.0, |
| "step": 3290 |
| }, |
| { |
| "epoch": 25.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.353e-05, |
| "loss": 1.7875, |
| "mean_token_accuracy": 0.6442469358444214, |
| "num_tokens": 12016433.0, |
| "step": 3295 |
| }, |
| { |
| "epoch": 25.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3505000000000005e-05, |
| "loss": 1.7792, |
| "mean_token_accuracy": 0.6442890495061875, |
| "num_tokens": 12034074.0, |
| "step": 3300 |
| }, |
| { |
| "epoch": 25.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.348e-05, |
| "loss": 1.7701, |
| "mean_token_accuracy": 0.6484797149896622, |
| "num_tokens": 12051813.0, |
| "step": 3305 |
| }, |
| { |
| "epoch": 25.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3455000000000004e-05, |
| "loss": 1.8468, |
| "mean_token_accuracy": 0.632660773396492, |
| "num_tokens": 12068538.0, |
| "step": 3310 |
| }, |
| { |
| "epoch": 25.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3430000000000003e-05, |
| "loss": 1.6571, |
| "mean_token_accuracy": 0.6714397668838501, |
| "num_tokens": 12087276.0, |
| "step": 3315 |
| }, |
| { |
| "epoch": 25.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3405e-05, |
| "loss": 1.7512, |
| "mean_token_accuracy": 0.6499413967132568, |
| "num_tokens": 12105620.0, |
| "step": 3320 |
| }, |
| { |
| "epoch": 25.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.338e-05, |
| "loss": 1.7522, |
| "mean_token_accuracy": 0.6516470074653625, |
| "num_tokens": 12123634.0, |
| "step": 3325 |
| }, |
| { |
| "epoch": 25.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3355e-05, |
| "loss": 1.6871, |
| "mean_token_accuracy": 0.6579142987728119, |
| "num_tokens": 12142945.0, |
| "step": 3330 |
| }, |
| { |
| "epoch": 25.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.333e-05, |
| "loss": 1.6238, |
| "mean_token_accuracy": 0.6688612520694732, |
| "num_tokens": 12162962.0, |
| "step": 3335 |
| }, |
| { |
| "epoch": 25.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3305e-05, |
| "loss": 1.8432, |
| "mean_token_accuracy": 0.6307004660367965, |
| "num_tokens": 12180342.0, |
| "step": 3340 |
| }, |
| { |
| "epoch": 25.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.328e-05, |
| "loss": 1.7708, |
| "mean_token_accuracy": 0.6448087066411972, |
| "num_tokens": 12198282.0, |
| "step": 3345 |
| }, |
| { |
| "epoch": 25.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3255000000000006e-05, |
| "loss": 1.7765, |
| "mean_token_accuracy": 0.6405314236879349, |
| "num_tokens": 12215896.0, |
| "step": 3350 |
| }, |
| { |
| "epoch": 25.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.323e-05, |
| "loss": 1.7424, |
| "mean_token_accuracy": 0.6420567870140076, |
| "num_tokens": 12234723.0, |
| "step": 3355 |
| }, |
| { |
| "epoch": 25.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3205000000000005e-05, |
| "loss": 1.6715, |
| "mean_token_accuracy": 0.6686410069465637, |
| "num_tokens": 12253662.0, |
| "step": 3360 |
| }, |
| { |
| "epoch": 25.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.318e-05, |
| "loss": 1.7287, |
| "mean_token_accuracy": 0.653440797328949, |
| "num_tokens": 12271662.0, |
| "step": 3365 |
| }, |
| { |
| "epoch": 25.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3155000000000004e-05, |
| "loss": 1.7753, |
| "mean_token_accuracy": 0.6468665450811386, |
| "num_tokens": 12289329.0, |
| "step": 3370 |
| }, |
| { |
| "epoch": 25.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.313e-05, |
| "loss": 1.764, |
| "mean_token_accuracy": 0.6430284142494201, |
| "num_tokens": 12307193.0, |
| "step": 3375 |
| }, |
| { |
| "epoch": 26.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3105e-05, |
| "loss": 1.6559, |
| "mean_token_accuracy": 0.6661966532468796, |
| "num_tokens": 12326730.0, |
| "step": 3380 |
| }, |
| { |
| "epoch": 26.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.308e-05, |
| "loss": 1.7868, |
| "mean_token_accuracy": 0.6423007547855377, |
| "num_tokens": 12344701.0, |
| "step": 3385 |
| }, |
| { |
| "epoch": 26.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3055e-05, |
| "loss": 1.7293, |
| "mean_token_accuracy": 0.6523230075836182, |
| "num_tokens": 12362936.0, |
| "step": 3390 |
| }, |
| { |
| "epoch": 26.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.303e-05, |
| "loss": 1.7103, |
| "mean_token_accuracy": 0.6539131790399552, |
| "num_tokens": 12381863.0, |
| "step": 3395 |
| }, |
| { |
| "epoch": 26.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3005e-05, |
| "loss": 1.765, |
| "mean_token_accuracy": 0.6502564907073974, |
| "num_tokens": 12399802.0, |
| "step": 3400 |
| }, |
| { |
| "epoch": 26.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.298e-05, |
| "loss": 1.7704, |
| "mean_token_accuracy": 0.6401029378175735, |
| "num_tokens": 12417656.0, |
| "step": 3405 |
| }, |
| { |
| "epoch": 26.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2955000000000006e-05, |
| "loss": 1.6876, |
| "mean_token_accuracy": 0.6598240792751312, |
| "num_tokens": 12436367.0, |
| "step": 3410 |
| }, |
| { |
| "epoch": 26.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.293e-05, |
| "loss": 1.7437, |
| "mean_token_accuracy": 0.6541768878698349, |
| "num_tokens": 12454372.0, |
| "step": 3415 |
| }, |
| { |
| "epoch": 26.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2905000000000004e-05, |
| "loss": 1.7304, |
| "mean_token_accuracy": 0.6505888134241105, |
| "num_tokens": 12472148.0, |
| "step": 3420 |
| }, |
| { |
| "epoch": 26.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.288e-05, |
| "loss": 1.7421, |
| "mean_token_accuracy": 0.6458885490894317, |
| "num_tokens": 12491225.0, |
| "step": 3425 |
| }, |
| { |
| "epoch": 26.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2855e-05, |
| "loss": 1.7056, |
| "mean_token_accuracy": 0.6600462704896927, |
| "num_tokens": 12510093.0, |
| "step": 3430 |
| }, |
| { |
| "epoch": 26.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.283e-05, |
| "loss": 1.7006, |
| "mean_token_accuracy": 0.6607132166624069, |
| "num_tokens": 12529121.0, |
| "step": 3435 |
| }, |
| { |
| "epoch": 26.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2805e-05, |
| "loss": 1.7692, |
| "mean_token_accuracy": 0.6444113343954087, |
| "num_tokens": 12546721.0, |
| "step": 3440 |
| }, |
| { |
| "epoch": 26.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.278e-05, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.6494855612516404, |
| "num_tokens": 12564745.0, |
| "step": 3445 |
| }, |
| { |
| "epoch": 26.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2755e-05, |
| "loss": 1.7475, |
| "mean_token_accuracy": 0.6525735288858414, |
| "num_tokens": 12583018.0, |
| "step": 3450 |
| }, |
| { |
| "epoch": 26.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.273e-05, |
| "loss": 1.7463, |
| "mean_token_accuracy": 0.6488917529582977, |
| "num_tokens": 12600537.0, |
| "step": 3455 |
| }, |
| { |
| "epoch": 26.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2705e-05, |
| "loss": 1.7124, |
| "mean_token_accuracy": 0.6540741354227066, |
| "num_tokens": 12619514.0, |
| "step": 3460 |
| }, |
| { |
| "epoch": 26.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.268e-05, |
| "loss": 1.7322, |
| "mean_token_accuracy": 0.6489483207464218, |
| "num_tokens": 12637542.0, |
| "step": 3465 |
| }, |
| { |
| "epoch": 26.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2655000000000005e-05, |
| "loss": 1.6812, |
| "mean_token_accuracy": 0.6622325748205184, |
| "num_tokens": 12656523.0, |
| "step": 3470 |
| }, |
| { |
| "epoch": 26.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.263e-05, |
| "loss": 1.7123, |
| "mean_token_accuracy": 0.654995933175087, |
| "num_tokens": 12675577.0, |
| "step": 3475 |
| }, |
| { |
| "epoch": 26.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2605000000000004e-05, |
| "loss": 1.7554, |
| "mean_token_accuracy": 0.6486168265342712, |
| "num_tokens": 12693239.0, |
| "step": 3480 |
| }, |
| { |
| "epoch": 26.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2579999999999996e-05, |
| "loss": 1.6658, |
| "mean_token_accuracy": 0.6676429510116577, |
| "num_tokens": 12712177.0, |
| "step": 3485 |
| }, |
| { |
| "epoch": 26.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2555e-05, |
| "loss": 1.789, |
| "mean_token_accuracy": 0.6442992717027665, |
| "num_tokens": 12729550.0, |
| "step": 3490 |
| }, |
| { |
| "epoch": 26.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.253e-05, |
| "loss": 1.7931, |
| "mean_token_accuracy": 0.6460472613573074, |
| "num_tokens": 12747450.0, |
| "step": 3495 |
| }, |
| { |
| "epoch": 26.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2505e-05, |
| "loss": 1.7583, |
| "mean_token_accuracy": 0.6475391507148742, |
| "num_tokens": 12765847.0, |
| "step": 3500 |
| }, |
| { |
| "epoch": 26.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.248e-05, |
| "loss": 1.7987, |
| "mean_token_accuracy": 0.6450091898441315, |
| "num_tokens": 12783426.0, |
| "step": 3505 |
| }, |
| { |
| "epoch": 27.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2455e-05, |
| "loss": 1.8088, |
| "mean_token_accuracy": 0.6360502719879151, |
| "num_tokens": 12800835.0, |
| "step": 3510 |
| }, |
| { |
| "epoch": 27.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.243e-05, |
| "loss": 1.6903, |
| "mean_token_accuracy": 0.6623231947422028, |
| "num_tokens": 12819454.0, |
| "step": 3515 |
| }, |
| { |
| "epoch": 27.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2405e-05, |
| "loss": 1.7798, |
| "mean_token_accuracy": 0.6476831644773483, |
| "num_tokens": 12837657.0, |
| "step": 3520 |
| }, |
| { |
| "epoch": 27.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.238e-05, |
| "loss": 1.7351, |
| "mean_token_accuracy": 0.6554979294538498, |
| "num_tokens": 12855426.0, |
| "step": 3525 |
| }, |
| { |
| "epoch": 27.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2355000000000004e-05, |
| "loss": 1.6828, |
| "mean_token_accuracy": 0.661261597275734, |
| "num_tokens": 12873752.0, |
| "step": 3530 |
| }, |
| { |
| "epoch": 27.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.233e-05, |
| "loss": 1.769, |
| "mean_token_accuracy": 0.6420103222131729, |
| "num_tokens": 12891459.0, |
| "step": 3535 |
| }, |
| { |
| "epoch": 27.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2305e-05, |
| "loss": 1.7324, |
| "mean_token_accuracy": 0.6513606965541839, |
| "num_tokens": 12910141.0, |
| "step": 3540 |
| }, |
| { |
| "epoch": 27.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2279999999999996e-05, |
| "loss": 1.7847, |
| "mean_token_accuracy": 0.6459431618452072, |
| "num_tokens": 12927928.0, |
| "step": 3545 |
| }, |
| { |
| "epoch": 27.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2255e-05, |
| "loss": 1.745, |
| "mean_token_accuracy": 0.6481869876384735, |
| "num_tokens": 12945933.0, |
| "step": 3550 |
| }, |
| { |
| "epoch": 27.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.223e-05, |
| "loss": 1.7767, |
| "mean_token_accuracy": 0.6434827774763108, |
| "num_tokens": 12963760.0, |
| "step": 3555 |
| }, |
| { |
| "epoch": 27.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2205e-05, |
| "loss": 1.7383, |
| "mean_token_accuracy": 0.6473019540309906, |
| "num_tokens": 12982243.0, |
| "step": 3560 |
| }, |
| { |
| "epoch": 27.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.218e-05, |
| "loss": 1.7827, |
| "mean_token_accuracy": 0.6399312824010849, |
| "num_tokens": 13000474.0, |
| "step": 3565 |
| }, |
| { |
| "epoch": 27.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2155e-05, |
| "loss": 1.7396, |
| "mean_token_accuracy": 0.6523656696081161, |
| "num_tokens": 13019062.0, |
| "step": 3570 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.213e-05, |
| "loss": 1.7927, |
| "mean_token_accuracy": 0.6346830785274505, |
| "num_tokens": 13036934.0, |
| "step": 3575 |
| }, |
| { |
| "epoch": 27.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2105e-05, |
| "loss": 1.7951, |
| "mean_token_accuracy": 0.6394420713186264, |
| "num_tokens": 13054793.0, |
| "step": 3580 |
| }, |
| { |
| "epoch": 27.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.208e-05, |
| "loss": 1.771, |
| "mean_token_accuracy": 0.6480452120304108, |
| "num_tokens": 13072483.0, |
| "step": 3585 |
| }, |
| { |
| "epoch": 27.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2055000000000004e-05, |
| "loss": 1.614, |
| "mean_token_accuracy": 0.6677554935216904, |
| "num_tokens": 13092135.0, |
| "step": 3590 |
| }, |
| { |
| "epoch": 27.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2029999999999997e-05, |
| "loss": 1.7139, |
| "mean_token_accuracy": 0.6528209656476974, |
| "num_tokens": 13110720.0, |
| "step": 3595 |
| }, |
| { |
| "epoch": 27.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2005e-05, |
| "loss": 1.6911, |
| "mean_token_accuracy": 0.6594361692667008, |
| "num_tokens": 13128968.0, |
| "step": 3600 |
| }, |
| { |
| "epoch": 27.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.198e-05, |
| "loss": 1.6991, |
| "mean_token_accuracy": 0.6626447290182114, |
| "num_tokens": 13147908.0, |
| "step": 3605 |
| }, |
| { |
| "epoch": 27.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1955e-05, |
| "loss": 1.7859, |
| "mean_token_accuracy": 0.6439048200845718, |
| "num_tokens": 13165594.0, |
| "step": 3610 |
| }, |
| { |
| "epoch": 27.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.193e-05, |
| "loss": 1.6987, |
| "mean_token_accuracy": 0.6610940158367157, |
| "num_tokens": 13183929.0, |
| "step": 3615 |
| }, |
| { |
| "epoch": 27.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1905e-05, |
| "loss": 1.8453, |
| "mean_token_accuracy": 0.6301412254571914, |
| "num_tokens": 13200878.0, |
| "step": 3620 |
| }, |
| { |
| "epoch": 27.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.188e-05, |
| "loss": 1.7052, |
| "mean_token_accuracy": 0.6592441231012345, |
| "num_tokens": 13219674.0, |
| "step": 3625 |
| }, |
| { |
| "epoch": 27.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1855e-05, |
| "loss": 1.7817, |
| "mean_token_accuracy": 0.6449559360742569, |
| "num_tokens": 13238217.0, |
| "step": 3630 |
| }, |
| { |
| "epoch": 27.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1830000000000005e-05, |
| "loss": 1.7167, |
| "mean_token_accuracy": 0.6618435323238373, |
| "num_tokens": 13256740.0, |
| "step": 3635 |
| }, |
| { |
| "epoch": 28.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1805000000000005e-05, |
| "loss": 1.723, |
| "mean_token_accuracy": 0.6585237294435501, |
| "num_tokens": 13274940.0, |
| "step": 3640 |
| }, |
| { |
| "epoch": 28.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1780000000000004e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6495951384305954, |
| "num_tokens": 13293159.0, |
| "step": 3645 |
| }, |
| { |
| "epoch": 28.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1755000000000003e-05, |
| "loss": 1.7448, |
| "mean_token_accuracy": 0.6506305515766144, |
| "num_tokens": 13310902.0, |
| "step": 3650 |
| }, |
| { |
| "epoch": 28.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.173e-05, |
| "loss": 1.7557, |
| "mean_token_accuracy": 0.6501852482557297, |
| "num_tokens": 13329266.0, |
| "step": 3655 |
| }, |
| { |
| "epoch": 28.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1705e-05, |
| "loss": 1.7799, |
| "mean_token_accuracy": 0.6411935687065125, |
| "num_tokens": 13346803.0, |
| "step": 3660 |
| }, |
| { |
| "epoch": 28.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.168e-05, |
| "loss": 1.6598, |
| "mean_token_accuracy": 0.6662024825811386, |
| "num_tokens": 13366046.0, |
| "step": 3665 |
| }, |
| { |
| "epoch": 28.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1655e-05, |
| "loss": 1.7028, |
| "mean_token_accuracy": 0.6530542701482773, |
| "num_tokens": 13384925.0, |
| "step": 3670 |
| }, |
| { |
| "epoch": 28.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.163000000000001e-05, |
| "loss": 1.77, |
| "mean_token_accuracy": 0.6439873456954956, |
| "num_tokens": 13402589.0, |
| "step": 3675 |
| }, |
| { |
| "epoch": 28.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1605e-05, |
| "loss": 1.7013, |
| "mean_token_accuracy": 0.6571666300296783, |
| "num_tokens": 13421123.0, |
| "step": 3680 |
| }, |
| { |
| "epoch": 28.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1580000000000006e-05, |
| "loss": 1.7788, |
| "mean_token_accuracy": 0.6432044029235839, |
| "num_tokens": 13439431.0, |
| "step": 3685 |
| }, |
| { |
| "epoch": 28.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1555e-05, |
| "loss": 1.7207, |
| "mean_token_accuracy": 0.6578160703182221, |
| "num_tokens": 13457841.0, |
| "step": 3690 |
| }, |
| { |
| "epoch": 28.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1530000000000005e-05, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.6534811556339264, |
| "num_tokens": 13476489.0, |
| "step": 3695 |
| }, |
| { |
| "epoch": 28.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1505000000000004e-05, |
| "loss": 1.7142, |
| "mean_token_accuracy": 0.6544462114572525, |
| "num_tokens": 13494620.0, |
| "step": 3700 |
| }, |
| { |
| "epoch": 28.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1480000000000004e-05, |
| "loss": 1.7707, |
| "mean_token_accuracy": 0.6433985292911529, |
| "num_tokens": 13512210.0, |
| "step": 3705 |
| }, |
| { |
| "epoch": 28.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1455e-05, |
| "loss": 1.7642, |
| "mean_token_accuracy": 0.6466316103935241, |
| "num_tokens": 13529925.0, |
| "step": 3710 |
| }, |
| { |
| "epoch": 28.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.143e-05, |
| "loss": 1.6938, |
| "mean_token_accuracy": 0.663212212920189, |
| "num_tokens": 13548438.0, |
| "step": 3715 |
| }, |
| { |
| "epoch": 28.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1405e-05, |
| "loss": 1.7596, |
| "mean_token_accuracy": 0.6495319962501526, |
| "num_tokens": 13566473.0, |
| "step": 3720 |
| }, |
| { |
| "epoch": 28.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.138e-05, |
| "loss": 1.7426, |
| "mean_token_accuracy": 0.6494800269603729, |
| "num_tokens": 13584724.0, |
| "step": 3725 |
| }, |
| { |
| "epoch": 28.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1355e-05, |
| "loss": 1.7341, |
| "mean_token_accuracy": 0.6527552962303161, |
| "num_tokens": 13603304.0, |
| "step": 3730 |
| }, |
| { |
| "epoch": 28.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.133000000000001e-05, |
| "loss": 1.7222, |
| "mean_token_accuracy": 0.6519874900579452, |
| "num_tokens": 13621839.0, |
| "step": 3735 |
| }, |
| { |
| "epoch": 28.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1305e-05, |
| "loss": 1.6808, |
| "mean_token_accuracy": 0.6618104815483093, |
| "num_tokens": 13640943.0, |
| "step": 3740 |
| }, |
| { |
| "epoch": 28.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1280000000000005e-05, |
| "loss": 1.8519, |
| "mean_token_accuracy": 0.6293092161417008, |
| "num_tokens": 13658372.0, |
| "step": 3745 |
| }, |
| { |
| "epoch": 28.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1255e-05, |
| "loss": 1.7723, |
| "mean_token_accuracy": 0.6427496522665024, |
| "num_tokens": 13676355.0, |
| "step": 3750 |
| }, |
| { |
| "epoch": 28.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1230000000000004e-05, |
| "loss": 1.7997, |
| "mean_token_accuracy": 0.6400014579296112, |
| "num_tokens": 13693987.0, |
| "step": 3755 |
| }, |
| { |
| "epoch": 28.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1205000000000004e-05, |
| "loss": 1.698, |
| "mean_token_accuracy": 0.6581391155719757, |
| "num_tokens": 13712942.0, |
| "step": 3760 |
| }, |
| { |
| "epoch": 28.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.118e-05, |
| "loss": 1.7421, |
| "mean_token_accuracy": 0.655744206905365, |
| "num_tokens": 13731216.0, |
| "step": 3765 |
| }, |
| { |
| "epoch": 29.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1155e-05, |
| "loss": 1.7639, |
| "mean_token_accuracy": 0.6510610312223435, |
| "num_tokens": 13749045.0, |
| "step": 3770 |
| }, |
| { |
| "epoch": 29.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.113e-05, |
| "loss": 1.6943, |
| "mean_token_accuracy": 0.658639770746231, |
| "num_tokens": 13767612.0, |
| "step": 3775 |
| }, |
| { |
| "epoch": 29.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1105e-05, |
| "loss": 1.7511, |
| "mean_token_accuracy": 0.645610973238945, |
| "num_tokens": 13785732.0, |
| "step": 3780 |
| }, |
| { |
| "epoch": 29.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.108e-05, |
| "loss": 1.7157, |
| "mean_token_accuracy": 0.6572709828615189, |
| "num_tokens": 13804149.0, |
| "step": 3785 |
| }, |
| { |
| "epoch": 29.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1055e-05, |
| "loss": 1.7041, |
| "mean_token_accuracy": 0.6564319849014282, |
| "num_tokens": 13822922.0, |
| "step": 3790 |
| }, |
| { |
| "epoch": 29.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1030000000000006e-05, |
| "loss": 1.7008, |
| "mean_token_accuracy": 0.6606447160243988, |
| "num_tokens": 13840985.0, |
| "step": 3795 |
| }, |
| { |
| "epoch": 29.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1005e-05, |
| "loss": 1.7041, |
| "mean_token_accuracy": 0.6543154448270798, |
| "num_tokens": 13859478.0, |
| "step": 3800 |
| }, |
| { |
| "epoch": 29.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0980000000000005e-05, |
| "loss": 1.743, |
| "mean_token_accuracy": 0.6480233430862427, |
| "num_tokens": 13877895.0, |
| "step": 3805 |
| }, |
| { |
| "epoch": 29.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0955e-05, |
| "loss": 1.6918, |
| "mean_token_accuracy": 0.6589089602231979, |
| "num_tokens": 13896941.0, |
| "step": 3810 |
| }, |
| { |
| "epoch": 29.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0930000000000004e-05, |
| "loss": 1.7573, |
| "mean_token_accuracy": 0.64599030315876, |
| "num_tokens": 13915563.0, |
| "step": 3815 |
| }, |
| { |
| "epoch": 29.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0905e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6496599853038788, |
| "num_tokens": 13933531.0, |
| "step": 3820 |
| }, |
| { |
| "epoch": 29.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.088e-05, |
| "loss": 1.7523, |
| "mean_token_accuracy": 0.6545566976070404, |
| "num_tokens": 13951615.0, |
| "step": 3825 |
| }, |
| { |
| "epoch": 29.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0855e-05, |
| "loss": 1.8148, |
| "mean_token_accuracy": 0.6359264075756073, |
| "num_tokens": 13969103.0, |
| "step": 3830 |
| }, |
| { |
| "epoch": 29.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.083e-05, |
| "loss": 1.7199, |
| "mean_token_accuracy": 0.6523585051298142, |
| "num_tokens": 13987413.0, |
| "step": 3835 |
| }, |
| { |
| "epoch": 29.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0805e-05, |
| "loss": 1.8455, |
| "mean_token_accuracy": 0.6313353180885315, |
| "num_tokens": 14004497.0, |
| "step": 3840 |
| }, |
| { |
| "epoch": 29.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.078e-05, |
| "loss": 1.6469, |
| "mean_token_accuracy": 0.6732534170150757, |
| "num_tokens": 14023651.0, |
| "step": 3845 |
| }, |
| { |
| "epoch": 29.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0755e-05, |
| "loss": 1.7665, |
| "mean_token_accuracy": 0.6485314697027207, |
| "num_tokens": 14041204.0, |
| "step": 3850 |
| }, |
| { |
| "epoch": 29.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0730000000000006e-05, |
| "loss": 1.7689, |
| "mean_token_accuracy": 0.6438219338655472, |
| "num_tokens": 14058902.0, |
| "step": 3855 |
| }, |
| { |
| "epoch": 29.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0705e-05, |
| "loss": 1.7326, |
| "mean_token_accuracy": 0.6497067272663116, |
| "num_tokens": 14077655.0, |
| "step": 3860 |
| }, |
| { |
| "epoch": 29.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0680000000000004e-05, |
| "loss": 1.6854, |
| "mean_token_accuracy": 0.6631190478801727, |
| "num_tokens": 14096779.0, |
| "step": 3865 |
| }, |
| { |
| "epoch": 29.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0655e-05, |
| "loss": 1.7354, |
| "mean_token_accuracy": 0.6524263739585876, |
| "num_tokens": 14114433.0, |
| "step": 3870 |
| }, |
| { |
| "epoch": 29.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.063e-05, |
| "loss": 1.7631, |
| "mean_token_accuracy": 0.6457651436328888, |
| "num_tokens": 14132433.0, |
| "step": 3875 |
| }, |
| { |
| "epoch": 29.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0605e-05, |
| "loss": 1.7735, |
| "mean_token_accuracy": 0.6389037370681763, |
| "num_tokens": 14150572.0, |
| "step": 3880 |
| }, |
| { |
| "epoch": 29.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.058e-05, |
| "loss": 1.786, |
| "mean_token_accuracy": 0.6433692693710327, |
| "num_tokens": 14168515.0, |
| "step": 3885 |
| }, |
| { |
| "epoch": 29.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0555e-05, |
| "loss": 1.8269, |
| "mean_token_accuracy": 0.6312504172325134, |
| "num_tokens": 14185905.0, |
| "step": 3890 |
| }, |
| { |
| "epoch": 29.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.053e-05, |
| "loss": 1.7589, |
| "mean_token_accuracy": 0.6496811449527741, |
| "num_tokens": 14203685.0, |
| "step": 3895 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0505e-05, |
| "loss": 1.6956, |
| "mean_token_accuracy": 0.6565341949462891, |
| "num_tokens": 14223150.0, |
| "step": 3900 |
| }, |
| { |
| "epoch": 30.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0480000000000003e-05, |
| "loss": 1.7284, |
| "mean_token_accuracy": 0.6489845454692841, |
| "num_tokens": 14241996.0, |
| "step": 3905 |
| }, |
| { |
| "epoch": 30.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0455e-05, |
| "loss": 1.6848, |
| "mean_token_accuracy": 0.6612500458955765, |
| "num_tokens": 14260761.0, |
| "step": 3910 |
| }, |
| { |
| "epoch": 30.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0430000000000002e-05, |
| "loss": 1.7619, |
| "mean_token_accuracy": 0.6461090564727783, |
| "num_tokens": 14278765.0, |
| "step": 3915 |
| }, |
| { |
| "epoch": 30.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0405e-05, |
| "loss": 1.7833, |
| "mean_token_accuracy": 0.6493801504373551, |
| "num_tokens": 14296313.0, |
| "step": 3920 |
| }, |
| { |
| "epoch": 30.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0380000000000004e-05, |
| "loss": 1.7384, |
| "mean_token_accuracy": 0.6529599964618683, |
| "num_tokens": 14314228.0, |
| "step": 3925 |
| }, |
| { |
| "epoch": 30.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0355e-05, |
| "loss": 1.736, |
| "mean_token_accuracy": 0.6538343548774719, |
| "num_tokens": 14332966.0, |
| "step": 3930 |
| }, |
| { |
| "epoch": 30.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0330000000000003e-05, |
| "loss": 1.7853, |
| "mean_token_accuracy": 0.6408940821886062, |
| "num_tokens": 14350343.0, |
| "step": 3935 |
| }, |
| { |
| "epoch": 30.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0305e-05, |
| "loss": 1.6214, |
| "mean_token_accuracy": 0.6724200338125229, |
| "num_tokens": 14369664.0, |
| "step": 3940 |
| }, |
| { |
| "epoch": 30.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.028e-05, |
| "loss": 1.7748, |
| "mean_token_accuracy": 0.6431576639413834, |
| "num_tokens": 14388275.0, |
| "step": 3945 |
| }, |
| { |
| "epoch": 30.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0255e-05, |
| "loss": 1.7376, |
| "mean_token_accuracy": 0.6552271574735642, |
| "num_tokens": 14406379.0, |
| "step": 3950 |
| }, |
| { |
| "epoch": 30.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0230000000000004e-05, |
| "loss": 1.7319, |
| "mean_token_accuracy": 0.6521833807229995, |
| "num_tokens": 14424792.0, |
| "step": 3955 |
| }, |
| { |
| "epoch": 30.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0205e-05, |
| "loss": 1.68, |
| "mean_token_accuracy": 0.6599725693464279, |
| "num_tokens": 14443837.0, |
| "step": 3960 |
| }, |
| { |
| "epoch": 30.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0180000000000002e-05, |
| "loss": 1.8198, |
| "mean_token_accuracy": 0.6380291700363159, |
| "num_tokens": 14461559.0, |
| "step": 3965 |
| }, |
| { |
| "epoch": 30.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0155e-05, |
| "loss": 1.7347, |
| "mean_token_accuracy": 0.6505941689014435, |
| "num_tokens": 14479670.0, |
| "step": 3970 |
| }, |
| { |
| "epoch": 30.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.013e-05, |
| "loss": 1.7947, |
| "mean_token_accuracy": 0.6404198050498963, |
| "num_tokens": 14497368.0, |
| "step": 3975 |
| }, |
| { |
| "epoch": 30.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0105e-05, |
| "loss": 1.7593, |
| "mean_token_accuracy": 0.644034668803215, |
| "num_tokens": 14515634.0, |
| "step": 3980 |
| }, |
| { |
| "epoch": 30.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0080000000000003e-05, |
| "loss": 1.6789, |
| "mean_token_accuracy": 0.6613237649202347, |
| "num_tokens": 14534370.0, |
| "step": 3985 |
| }, |
| { |
| "epoch": 30.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0055e-05, |
| "loss": 1.7542, |
| "mean_token_accuracy": 0.645700478553772, |
| "num_tokens": 14552442.0, |
| "step": 3990 |
| }, |
| { |
| "epoch": 30.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0030000000000002e-05, |
| "loss": 1.8189, |
| "mean_token_accuracy": 0.6313192546367645, |
| "num_tokens": 14569871.0, |
| "step": 3995 |
| }, |
| { |
| "epoch": 30.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0004999999999998e-05, |
| "loss": 1.6918, |
| "mean_token_accuracy": 0.6627134591341018, |
| "num_tokens": 14588659.0, |
| "step": 4000 |
| }, |
| { |
| "epoch": 30.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 2.998e-05, |
| "loss": 1.7661, |
| "mean_token_accuracy": 0.6492778122425079, |
| "num_tokens": 14606860.0, |
| "step": 4005 |
| }, |
| { |
| "epoch": 30.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9955e-05, |
| "loss": 1.771, |
| "mean_token_accuracy": 0.6434893846511841, |
| "num_tokens": 14624861.0, |
| "step": 4010 |
| }, |
| { |
| "epoch": 30.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9930000000000003e-05, |
| "loss": 1.6634, |
| "mean_token_accuracy": 0.6664941430091857, |
| "num_tokens": 14643597.0, |
| "step": 4015 |
| }, |
| { |
| "epoch": 30.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9905e-05, |
| "loss": 1.7814, |
| "mean_token_accuracy": 0.6440500050783158, |
| "num_tokens": 14661469.0, |
| "step": 4020 |
| }, |
| { |
| "epoch": 30.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9880000000000002e-05, |
| "loss": 1.7175, |
| "mean_token_accuracy": 0.6576481223106384, |
| "num_tokens": 14679555.0, |
| "step": 4025 |
| }, |
| { |
| "epoch": 31.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9855e-05, |
| "loss": 1.7706, |
| "mean_token_accuracy": 0.6471602261066437, |
| "num_tokens": 14697255.0, |
| "step": 4030 |
| }, |
| { |
| "epoch": 31.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9830000000000004e-05, |
| "loss": 1.7567, |
| "mean_token_accuracy": 0.6573283642530441, |
| "num_tokens": 14715135.0, |
| "step": 4035 |
| }, |
| { |
| "epoch": 31.076923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9805e-05, |
| "loss": 1.649, |
| "mean_token_accuracy": 0.6654754310846329, |
| "num_tokens": 14734385.0, |
| "step": 4040 |
| }, |
| { |
| "epoch": 31.115384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9780000000000003e-05, |
| "loss": 1.7331, |
| "mean_token_accuracy": 0.6555071473121643, |
| "num_tokens": 14752659.0, |
| "step": 4045 |
| }, |
| { |
| "epoch": 31.153846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9755e-05, |
| "loss": 1.7449, |
| "mean_token_accuracy": 0.6513841986656189, |
| "num_tokens": 14770947.0, |
| "step": 4050 |
| }, |
| { |
| "epoch": 31.192307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 2.973e-05, |
| "loss": 1.8619, |
| "mean_token_accuracy": 0.6317902356386185, |
| "num_tokens": 14787485.0, |
| "step": 4055 |
| }, |
| { |
| "epoch": 31.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9705e-05, |
| "loss": 1.7178, |
| "mean_token_accuracy": 0.6520997792482376, |
| "num_tokens": 14806506.0, |
| "step": 4060 |
| }, |
| { |
| "epoch": 31.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9680000000000004e-05, |
| "loss": 1.749, |
| "mean_token_accuracy": 0.6490054994821548, |
| "num_tokens": 14824796.0, |
| "step": 4065 |
| }, |
| { |
| "epoch": 31.307692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9655e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6469120174646378, |
| "num_tokens": 14842766.0, |
| "step": 4070 |
| }, |
| { |
| "epoch": 31.346153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9630000000000003e-05, |
| "loss": 1.7272, |
| "mean_token_accuracy": 0.6515872448682785, |
| "num_tokens": 14861282.0, |
| "step": 4075 |
| }, |
| { |
| "epoch": 31.384615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9605e-05, |
| "loss": 1.6966, |
| "mean_token_accuracy": 0.6624060094356536, |
| "num_tokens": 14879361.0, |
| "step": 4080 |
| }, |
| { |
| "epoch": 31.423076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.958e-05, |
| "loss": 1.735, |
| "mean_token_accuracy": 0.6479102671146393, |
| "num_tokens": 14897412.0, |
| "step": 4085 |
| }, |
| { |
| "epoch": 31.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9555e-05, |
| "loss": 1.7283, |
| "mean_token_accuracy": 0.6495876848697663, |
| "num_tokens": 14915712.0, |
| "step": 4090 |
| }, |
| { |
| "epoch": 31.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9530000000000004e-05, |
| "loss": 1.7165, |
| "mean_token_accuracy": 0.6608868330717087, |
| "num_tokens": 14933929.0, |
| "step": 4095 |
| }, |
| { |
| "epoch": 31.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9505e-05, |
| "loss": 1.7196, |
| "mean_token_accuracy": 0.6539542943239212, |
| "num_tokens": 14952726.0, |
| "step": 4100 |
| }, |
| { |
| "epoch": 31.576923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9480000000000002e-05, |
| "loss": 1.7294, |
| "mean_token_accuracy": 0.6499251574277878, |
| "num_tokens": 14971349.0, |
| "step": 4105 |
| }, |
| { |
| "epoch": 31.615384615384617, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9455e-05, |
| "loss": 1.7617, |
| "mean_token_accuracy": 0.6461383640766144, |
| "num_tokens": 14989504.0, |
| "step": 4110 |
| }, |
| { |
| "epoch": 31.653846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 2.943e-05, |
| "loss": 1.83, |
| "mean_token_accuracy": 0.6284946590662003, |
| "num_tokens": 15006774.0, |
| "step": 4115 |
| }, |
| { |
| "epoch": 31.692307692307693, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9405e-05, |
| "loss": 1.7556, |
| "mean_token_accuracy": 0.646930256485939, |
| "num_tokens": 15025098.0, |
| "step": 4120 |
| }, |
| { |
| "epoch": 31.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9380000000000003e-05, |
| "loss": 1.7464, |
| "mean_token_accuracy": 0.6496236711740494, |
| "num_tokens": 15043402.0, |
| "step": 4125 |
| }, |
| { |
| "epoch": 31.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9355e-05, |
| "loss": 1.7743, |
| "mean_token_accuracy": 0.6452437072992325, |
| "num_tokens": 15061031.0, |
| "step": 4130 |
| }, |
| { |
| "epoch": 31.807692307692307, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9330000000000002e-05, |
| "loss": 1.7517, |
| "mean_token_accuracy": 0.6447321742773056, |
| "num_tokens": 15079238.0, |
| "step": 4135 |
| }, |
| { |
| "epoch": 31.846153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9304999999999998e-05, |
| "loss": 1.6618, |
| "mean_token_accuracy": 0.6659055262804031, |
| "num_tokens": 15098047.0, |
| "step": 4140 |
| }, |
| { |
| "epoch": 31.884615384615383, |
| "grad_norm": 0.0, |
| "learning_rate": 2.928e-05, |
| "loss": 1.7187, |
| "mean_token_accuracy": 0.6543934553861618, |
| "num_tokens": 15116367.0, |
| "step": 4145 |
| }, |
| { |
| "epoch": 31.923076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9255e-05, |
| "loss": 1.7361, |
| "mean_token_accuracy": 0.6480780899524688, |
| "num_tokens": 15135132.0, |
| "step": 4150 |
| }, |
| { |
| "epoch": 31.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9230000000000003e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6506118953227997, |
| "num_tokens": 15152655.0, |
| "step": 4155 |
| }, |
| { |
| "epoch": 32.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9205e-05, |
| "loss": 1.7498, |
| "mean_token_accuracy": 0.6500423729419709, |
| "num_tokens": 15171360.0, |
| "step": 4160 |
| }, |
| { |
| "epoch": 32.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9180000000000002e-05, |
| "loss": 1.7394, |
| "mean_token_accuracy": 0.6556807518005371, |
| "num_tokens": 15189772.0, |
| "step": 4165 |
| }, |
| { |
| "epoch": 32.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9154999999999998e-05, |
| "loss": 1.6884, |
| "mean_token_accuracy": 0.6571320801973343, |
| "num_tokens": 15208356.0, |
| "step": 4170 |
| }, |
| { |
| "epoch": 32.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.913e-05, |
| "loss": 1.7331, |
| "mean_token_accuracy": 0.6546037912368774, |
| "num_tokens": 15225973.0, |
| "step": 4175 |
| }, |
| { |
| "epoch": 32.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9105e-05, |
| "loss": 1.7188, |
| "mean_token_accuracy": 0.6566130489110946, |
| "num_tokens": 15244498.0, |
| "step": 4180 |
| }, |
| { |
| "epoch": 32.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9080000000000003e-05, |
| "loss": 1.6926, |
| "mean_token_accuracy": 0.6630068510770798, |
| "num_tokens": 15263404.0, |
| "step": 4185 |
| }, |
| { |
| "epoch": 32.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9055e-05, |
| "loss": 1.8139, |
| "mean_token_accuracy": 0.6341224372386932, |
| "num_tokens": 15281071.0, |
| "step": 4190 |
| }, |
| { |
| "epoch": 32.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.903e-05, |
| "loss": 1.7784, |
| "mean_token_accuracy": 0.6433619648218155, |
| "num_tokens": 15299529.0, |
| "step": 4195 |
| }, |
| { |
| "epoch": 32.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9004999999999998e-05, |
| "loss": 1.7868, |
| "mean_token_accuracy": 0.642947056889534, |
| "num_tokens": 15317120.0, |
| "step": 4200 |
| }, |
| { |
| "epoch": 32.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.898e-05, |
| "loss": 1.7569, |
| "mean_token_accuracy": 0.648267537355423, |
| "num_tokens": 15335733.0, |
| "step": 4205 |
| }, |
| { |
| "epoch": 32.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8955e-05, |
| "loss": 1.6314, |
| "mean_token_accuracy": 0.6678825885057449, |
| "num_tokens": 15355227.0, |
| "step": 4210 |
| }, |
| { |
| "epoch": 32.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8930000000000003e-05, |
| "loss": 1.7637, |
| "mean_token_accuracy": 0.6434868663549423, |
| "num_tokens": 15373422.0, |
| "step": 4215 |
| }, |
| { |
| "epoch": 32.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8905e-05, |
| "loss": 1.7577, |
| "mean_token_accuracy": 0.6474568367004394, |
| "num_tokens": 15391589.0, |
| "step": 4220 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.888e-05, |
| "loss": 1.7877, |
| "mean_token_accuracy": 0.637940239906311, |
| "num_tokens": 15409383.0, |
| "step": 4225 |
| }, |
| { |
| "epoch": 32.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8854999999999997e-05, |
| "loss": 1.6901, |
| "mean_token_accuracy": 0.6617184728384018, |
| "num_tokens": 15428238.0, |
| "step": 4230 |
| }, |
| { |
| "epoch": 32.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.883e-05, |
| "loss": 1.6847, |
| "mean_token_accuracy": 0.660372993350029, |
| "num_tokens": 15446651.0, |
| "step": 4235 |
| }, |
| { |
| "epoch": 32.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8805e-05, |
| "loss": 1.8002, |
| "mean_token_accuracy": 0.634194228053093, |
| "num_tokens": 15464761.0, |
| "step": 4240 |
| }, |
| { |
| "epoch": 32.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8780000000000002e-05, |
| "loss": 1.7199, |
| "mean_token_accuracy": 0.6551119357347488, |
| "num_tokens": 15483259.0, |
| "step": 4245 |
| }, |
| { |
| "epoch": 32.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8754999999999998e-05, |
| "loss": 1.718, |
| "mean_token_accuracy": 0.6517602205276489, |
| "num_tokens": 15502102.0, |
| "step": 4250 |
| }, |
| { |
| "epoch": 32.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.873e-05, |
| "loss": 1.7375, |
| "mean_token_accuracy": 0.6485500365495682, |
| "num_tokens": 15520198.0, |
| "step": 4255 |
| }, |
| { |
| "epoch": 32.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8705000000000004e-05, |
| "loss": 1.7885, |
| "mean_token_accuracy": 0.641147717833519, |
| "num_tokens": 15538006.0, |
| "step": 4260 |
| }, |
| { |
| "epoch": 32.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.868e-05, |
| "loss": 1.6918, |
| "mean_token_accuracy": 0.662942835688591, |
| "num_tokens": 15555826.0, |
| "step": 4265 |
| }, |
| { |
| "epoch": 32.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8655000000000003e-05, |
| "loss": 1.7834, |
| "mean_token_accuracy": 0.6468859910964966, |
| "num_tokens": 15573396.0, |
| "step": 4270 |
| }, |
| { |
| "epoch": 32.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8630000000000002e-05, |
| "loss": 1.7352, |
| "mean_token_accuracy": 0.654278576374054, |
| "num_tokens": 15592035.0, |
| "step": 4275 |
| }, |
| { |
| "epoch": 32.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8605000000000005e-05, |
| "loss": 1.7305, |
| "mean_token_accuracy": 0.6594868719577789, |
| "num_tokens": 15610531.0, |
| "step": 4280 |
| }, |
| { |
| "epoch": 32.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.858e-05, |
| "loss": 1.8267, |
| "mean_token_accuracy": 0.6349624991416931, |
| "num_tokens": 15627607.0, |
| "step": 4285 |
| }, |
| { |
| "epoch": 33.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8555000000000004e-05, |
| "loss": 1.743, |
| "mean_token_accuracy": 0.6526326090097427, |
| "num_tokens": 15645465.0, |
| "step": 4290 |
| }, |
| { |
| "epoch": 33.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.853e-05, |
| "loss": 1.7299, |
| "mean_token_accuracy": 0.6534471154212952, |
| "num_tokens": 15663771.0, |
| "step": 4295 |
| }, |
| { |
| "epoch": 33.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8505000000000002e-05, |
| "loss": 1.7432, |
| "mean_token_accuracy": 0.6503149479627609, |
| "num_tokens": 15682167.0, |
| "step": 4300 |
| }, |
| { |
| "epoch": 33.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8480000000000002e-05, |
| "loss": 1.7905, |
| "mean_token_accuracy": 0.6410009592771531, |
| "num_tokens": 15699294.0, |
| "step": 4305 |
| }, |
| { |
| "epoch": 33.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8455000000000005e-05, |
| "loss": 1.7323, |
| "mean_token_accuracy": 0.6517443567514419, |
| "num_tokens": 15718054.0, |
| "step": 4310 |
| }, |
| { |
| "epoch": 33.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.843e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6458596080541611, |
| "num_tokens": 15736292.0, |
| "step": 4315 |
| }, |
| { |
| "epoch": 33.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8405000000000003e-05, |
| "loss": 1.715, |
| "mean_token_accuracy": 0.6541249454021454, |
| "num_tokens": 15755018.0, |
| "step": 4320 |
| }, |
| { |
| "epoch": 33.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8380000000000003e-05, |
| "loss": 1.6412, |
| "mean_token_accuracy": 0.665298554301262, |
| "num_tokens": 15774342.0, |
| "step": 4325 |
| }, |
| { |
| "epoch": 33.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8355000000000002e-05, |
| "loss": 1.7192, |
| "mean_token_accuracy": 0.6550895661115647, |
| "num_tokens": 15792876.0, |
| "step": 4330 |
| }, |
| { |
| "epoch": 33.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.833e-05, |
| "loss": 1.6975, |
| "mean_token_accuracy": 0.6590204060077667, |
| "num_tokens": 15811378.0, |
| "step": 4335 |
| }, |
| { |
| "epoch": 33.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8305000000000004e-05, |
| "loss": 1.7203, |
| "mean_token_accuracy": 0.6554653912782669, |
| "num_tokens": 15829656.0, |
| "step": 4340 |
| }, |
| { |
| "epoch": 33.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.828e-05, |
| "loss": 1.7643, |
| "mean_token_accuracy": 0.6414321035146713, |
| "num_tokens": 15847703.0, |
| "step": 4345 |
| }, |
| { |
| "epoch": 33.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8255000000000003e-05, |
| "loss": 1.6511, |
| "mean_token_accuracy": 0.667979073524475, |
| "num_tokens": 15867134.0, |
| "step": 4350 |
| }, |
| { |
| "epoch": 33.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8230000000000002e-05, |
| "loss": 1.7647, |
| "mean_token_accuracy": 0.6481159001588821, |
| "num_tokens": 15885242.0, |
| "step": 4355 |
| }, |
| { |
| "epoch": 33.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8205000000000005e-05, |
| "loss": 1.8012, |
| "mean_token_accuracy": 0.6423881828784943, |
| "num_tokens": 15902772.0, |
| "step": 4360 |
| }, |
| { |
| "epoch": 33.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.818e-05, |
| "loss": 1.7589, |
| "mean_token_accuracy": 0.6487757861614227, |
| "num_tokens": 15921138.0, |
| "step": 4365 |
| }, |
| { |
| "epoch": 33.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8155000000000004e-05, |
| "loss": 1.7236, |
| "mean_token_accuracy": 0.6539973527193069, |
| "num_tokens": 15939494.0, |
| "step": 4370 |
| }, |
| { |
| "epoch": 33.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.813e-05, |
| "loss": 1.7232, |
| "mean_token_accuracy": 0.649171131849289, |
| "num_tokens": 15958194.0, |
| "step": 4375 |
| }, |
| { |
| "epoch": 33.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8105000000000003e-05, |
| "loss": 1.7117, |
| "mean_token_accuracy": 0.6549121081829071, |
| "num_tokens": 15976927.0, |
| "step": 4380 |
| }, |
| { |
| "epoch": 33.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8080000000000002e-05, |
| "loss": 1.8074, |
| "mean_token_accuracy": 0.6450524151325225, |
| "num_tokens": 15994035.0, |
| "step": 4385 |
| }, |
| { |
| "epoch": 33.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8055000000000005e-05, |
| "loss": 1.7169, |
| "mean_token_accuracy": 0.6550534397363663, |
| "num_tokens": 16012525.0, |
| "step": 4390 |
| }, |
| { |
| "epoch": 33.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.803e-05, |
| "loss": 1.8682, |
| "mean_token_accuracy": 0.6267781972885131, |
| "num_tokens": 16029494.0, |
| "step": 4395 |
| }, |
| { |
| "epoch": 33.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8005000000000004e-05, |
| "loss": 1.7787, |
| "mean_token_accuracy": 0.6424099951982498, |
| "num_tokens": 16047811.0, |
| "step": 4400 |
| }, |
| { |
| "epoch": 33.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.798e-05, |
| "loss": 1.737, |
| "mean_token_accuracy": 0.6514830589294434, |
| "num_tokens": 16065304.0, |
| "step": 4405 |
| }, |
| { |
| "epoch": 33.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7955000000000003e-05, |
| "loss": 1.7418, |
| "mean_token_accuracy": 0.652796396613121, |
| "num_tokens": 16083292.0, |
| "step": 4410 |
| }, |
| { |
| "epoch": 33.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7930000000000002e-05, |
| "loss": 1.7136, |
| "mean_token_accuracy": 0.654367059469223, |
| "num_tokens": 16101807.0, |
| "step": 4415 |
| }, |
| { |
| "epoch": 34.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7905000000000005e-05, |
| "loss": 1.7799, |
| "mean_token_accuracy": 0.644026267528534, |
| "num_tokens": 16119570.0, |
| "step": 4420 |
| }, |
| { |
| "epoch": 34.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.788e-05, |
| "loss": 1.7598, |
| "mean_token_accuracy": 0.6437584400177002, |
| "num_tokens": 16137834.0, |
| "step": 4425 |
| }, |
| { |
| "epoch": 34.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7855000000000004e-05, |
| "loss": 1.6805, |
| "mean_token_accuracy": 0.6666524916887283, |
| "num_tokens": 16156663.0, |
| "step": 4430 |
| }, |
| { |
| "epoch": 34.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.783e-05, |
| "loss": 1.6866, |
| "mean_token_accuracy": 0.6584878832101821, |
| "num_tokens": 16175788.0, |
| "step": 4435 |
| }, |
| { |
| "epoch": 34.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7805000000000002e-05, |
| "loss": 1.7121, |
| "mean_token_accuracy": 0.6575721591711045, |
| "num_tokens": 16194451.0, |
| "step": 4440 |
| }, |
| { |
| "epoch": 34.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.778e-05, |
| "loss": 1.751, |
| "mean_token_accuracy": 0.6504880547523498, |
| "num_tokens": 16212789.0, |
| "step": 4445 |
| }, |
| { |
| "epoch": 34.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7755000000000004e-05, |
| "loss": 1.8498, |
| "mean_token_accuracy": 0.6282713234424591, |
| "num_tokens": 16230046.0, |
| "step": 4450 |
| }, |
| { |
| "epoch": 34.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.773e-05, |
| "loss": 1.75, |
| "mean_token_accuracy": 0.6498936504125595, |
| "num_tokens": 16247898.0, |
| "step": 4455 |
| }, |
| { |
| "epoch": 34.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7705000000000003e-05, |
| "loss": 1.7994, |
| "mean_token_accuracy": 0.6431075096130371, |
| "num_tokens": 16265073.0, |
| "step": 4460 |
| }, |
| { |
| "epoch": 34.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.768e-05, |
| "loss": 1.6605, |
| "mean_token_accuracy": 0.6640267461538315, |
| "num_tokens": 16283741.0, |
| "step": 4465 |
| }, |
| { |
| "epoch": 34.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7655000000000002e-05, |
| "loss": 1.7335, |
| "mean_token_accuracy": 0.6550962030887604, |
| "num_tokens": 16301795.0, |
| "step": 4470 |
| }, |
| { |
| "epoch": 34.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.763e-05, |
| "loss": 1.7215, |
| "mean_token_accuracy": 0.6581933110952377, |
| "num_tokens": 16320399.0, |
| "step": 4475 |
| }, |
| { |
| "epoch": 34.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7605000000000004e-05, |
| "loss": 1.7164, |
| "mean_token_accuracy": 0.6563991248607636, |
| "num_tokens": 16338805.0, |
| "step": 4480 |
| }, |
| { |
| "epoch": 34.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.758e-05, |
| "loss": 1.718, |
| "mean_token_accuracy": 0.6518977075815201, |
| "num_tokens": 16357675.0, |
| "step": 4485 |
| }, |
| { |
| "epoch": 34.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7555000000000003e-05, |
| "loss": 1.7261, |
| "mean_token_accuracy": 0.6472561746835709, |
| "num_tokens": 16376357.0, |
| "step": 4490 |
| }, |
| { |
| "epoch": 34.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.753e-05, |
| "loss": 1.7656, |
| "mean_token_accuracy": 0.6474886327981949, |
| "num_tokens": 16394142.0, |
| "step": 4495 |
| }, |
| { |
| "epoch": 34.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7505000000000002e-05, |
| "loss": 1.7131, |
| "mean_token_accuracy": 0.6552159339189529, |
| "num_tokens": 16412619.0, |
| "step": 4500 |
| }, |
| { |
| "epoch": 34.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.748e-05, |
| "loss": 1.8421, |
| "mean_token_accuracy": 0.6334175139665603, |
| "num_tokens": 16429807.0, |
| "step": 4505 |
| }, |
| { |
| "epoch": 34.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7455000000000004e-05, |
| "loss": 1.7888, |
| "mean_token_accuracy": 0.6399717807769776, |
| "num_tokens": 16447829.0, |
| "step": 4510 |
| }, |
| { |
| "epoch": 34.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.743e-05, |
| "loss": 1.6926, |
| "mean_token_accuracy": 0.6622339367866517, |
| "num_tokens": 16466534.0, |
| "step": 4515 |
| }, |
| { |
| "epoch": 34.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7405000000000003e-05, |
| "loss": 1.7331, |
| "mean_token_accuracy": 0.6530486464500427, |
| "num_tokens": 16484278.0, |
| "step": 4520 |
| }, |
| { |
| "epoch": 34.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.738e-05, |
| "loss": 1.7237, |
| "mean_token_accuracy": 0.6497306555509568, |
| "num_tokens": 16502702.0, |
| "step": 4525 |
| }, |
| { |
| "epoch": 34.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7355e-05, |
| "loss": 1.6833, |
| "mean_token_accuracy": 0.6642057120800018, |
| "num_tokens": 16521906.0, |
| "step": 4530 |
| }, |
| { |
| "epoch": 34.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.733e-05, |
| "loss": 1.8207, |
| "mean_token_accuracy": 0.6355377316474915, |
| "num_tokens": 16539170.0, |
| "step": 4535 |
| }, |
| { |
| "epoch": 34.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7305000000000004e-05, |
| "loss": 1.7629, |
| "mean_token_accuracy": 0.6472138792276383, |
| "num_tokens": 16556927.0, |
| "step": 4540 |
| }, |
| { |
| "epoch": 34.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.728e-05, |
| "loss": 1.7334, |
| "mean_token_accuracy": 0.6492251008749008, |
| "num_tokens": 16575521.0, |
| "step": 4545 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7255000000000002e-05, |
| "loss": 1.7436, |
| "mean_token_accuracy": 0.6502075582742691, |
| "num_tokens": 16593675.0, |
| "step": 4550 |
| }, |
| { |
| "epoch": 35.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.723e-05, |
| "loss": 1.7475, |
| "mean_token_accuracy": 0.6530224084854126, |
| "num_tokens": 16612033.0, |
| "step": 4555 |
| }, |
| { |
| "epoch": 35.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7205e-05, |
| "loss": 1.7608, |
| "mean_token_accuracy": 0.6467309683561325, |
| "num_tokens": 16630046.0, |
| "step": 4560 |
| }, |
| { |
| "epoch": 35.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.718e-05, |
| "loss": 1.6749, |
| "mean_token_accuracy": 0.6611016511917114, |
| "num_tokens": 16648965.0, |
| "step": 4565 |
| }, |
| { |
| "epoch": 35.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7155000000000003e-05, |
| "loss": 1.6678, |
| "mean_token_accuracy": 0.6659534931182861, |
| "num_tokens": 16668179.0, |
| "step": 4570 |
| }, |
| { |
| "epoch": 35.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.713e-05, |
| "loss": 1.7469, |
| "mean_token_accuracy": 0.6515377521514892, |
| "num_tokens": 16686202.0, |
| "step": 4575 |
| }, |
| { |
| "epoch": 35.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7105000000000002e-05, |
| "loss": 1.758, |
| "mean_token_accuracy": 0.6510407030582428, |
| "num_tokens": 16704566.0, |
| "step": 4580 |
| }, |
| { |
| "epoch": 35.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7079999999999998e-05, |
| "loss": 1.7879, |
| "mean_token_accuracy": 0.6449306428432464, |
| "num_tokens": 16721669.0, |
| "step": 4585 |
| }, |
| { |
| "epoch": 35.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7055e-05, |
| "loss": 1.7399, |
| "mean_token_accuracy": 0.6478658020496368, |
| "num_tokens": 16739999.0, |
| "step": 4590 |
| }, |
| { |
| "epoch": 35.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.703e-05, |
| "loss": 1.7316, |
| "mean_token_accuracy": 0.647733348608017, |
| "num_tokens": 16758454.0, |
| "step": 4595 |
| }, |
| { |
| "epoch": 35.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7005000000000003e-05, |
| "loss": 1.7179, |
| "mean_token_accuracy": 0.6545091897249222, |
| "num_tokens": 16776775.0, |
| "step": 4600 |
| }, |
| { |
| "epoch": 35.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.698e-05, |
| "loss": 1.7509, |
| "mean_token_accuracy": 0.6458514422178269, |
| "num_tokens": 16795006.0, |
| "step": 4605 |
| }, |
| { |
| "epoch": 35.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6955000000000002e-05, |
| "loss": 1.8091, |
| "mean_token_accuracy": 0.6382217675447464, |
| "num_tokens": 16812133.0, |
| "step": 4610 |
| }, |
| { |
| "epoch": 35.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.693e-05, |
| "loss": 1.7595, |
| "mean_token_accuracy": 0.6477942883968353, |
| "num_tokens": 16829745.0, |
| "step": 4615 |
| }, |
| { |
| "epoch": 35.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6905e-05, |
| "loss": 1.7319, |
| "mean_token_accuracy": 0.6578234076499939, |
| "num_tokens": 16848720.0, |
| "step": 4620 |
| }, |
| { |
| "epoch": 35.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.688e-05, |
| "loss": 1.6978, |
| "mean_token_accuracy": 0.653961855173111, |
| "num_tokens": 16867266.0, |
| "step": 4625 |
| }, |
| { |
| "epoch": 35.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6855000000000003e-05, |
| "loss": 1.7421, |
| "mean_token_accuracy": 0.6451707929372787, |
| "num_tokens": 16885165.0, |
| "step": 4630 |
| }, |
| { |
| "epoch": 35.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.683e-05, |
| "loss": 1.6848, |
| "mean_token_accuracy": 0.660153791308403, |
| "num_tokens": 16904043.0, |
| "step": 4635 |
| }, |
| { |
| "epoch": 35.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6805000000000002e-05, |
| "loss": 1.7564, |
| "mean_token_accuracy": 0.6473292171955108, |
| "num_tokens": 16921974.0, |
| "step": 4640 |
| }, |
| { |
| "epoch": 35.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.678e-05, |
| "loss": 1.758, |
| "mean_token_accuracy": 0.6488713294267654, |
| "num_tokens": 16939451.0, |
| "step": 4645 |
| }, |
| { |
| "epoch": 35.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6755000000000004e-05, |
| "loss": 1.7267, |
| "mean_token_accuracy": 0.653421950340271, |
| "num_tokens": 16958067.0, |
| "step": 4650 |
| }, |
| { |
| "epoch": 35.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.673e-05, |
| "loss": 1.7234, |
| "mean_token_accuracy": 0.6553035944700241, |
| "num_tokens": 16976533.0, |
| "step": 4655 |
| }, |
| { |
| "epoch": 35.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6705000000000003e-05, |
| "loss": 1.7566, |
| "mean_token_accuracy": 0.6490527987480164, |
| "num_tokens": 16994749.0, |
| "step": 4660 |
| }, |
| { |
| "epoch": 35.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.668e-05, |
| "loss": 1.7622, |
| "mean_token_accuracy": 0.6474152833223343, |
| "num_tokens": 17012449.0, |
| "step": 4665 |
| }, |
| { |
| "epoch": 35.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6655e-05, |
| "loss": 1.746, |
| "mean_token_accuracy": 0.644548413157463, |
| "num_tokens": 17031126.0, |
| "step": 4670 |
| }, |
| { |
| "epoch": 35.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.663e-05, |
| "loss": 1.7045, |
| "mean_token_accuracy": 0.6576229840517044, |
| "num_tokens": 17050110.0, |
| "step": 4675 |
| }, |
| { |
| "epoch": 36.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6605000000000004e-05, |
| "loss": 1.8172, |
| "mean_token_accuracy": 0.6355714172124862, |
| "num_tokens": 17067780.0, |
| "step": 4680 |
| }, |
| { |
| "epoch": 36.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.658e-05, |
| "loss": 1.8764, |
| "mean_token_accuracy": 0.6272640764713288, |
| "num_tokens": 17084927.0, |
| "step": 4685 |
| }, |
| { |
| "epoch": 36.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6555000000000002e-05, |
| "loss": 1.754, |
| "mean_token_accuracy": 0.6480523020029068, |
| "num_tokens": 17102872.0, |
| "step": 4690 |
| }, |
| { |
| "epoch": 36.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.653e-05, |
| "loss": 1.733, |
| "mean_token_accuracy": 0.6500560373067856, |
| "num_tokens": 17120645.0, |
| "step": 4695 |
| }, |
| { |
| "epoch": 36.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6505e-05, |
| "loss": 1.7193, |
| "mean_token_accuracy": 0.6540112674236298, |
| "num_tokens": 17139263.0, |
| "step": 4700 |
| }, |
| { |
| "epoch": 36.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.648e-05, |
| "loss": 1.6614, |
| "mean_token_accuracy": 0.6684265941381454, |
| "num_tokens": 17158319.0, |
| "step": 4705 |
| }, |
| { |
| "epoch": 36.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6455000000000003e-05, |
| "loss": 1.7377, |
| "mean_token_accuracy": 0.6504531025886535, |
| "num_tokens": 17176907.0, |
| "step": 4710 |
| }, |
| { |
| "epoch": 36.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.643e-05, |
| "loss": 1.751, |
| "mean_token_accuracy": 0.6483948498964309, |
| "num_tokens": 17195007.0, |
| "step": 4715 |
| }, |
| { |
| "epoch": 36.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6405000000000002e-05, |
| "loss": 1.6963, |
| "mean_token_accuracy": 0.6590126276016235, |
| "num_tokens": 17213782.0, |
| "step": 4720 |
| }, |
| { |
| "epoch": 36.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6379999999999998e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6419188559055329, |
| "num_tokens": 17231953.0, |
| "step": 4725 |
| }, |
| { |
| "epoch": 36.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6355e-05, |
| "loss": 1.7443, |
| "mean_token_accuracy": 0.6517761409282684, |
| "num_tokens": 17250069.0, |
| "step": 4730 |
| }, |
| { |
| "epoch": 36.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.633e-05, |
| "loss": 1.7795, |
| "mean_token_accuracy": 0.63814417719841, |
| "num_tokens": 17267793.0, |
| "step": 4735 |
| }, |
| { |
| "epoch": 36.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6305000000000003e-05, |
| "loss": 1.7513, |
| "mean_token_accuracy": 0.6479008972644806, |
| "num_tokens": 17285956.0, |
| "step": 4740 |
| }, |
| { |
| "epoch": 36.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.628e-05, |
| "loss": 1.7722, |
| "mean_token_accuracy": 0.6436032384634018, |
| "num_tokens": 17303770.0, |
| "step": 4745 |
| }, |
| { |
| "epoch": 36.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6255000000000002e-05, |
| "loss": 1.7904, |
| "mean_token_accuracy": 0.6443703442811965, |
| "num_tokens": 17321120.0, |
| "step": 4750 |
| }, |
| { |
| "epoch": 36.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6229999999999998e-05, |
| "loss": 1.7634, |
| "mean_token_accuracy": 0.6405331075191498, |
| "num_tokens": 17338787.0, |
| "step": 4755 |
| }, |
| { |
| "epoch": 36.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6205e-05, |
| "loss": 1.6876, |
| "mean_token_accuracy": 0.6604384332895279, |
| "num_tokens": 17356798.0, |
| "step": 4760 |
| }, |
| { |
| "epoch": 36.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.618e-05, |
| "loss": 1.7414, |
| "mean_token_accuracy": 0.6457662045955658, |
| "num_tokens": 17375411.0, |
| "step": 4765 |
| }, |
| { |
| "epoch": 36.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6155000000000003e-05, |
| "loss": 1.7765, |
| "mean_token_accuracy": 0.6472298890352249, |
| "num_tokens": 17392790.0, |
| "step": 4770 |
| }, |
| { |
| "epoch": 36.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.613e-05, |
| "loss": 1.7773, |
| "mean_token_accuracy": 0.6443253785371781, |
| "num_tokens": 17411334.0, |
| "step": 4775 |
| }, |
| { |
| "epoch": 36.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6105e-05, |
| "loss": 1.6968, |
| "mean_token_accuracy": 0.6606687515974045, |
| "num_tokens": 17429786.0, |
| "step": 4780 |
| }, |
| { |
| "epoch": 36.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6079999999999998e-05, |
| "loss": 1.6995, |
| "mean_token_accuracy": 0.6591976523399353, |
| "num_tokens": 17448074.0, |
| "step": 4785 |
| }, |
| { |
| "epoch": 36.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6055e-05, |
| "loss": 1.7353, |
| "mean_token_accuracy": 0.6497946470975876, |
| "num_tokens": 17466677.0, |
| "step": 4790 |
| }, |
| { |
| "epoch": 36.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.603e-05, |
| "loss": 1.7532, |
| "mean_token_accuracy": 0.6486219108104706, |
| "num_tokens": 17485904.0, |
| "step": 4795 |
| }, |
| { |
| "epoch": 36.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6005000000000003e-05, |
| "loss": 1.7336, |
| "mean_token_accuracy": 0.6513243049383164, |
| "num_tokens": 17503928.0, |
| "step": 4800 |
| }, |
| { |
| "epoch": 36.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.598e-05, |
| "loss": 1.7178, |
| "mean_token_accuracy": 0.6609677881002426, |
| "num_tokens": 17522780.0, |
| "step": 4805 |
| }, |
| { |
| "epoch": 37.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5955e-05, |
| "loss": 1.6678, |
| "mean_token_accuracy": 0.6652211785316468, |
| "num_tokens": 17541885.0, |
| "step": 4810 |
| }, |
| { |
| "epoch": 37.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5929999999999997e-05, |
| "loss": 1.7167, |
| "mean_token_accuracy": 0.6583490580320358, |
| "num_tokens": 17560348.0, |
| "step": 4815 |
| }, |
| { |
| "epoch": 37.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5905e-05, |
| "loss": 1.7755, |
| "mean_token_accuracy": 0.6465405881404876, |
| "num_tokens": 17578412.0, |
| "step": 4820 |
| }, |
| { |
| "epoch": 37.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.588e-05, |
| "loss": 1.667, |
| "mean_token_accuracy": 0.6637176364660263, |
| "num_tokens": 17597158.0, |
| "step": 4825 |
| }, |
| { |
| "epoch": 37.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5855000000000002e-05, |
| "loss": 1.6931, |
| "mean_token_accuracy": 0.6589970797300339, |
| "num_tokens": 17616052.0, |
| "step": 4830 |
| }, |
| { |
| "epoch": 37.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.583e-05, |
| "loss": 1.7732, |
| "mean_token_accuracy": 0.6425283402204514, |
| "num_tokens": 17633798.0, |
| "step": 4835 |
| }, |
| { |
| "epoch": 37.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5805e-05, |
| "loss": 1.7705, |
| "mean_token_accuracy": 0.6458623111248016, |
| "num_tokens": 17651657.0, |
| "step": 4840 |
| }, |
| { |
| "epoch": 37.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5779999999999997e-05, |
| "loss": 1.7854, |
| "mean_token_accuracy": 0.6410915940999985, |
| "num_tokens": 17669527.0, |
| "step": 4845 |
| }, |
| { |
| "epoch": 37.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5755e-05, |
| "loss": 1.7162, |
| "mean_token_accuracy": 0.6470267534255981, |
| "num_tokens": 17688130.0, |
| "step": 4850 |
| }, |
| { |
| "epoch": 37.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.573e-05, |
| "loss": 1.7409, |
| "mean_token_accuracy": 0.6474620312452316, |
| "num_tokens": 17706679.0, |
| "step": 4855 |
| }, |
| { |
| "epoch": 37.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5705000000000002e-05, |
| "loss": 1.7944, |
| "mean_token_accuracy": 0.6404423266649246, |
| "num_tokens": 17724627.0, |
| "step": 4860 |
| }, |
| { |
| "epoch": 37.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5679999999999998e-05, |
| "loss": 1.7607, |
| "mean_token_accuracy": 0.649592487514019, |
| "num_tokens": 17742357.0, |
| "step": 4865 |
| }, |
| { |
| "epoch": 37.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5655e-05, |
| "loss": 1.7116, |
| "mean_token_accuracy": 0.6577517062425613, |
| "num_tokens": 17761170.0, |
| "step": 4870 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5629999999999997e-05, |
| "loss": 1.7832, |
| "mean_token_accuracy": 0.6425192266702652, |
| "num_tokens": 17778997.0, |
| "step": 4875 |
| }, |
| { |
| "epoch": 37.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5605e-05, |
| "loss": 1.7689, |
| "mean_token_accuracy": 0.6469813168048859, |
| "num_tokens": 17796705.0, |
| "step": 4880 |
| }, |
| { |
| "epoch": 37.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5580000000000002e-05, |
| "loss": 1.7045, |
| "mean_token_accuracy": 0.6609047919511795, |
| "num_tokens": 17815465.0, |
| "step": 4885 |
| }, |
| { |
| "epoch": 37.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5555000000000002e-05, |
| "loss": 1.7773, |
| "mean_token_accuracy": 0.641741332411766, |
| "num_tokens": 17833805.0, |
| "step": 4890 |
| }, |
| { |
| "epoch": 37.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5530000000000005e-05, |
| "loss": 1.7906, |
| "mean_token_accuracy": 0.6406534284353256, |
| "num_tokens": 17851648.0, |
| "step": 4895 |
| }, |
| { |
| "epoch": 37.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5505e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.650345778465271, |
| "num_tokens": 17869722.0, |
| "step": 4900 |
| }, |
| { |
| "epoch": 37.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5480000000000003e-05, |
| "loss": 1.6724, |
| "mean_token_accuracy": 0.661380472779274, |
| "num_tokens": 17888580.0, |
| "step": 4905 |
| }, |
| { |
| "epoch": 37.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5455e-05, |
| "loss": 1.7188, |
| "mean_token_accuracy": 0.6543963342905045, |
| "num_tokens": 17907123.0, |
| "step": 4910 |
| }, |
| { |
| "epoch": 37.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5430000000000002e-05, |
| "loss": 1.7679, |
| "mean_token_accuracy": 0.6461872398853302, |
| "num_tokens": 17924870.0, |
| "step": 4915 |
| }, |
| { |
| "epoch": 37.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5405e-05, |
| "loss": 1.7487, |
| "mean_token_accuracy": 0.6498663574457169, |
| "num_tokens": 17942295.0, |
| "step": 4920 |
| }, |
| { |
| "epoch": 37.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5380000000000004e-05, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.6530733823776245, |
| "num_tokens": 17960859.0, |
| "step": 4925 |
| }, |
| { |
| "epoch": 37.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5355e-05, |
| "loss": 1.6919, |
| "mean_token_accuracy": 0.6597955763339997, |
| "num_tokens": 17979359.0, |
| "step": 4930 |
| }, |
| { |
| "epoch": 37.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5330000000000003e-05, |
| "loss": 1.7047, |
| "mean_token_accuracy": 0.6595474272966385, |
| "num_tokens": 17997968.0, |
| "step": 4935 |
| }, |
| { |
| "epoch": 38.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5305000000000003e-05, |
| "loss": 1.7505, |
| "mean_token_accuracy": 0.6506702989339829, |
| "num_tokens": 18015990.0, |
| "step": 4940 |
| }, |
| { |
| "epoch": 38.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5280000000000005e-05, |
| "loss": 1.7561, |
| "mean_token_accuracy": 0.6481132537126542, |
| "num_tokens": 18034580.0, |
| "step": 4945 |
| }, |
| { |
| "epoch": 38.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5255e-05, |
| "loss": 1.7617, |
| "mean_token_accuracy": 0.6524750530719757, |
| "num_tokens": 18052519.0, |
| "step": 4950 |
| }, |
| { |
| "epoch": 38.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5230000000000004e-05, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6480519741773605, |
| "num_tokens": 18070400.0, |
| "step": 4955 |
| }, |
| { |
| "epoch": 38.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5205e-05, |
| "loss": 1.6892, |
| "mean_token_accuracy": 0.6499431103467941, |
| "num_tokens": 18089731.0, |
| "step": 4960 |
| }, |
| { |
| "epoch": 38.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5180000000000003e-05, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.652138090133667, |
| "num_tokens": 18107784.0, |
| "step": 4965 |
| }, |
| { |
| "epoch": 38.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5155000000000002e-05, |
| "loss": 1.6622, |
| "mean_token_accuracy": 0.6630479246377945, |
| "num_tokens": 18126899.0, |
| "step": 4970 |
| }, |
| { |
| "epoch": 38.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5130000000000005e-05, |
| "loss": 1.6931, |
| "mean_token_accuracy": 0.6576372981071472, |
| "num_tokens": 18145576.0, |
| "step": 4975 |
| }, |
| { |
| "epoch": 38.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5105e-05, |
| "loss": 1.7881, |
| "mean_token_accuracy": 0.6391725331544876, |
| "num_tokens": 18162910.0, |
| "step": 4980 |
| }, |
| { |
| "epoch": 38.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5080000000000004e-05, |
| "loss": 1.7223, |
| "mean_token_accuracy": 0.656527328491211, |
| "num_tokens": 18181680.0, |
| "step": 4985 |
| }, |
| { |
| "epoch": 38.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5055e-05, |
| "loss": 1.7574, |
| "mean_token_accuracy": 0.6486213862895965, |
| "num_tokens": 18199714.0, |
| "step": 4990 |
| }, |
| { |
| "epoch": 38.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5030000000000003e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6542579799890518, |
| "num_tokens": 18217074.0, |
| "step": 4995 |
| }, |
| { |
| "epoch": 38.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5005000000000002e-05, |
| "loss": 1.7554, |
| "mean_token_accuracy": 0.642228040099144, |
| "num_tokens": 18235182.0, |
| "step": 5000 |
| }, |
| { |
| "epoch": 38.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.498e-05, |
| "loss": 1.7976, |
| "mean_token_accuracy": 0.6354389518499375, |
| "num_tokens": 18252871.0, |
| "step": 5005 |
| }, |
| { |
| "epoch": 38.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4955e-05, |
| "loss": 1.7793, |
| "mean_token_accuracy": 0.6435254126787185, |
| "num_tokens": 18270734.0, |
| "step": 5010 |
| }, |
| { |
| "epoch": 38.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.493e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6490629822015762, |
| "num_tokens": 18288300.0, |
| "step": 5015 |
| }, |
| { |
| "epoch": 38.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4905e-05, |
| "loss": 1.7339, |
| "mean_token_accuracy": 0.6582322597503663, |
| "num_tokens": 18306234.0, |
| "step": 5020 |
| }, |
| { |
| "epoch": 38.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.488e-05, |
| "loss": 1.6772, |
| "mean_token_accuracy": 0.6635013937950134, |
| "num_tokens": 18325571.0, |
| "step": 5025 |
| }, |
| { |
| "epoch": 38.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4855000000000002e-05, |
| "loss": 1.7387, |
| "mean_token_accuracy": 0.6517604231834412, |
| "num_tokens": 18344113.0, |
| "step": 5030 |
| }, |
| { |
| "epoch": 38.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.483e-05, |
| "loss": 1.6998, |
| "mean_token_accuracy": 0.6589053481817245, |
| "num_tokens": 18362753.0, |
| "step": 5035 |
| }, |
| { |
| "epoch": 38.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4805e-05, |
| "loss": 1.7194, |
| "mean_token_accuracy": 0.654488143324852, |
| "num_tokens": 18381290.0, |
| "step": 5040 |
| }, |
| { |
| "epoch": 38.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.478e-05, |
| "loss": 1.7901, |
| "mean_token_accuracy": 0.6406182050704956, |
| "num_tokens": 18398780.0, |
| "step": 5045 |
| }, |
| { |
| "epoch": 38.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4755e-05, |
| "loss": 1.7715, |
| "mean_token_accuracy": 0.6461145460605622, |
| "num_tokens": 18416605.0, |
| "step": 5050 |
| }, |
| { |
| "epoch": 38.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.473e-05, |
| "loss": 1.7961, |
| "mean_token_accuracy": 0.6413571745157242, |
| "num_tokens": 18434448.0, |
| "step": 5055 |
| }, |
| { |
| "epoch": 38.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4705e-05, |
| "loss": 1.7132, |
| "mean_token_accuracy": 0.6524721384048462, |
| "num_tokens": 18453025.0, |
| "step": 5060 |
| }, |
| { |
| "epoch": 38.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.468e-05, |
| "loss": 1.7789, |
| "mean_token_accuracy": 0.6459534347057343, |
| "num_tokens": 18470888.0, |
| "step": 5065 |
| }, |
| { |
| "epoch": 39.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4655e-05, |
| "loss": 1.6775, |
| "mean_token_accuracy": 0.6582199364900589, |
| "num_tokens": 18490095.0, |
| "step": 5070 |
| }, |
| { |
| "epoch": 39.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.463e-05, |
| "loss": 1.7668, |
| "mean_token_accuracy": 0.6507688373327255, |
| "num_tokens": 18507912.0, |
| "step": 5075 |
| }, |
| { |
| "epoch": 39.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4605e-05, |
| "loss": 1.7214, |
| "mean_token_accuracy": 0.650596770644188, |
| "num_tokens": 18526534.0, |
| "step": 5080 |
| }, |
| { |
| "epoch": 39.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4580000000000002e-05, |
| "loss": 1.7074, |
| "mean_token_accuracy": 0.6542585700750351, |
| "num_tokens": 18544958.0, |
| "step": 5085 |
| }, |
| { |
| "epoch": 39.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4555e-05, |
| "loss": 1.7504, |
| "mean_token_accuracy": 0.6499609500169754, |
| "num_tokens": 18562850.0, |
| "step": 5090 |
| }, |
| { |
| "epoch": 39.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.453e-05, |
| "loss": 1.7253, |
| "mean_token_accuracy": 0.6546373665332794, |
| "num_tokens": 18581257.0, |
| "step": 5095 |
| }, |
| { |
| "epoch": 39.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4505e-05, |
| "loss": 1.7325, |
| "mean_token_accuracy": 0.6537098169326783, |
| "num_tokens": 18599186.0, |
| "step": 5100 |
| }, |
| { |
| "epoch": 39.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.448e-05, |
| "loss": 1.817, |
| "mean_token_accuracy": 0.6370946735143661, |
| "num_tokens": 18616131.0, |
| "step": 5105 |
| }, |
| { |
| "epoch": 39.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4455e-05, |
| "loss": 1.6847, |
| "mean_token_accuracy": 0.6567441165447235, |
| "num_tokens": 18635229.0, |
| "step": 5110 |
| }, |
| { |
| "epoch": 39.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.443e-05, |
| "loss": 1.6926, |
| "mean_token_accuracy": 0.6508637487888336, |
| "num_tokens": 18654889.0, |
| "step": 5115 |
| }, |
| { |
| "epoch": 39.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4405e-05, |
| "loss": 1.7283, |
| "mean_token_accuracy": 0.6576891779899597, |
| "num_tokens": 18672794.0, |
| "step": 5120 |
| }, |
| { |
| "epoch": 39.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.438e-05, |
| "loss": 1.671, |
| "mean_token_accuracy": 0.6607532262802124, |
| "num_tokens": 18692648.0, |
| "step": 5125 |
| }, |
| { |
| "epoch": 39.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4355e-05, |
| "loss": 1.7717, |
| "mean_token_accuracy": 0.6461830377578736, |
| "num_tokens": 18710735.0, |
| "step": 5130 |
| }, |
| { |
| "epoch": 39.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.433e-05, |
| "loss": 1.7019, |
| "mean_token_accuracy": 0.657170632481575, |
| "num_tokens": 18729163.0, |
| "step": 5135 |
| }, |
| { |
| "epoch": 39.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4305e-05, |
| "loss": 1.7227, |
| "mean_token_accuracy": 0.6536213010549545, |
| "num_tokens": 18747486.0, |
| "step": 5140 |
| }, |
| { |
| "epoch": 39.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.428e-05, |
| "loss": 1.7075, |
| "mean_token_accuracy": 0.6612475931644439, |
| "num_tokens": 18766026.0, |
| "step": 5145 |
| }, |
| { |
| "epoch": 39.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4255e-05, |
| "loss": 1.6588, |
| "mean_token_accuracy": 0.6661661475896835, |
| "num_tokens": 18785291.0, |
| "step": 5150 |
| }, |
| { |
| "epoch": 39.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.423e-05, |
| "loss": 1.7663, |
| "mean_token_accuracy": 0.6468746066093445, |
| "num_tokens": 18803265.0, |
| "step": 5155 |
| }, |
| { |
| "epoch": 39.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4205e-05, |
| "loss": 1.7807, |
| "mean_token_accuracy": 0.6388770550489425, |
| "num_tokens": 18821103.0, |
| "step": 5160 |
| }, |
| { |
| "epoch": 39.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.418e-05, |
| "loss": 1.8602, |
| "mean_token_accuracy": 0.6244511514902115, |
| "num_tokens": 18838884.0, |
| "step": 5165 |
| }, |
| { |
| "epoch": 39.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4154999999999998e-05, |
| "loss": 1.7841, |
| "mean_token_accuracy": 0.6422644227743148, |
| "num_tokens": 18856713.0, |
| "step": 5170 |
| }, |
| { |
| "epoch": 39.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.413e-05, |
| "loss": 1.7265, |
| "mean_token_accuracy": 0.6520240396261215, |
| "num_tokens": 18874743.0, |
| "step": 5175 |
| }, |
| { |
| "epoch": 39.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4105e-05, |
| "loss": 1.8197, |
| "mean_token_accuracy": 0.6371575862169265, |
| "num_tokens": 18891917.0, |
| "step": 5180 |
| }, |
| { |
| "epoch": 39.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.408e-05, |
| "loss": 1.7642, |
| "mean_token_accuracy": 0.6434910297393799, |
| "num_tokens": 18909826.0, |
| "step": 5185 |
| }, |
| { |
| "epoch": 39.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4055000000000003e-05, |
| "loss": 1.7481, |
| "mean_token_accuracy": 0.6525659114122391, |
| "num_tokens": 18928215.0, |
| "step": 5190 |
| }, |
| { |
| "epoch": 39.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4030000000000002e-05, |
| "loss": 1.705, |
| "mean_token_accuracy": 0.6562006801366806, |
| "num_tokens": 18946708.0, |
| "step": 5195 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4005e-05, |
| "loss": 1.7572, |
| "mean_token_accuracy": 0.6535427123308182, |
| "num_tokens": 18964200.0, |
| "step": 5200 |
| }, |
| { |
| "epoch": 40.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.398e-05, |
| "loss": 1.6565, |
| "mean_token_accuracy": 0.6654837459325791, |
| "num_tokens": 18982763.0, |
| "step": 5205 |
| }, |
| { |
| "epoch": 40.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3955000000000004e-05, |
| "loss": 1.7472, |
| "mean_token_accuracy": 0.6510910272598267, |
| "num_tokens": 19000451.0, |
| "step": 5210 |
| }, |
| { |
| "epoch": 40.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3930000000000003e-05, |
| "loss": 1.7149, |
| "mean_token_accuracy": 0.6571291297674179, |
| "num_tokens": 19018812.0, |
| "step": 5215 |
| }, |
| { |
| "epoch": 40.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3905000000000002e-05, |
| "loss": 1.7426, |
| "mean_token_accuracy": 0.6531620174646378, |
| "num_tokens": 19036736.0, |
| "step": 5220 |
| }, |
| { |
| "epoch": 40.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3880000000000002e-05, |
| "loss": 1.7962, |
| "mean_token_accuracy": 0.6403325825929642, |
| "num_tokens": 19054193.0, |
| "step": 5225 |
| }, |
| { |
| "epoch": 40.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3855e-05, |
| "loss": 1.8235, |
| "mean_token_accuracy": 0.634826073050499, |
| "num_tokens": 19071438.0, |
| "step": 5230 |
| }, |
| { |
| "epoch": 40.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3830000000000004e-05, |
| "loss": 1.7791, |
| "mean_token_accuracy": 0.641825932264328, |
| "num_tokens": 19089581.0, |
| "step": 5235 |
| }, |
| { |
| "epoch": 40.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3805000000000003e-05, |
| "loss": 1.7551, |
| "mean_token_accuracy": 0.651043924689293, |
| "num_tokens": 19107710.0, |
| "step": 5240 |
| }, |
| { |
| "epoch": 40.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3780000000000003e-05, |
| "loss": 1.7209, |
| "mean_token_accuracy": 0.6562368720769882, |
| "num_tokens": 19125352.0, |
| "step": 5245 |
| }, |
| { |
| "epoch": 40.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3755000000000002e-05, |
| "loss": 1.7579, |
| "mean_token_accuracy": 0.6454941630363464, |
| "num_tokens": 19142899.0, |
| "step": 5250 |
| }, |
| { |
| "epoch": 40.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.373e-05, |
| "loss": 1.7409, |
| "mean_token_accuracy": 0.6469170600175858, |
| "num_tokens": 19161441.0, |
| "step": 5255 |
| }, |
| { |
| "epoch": 40.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3705e-05, |
| "loss": 1.7284, |
| "mean_token_accuracy": 0.6500023663043976, |
| "num_tokens": 19179916.0, |
| "step": 5260 |
| }, |
| { |
| "epoch": 40.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3680000000000004e-05, |
| "loss": 1.7584, |
| "mean_token_accuracy": 0.6516165018081665, |
| "num_tokens": 19197579.0, |
| "step": 5265 |
| }, |
| { |
| "epoch": 40.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3655000000000003e-05, |
| "loss": 1.6656, |
| "mean_token_accuracy": 0.6603842228651047, |
| "num_tokens": 19217073.0, |
| "step": 5270 |
| }, |
| { |
| "epoch": 40.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3630000000000002e-05, |
| "loss": 1.7213, |
| "mean_token_accuracy": 0.6537799417972565, |
| "num_tokens": 19235609.0, |
| "step": 5275 |
| }, |
| { |
| "epoch": 40.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3605000000000002e-05, |
| "loss": 1.6618, |
| "mean_token_accuracy": 0.664988386631012, |
| "num_tokens": 19254738.0, |
| "step": 5280 |
| }, |
| { |
| "epoch": 40.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.358e-05, |
| "loss": 1.7453, |
| "mean_token_accuracy": 0.6468230724334717, |
| "num_tokens": 19273467.0, |
| "step": 5285 |
| }, |
| { |
| "epoch": 40.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3555e-05, |
| "loss": 1.7231, |
| "mean_token_accuracy": 0.6521205425262451, |
| "num_tokens": 19292138.0, |
| "step": 5290 |
| }, |
| { |
| "epoch": 40.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3530000000000003e-05, |
| "loss": 1.7605, |
| "mean_token_accuracy": 0.652343025803566, |
| "num_tokens": 19310319.0, |
| "step": 5295 |
| }, |
| { |
| "epoch": 40.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3505000000000003e-05, |
| "loss": 1.7581, |
| "mean_token_accuracy": 0.6503111094236373, |
| "num_tokens": 19328485.0, |
| "step": 5300 |
| }, |
| { |
| "epoch": 40.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3480000000000002e-05, |
| "loss": 1.8109, |
| "mean_token_accuracy": 0.6344301998615265, |
| "num_tokens": 19346027.0, |
| "step": 5305 |
| }, |
| { |
| "epoch": 40.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3455e-05, |
| "loss": 1.7801, |
| "mean_token_accuracy": 0.6480203717947006, |
| "num_tokens": 19364659.0, |
| "step": 5310 |
| }, |
| { |
| "epoch": 40.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.343e-05, |
| "loss": 1.7486, |
| "mean_token_accuracy": 0.6470009624958039, |
| "num_tokens": 19383460.0, |
| "step": 5315 |
| }, |
| { |
| "epoch": 40.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3405e-05, |
| "loss": 1.6961, |
| "mean_token_accuracy": 0.6605125904083252, |
| "num_tokens": 19401413.0, |
| "step": 5320 |
| }, |
| { |
| "epoch": 40.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3380000000000003e-05, |
| "loss": 1.7979, |
| "mean_token_accuracy": 0.6404047667980194, |
| "num_tokens": 19419042.0, |
| "step": 5325 |
| }, |
| { |
| "epoch": 41.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3355000000000003e-05, |
| "loss": 1.6808, |
| "mean_token_accuracy": 0.6540407299995422, |
| "num_tokens": 19438305.0, |
| "step": 5330 |
| }, |
| { |
| "epoch": 41.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3330000000000002e-05, |
| "loss": 1.7928, |
| "mean_token_accuracy": 0.6409297794103622, |
| "num_tokens": 19456136.0, |
| "step": 5335 |
| }, |
| { |
| "epoch": 41.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3305e-05, |
| "loss": 1.6898, |
| "mean_token_accuracy": 0.6670808821916581, |
| "num_tokens": 19474660.0, |
| "step": 5340 |
| }, |
| { |
| "epoch": 41.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.328e-05, |
| "loss": 1.7063, |
| "mean_token_accuracy": 0.6591937988996506, |
| "num_tokens": 19493457.0, |
| "step": 5345 |
| }, |
| { |
| "epoch": 41.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3255e-05, |
| "loss": 1.7488, |
| "mean_token_accuracy": 0.6491418421268463, |
| "num_tokens": 19512018.0, |
| "step": 5350 |
| }, |
| { |
| "epoch": 41.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3230000000000003e-05, |
| "loss": 1.757, |
| "mean_token_accuracy": 0.6469480991363525, |
| "num_tokens": 19529637.0, |
| "step": 5355 |
| }, |
| { |
| "epoch": 41.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3205000000000002e-05, |
| "loss": 1.7163, |
| "mean_token_accuracy": 0.6563492953777313, |
| "num_tokens": 19548075.0, |
| "step": 5360 |
| }, |
| { |
| "epoch": 41.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.318e-05, |
| "loss": 1.8052, |
| "mean_token_accuracy": 0.6417810708284378, |
| "num_tokens": 19565579.0, |
| "step": 5365 |
| }, |
| { |
| "epoch": 41.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3155e-05, |
| "loss": 1.703, |
| "mean_token_accuracy": 0.6617189884185791, |
| "num_tokens": 19584102.0, |
| "step": 5370 |
| }, |
| { |
| "epoch": 41.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.313e-05, |
| "loss": 1.6773, |
| "mean_token_accuracy": 0.6627373963594436, |
| "num_tokens": 19602734.0, |
| "step": 5375 |
| }, |
| { |
| "epoch": 41.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3105000000000003e-05, |
| "loss": 1.7655, |
| "mean_token_accuracy": 0.6403976768255234, |
| "num_tokens": 19620846.0, |
| "step": 5380 |
| }, |
| { |
| "epoch": 41.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3080000000000003e-05, |
| "loss": 1.7217, |
| "mean_token_accuracy": 0.6528929799795151, |
| "num_tokens": 19639574.0, |
| "step": 5385 |
| }, |
| { |
| "epoch": 41.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3055000000000002e-05, |
| "loss": 1.8022, |
| "mean_token_accuracy": 0.641742405295372, |
| "num_tokens": 19657464.0, |
| "step": 5390 |
| }, |
| { |
| "epoch": 41.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.303e-05, |
| "loss": 1.7207, |
| "mean_token_accuracy": 0.6533408045768738, |
| "num_tokens": 19676214.0, |
| "step": 5395 |
| }, |
| { |
| "epoch": 41.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3005e-05, |
| "loss": 1.7098, |
| "mean_token_accuracy": 0.6526803016662598, |
| "num_tokens": 19694850.0, |
| "step": 5400 |
| }, |
| { |
| "epoch": 41.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.298e-05, |
| "loss": 1.767, |
| "mean_token_accuracy": 0.6512164533138275, |
| "num_tokens": 19712732.0, |
| "step": 5405 |
| }, |
| { |
| "epoch": 41.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2955000000000003e-05, |
| "loss": 1.7149, |
| "mean_token_accuracy": 0.6531447738409042, |
| "num_tokens": 19731634.0, |
| "step": 5410 |
| }, |
| { |
| "epoch": 41.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2930000000000002e-05, |
| "loss": 1.7672, |
| "mean_token_accuracy": 0.6508445739746094, |
| "num_tokens": 19748890.0, |
| "step": 5415 |
| }, |
| { |
| "epoch": 41.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2905000000000002e-05, |
| "loss": 1.7294, |
| "mean_token_accuracy": 0.6476221710443497, |
| "num_tokens": 19767058.0, |
| "step": 5420 |
| }, |
| { |
| "epoch": 41.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.288e-05, |
| "loss": 1.713, |
| "mean_token_accuracy": 0.6579526603221894, |
| "num_tokens": 19785185.0, |
| "step": 5425 |
| }, |
| { |
| "epoch": 41.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2855e-05, |
| "loss": 1.6993, |
| "mean_token_accuracy": 0.6542178004980087, |
| "num_tokens": 19803873.0, |
| "step": 5430 |
| }, |
| { |
| "epoch": 41.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.283e-05, |
| "loss": 1.7208, |
| "mean_token_accuracy": 0.6572512924671173, |
| "num_tokens": 19822419.0, |
| "step": 5435 |
| }, |
| { |
| "epoch": 41.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2805000000000003e-05, |
| "loss": 1.7922, |
| "mean_token_accuracy": 0.6356712460517884, |
| "num_tokens": 19840582.0, |
| "step": 5440 |
| }, |
| { |
| "epoch": 41.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2780000000000002e-05, |
| "loss": 1.7735, |
| "mean_token_accuracy": 0.6435914099216461, |
| "num_tokens": 19858971.0, |
| "step": 5445 |
| }, |
| { |
| "epoch": 41.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2755e-05, |
| "loss": 1.7703, |
| "mean_token_accuracy": 0.6400933116674423, |
| "num_tokens": 19876852.0, |
| "step": 5450 |
| }, |
| { |
| "epoch": 41.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.273e-05, |
| "loss": 1.7577, |
| "mean_token_accuracy": 0.6475079268217087, |
| "num_tokens": 19894493.0, |
| "step": 5455 |
| }, |
| { |
| "epoch": 42.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2705e-05, |
| "loss": 1.7518, |
| "mean_token_accuracy": 0.6474265098571778, |
| "num_tokens": 19912410.0, |
| "step": 5460 |
| }, |
| { |
| "epoch": 42.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.268e-05, |
| "loss": 1.7478, |
| "mean_token_accuracy": 0.646801871061325, |
| "num_tokens": 19930455.0, |
| "step": 5465 |
| }, |
| { |
| "epoch": 42.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2655000000000002e-05, |
| "loss": 1.8042, |
| "mean_token_accuracy": 0.6378622591495514, |
| "num_tokens": 19948140.0, |
| "step": 5470 |
| }, |
| { |
| "epoch": 42.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2630000000000002e-05, |
| "loss": 1.7676, |
| "mean_token_accuracy": 0.6440225541591644, |
| "num_tokens": 19965973.0, |
| "step": 5475 |
| }, |
| { |
| "epoch": 42.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2605e-05, |
| "loss": 1.7853, |
| "mean_token_accuracy": 0.6422720283269883, |
| "num_tokens": 19983738.0, |
| "step": 5480 |
| }, |
| { |
| "epoch": 42.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.258e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6497463047504425, |
| "num_tokens": 20001740.0, |
| "step": 5485 |
| }, |
| { |
| "epoch": 42.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2555e-05, |
| "loss": 1.7095, |
| "mean_token_accuracy": 0.6572997212409973, |
| "num_tokens": 20019689.0, |
| "step": 5490 |
| }, |
| { |
| "epoch": 42.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.253e-05, |
| "loss": 1.7257, |
| "mean_token_accuracy": 0.6576829224824905, |
| "num_tokens": 20037781.0, |
| "step": 5495 |
| }, |
| { |
| "epoch": 42.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2505000000000002e-05, |
| "loss": 1.7521, |
| "mean_token_accuracy": 0.647400951385498, |
| "num_tokens": 20056196.0, |
| "step": 5500 |
| }, |
| { |
| "epoch": 42.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.248e-05, |
| "loss": 1.6555, |
| "mean_token_accuracy": 0.6666512668132782, |
| "num_tokens": 20075099.0, |
| "step": 5505 |
| }, |
| { |
| "epoch": 42.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2455e-05, |
| "loss": 1.7001, |
| "mean_token_accuracy": 0.6635800451040268, |
| "num_tokens": 20094237.0, |
| "step": 5510 |
| }, |
| { |
| "epoch": 42.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.243e-05, |
| "loss": 1.7268, |
| "mean_token_accuracy": 0.6491197019815445, |
| "num_tokens": 20112703.0, |
| "step": 5515 |
| }, |
| { |
| "epoch": 42.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2405e-05, |
| "loss": 1.7641, |
| "mean_token_accuracy": 0.6478333413600922, |
| "num_tokens": 20130707.0, |
| "step": 5520 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2380000000000003e-05, |
| "loss": 1.7299, |
| "mean_token_accuracy": 0.6528987497091293, |
| "num_tokens": 20149020.0, |
| "step": 5525 |
| }, |
| { |
| "epoch": 42.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2355000000000002e-05, |
| "loss": 1.7313, |
| "mean_token_accuracy": 0.6519591569900512, |
| "num_tokens": 20167484.0, |
| "step": 5530 |
| }, |
| { |
| "epoch": 42.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.233e-05, |
| "loss": 1.7865, |
| "mean_token_accuracy": 0.6391574621200562, |
| "num_tokens": 20184976.0, |
| "step": 5535 |
| }, |
| { |
| "epoch": 42.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2305e-05, |
| "loss": 1.6944, |
| "mean_token_accuracy": 0.658808296918869, |
| "num_tokens": 20203744.0, |
| "step": 5540 |
| }, |
| { |
| "epoch": 42.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.228e-05, |
| "loss": 1.6489, |
| "mean_token_accuracy": 0.6704173415899277, |
| "num_tokens": 20222753.0, |
| "step": 5545 |
| }, |
| { |
| "epoch": 42.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2255e-05, |
| "loss": 1.7507, |
| "mean_token_accuracy": 0.6489108324050903, |
| "num_tokens": 20241135.0, |
| "step": 5550 |
| }, |
| { |
| "epoch": 42.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2230000000000002e-05, |
| "loss": 1.722, |
| "mean_token_accuracy": 0.6489439159631729, |
| "num_tokens": 20260178.0, |
| "step": 5555 |
| }, |
| { |
| "epoch": 42.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2205000000000002e-05, |
| "loss": 1.7384, |
| "mean_token_accuracy": 0.6486925899982452, |
| "num_tokens": 20278793.0, |
| "step": 5560 |
| }, |
| { |
| "epoch": 42.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.218e-05, |
| "loss": 1.7011, |
| "mean_token_accuracy": 0.6595232754945755, |
| "num_tokens": 20297915.0, |
| "step": 5565 |
| }, |
| { |
| "epoch": 42.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2155e-05, |
| "loss": 1.756, |
| "mean_token_accuracy": 0.6450414389371872, |
| "num_tokens": 20316235.0, |
| "step": 5570 |
| }, |
| { |
| "epoch": 42.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.213e-05, |
| "loss": 1.8509, |
| "mean_token_accuracy": 0.6292482733726501, |
| "num_tokens": 20333291.0, |
| "step": 5575 |
| }, |
| { |
| "epoch": 42.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2105e-05, |
| "loss": 1.7492, |
| "mean_token_accuracy": 0.6515639036893844, |
| "num_tokens": 20351293.0, |
| "step": 5580 |
| }, |
| { |
| "epoch": 42.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2080000000000002e-05, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6521316289901733, |
| "num_tokens": 20368835.0, |
| "step": 5585 |
| }, |
| { |
| "epoch": 43.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2055e-05, |
| "loss": 1.7799, |
| "mean_token_accuracy": 0.6442039728164672, |
| "num_tokens": 20386515.0, |
| "step": 5590 |
| }, |
| { |
| "epoch": 43.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.203e-05, |
| "loss": 1.7707, |
| "mean_token_accuracy": 0.6441226691007614, |
| "num_tokens": 20404313.0, |
| "step": 5595 |
| }, |
| { |
| "epoch": 43.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2005e-05, |
| "loss": 1.7275, |
| "mean_token_accuracy": 0.6514497637748718, |
| "num_tokens": 20422886.0, |
| "step": 5600 |
| }, |
| { |
| "epoch": 43.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.198e-05, |
| "loss": 1.7403, |
| "mean_token_accuracy": 0.6523116052150726, |
| "num_tokens": 20440808.0, |
| "step": 5605 |
| }, |
| { |
| "epoch": 43.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1955e-05, |
| "loss": 1.6582, |
| "mean_token_accuracy": 0.6698237389326096, |
| "num_tokens": 20459519.0, |
| "step": 5610 |
| }, |
| { |
| "epoch": 43.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1930000000000002e-05, |
| "loss": 1.7039, |
| "mean_token_accuracy": 0.6586238622665406, |
| "num_tokens": 20478450.0, |
| "step": 5615 |
| }, |
| { |
| "epoch": 43.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1905e-05, |
| "loss": 1.733, |
| "mean_token_accuracy": 0.650734207034111, |
| "num_tokens": 20497721.0, |
| "step": 5620 |
| }, |
| { |
| "epoch": 43.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.188e-05, |
| "loss": 1.7549, |
| "mean_token_accuracy": 0.6470336526632309, |
| "num_tokens": 20515794.0, |
| "step": 5625 |
| }, |
| { |
| "epoch": 43.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1855e-05, |
| "loss": 1.7582, |
| "mean_token_accuracy": 0.6463449627161026, |
| "num_tokens": 20533916.0, |
| "step": 5630 |
| }, |
| { |
| "epoch": 43.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.183e-05, |
| "loss": 1.6911, |
| "mean_token_accuracy": 0.6570004016160965, |
| "num_tokens": 20552781.0, |
| "step": 5635 |
| }, |
| { |
| "epoch": 43.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1805e-05, |
| "loss": 1.7714, |
| "mean_token_accuracy": 0.6484808444976806, |
| "num_tokens": 20570987.0, |
| "step": 5640 |
| }, |
| { |
| "epoch": 43.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.178e-05, |
| "loss": 1.7396, |
| "mean_token_accuracy": 0.6534300118684768, |
| "num_tokens": 20589119.0, |
| "step": 5645 |
| }, |
| { |
| "epoch": 43.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1755e-05, |
| "loss": 1.7263, |
| "mean_token_accuracy": 0.6558996856212616, |
| "num_tokens": 20607573.0, |
| "step": 5650 |
| }, |
| { |
| "epoch": 43.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.173e-05, |
| "loss": 1.7667, |
| "mean_token_accuracy": 0.6426133215427399, |
| "num_tokens": 20625925.0, |
| "step": 5655 |
| }, |
| { |
| "epoch": 43.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1705e-05, |
| "loss": 1.7072, |
| "mean_token_accuracy": 0.6583232104778289, |
| "num_tokens": 20644265.0, |
| "step": 5660 |
| }, |
| { |
| "epoch": 43.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.168e-05, |
| "loss": 1.7284, |
| "mean_token_accuracy": 0.6528914481401443, |
| "num_tokens": 20662528.0, |
| "step": 5665 |
| }, |
| { |
| "epoch": 43.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1655000000000002e-05, |
| "loss": 1.6837, |
| "mean_token_accuracy": 0.6582222819328308, |
| "num_tokens": 20681100.0, |
| "step": 5670 |
| }, |
| { |
| "epoch": 43.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.163e-05, |
| "loss": 1.7536, |
| "mean_token_accuracy": 0.6478887408971786, |
| "num_tokens": 20699280.0, |
| "step": 5675 |
| }, |
| { |
| "epoch": 43.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1605e-05, |
| "loss": 1.7702, |
| "mean_token_accuracy": 0.6438897788524628, |
| "num_tokens": 20717346.0, |
| "step": 5680 |
| }, |
| { |
| "epoch": 43.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.158e-05, |
| "loss": 1.7388, |
| "mean_token_accuracy": 0.6525939971208572, |
| "num_tokens": 20735654.0, |
| "step": 5685 |
| }, |
| { |
| "epoch": 43.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1555e-05, |
| "loss": 1.7583, |
| "mean_token_accuracy": 0.649162980914116, |
| "num_tokens": 20753638.0, |
| "step": 5690 |
| }, |
| { |
| "epoch": 43.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.153e-05, |
| "loss": 1.7555, |
| "mean_token_accuracy": 0.6506625056266785, |
| "num_tokens": 20771873.0, |
| "step": 5695 |
| }, |
| { |
| "epoch": 43.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1505e-05, |
| "loss": 1.7904, |
| "mean_token_accuracy": 0.6409453451633453, |
| "num_tokens": 20789218.0, |
| "step": 5700 |
| }, |
| { |
| "epoch": 43.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.148e-05, |
| "loss": 1.7477, |
| "mean_token_accuracy": 0.6445836216211319, |
| "num_tokens": 20807034.0, |
| "step": 5705 |
| }, |
| { |
| "epoch": 43.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1455e-05, |
| "loss": 1.7596, |
| "mean_token_accuracy": 0.6507911443710327, |
| "num_tokens": 20824903.0, |
| "step": 5710 |
| }, |
| { |
| "epoch": 43.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.143e-05, |
| "loss": 1.7877, |
| "mean_token_accuracy": 0.6402207911014557, |
| "num_tokens": 20842868.0, |
| "step": 5715 |
| }, |
| { |
| "epoch": 44.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1405e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6519168674945831, |
| "num_tokens": 20860620.0, |
| "step": 5720 |
| }, |
| { |
| "epoch": 44.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.138e-05, |
| "loss": 1.7833, |
| "mean_token_accuracy": 0.6388791173696518, |
| "num_tokens": 20878837.0, |
| "step": 5725 |
| }, |
| { |
| "epoch": 44.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1355e-05, |
| "loss": 1.7785, |
| "mean_token_accuracy": 0.6488111823797226, |
| "num_tokens": 20896892.0, |
| "step": 5730 |
| }, |
| { |
| "epoch": 44.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.133e-05, |
| "loss": 1.8372, |
| "mean_token_accuracy": 0.630810198187828, |
| "num_tokens": 20914256.0, |
| "step": 5735 |
| }, |
| { |
| "epoch": 44.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1305e-05, |
| "loss": 1.6904, |
| "mean_token_accuracy": 0.6603229939937592, |
| "num_tokens": 20932992.0, |
| "step": 5740 |
| }, |
| { |
| "epoch": 44.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.128e-05, |
| "loss": 1.7463, |
| "mean_token_accuracy": 0.65241838991642, |
| "num_tokens": 20950902.0, |
| "step": 5745 |
| }, |
| { |
| "epoch": 44.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1255e-05, |
| "loss": 1.7803, |
| "mean_token_accuracy": 0.6396133095026016, |
| "num_tokens": 20969175.0, |
| "step": 5750 |
| }, |
| { |
| "epoch": 44.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.123e-05, |
| "loss": 1.7431, |
| "mean_token_accuracy": 0.6489101707935333, |
| "num_tokens": 20987238.0, |
| "step": 5755 |
| }, |
| { |
| "epoch": 44.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1205e-05, |
| "loss": 1.7022, |
| "mean_token_accuracy": 0.658707058429718, |
| "num_tokens": 21005379.0, |
| "step": 5760 |
| }, |
| { |
| "epoch": 44.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.118e-05, |
| "loss": 1.7081, |
| "mean_token_accuracy": 0.65692238509655, |
| "num_tokens": 21023486.0, |
| "step": 5765 |
| }, |
| { |
| "epoch": 44.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1155e-05, |
| "loss": 1.7568, |
| "mean_token_accuracy": 0.6409606844186783, |
| "num_tokens": 21041662.0, |
| "step": 5770 |
| }, |
| { |
| "epoch": 44.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.113e-05, |
| "loss": 1.7182, |
| "mean_token_accuracy": 0.6524233877658844, |
| "num_tokens": 21060206.0, |
| "step": 5775 |
| }, |
| { |
| "epoch": 44.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1105e-05, |
| "loss": 1.7231, |
| "mean_token_accuracy": 0.655006617307663, |
| "num_tokens": 21078736.0, |
| "step": 5780 |
| }, |
| { |
| "epoch": 44.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1079999999999998e-05, |
| "loss": 1.7458, |
| "mean_token_accuracy": 0.648220095038414, |
| "num_tokens": 21097223.0, |
| "step": 5785 |
| }, |
| { |
| "epoch": 44.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1055e-05, |
| "loss": 1.6928, |
| "mean_token_accuracy": 0.6601225137710571, |
| "num_tokens": 21116083.0, |
| "step": 5790 |
| }, |
| { |
| "epoch": 44.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.103e-05, |
| "loss": 1.7913, |
| "mean_token_accuracy": 0.6423264652490616, |
| "num_tokens": 21133615.0, |
| "step": 5795 |
| }, |
| { |
| "epoch": 44.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1005e-05, |
| "loss": 1.6964, |
| "mean_token_accuracy": 0.6554509729146958, |
| "num_tokens": 21152459.0, |
| "step": 5800 |
| }, |
| { |
| "epoch": 44.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.098e-05, |
| "loss": 1.6493, |
| "mean_token_accuracy": 0.6667488932609558, |
| "num_tokens": 21171676.0, |
| "step": 5805 |
| }, |
| { |
| "epoch": 44.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0955e-05, |
| "loss": 1.8191, |
| "mean_token_accuracy": 0.6378259479999542, |
| "num_tokens": 21188876.0, |
| "step": 5810 |
| }, |
| { |
| "epoch": 44.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.093e-05, |
| "loss": 1.7046, |
| "mean_token_accuracy": 0.6647377282381057, |
| "num_tokens": 21207312.0, |
| "step": 5815 |
| }, |
| { |
| "epoch": 44.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0905000000000004e-05, |
| "loss": 1.7338, |
| "mean_token_accuracy": 0.6526555448770524, |
| "num_tokens": 21225855.0, |
| "step": 5820 |
| }, |
| { |
| "epoch": 44.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0880000000000003e-05, |
| "loss": 1.7651, |
| "mean_token_accuracy": 0.6445731610059738, |
| "num_tokens": 21244188.0, |
| "step": 5825 |
| }, |
| { |
| "epoch": 44.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0855000000000003e-05, |
| "loss": 1.7282, |
| "mean_token_accuracy": 0.6482422709465027, |
| "num_tokens": 21262701.0, |
| "step": 5830 |
| }, |
| { |
| "epoch": 44.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0830000000000002e-05, |
| "loss": 1.79, |
| "mean_token_accuracy": 0.6363201081752777, |
| "num_tokens": 21280639.0, |
| "step": 5835 |
| }, |
| { |
| "epoch": 44.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0805e-05, |
| "loss": 1.7229, |
| "mean_token_accuracy": 0.6573170632123947, |
| "num_tokens": 21298845.0, |
| "step": 5840 |
| }, |
| { |
| "epoch": 44.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.078e-05, |
| "loss": 1.6992, |
| "mean_token_accuracy": 0.6632038950920105, |
| "num_tokens": 21317164.0, |
| "step": 5845 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0755000000000004e-05, |
| "loss": 1.7663, |
| "mean_token_accuracy": 0.6449836879968643, |
| "num_tokens": 21334725.0, |
| "step": 5850 |
| }, |
| { |
| "epoch": 45.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0730000000000003e-05, |
| "loss": 1.7384, |
| "mean_token_accuracy": 0.6504657328128814, |
| "num_tokens": 21353119.0, |
| "step": 5855 |
| }, |
| { |
| "epoch": 45.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0705000000000003e-05, |
| "loss": 1.6998, |
| "mean_token_accuracy": 0.6628474831581116, |
| "num_tokens": 21371719.0, |
| "step": 5860 |
| }, |
| { |
| "epoch": 45.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0680000000000002e-05, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6500916868448258, |
| "num_tokens": 21390734.0, |
| "step": 5865 |
| }, |
| { |
| "epoch": 45.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0655e-05, |
| "loss": 1.7111, |
| "mean_token_accuracy": 0.6544335097074508, |
| "num_tokens": 21409214.0, |
| "step": 5870 |
| }, |
| { |
| "epoch": 45.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.063e-05, |
| "loss": 1.6598, |
| "mean_token_accuracy": 0.6647601097822189, |
| "num_tokens": 21428341.0, |
| "step": 5875 |
| }, |
| { |
| "epoch": 45.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0605000000000003e-05, |
| "loss": 1.7528, |
| "mean_token_accuracy": 0.6484750181436538, |
| "num_tokens": 21446444.0, |
| "step": 5880 |
| }, |
| { |
| "epoch": 45.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0580000000000003e-05, |
| "loss": 1.7472, |
| "mean_token_accuracy": 0.6514826148748398, |
| "num_tokens": 21464094.0, |
| "step": 5885 |
| }, |
| { |
| "epoch": 45.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0555000000000002e-05, |
| "loss": 1.6722, |
| "mean_token_accuracy": 0.6630917578935623, |
| "num_tokens": 21482239.0, |
| "step": 5890 |
| }, |
| { |
| "epoch": 45.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.053e-05, |
| "loss": 1.7819, |
| "mean_token_accuracy": 0.6405300617218017, |
| "num_tokens": 21499829.0, |
| "step": 5895 |
| }, |
| { |
| "epoch": 45.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0505e-05, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.6537360846996307, |
| "num_tokens": 21518451.0, |
| "step": 5900 |
| }, |
| { |
| "epoch": 45.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.048e-05, |
| "loss": 1.7311, |
| "mean_token_accuracy": 0.6522845417261124, |
| "num_tokens": 21536579.0, |
| "step": 5905 |
| }, |
| { |
| "epoch": 45.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0455000000000003e-05, |
| "loss": 1.6915, |
| "mean_token_accuracy": 0.658150565624237, |
| "num_tokens": 21555889.0, |
| "step": 5910 |
| }, |
| { |
| "epoch": 45.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0430000000000003e-05, |
| "loss": 1.7755, |
| "mean_token_accuracy": 0.6407826870679856, |
| "num_tokens": 21574267.0, |
| "step": 5915 |
| }, |
| { |
| "epoch": 45.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0405000000000002e-05, |
| "loss": 1.7662, |
| "mean_token_accuracy": 0.6452647089958191, |
| "num_tokens": 21591748.0, |
| "step": 5920 |
| }, |
| { |
| "epoch": 45.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.038e-05, |
| "loss": 1.8003, |
| "mean_token_accuracy": 0.6423941493034363, |
| "num_tokens": 21608855.0, |
| "step": 5925 |
| }, |
| { |
| "epoch": 45.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0355e-05, |
| "loss": 1.7603, |
| "mean_token_accuracy": 0.6519896745681762, |
| "num_tokens": 21626524.0, |
| "step": 5930 |
| }, |
| { |
| "epoch": 45.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.033e-05, |
| "loss": 1.7864, |
| "mean_token_accuracy": 0.6421408951282501, |
| "num_tokens": 21644165.0, |
| "step": 5935 |
| }, |
| { |
| "epoch": 45.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0305000000000003e-05, |
| "loss": 1.8047, |
| "mean_token_accuracy": 0.6421581089496613, |
| "num_tokens": 21661822.0, |
| "step": 5940 |
| }, |
| { |
| "epoch": 45.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0280000000000002e-05, |
| "loss": 1.6908, |
| "mean_token_accuracy": 0.6561397314071655, |
| "num_tokens": 21681068.0, |
| "step": 5945 |
| }, |
| { |
| "epoch": 45.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0255000000000002e-05, |
| "loss": 1.7477, |
| "mean_token_accuracy": 0.6448839485645295, |
| "num_tokens": 21699251.0, |
| "step": 5950 |
| }, |
| { |
| "epoch": 45.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 2.023e-05, |
| "loss": 1.8038, |
| "mean_token_accuracy": 0.6432386726140976, |
| "num_tokens": 21716596.0, |
| "step": 5955 |
| }, |
| { |
| "epoch": 45.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0205e-05, |
| "loss": 1.768, |
| "mean_token_accuracy": 0.6459046006202698, |
| "num_tokens": 21734632.0, |
| "step": 5960 |
| }, |
| { |
| "epoch": 45.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0180000000000003e-05, |
| "loss": 1.7534, |
| "mean_token_accuracy": 0.6509894400835037, |
| "num_tokens": 21753024.0, |
| "step": 5965 |
| }, |
| { |
| "epoch": 45.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0155000000000003e-05, |
| "loss": 1.6896, |
| "mean_token_accuracy": 0.663582494854927, |
| "num_tokens": 21771809.0, |
| "step": 5970 |
| }, |
| { |
| "epoch": 45.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0130000000000002e-05, |
| "loss": 1.7161, |
| "mean_token_accuracy": 0.6539533823728562, |
| "num_tokens": 21790432.0, |
| "step": 5975 |
| }, |
| { |
| "epoch": 46.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0105e-05, |
| "loss": 1.7338, |
| "mean_token_accuracy": 0.6510831654071808, |
| "num_tokens": 21808830.0, |
| "step": 5980 |
| }, |
| { |
| "epoch": 46.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 2.008e-05, |
| "loss": 1.7729, |
| "mean_token_accuracy": 0.6468154609203338, |
| "num_tokens": 21826336.0, |
| "step": 5985 |
| }, |
| { |
| "epoch": 46.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0055e-05, |
| "loss": 1.7439, |
| "mean_token_accuracy": 0.657364284992218, |
| "num_tokens": 21844521.0, |
| "step": 5990 |
| }, |
| { |
| "epoch": 46.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0030000000000003e-05, |
| "loss": 1.7688, |
| "mean_token_accuracy": 0.645710214972496, |
| "num_tokens": 21862800.0, |
| "step": 5995 |
| }, |
| { |
| "epoch": 46.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 2.0005000000000002e-05, |
| "loss": 1.7315, |
| "mean_token_accuracy": 0.6578894317150116, |
| "num_tokens": 21880938.0, |
| "step": 6000 |
| }, |
| { |
| "epoch": 46.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 1.7818, |
| "mean_token_accuracy": 0.641998165845871, |
| "num_tokens": 21899058.0, |
| "step": 6005 |
| }, |
| { |
| "epoch": 46.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9955e-05, |
| "loss": 1.7064, |
| "mean_token_accuracy": 0.6609808683395386, |
| "num_tokens": 21917113.0, |
| "step": 6010 |
| }, |
| { |
| "epoch": 46.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.993e-05, |
| "loss": 1.7452, |
| "mean_token_accuracy": 0.6411201417446136, |
| "num_tokens": 21935754.0, |
| "step": 6015 |
| }, |
| { |
| "epoch": 46.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9905e-05, |
| "loss": 1.7566, |
| "mean_token_accuracy": 0.6457954943180084, |
| "num_tokens": 21954031.0, |
| "step": 6020 |
| }, |
| { |
| "epoch": 46.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9880000000000003e-05, |
| "loss": 1.7037, |
| "mean_token_accuracy": 0.6573837339878082, |
| "num_tokens": 21972598.0, |
| "step": 6025 |
| }, |
| { |
| "epoch": 46.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9855000000000002e-05, |
| "loss": 1.7076, |
| "mean_token_accuracy": 0.658588969707489, |
| "num_tokens": 21991074.0, |
| "step": 6030 |
| }, |
| { |
| "epoch": 46.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.983e-05, |
| "loss": 1.7319, |
| "mean_token_accuracy": 0.6501603156328202, |
| "num_tokens": 22009260.0, |
| "step": 6035 |
| }, |
| { |
| "epoch": 46.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9805e-05, |
| "loss": 1.6804, |
| "mean_token_accuracy": 0.65768404006958, |
| "num_tokens": 22028277.0, |
| "step": 6040 |
| }, |
| { |
| "epoch": 46.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.978e-05, |
| "loss": 1.7751, |
| "mean_token_accuracy": 0.6457584232091904, |
| "num_tokens": 22046348.0, |
| "step": 6045 |
| }, |
| { |
| "epoch": 46.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9755e-05, |
| "loss": 1.8084, |
| "mean_token_accuracy": 0.6365677118301392, |
| "num_tokens": 22064065.0, |
| "step": 6050 |
| }, |
| { |
| "epoch": 46.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9730000000000003e-05, |
| "loss": 1.7312, |
| "mean_token_accuracy": 0.6494260609149933, |
| "num_tokens": 22082601.0, |
| "step": 6055 |
| }, |
| { |
| "epoch": 46.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9705000000000002e-05, |
| "loss": 1.7834, |
| "mean_token_accuracy": 0.6389273494482041, |
| "num_tokens": 22100180.0, |
| "step": 6060 |
| }, |
| { |
| "epoch": 46.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.968e-05, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6541613578796387, |
| "num_tokens": 22118685.0, |
| "step": 6065 |
| }, |
| { |
| "epoch": 46.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9655e-05, |
| "loss": 1.7336, |
| "mean_token_accuracy": 0.6524038523435592, |
| "num_tokens": 22136815.0, |
| "step": 6070 |
| }, |
| { |
| "epoch": 46.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.963e-05, |
| "loss": 1.7857, |
| "mean_token_accuracy": 0.6441529363393783, |
| "num_tokens": 22154538.0, |
| "step": 6075 |
| }, |
| { |
| "epoch": 46.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9605e-05, |
| "loss": 1.7715, |
| "mean_token_accuracy": 0.6463933974504471, |
| "num_tokens": 22173083.0, |
| "step": 6080 |
| }, |
| { |
| "epoch": 46.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9580000000000002e-05, |
| "loss": 1.6809, |
| "mean_token_accuracy": 0.6644227296113968, |
| "num_tokens": 22191785.0, |
| "step": 6085 |
| }, |
| { |
| "epoch": 46.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9555e-05, |
| "loss": 1.6764, |
| "mean_token_accuracy": 0.6619850039482117, |
| "num_tokens": 22210517.0, |
| "step": 6090 |
| }, |
| { |
| "epoch": 46.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.953e-05, |
| "loss": 1.729, |
| "mean_token_accuracy": 0.6496035814285278, |
| "num_tokens": 22228596.0, |
| "step": 6095 |
| }, |
| { |
| "epoch": 46.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9505e-05, |
| "loss": 1.7949, |
| "mean_token_accuracy": 0.6454124927520752, |
| "num_tokens": 22245784.0, |
| "step": 6100 |
| }, |
| { |
| "epoch": 46.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.948e-05, |
| "loss": 1.6937, |
| "mean_token_accuracy": 0.6616695493459701, |
| "num_tokens": 22263991.0, |
| "step": 6105 |
| }, |
| { |
| "epoch": 47.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9455000000000003e-05, |
| "loss": 1.7195, |
| "mean_token_accuracy": 0.6506162971258164, |
| "num_tokens": 22282935.0, |
| "step": 6110 |
| }, |
| { |
| "epoch": 47.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9430000000000002e-05, |
| "loss": 1.7305, |
| "mean_token_accuracy": 0.6571007430553436, |
| "num_tokens": 22301303.0, |
| "step": 6115 |
| }, |
| { |
| "epoch": 47.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9405e-05, |
| "loss": 1.6624, |
| "mean_token_accuracy": 0.6648898661136627, |
| "num_tokens": 22320863.0, |
| "step": 6120 |
| }, |
| { |
| "epoch": 47.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.938e-05, |
| "loss": 1.7835, |
| "mean_token_accuracy": 0.6378375381231308, |
| "num_tokens": 22338722.0, |
| "step": 6125 |
| }, |
| { |
| "epoch": 47.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9355e-05, |
| "loss": 1.7585, |
| "mean_token_accuracy": 0.6448232650756835, |
| "num_tokens": 22357250.0, |
| "step": 6130 |
| }, |
| { |
| "epoch": 47.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.933e-05, |
| "loss": 1.7201, |
| "mean_token_accuracy": 0.6556304097175598, |
| "num_tokens": 22375225.0, |
| "step": 6135 |
| }, |
| { |
| "epoch": 47.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9305000000000002e-05, |
| "loss": 1.6685, |
| "mean_token_accuracy": 0.6670328795909881, |
| "num_tokens": 22394135.0, |
| "step": 6140 |
| }, |
| { |
| "epoch": 47.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9280000000000002e-05, |
| "loss": 1.6467, |
| "mean_token_accuracy": 0.6665792822837829, |
| "num_tokens": 22413510.0, |
| "step": 6145 |
| }, |
| { |
| "epoch": 47.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9255e-05, |
| "loss": 1.7692, |
| "mean_token_accuracy": 0.6390243858098984, |
| "num_tokens": 22432228.0, |
| "step": 6150 |
| }, |
| { |
| "epoch": 47.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.923e-05, |
| "loss": 1.7202, |
| "mean_token_accuracy": 0.6546848744153977, |
| "num_tokens": 22450356.0, |
| "step": 6155 |
| }, |
| { |
| "epoch": 47.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9205e-05, |
| "loss": 1.8106, |
| "mean_token_accuracy": 0.6357954889535904, |
| "num_tokens": 22467696.0, |
| "step": 6160 |
| }, |
| { |
| "epoch": 47.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.918e-05, |
| "loss": 1.7551, |
| "mean_token_accuracy": 0.6453001827001572, |
| "num_tokens": 22486250.0, |
| "step": 6165 |
| }, |
| { |
| "epoch": 47.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9155000000000002e-05, |
| "loss": 1.7741, |
| "mean_token_accuracy": 0.6439648687839508, |
| "num_tokens": 22504172.0, |
| "step": 6170 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.913e-05, |
| "loss": 1.7398, |
| "mean_token_accuracy": 0.6509507864713668, |
| "num_tokens": 22521860.0, |
| "step": 6175 |
| }, |
| { |
| "epoch": 47.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9105e-05, |
| "loss": 1.7535, |
| "mean_token_accuracy": 0.6506534874439239, |
| "num_tokens": 22539795.0, |
| "step": 6180 |
| }, |
| { |
| "epoch": 47.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.908e-05, |
| "loss": 1.6702, |
| "mean_token_accuracy": 0.6614595890045166, |
| "num_tokens": 22558751.0, |
| "step": 6185 |
| }, |
| { |
| "epoch": 47.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9055e-05, |
| "loss": 1.7339, |
| "mean_token_accuracy": 0.6529056757688523, |
| "num_tokens": 22576706.0, |
| "step": 6190 |
| }, |
| { |
| "epoch": 47.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.903e-05, |
| "loss": 1.7892, |
| "mean_token_accuracy": 0.6409416913986206, |
| "num_tokens": 22594485.0, |
| "step": 6195 |
| }, |
| { |
| "epoch": 47.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9005000000000002e-05, |
| "loss": 1.6885, |
| "mean_token_accuracy": 0.6648684412240982, |
| "num_tokens": 22613662.0, |
| "step": 6200 |
| }, |
| { |
| "epoch": 47.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.898e-05, |
| "loss": 1.7837, |
| "mean_token_accuracy": 0.6461777001619339, |
| "num_tokens": 22631498.0, |
| "step": 6205 |
| }, |
| { |
| "epoch": 47.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8955e-05, |
| "loss": 1.8125, |
| "mean_token_accuracy": 0.6402515649795533, |
| "num_tokens": 22648506.0, |
| "step": 6210 |
| }, |
| { |
| "epoch": 47.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.893e-05, |
| "loss": 1.7494, |
| "mean_token_accuracy": 0.6493428587913513, |
| "num_tokens": 22666870.0, |
| "step": 6215 |
| }, |
| { |
| "epoch": 47.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8905e-05, |
| "loss": 1.7876, |
| "mean_token_accuracy": 0.6424293428659439, |
| "num_tokens": 22684255.0, |
| "step": 6220 |
| }, |
| { |
| "epoch": 47.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.888e-05, |
| "loss": 1.7513, |
| "mean_token_accuracy": 0.6502956181764603, |
| "num_tokens": 22702158.0, |
| "step": 6225 |
| }, |
| { |
| "epoch": 47.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8855e-05, |
| "loss": 1.6915, |
| "mean_token_accuracy": 0.6557220876216888, |
| "num_tokens": 22720730.0, |
| "step": 6230 |
| }, |
| { |
| "epoch": 47.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.883e-05, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.6476509481668472, |
| "num_tokens": 22739451.0, |
| "step": 6235 |
| }, |
| { |
| "epoch": 48.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8805e-05, |
| "loss": 1.7672, |
| "mean_token_accuracy": 0.6477993667125702, |
| "num_tokens": 22757040.0, |
| "step": 6240 |
| }, |
| { |
| "epoch": 48.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.878e-05, |
| "loss": 1.7979, |
| "mean_token_accuracy": 0.6381140261888504, |
| "num_tokens": 22775082.0, |
| "step": 6245 |
| }, |
| { |
| "epoch": 48.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8755e-05, |
| "loss": 1.7172, |
| "mean_token_accuracy": 0.6539857119321824, |
| "num_tokens": 22793573.0, |
| "step": 6250 |
| }, |
| { |
| "epoch": 48.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8730000000000002e-05, |
| "loss": 1.7259, |
| "mean_token_accuracy": 0.652483606338501, |
| "num_tokens": 22811965.0, |
| "step": 6255 |
| }, |
| { |
| "epoch": 48.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8705e-05, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6539576113224029, |
| "num_tokens": 22829928.0, |
| "step": 6260 |
| }, |
| { |
| "epoch": 48.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.868e-05, |
| "loss": 1.7318, |
| "mean_token_accuracy": 0.651011449098587, |
| "num_tokens": 22848013.0, |
| "step": 6265 |
| }, |
| { |
| "epoch": 48.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8655e-05, |
| "loss": 1.705, |
| "mean_token_accuracy": 0.6651150584220886, |
| "num_tokens": 22866785.0, |
| "step": 6270 |
| }, |
| { |
| "epoch": 48.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.863e-05, |
| "loss": 1.7495, |
| "mean_token_accuracy": 0.6475239574909211, |
| "num_tokens": 22884516.0, |
| "step": 6275 |
| }, |
| { |
| "epoch": 48.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8605e-05, |
| "loss": 1.7714, |
| "mean_token_accuracy": 0.6414877325296402, |
| "num_tokens": 22902483.0, |
| "step": 6280 |
| }, |
| { |
| "epoch": 48.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.858e-05, |
| "loss": 1.7391, |
| "mean_token_accuracy": 0.6488156080245971, |
| "num_tokens": 22921114.0, |
| "step": 6285 |
| }, |
| { |
| "epoch": 48.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8555e-05, |
| "loss": 1.7538, |
| "mean_token_accuracy": 0.6502360552549362, |
| "num_tokens": 22938888.0, |
| "step": 6290 |
| }, |
| { |
| "epoch": 48.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.853e-05, |
| "loss": 1.7299, |
| "mean_token_accuracy": 0.649975848197937, |
| "num_tokens": 22957254.0, |
| "step": 6295 |
| }, |
| { |
| "epoch": 48.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8505e-05, |
| "loss": 1.7488, |
| "mean_token_accuracy": 0.6526569038629532, |
| "num_tokens": 22975073.0, |
| "step": 6300 |
| }, |
| { |
| "epoch": 48.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.848e-05, |
| "loss": 1.7108, |
| "mean_token_accuracy": 0.6544829219579696, |
| "num_tokens": 22993939.0, |
| "step": 6305 |
| }, |
| { |
| "epoch": 48.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8455e-05, |
| "loss": 1.7625, |
| "mean_token_accuracy": 0.643173098564148, |
| "num_tokens": 23012560.0, |
| "step": 6310 |
| }, |
| { |
| "epoch": 48.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.843e-05, |
| "loss": 1.7734, |
| "mean_token_accuracy": 0.6458878636360168, |
| "num_tokens": 23030129.0, |
| "step": 6315 |
| }, |
| { |
| "epoch": 48.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8405e-05, |
| "loss": 1.7598, |
| "mean_token_accuracy": 0.6463797986507416, |
| "num_tokens": 23047988.0, |
| "step": 6320 |
| }, |
| { |
| "epoch": 48.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.838e-05, |
| "loss": 1.7542, |
| "mean_token_accuracy": 0.6464673429727554, |
| "num_tokens": 23065816.0, |
| "step": 6325 |
| }, |
| { |
| "epoch": 48.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8355e-05, |
| "loss": 1.742, |
| "mean_token_accuracy": 0.6510752677917481, |
| "num_tokens": 23083562.0, |
| "step": 6330 |
| }, |
| { |
| "epoch": 48.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.833e-05, |
| "loss": 1.7406, |
| "mean_token_accuracy": 0.6523085534572601, |
| "num_tokens": 23101646.0, |
| "step": 6335 |
| }, |
| { |
| "epoch": 48.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8305e-05, |
| "loss": 1.7408, |
| "mean_token_accuracy": 0.6524953216314315, |
| "num_tokens": 23120020.0, |
| "step": 6340 |
| }, |
| { |
| "epoch": 48.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.828e-05, |
| "loss": 1.6759, |
| "mean_token_accuracy": 0.6643141090869904, |
| "num_tokens": 23139519.0, |
| "step": 6345 |
| }, |
| { |
| "epoch": 48.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8255e-05, |
| "loss": 1.8242, |
| "mean_token_accuracy": 0.6340841352939606, |
| "num_tokens": 23156832.0, |
| "step": 6350 |
| }, |
| { |
| "epoch": 48.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.823e-05, |
| "loss": 1.6869, |
| "mean_token_accuracy": 0.6609957993030549, |
| "num_tokens": 23175409.0, |
| "step": 6355 |
| }, |
| { |
| "epoch": 48.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8205e-05, |
| "loss": 1.7518, |
| "mean_token_accuracy": 0.6469810694456101, |
| "num_tokens": 23193876.0, |
| "step": 6360 |
| }, |
| { |
| "epoch": 48.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.818e-05, |
| "loss": 1.6923, |
| "mean_token_accuracy": 0.6596918433904648, |
| "num_tokens": 23212532.0, |
| "step": 6365 |
| }, |
| { |
| "epoch": 49.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8154999999999998e-05, |
| "loss": 1.7207, |
| "mean_token_accuracy": 0.6570287615060806, |
| "num_tokens": 23231145.0, |
| "step": 6370 |
| }, |
| { |
| "epoch": 49.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.813e-05, |
| "loss": 1.7494, |
| "mean_token_accuracy": 0.6495041728019715, |
| "num_tokens": 23249429.0, |
| "step": 6375 |
| }, |
| { |
| "epoch": 49.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8105e-05, |
| "loss": 1.6628, |
| "mean_token_accuracy": 0.6711469799280166, |
| "num_tokens": 23268600.0, |
| "step": 6380 |
| }, |
| { |
| "epoch": 49.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.808e-05, |
| "loss": 1.7377, |
| "mean_token_accuracy": 0.6483930766582489, |
| "num_tokens": 23286588.0, |
| "step": 6385 |
| }, |
| { |
| "epoch": 49.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8055e-05, |
| "loss": 1.7444, |
| "mean_token_accuracy": 0.6448217332363129, |
| "num_tokens": 23304489.0, |
| "step": 6390 |
| }, |
| { |
| "epoch": 49.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.803e-05, |
| "loss": 1.7013, |
| "mean_token_accuracy": 0.6568325966596603, |
| "num_tokens": 23323802.0, |
| "step": 6395 |
| }, |
| { |
| "epoch": 49.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8005e-05, |
| "loss": 1.6943, |
| "mean_token_accuracy": 0.6581353217363357, |
| "num_tokens": 23342349.0, |
| "step": 6400 |
| }, |
| { |
| "epoch": 49.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.798e-05, |
| "loss": 1.739, |
| "mean_token_accuracy": 0.6524305224418641, |
| "num_tokens": 23360368.0, |
| "step": 6405 |
| }, |
| { |
| "epoch": 49.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7955e-05, |
| "loss": 1.7507, |
| "mean_token_accuracy": 0.6451392889022827, |
| "num_tokens": 23378793.0, |
| "step": 6410 |
| }, |
| { |
| "epoch": 49.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.793e-05, |
| "loss": 1.7621, |
| "mean_token_accuracy": 0.6497738718986511, |
| "num_tokens": 23396781.0, |
| "step": 6415 |
| }, |
| { |
| "epoch": 49.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7905e-05, |
| "loss": 1.7673, |
| "mean_token_accuracy": 0.6516094326972961, |
| "num_tokens": 23413811.0, |
| "step": 6420 |
| }, |
| { |
| "epoch": 49.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7879999999999998e-05, |
| "loss": 1.7673, |
| "mean_token_accuracy": 0.6500208914279938, |
| "num_tokens": 23431562.0, |
| "step": 6425 |
| }, |
| { |
| "epoch": 49.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7855e-05, |
| "loss": 1.7738, |
| "mean_token_accuracy": 0.6465479761362076, |
| "num_tokens": 23449644.0, |
| "step": 6430 |
| }, |
| { |
| "epoch": 49.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.783e-05, |
| "loss": 1.7867, |
| "mean_token_accuracy": 0.6393932700157166, |
| "num_tokens": 23466835.0, |
| "step": 6435 |
| }, |
| { |
| "epoch": 49.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7805000000000003e-05, |
| "loss": 1.6852, |
| "mean_token_accuracy": 0.6625799834728241, |
| "num_tokens": 23485932.0, |
| "step": 6440 |
| }, |
| { |
| "epoch": 49.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7780000000000003e-05, |
| "loss": 1.7437, |
| "mean_token_accuracy": 0.6509669572114944, |
| "num_tokens": 23504065.0, |
| "step": 6445 |
| }, |
| { |
| "epoch": 49.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7755000000000002e-05, |
| "loss": 1.705, |
| "mean_token_accuracy": 0.6565153568983078, |
| "num_tokens": 23522965.0, |
| "step": 6450 |
| }, |
| { |
| "epoch": 49.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.773e-05, |
| "loss": 1.7125, |
| "mean_token_accuracy": 0.6582383394241333, |
| "num_tokens": 23541321.0, |
| "step": 6455 |
| }, |
| { |
| "epoch": 49.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7705e-05, |
| "loss": 1.7213, |
| "mean_token_accuracy": 0.6557675808668136, |
| "num_tokens": 23559830.0, |
| "step": 6460 |
| }, |
| { |
| "epoch": 49.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7680000000000004e-05, |
| "loss": 1.7611, |
| "mean_token_accuracy": 0.6487846702337265, |
| "num_tokens": 23578070.0, |
| "step": 6465 |
| }, |
| { |
| "epoch": 49.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7655000000000003e-05, |
| "loss": 1.7967, |
| "mean_token_accuracy": 0.639831280708313, |
| "num_tokens": 23596069.0, |
| "step": 6470 |
| }, |
| { |
| "epoch": 49.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7630000000000002e-05, |
| "loss": 1.8093, |
| "mean_token_accuracy": 0.6422061920166016, |
| "num_tokens": 23612951.0, |
| "step": 6475 |
| }, |
| { |
| "epoch": 49.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7605000000000002e-05, |
| "loss": 1.7392, |
| "mean_token_accuracy": 0.6497550249099732, |
| "num_tokens": 23632333.0, |
| "step": 6480 |
| }, |
| { |
| "epoch": 49.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.758e-05, |
| "loss": 1.6955, |
| "mean_token_accuracy": 0.6517337173223495, |
| "num_tokens": 23651103.0, |
| "step": 6485 |
| }, |
| { |
| "epoch": 49.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7555e-05, |
| "loss": 1.7078, |
| "mean_token_accuracy": 0.6560462713241577, |
| "num_tokens": 23669731.0, |
| "step": 6490 |
| }, |
| { |
| "epoch": 49.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7530000000000003e-05, |
| "loss": 1.817, |
| "mean_token_accuracy": 0.6334926426410675, |
| "num_tokens": 23687316.0, |
| "step": 6495 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7505000000000003e-05, |
| "loss": 1.7466, |
| "mean_token_accuracy": 0.6494122266769409, |
| "num_tokens": 23705250.0, |
| "step": 6500 |
| }, |
| { |
| "epoch": 50.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7480000000000002e-05, |
| "loss": 1.7627, |
| "mean_token_accuracy": 0.6493338525295258, |
| "num_tokens": 23722587.0, |
| "step": 6505 |
| }, |
| { |
| "epoch": 50.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7455e-05, |
| "loss": 1.7611, |
| "mean_token_accuracy": 0.6497463405132293, |
| "num_tokens": 23740287.0, |
| "step": 6510 |
| }, |
| { |
| "epoch": 50.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.743e-05, |
| "loss": 1.7443, |
| "mean_token_accuracy": 0.6468771427869797, |
| "num_tokens": 23758657.0, |
| "step": 6515 |
| }, |
| { |
| "epoch": 50.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7405e-05, |
| "loss": 1.7767, |
| "mean_token_accuracy": 0.6426906883716583, |
| "num_tokens": 23776842.0, |
| "step": 6520 |
| }, |
| { |
| "epoch": 50.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7380000000000003e-05, |
| "loss": 1.7058, |
| "mean_token_accuracy": 0.6551610261201859, |
| "num_tokens": 23795678.0, |
| "step": 6525 |
| }, |
| { |
| "epoch": 50.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7355000000000002e-05, |
| "loss": 1.713, |
| "mean_token_accuracy": 0.6565469861030578, |
| "num_tokens": 23814122.0, |
| "step": 6530 |
| }, |
| { |
| "epoch": 50.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7330000000000002e-05, |
| "loss": 1.6964, |
| "mean_token_accuracy": 0.6581017464399338, |
| "num_tokens": 23832733.0, |
| "step": 6535 |
| }, |
| { |
| "epoch": 50.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7305e-05, |
| "loss": 1.7557, |
| "mean_token_accuracy": 0.6457631707191467, |
| "num_tokens": 23851205.0, |
| "step": 6540 |
| }, |
| { |
| "epoch": 50.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.728e-05, |
| "loss": 1.7517, |
| "mean_token_accuracy": 0.6463135719299317, |
| "num_tokens": 23869701.0, |
| "step": 6545 |
| }, |
| { |
| "epoch": 50.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7255000000000003e-05, |
| "loss": 1.7099, |
| "mean_token_accuracy": 0.6558305144309997, |
| "num_tokens": 23887966.0, |
| "step": 6550 |
| }, |
| { |
| "epoch": 50.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7230000000000003e-05, |
| "loss": 1.703, |
| "mean_token_accuracy": 0.6580185920000077, |
| "num_tokens": 23906795.0, |
| "step": 6555 |
| }, |
| { |
| "epoch": 50.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7205000000000002e-05, |
| "loss": 1.7364, |
| "mean_token_accuracy": 0.6480530560016632, |
| "num_tokens": 23925538.0, |
| "step": 6560 |
| }, |
| { |
| "epoch": 50.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.718e-05, |
| "loss": 1.7476, |
| "mean_token_accuracy": 0.6457035690546036, |
| "num_tokens": 23943937.0, |
| "step": 6565 |
| }, |
| { |
| "epoch": 50.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7155e-05, |
| "loss": 1.829, |
| "mean_token_accuracy": 0.6348341315984726, |
| "num_tokens": 23961264.0, |
| "step": 6570 |
| }, |
| { |
| "epoch": 50.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.713e-05, |
| "loss": 1.6595, |
| "mean_token_accuracy": 0.6654309093952179, |
| "num_tokens": 23980824.0, |
| "step": 6575 |
| }, |
| { |
| "epoch": 50.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7105000000000003e-05, |
| "loss": 1.7533, |
| "mean_token_accuracy": 0.6445026308298111, |
| "num_tokens": 23998733.0, |
| "step": 6580 |
| }, |
| { |
| "epoch": 50.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7080000000000002e-05, |
| "loss": 1.7202, |
| "mean_token_accuracy": 0.6478159755468369, |
| "num_tokens": 24017155.0, |
| "step": 6585 |
| }, |
| { |
| "epoch": 50.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7055000000000002e-05, |
| "loss": 1.7928, |
| "mean_token_accuracy": 0.6426284611225128, |
| "num_tokens": 24034300.0, |
| "step": 6590 |
| }, |
| { |
| "epoch": 50.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.703e-05, |
| "loss": 1.7132, |
| "mean_token_accuracy": 0.6564747720956803, |
| "num_tokens": 24052640.0, |
| "step": 6595 |
| }, |
| { |
| "epoch": 50.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7005e-05, |
| "loss": 1.7754, |
| "mean_token_accuracy": 0.6513616561889648, |
| "num_tokens": 24070441.0, |
| "step": 6600 |
| }, |
| { |
| "epoch": 50.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.698e-05, |
| "loss": 1.7644, |
| "mean_token_accuracy": 0.644871911406517, |
| "num_tokens": 24088585.0, |
| "step": 6605 |
| }, |
| { |
| "epoch": 50.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6955000000000003e-05, |
| "loss": 1.7347, |
| "mean_token_accuracy": 0.6584911167621612, |
| "num_tokens": 24106472.0, |
| "step": 6610 |
| }, |
| { |
| "epoch": 50.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6930000000000002e-05, |
| "loss": 1.7469, |
| "mean_token_accuracy": 0.6565795034170151, |
| "num_tokens": 24124773.0, |
| "step": 6615 |
| }, |
| { |
| "epoch": 50.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6905e-05, |
| "loss": 1.7342, |
| "mean_token_accuracy": 0.6567032128572464, |
| "num_tokens": 24142871.0, |
| "step": 6620 |
| }, |
| { |
| "epoch": 50.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.688e-05, |
| "loss": 1.6891, |
| "mean_token_accuracy": 0.6616051197052002, |
| "num_tokens": 24161666.0, |
| "step": 6625 |
| }, |
| { |
| "epoch": 51.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6855e-05, |
| "loss": 1.7768, |
| "mean_token_accuracy": 0.6428048938512803, |
| "num_tokens": 24179355.0, |
| "step": 6630 |
| }, |
| { |
| "epoch": 51.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.683e-05, |
| "loss": 1.6311, |
| "mean_token_accuracy": 0.6698222011327744, |
| "num_tokens": 24198872.0, |
| "step": 6635 |
| }, |
| { |
| "epoch": 51.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6805000000000003e-05, |
| "loss": 1.7177, |
| "mean_token_accuracy": 0.6608267247676849, |
| "num_tokens": 24216830.0, |
| "step": 6640 |
| }, |
| { |
| "epoch": 51.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6780000000000002e-05, |
| "loss": 1.7234, |
| "mean_token_accuracy": 0.6543210685253144, |
| "num_tokens": 24235082.0, |
| "step": 6645 |
| }, |
| { |
| "epoch": 51.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6755e-05, |
| "loss": 1.7839, |
| "mean_token_accuracy": 0.64230475127697, |
| "num_tokens": 24252383.0, |
| "step": 6650 |
| }, |
| { |
| "epoch": 51.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.673e-05, |
| "loss": 1.7651, |
| "mean_token_accuracy": 0.6459890872240066, |
| "num_tokens": 24270356.0, |
| "step": 6655 |
| }, |
| { |
| "epoch": 51.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6705e-05, |
| "loss": 1.7523, |
| "mean_token_accuracy": 0.6558059513568878, |
| "num_tokens": 24288236.0, |
| "step": 6660 |
| }, |
| { |
| "epoch": 51.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.668e-05, |
| "loss": 1.7298, |
| "mean_token_accuracy": 0.6469644725322723, |
| "num_tokens": 24306844.0, |
| "step": 6665 |
| }, |
| { |
| "epoch": 51.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6655000000000002e-05, |
| "loss": 1.7604, |
| "mean_token_accuracy": 0.6506382346153259, |
| "num_tokens": 24325111.0, |
| "step": 6670 |
| }, |
| { |
| "epoch": 51.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6630000000000002e-05, |
| "loss": 1.7607, |
| "mean_token_accuracy": 0.6459864258766175, |
| "num_tokens": 24343133.0, |
| "step": 6675 |
| }, |
| { |
| "epoch": 51.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6605e-05, |
| "loss": 1.7669, |
| "mean_token_accuracy": 0.6461174368858338, |
| "num_tokens": 24361431.0, |
| "step": 6680 |
| }, |
| { |
| "epoch": 51.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.658e-05, |
| "loss": 1.6647, |
| "mean_token_accuracy": 0.6634925454854965, |
| "num_tokens": 24380890.0, |
| "step": 6685 |
| }, |
| { |
| "epoch": 51.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6555e-05, |
| "loss": 1.7547, |
| "mean_token_accuracy": 0.650789025425911, |
| "num_tokens": 24398752.0, |
| "step": 6690 |
| }, |
| { |
| "epoch": 51.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6530000000000003e-05, |
| "loss": 1.8203, |
| "mean_token_accuracy": 0.6345427900552749, |
| "num_tokens": 24416377.0, |
| "step": 6695 |
| }, |
| { |
| "epoch": 51.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6505000000000002e-05, |
| "loss": 1.7334, |
| "mean_token_accuracy": 0.6506275236606598, |
| "num_tokens": 24435235.0, |
| "step": 6700 |
| }, |
| { |
| "epoch": 51.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.648e-05, |
| "loss": 1.7543, |
| "mean_token_accuracy": 0.6435793161392211, |
| "num_tokens": 24453138.0, |
| "step": 6705 |
| }, |
| { |
| "epoch": 51.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6455e-05, |
| "loss": 1.7537, |
| "mean_token_accuracy": 0.644529914855957, |
| "num_tokens": 24471067.0, |
| "step": 6710 |
| }, |
| { |
| "epoch": 51.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.643e-05, |
| "loss": 1.6671, |
| "mean_token_accuracy": 0.6617798268795013, |
| "num_tokens": 24489441.0, |
| "step": 6715 |
| }, |
| { |
| "epoch": 51.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6405e-05, |
| "loss": 1.7789, |
| "mean_token_accuracy": 0.6475713908672333, |
| "num_tokens": 24507517.0, |
| "step": 6720 |
| }, |
| { |
| "epoch": 51.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6380000000000002e-05, |
| "loss": 1.7109, |
| "mean_token_accuracy": 0.6559187889099121, |
| "num_tokens": 24525635.0, |
| "step": 6725 |
| }, |
| { |
| "epoch": 51.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6355000000000002e-05, |
| "loss": 1.6891, |
| "mean_token_accuracy": 0.6629273265600204, |
| "num_tokens": 24544388.0, |
| "step": 6730 |
| }, |
| { |
| "epoch": 51.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.633e-05, |
| "loss": 1.7177, |
| "mean_token_accuracy": 0.653970816731453, |
| "num_tokens": 24563413.0, |
| "step": 6735 |
| }, |
| { |
| "epoch": 51.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6305e-05, |
| "loss": 1.7994, |
| "mean_token_accuracy": 0.6389773488044739, |
| "num_tokens": 24581168.0, |
| "step": 6740 |
| }, |
| { |
| "epoch": 51.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.628e-05, |
| "loss": 1.7425, |
| "mean_token_accuracy": 0.6551438093185424, |
| "num_tokens": 24599489.0, |
| "step": 6745 |
| }, |
| { |
| "epoch": 51.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6255e-05, |
| "loss": 1.735, |
| "mean_token_accuracy": 0.6468292713165283, |
| "num_tokens": 24617539.0, |
| "step": 6750 |
| }, |
| { |
| "epoch": 51.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6230000000000002e-05, |
| "loss": 1.7414, |
| "mean_token_accuracy": 0.653847748041153, |
| "num_tokens": 24635357.0, |
| "step": 6755 |
| }, |
| { |
| "epoch": 52.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6205e-05, |
| "loss": 1.7919, |
| "mean_token_accuracy": 0.6327983260154724, |
| "num_tokens": 24653460.0, |
| "step": 6760 |
| }, |
| { |
| "epoch": 52.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.618e-05, |
| "loss": 1.6387, |
| "mean_token_accuracy": 0.6734457373619079, |
| "num_tokens": 24672969.0, |
| "step": 6765 |
| }, |
| { |
| "epoch": 52.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6155e-05, |
| "loss": 1.7751, |
| "mean_token_accuracy": 0.6482141762971878, |
| "num_tokens": 24690768.0, |
| "step": 6770 |
| }, |
| { |
| "epoch": 52.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.613e-05, |
| "loss": 1.6709, |
| "mean_token_accuracy": 0.6655151665210723, |
| "num_tokens": 24709986.0, |
| "step": 6775 |
| }, |
| { |
| "epoch": 52.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6105e-05, |
| "loss": 1.8157, |
| "mean_token_accuracy": 0.6377092868089675, |
| "num_tokens": 24726980.0, |
| "step": 6780 |
| }, |
| { |
| "epoch": 52.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6080000000000002e-05, |
| "loss": 1.758, |
| "mean_token_accuracy": 0.6477128326892853, |
| "num_tokens": 24744869.0, |
| "step": 6785 |
| }, |
| { |
| "epoch": 52.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6055e-05, |
| "loss": 1.74, |
| "mean_token_accuracy": 0.6518424063920975, |
| "num_tokens": 24763369.0, |
| "step": 6790 |
| }, |
| { |
| "epoch": 52.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.603e-05, |
| "loss": 1.6843, |
| "mean_token_accuracy": 0.662993323802948, |
| "num_tokens": 24782101.0, |
| "step": 6795 |
| }, |
| { |
| "epoch": 52.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6005e-05, |
| "loss": 1.718, |
| "mean_token_accuracy": 0.6558027982711792, |
| "num_tokens": 24800571.0, |
| "step": 6800 |
| }, |
| { |
| "epoch": 52.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.598e-05, |
| "loss": 1.7319, |
| "mean_token_accuracy": 0.6475233376026154, |
| "num_tokens": 24818676.0, |
| "step": 6805 |
| }, |
| { |
| "epoch": 52.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5955e-05, |
| "loss": 1.6884, |
| "mean_token_accuracy": 0.6593423992395401, |
| "num_tokens": 24837669.0, |
| "step": 6810 |
| }, |
| { |
| "epoch": 52.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.593e-05, |
| "loss": 1.7498, |
| "mean_token_accuracy": 0.6527423232793808, |
| "num_tokens": 24855694.0, |
| "step": 6815 |
| }, |
| { |
| "epoch": 52.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5905e-05, |
| "loss": 1.7311, |
| "mean_token_accuracy": 0.6554641664028168, |
| "num_tokens": 24874368.0, |
| "step": 6820 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.588e-05, |
| "loss": 1.7152, |
| "mean_token_accuracy": 0.6568386435508728, |
| "num_tokens": 24893164.0, |
| "step": 6825 |
| }, |
| { |
| "epoch": 52.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5855e-05, |
| "loss": 1.7097, |
| "mean_token_accuracy": 0.6543055295944213, |
| "num_tokens": 24911502.0, |
| "step": 6830 |
| }, |
| { |
| "epoch": 52.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.583e-05, |
| "loss": 1.8051, |
| "mean_token_accuracy": 0.6321907073259354, |
| "num_tokens": 24929552.0, |
| "step": 6835 |
| }, |
| { |
| "epoch": 52.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5805000000000002e-05, |
| "loss": 1.7726, |
| "mean_token_accuracy": 0.6486641079187393, |
| "num_tokens": 24947565.0, |
| "step": 6840 |
| }, |
| { |
| "epoch": 52.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.578e-05, |
| "loss": 1.7256, |
| "mean_token_accuracy": 0.6513917237520218, |
| "num_tokens": 24966020.0, |
| "step": 6845 |
| }, |
| { |
| "epoch": 52.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5755e-05, |
| "loss": 1.7481, |
| "mean_token_accuracy": 0.6502928107976913, |
| "num_tokens": 24983952.0, |
| "step": 6850 |
| }, |
| { |
| "epoch": 52.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.573e-05, |
| "loss": 1.6641, |
| "mean_token_accuracy": 0.6641529649496078, |
| "num_tokens": 25002526.0, |
| "step": 6855 |
| }, |
| { |
| "epoch": 52.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5705e-05, |
| "loss": 1.7879, |
| "mean_token_accuracy": 0.6445726931095124, |
| "num_tokens": 25020198.0, |
| "step": 6860 |
| }, |
| { |
| "epoch": 52.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.568e-05, |
| "loss": 1.831, |
| "mean_token_accuracy": 0.6385212212800979, |
| "num_tokens": 25037190.0, |
| "step": 6865 |
| }, |
| { |
| "epoch": 52.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5655000000000002e-05, |
| "loss": 1.7463, |
| "mean_token_accuracy": 0.6501509785652161, |
| "num_tokens": 25055025.0, |
| "step": 6870 |
| }, |
| { |
| "epoch": 52.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.563e-05, |
| "loss": 1.739, |
| "mean_token_accuracy": 0.6465972781181335, |
| "num_tokens": 25073502.0, |
| "step": 6875 |
| }, |
| { |
| "epoch": 52.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5605e-05, |
| "loss": 1.7514, |
| "mean_token_accuracy": 0.6448877215385437, |
| "num_tokens": 25092178.0, |
| "step": 6880 |
| }, |
| { |
| "epoch": 52.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.558e-05, |
| "loss": 1.7896, |
| "mean_token_accuracy": 0.6419251203536988, |
| "num_tokens": 25109978.0, |
| "step": 6885 |
| }, |
| { |
| "epoch": 53.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5555e-05, |
| "loss": 1.8081, |
| "mean_token_accuracy": 0.6363903671503067, |
| "num_tokens": 25127565.0, |
| "step": 6890 |
| }, |
| { |
| "epoch": 53.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.553e-05, |
| "loss": 1.7896, |
| "mean_token_accuracy": 0.6396276503801346, |
| "num_tokens": 25145088.0, |
| "step": 6895 |
| }, |
| { |
| "epoch": 53.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5505e-05, |
| "loss": 1.717, |
| "mean_token_accuracy": 0.6515821665525436, |
| "num_tokens": 25163828.0, |
| "step": 6900 |
| }, |
| { |
| "epoch": 53.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.548e-05, |
| "loss": 1.7229, |
| "mean_token_accuracy": 0.6592198878526687, |
| "num_tokens": 25181797.0, |
| "step": 6905 |
| }, |
| { |
| "epoch": 53.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5455e-05, |
| "loss": 1.7218, |
| "mean_token_accuracy": 0.6529275476932526, |
| "num_tokens": 25200618.0, |
| "step": 6910 |
| }, |
| { |
| "epoch": 53.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.543e-05, |
| "loss": 1.8069, |
| "mean_token_accuracy": 0.6347218960523605, |
| "num_tokens": 25218034.0, |
| "step": 6915 |
| }, |
| { |
| "epoch": 53.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5405e-05, |
| "loss": 1.7021, |
| "mean_token_accuracy": 0.6613812148571014, |
| "num_tokens": 25236653.0, |
| "step": 6920 |
| }, |
| { |
| "epoch": 53.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.538e-05, |
| "loss": 1.6697, |
| "mean_token_accuracy": 0.6670442432165146, |
| "num_tokens": 25255752.0, |
| "step": 6925 |
| }, |
| { |
| "epoch": 53.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5355e-05, |
| "loss": 1.7525, |
| "mean_token_accuracy": 0.6471881836652755, |
| "num_tokens": 25273909.0, |
| "step": 6930 |
| }, |
| { |
| "epoch": 53.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.533e-05, |
| "loss": 1.7417, |
| "mean_token_accuracy": 0.6538171172142029, |
| "num_tokens": 25291903.0, |
| "step": 6935 |
| }, |
| { |
| "epoch": 53.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5305e-05, |
| "loss": 1.7168, |
| "mean_token_accuracy": 0.654968672990799, |
| "num_tokens": 25310540.0, |
| "step": 6940 |
| }, |
| { |
| "epoch": 53.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.528e-05, |
| "loss": 1.726, |
| "mean_token_accuracy": 0.652405110001564, |
| "num_tokens": 25328964.0, |
| "step": 6945 |
| }, |
| { |
| "epoch": 53.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5255e-05, |
| "loss": 1.7646, |
| "mean_token_accuracy": 0.6477054178714752, |
| "num_tokens": 25346432.0, |
| "step": 6950 |
| }, |
| { |
| "epoch": 53.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.523e-05, |
| "loss": 1.7439, |
| "mean_token_accuracy": 0.6488854259252548, |
| "num_tokens": 25364382.0, |
| "step": 6955 |
| }, |
| { |
| "epoch": 53.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5205e-05, |
| "loss": 1.7786, |
| "mean_token_accuracy": 0.6465272456407547, |
| "num_tokens": 25382179.0, |
| "step": 6960 |
| }, |
| { |
| "epoch": 53.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.518e-05, |
| "loss": 1.7934, |
| "mean_token_accuracy": 0.6389431089162827, |
| "num_tokens": 25400797.0, |
| "step": 6965 |
| }, |
| { |
| "epoch": 53.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5155e-05, |
| "loss": 1.71, |
| "mean_token_accuracy": 0.6540043205022812, |
| "num_tokens": 25418880.0, |
| "step": 6970 |
| }, |
| { |
| "epoch": 53.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5129999999999999e-05, |
| "loss": 1.6958, |
| "mean_token_accuracy": 0.6611298769712448, |
| "num_tokens": 25437618.0, |
| "step": 6975 |
| }, |
| { |
| "epoch": 53.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5105e-05, |
| "loss": 1.7339, |
| "mean_token_accuracy": 0.6556006103754044, |
| "num_tokens": 25455960.0, |
| "step": 6980 |
| }, |
| { |
| "epoch": 53.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.508e-05, |
| "loss": 1.7452, |
| "mean_token_accuracy": 0.6496385037899017, |
| "num_tokens": 25474285.0, |
| "step": 6985 |
| }, |
| { |
| "epoch": 53.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5054999999999999e-05, |
| "loss": 1.7495, |
| "mean_token_accuracy": 0.6455465704202652, |
| "num_tokens": 25492111.0, |
| "step": 6990 |
| }, |
| { |
| "epoch": 53.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.503e-05, |
| "loss": 1.7091, |
| "mean_token_accuracy": 0.6587622851133347, |
| "num_tokens": 25510592.0, |
| "step": 6995 |
| }, |
| { |
| "epoch": 53.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5005e-05, |
| "loss": 1.7738, |
| "mean_token_accuracy": 0.646448740363121, |
| "num_tokens": 25528517.0, |
| "step": 7000 |
| }, |
| { |
| "epoch": 53.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4979999999999999e-05, |
| "loss": 1.8031, |
| "mean_token_accuracy": 0.6375834941864014, |
| "num_tokens": 25546409.0, |
| "step": 7005 |
| }, |
| { |
| "epoch": 53.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4955e-05, |
| "loss": 1.7659, |
| "mean_token_accuracy": 0.6465475499629975, |
| "num_tokens": 25564683.0, |
| "step": 7010 |
| }, |
| { |
| "epoch": 53.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.493e-05, |
| "loss": 1.7459, |
| "mean_token_accuracy": 0.6489285141229629, |
| "num_tokens": 25582903.0, |
| "step": 7015 |
| }, |
| { |
| "epoch": 54.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4904999999999999e-05, |
| "loss": 1.6906, |
| "mean_token_accuracy": 0.6570352196693421, |
| "num_tokens": 25601670.0, |
| "step": 7020 |
| }, |
| { |
| "epoch": 54.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.488e-05, |
| "loss": 1.7228, |
| "mean_token_accuracy": 0.6555858552455902, |
| "num_tokens": 25620461.0, |
| "step": 7025 |
| }, |
| { |
| "epoch": 54.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4855e-05, |
| "loss": 1.817, |
| "mean_token_accuracy": 0.6386265754699707, |
| "num_tokens": 25637942.0, |
| "step": 7030 |
| }, |
| { |
| "epoch": 54.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4829999999999999e-05, |
| "loss": 1.6514, |
| "mean_token_accuracy": 0.6691981256008148, |
| "num_tokens": 25656864.0, |
| "step": 7035 |
| }, |
| { |
| "epoch": 54.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4805e-05, |
| "loss": 1.7832, |
| "mean_token_accuracy": 0.6411665648221969, |
| "num_tokens": 25675005.0, |
| "step": 7040 |
| }, |
| { |
| "epoch": 54.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4779999999999999e-05, |
| "loss": 1.6956, |
| "mean_token_accuracy": 0.6606644123792649, |
| "num_tokens": 25694037.0, |
| "step": 7045 |
| }, |
| { |
| "epoch": 54.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4755e-05, |
| "loss": 1.7537, |
| "mean_token_accuracy": 0.6480139315128326, |
| "num_tokens": 25712064.0, |
| "step": 7050 |
| }, |
| { |
| "epoch": 54.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.473e-05, |
| "loss": 1.701, |
| "mean_token_accuracy": 0.6573342353105545, |
| "num_tokens": 25731391.0, |
| "step": 7055 |
| }, |
| { |
| "epoch": 54.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4704999999999999e-05, |
| "loss": 1.7046, |
| "mean_token_accuracy": 0.6552079916000366, |
| "num_tokens": 25750436.0, |
| "step": 7060 |
| }, |
| { |
| "epoch": 54.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4680000000000002e-05, |
| "loss": 1.7022, |
| "mean_token_accuracy": 0.6563819646835327, |
| "num_tokens": 25768998.0, |
| "step": 7065 |
| }, |
| { |
| "epoch": 54.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4655000000000003e-05, |
| "loss": 1.7062, |
| "mean_token_accuracy": 0.6605098664760589, |
| "num_tokens": 25787124.0, |
| "step": 7070 |
| }, |
| { |
| "epoch": 54.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4630000000000002e-05, |
| "loss": 1.7444, |
| "mean_token_accuracy": 0.6469592988491059, |
| "num_tokens": 25805583.0, |
| "step": 7075 |
| }, |
| { |
| "epoch": 54.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4605000000000002e-05, |
| "loss": 1.7117, |
| "mean_token_accuracy": 0.6542593479156494, |
| "num_tokens": 25824061.0, |
| "step": 7080 |
| }, |
| { |
| "epoch": 54.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4580000000000003e-05, |
| "loss": 1.7751, |
| "mean_token_accuracy": 0.6425804078578949, |
| "num_tokens": 25842073.0, |
| "step": 7085 |
| }, |
| { |
| "epoch": 54.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4555000000000002e-05, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6489378064870834, |
| "num_tokens": 25860639.0, |
| "step": 7090 |
| }, |
| { |
| "epoch": 54.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4530000000000001e-05, |
| "loss": 1.721, |
| "mean_token_accuracy": 0.6531164467334747, |
| "num_tokens": 25878985.0, |
| "step": 7095 |
| }, |
| { |
| "epoch": 54.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4505000000000003e-05, |
| "loss": 1.7618, |
| "mean_token_accuracy": 0.6497918337583541, |
| "num_tokens": 25896813.0, |
| "step": 7100 |
| }, |
| { |
| "epoch": 54.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4480000000000002e-05, |
| "loss": 1.85, |
| "mean_token_accuracy": 0.6287777543067932, |
| "num_tokens": 25913707.0, |
| "step": 7105 |
| }, |
| { |
| "epoch": 54.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4455000000000001e-05, |
| "loss": 1.739, |
| "mean_token_accuracy": 0.6525210946798324, |
| "num_tokens": 25931516.0, |
| "step": 7110 |
| }, |
| { |
| "epoch": 54.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4430000000000002e-05, |
| "loss": 1.7648, |
| "mean_token_accuracy": 0.6474990725517273, |
| "num_tokens": 25949015.0, |
| "step": 7115 |
| }, |
| { |
| "epoch": 54.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4405000000000002e-05, |
| "loss": 1.7407, |
| "mean_token_accuracy": 0.6535341709852218, |
| "num_tokens": 25967346.0, |
| "step": 7120 |
| }, |
| { |
| "epoch": 54.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4380000000000001e-05, |
| "loss": 1.7919, |
| "mean_token_accuracy": 0.6459776431322097, |
| "num_tokens": 25984246.0, |
| "step": 7125 |
| }, |
| { |
| "epoch": 54.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4355000000000002e-05, |
| "loss": 1.6747, |
| "mean_token_accuracy": 0.6653303891420365, |
| "num_tokens": 26003170.0, |
| "step": 7130 |
| }, |
| { |
| "epoch": 54.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4330000000000002e-05, |
| "loss": 1.7999, |
| "mean_token_accuracy": 0.6397199392318725, |
| "num_tokens": 26020817.0, |
| "step": 7135 |
| }, |
| { |
| "epoch": 54.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4305000000000001e-05, |
| "loss": 1.7184, |
| "mean_token_accuracy": 0.6556466907262802, |
| "num_tokens": 26039316.0, |
| "step": 7140 |
| }, |
| { |
| "epoch": 54.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4280000000000002e-05, |
| "loss": 1.7178, |
| "mean_token_accuracy": 0.6527464896440506, |
| "num_tokens": 26058030.0, |
| "step": 7145 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4255000000000002e-05, |
| "loss": 1.7908, |
| "mean_token_accuracy": 0.6422353565692902, |
| "num_tokens": 26075775.0, |
| "step": 7150 |
| }, |
| { |
| "epoch": 55.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4230000000000001e-05, |
| "loss": 1.7951, |
| "mean_token_accuracy": 0.6395128637552261, |
| "num_tokens": 26093593.0, |
| "step": 7155 |
| }, |
| { |
| "epoch": 55.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4205000000000002e-05, |
| "loss": 1.7587, |
| "mean_token_accuracy": 0.6477168142795563, |
| "num_tokens": 26111565.0, |
| "step": 7160 |
| }, |
| { |
| "epoch": 55.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4180000000000001e-05, |
| "loss": 1.6735, |
| "mean_token_accuracy": 0.660878399014473, |
| "num_tokens": 26130288.0, |
| "step": 7165 |
| }, |
| { |
| "epoch": 55.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4155000000000001e-05, |
| "loss": 1.7429, |
| "mean_token_accuracy": 0.644039872288704, |
| "num_tokens": 26148751.0, |
| "step": 7170 |
| }, |
| { |
| "epoch": 55.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4130000000000002e-05, |
| "loss": 1.743, |
| "mean_token_accuracy": 0.6448181957006455, |
| "num_tokens": 26167250.0, |
| "step": 7175 |
| }, |
| { |
| "epoch": 55.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4105000000000001e-05, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6529501020908356, |
| "num_tokens": 26185026.0, |
| "step": 7180 |
| }, |
| { |
| "epoch": 55.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.408e-05, |
| "loss": 1.687, |
| "mean_token_accuracy": 0.6574739754199982, |
| "num_tokens": 26204005.0, |
| "step": 7185 |
| }, |
| { |
| "epoch": 55.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4055000000000002e-05, |
| "loss": 1.8161, |
| "mean_token_accuracy": 0.6342040151357651, |
| "num_tokens": 26221347.0, |
| "step": 7190 |
| }, |
| { |
| "epoch": 55.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4030000000000001e-05, |
| "loss": 1.7202, |
| "mean_token_accuracy": 0.6536721408367157, |
| "num_tokens": 26239690.0, |
| "step": 7195 |
| }, |
| { |
| "epoch": 55.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4005000000000002e-05, |
| "loss": 1.7623, |
| "mean_token_accuracy": 0.6440413445234299, |
| "num_tokens": 26257855.0, |
| "step": 7200 |
| }, |
| { |
| "epoch": 55.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3980000000000002e-05, |
| "loss": 1.7581, |
| "mean_token_accuracy": 0.651086950302124, |
| "num_tokens": 26275173.0, |
| "step": 7205 |
| }, |
| { |
| "epoch": 55.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3955000000000001e-05, |
| "loss": 1.7523, |
| "mean_token_accuracy": 0.6518444687128067, |
| "num_tokens": 26293084.0, |
| "step": 7210 |
| }, |
| { |
| "epoch": 55.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3930000000000002e-05, |
| "loss": 1.7744, |
| "mean_token_accuracy": 0.6451671838760376, |
| "num_tokens": 26310728.0, |
| "step": 7215 |
| }, |
| { |
| "epoch": 55.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3905000000000002e-05, |
| "loss": 1.7131, |
| "mean_token_accuracy": 0.6529065489768981, |
| "num_tokens": 26329504.0, |
| "step": 7220 |
| }, |
| { |
| "epoch": 55.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3880000000000001e-05, |
| "loss": 1.6938, |
| "mean_token_accuracy": 0.6577755719423294, |
| "num_tokens": 26348129.0, |
| "step": 7225 |
| }, |
| { |
| "epoch": 55.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3855000000000002e-05, |
| "loss": 1.7149, |
| "mean_token_accuracy": 0.6628178864717483, |
| "num_tokens": 26366596.0, |
| "step": 7230 |
| }, |
| { |
| "epoch": 55.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3830000000000001e-05, |
| "loss": 1.7266, |
| "mean_token_accuracy": 0.6519777953624726, |
| "num_tokens": 26384852.0, |
| "step": 7235 |
| }, |
| { |
| "epoch": 55.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3805e-05, |
| "loss": 1.7232, |
| "mean_token_accuracy": 0.6492596924304962, |
| "num_tokens": 26403619.0, |
| "step": 7240 |
| }, |
| { |
| "epoch": 55.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3780000000000002e-05, |
| "loss": 1.6343, |
| "mean_token_accuracy": 0.6683441162109375, |
| "num_tokens": 26423343.0, |
| "step": 7245 |
| }, |
| { |
| "epoch": 55.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3755000000000001e-05, |
| "loss": 1.8149, |
| "mean_token_accuracy": 0.6388367056846619, |
| "num_tokens": 26440984.0, |
| "step": 7250 |
| }, |
| { |
| "epoch": 55.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.373e-05, |
| "loss": 1.7501, |
| "mean_token_accuracy": 0.6547516256570816, |
| "num_tokens": 26459173.0, |
| "step": 7255 |
| }, |
| { |
| "epoch": 55.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3705000000000002e-05, |
| "loss": 1.7926, |
| "mean_token_accuracy": 0.6436378836631775, |
| "num_tokens": 26477032.0, |
| "step": 7260 |
| }, |
| { |
| "epoch": 55.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3680000000000001e-05, |
| "loss": 1.7543, |
| "mean_token_accuracy": 0.6499706447124481, |
| "num_tokens": 26495861.0, |
| "step": 7265 |
| }, |
| { |
| "epoch": 55.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3655e-05, |
| "loss": 1.714, |
| "mean_token_accuracy": 0.6544960319995881, |
| "num_tokens": 26514486.0, |
| "step": 7270 |
| }, |
| { |
| "epoch": 55.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3630000000000002e-05, |
| "loss": 1.7599, |
| "mean_token_accuracy": 0.6501589000225068, |
| "num_tokens": 26532126.0, |
| "step": 7275 |
| }, |
| { |
| "epoch": 56.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3605000000000001e-05, |
| "loss": 1.7521, |
| "mean_token_accuracy": 0.6498360931873322, |
| "num_tokens": 26549880.0, |
| "step": 7280 |
| }, |
| { |
| "epoch": 56.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.358e-05, |
| "loss": 1.7991, |
| "mean_token_accuracy": 0.6392385989427567, |
| "num_tokens": 26567257.0, |
| "step": 7285 |
| }, |
| { |
| "epoch": 56.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3555000000000002e-05, |
| "loss": 1.7625, |
| "mean_token_accuracy": 0.6449432462453842, |
| "num_tokens": 26585384.0, |
| "step": 7290 |
| }, |
| { |
| "epoch": 56.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3530000000000001e-05, |
| "loss": 1.7256, |
| "mean_token_accuracy": 0.6524859637022018, |
| "num_tokens": 26604011.0, |
| "step": 7295 |
| }, |
| { |
| "epoch": 56.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3505e-05, |
| "loss": 1.6922, |
| "mean_token_accuracy": 0.6660002678632736, |
| "num_tokens": 26622949.0, |
| "step": 7300 |
| }, |
| { |
| "epoch": 56.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3480000000000001e-05, |
| "loss": 1.7609, |
| "mean_token_accuracy": 0.6495236337184906, |
| "num_tokens": 26640892.0, |
| "step": 7305 |
| }, |
| { |
| "epoch": 56.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3455e-05, |
| "loss": 1.7394, |
| "mean_token_accuracy": 0.6511315196752548, |
| "num_tokens": 26659096.0, |
| "step": 7310 |
| }, |
| { |
| "epoch": 56.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.343e-05, |
| "loss": 1.7669, |
| "mean_token_accuracy": 0.6419103145599365, |
| "num_tokens": 26677122.0, |
| "step": 7315 |
| }, |
| { |
| "epoch": 56.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3405000000000001e-05, |
| "loss": 1.7607, |
| "mean_token_accuracy": 0.6484110444784165, |
| "num_tokens": 26694580.0, |
| "step": 7320 |
| }, |
| { |
| "epoch": 56.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.338e-05, |
| "loss": 1.7041, |
| "mean_token_accuracy": 0.6528016269207001, |
| "num_tokens": 26713265.0, |
| "step": 7325 |
| }, |
| { |
| "epoch": 56.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3355e-05, |
| "loss": 1.7255, |
| "mean_token_accuracy": 0.6498448342084885, |
| "num_tokens": 26731833.0, |
| "step": 7330 |
| }, |
| { |
| "epoch": 56.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3330000000000001e-05, |
| "loss": 1.7544, |
| "mean_token_accuracy": 0.6504902809858322, |
| "num_tokens": 26749542.0, |
| "step": 7335 |
| }, |
| { |
| "epoch": 56.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3305e-05, |
| "loss": 1.7841, |
| "mean_token_accuracy": 0.6441579282283783, |
| "num_tokens": 26767123.0, |
| "step": 7340 |
| }, |
| { |
| "epoch": 56.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3280000000000002e-05, |
| "loss": 1.7293, |
| "mean_token_accuracy": 0.6540816992521286, |
| "num_tokens": 26785021.0, |
| "step": 7345 |
| }, |
| { |
| "epoch": 56.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3255000000000001e-05, |
| "loss": 1.7353, |
| "mean_token_accuracy": 0.6520332425832749, |
| "num_tokens": 26803690.0, |
| "step": 7350 |
| }, |
| { |
| "epoch": 56.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.323e-05, |
| "loss": 1.7765, |
| "mean_token_accuracy": 0.6428980946540832, |
| "num_tokens": 26821866.0, |
| "step": 7355 |
| }, |
| { |
| "epoch": 56.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3205000000000001e-05, |
| "loss": 1.7009, |
| "mean_token_accuracy": 0.6572036474943161, |
| "num_tokens": 26840558.0, |
| "step": 7360 |
| }, |
| { |
| "epoch": 56.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3180000000000001e-05, |
| "loss": 1.7503, |
| "mean_token_accuracy": 0.6493770986795425, |
| "num_tokens": 26859028.0, |
| "step": 7365 |
| }, |
| { |
| "epoch": 56.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3155e-05, |
| "loss": 1.7131, |
| "mean_token_accuracy": 0.6502207726240158, |
| "num_tokens": 26877909.0, |
| "step": 7370 |
| }, |
| { |
| "epoch": 56.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3130000000000001e-05, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6498846352100373, |
| "num_tokens": 26895371.0, |
| "step": 7375 |
| }, |
| { |
| "epoch": 56.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3105e-05, |
| "loss": 1.7048, |
| "mean_token_accuracy": 0.6559781163930893, |
| "num_tokens": 26914341.0, |
| "step": 7380 |
| }, |
| { |
| "epoch": 56.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.308e-05, |
| "loss": 1.7642, |
| "mean_token_accuracy": 0.6446814894676208, |
| "num_tokens": 26932597.0, |
| "step": 7385 |
| }, |
| { |
| "epoch": 56.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3055000000000001e-05, |
| "loss": 1.7919, |
| "mean_token_accuracy": 0.6469128876924515, |
| "num_tokens": 26950520.0, |
| "step": 7390 |
| }, |
| { |
| "epoch": 56.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.303e-05, |
| "loss": 1.724, |
| "mean_token_accuracy": 0.6555041134357452, |
| "num_tokens": 26969262.0, |
| "step": 7395 |
| }, |
| { |
| "epoch": 56.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3005e-05, |
| "loss": 1.7015, |
| "mean_token_accuracy": 0.661999249458313, |
| "num_tokens": 26987507.0, |
| "step": 7400 |
| }, |
| { |
| "epoch": 56.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2980000000000001e-05, |
| "loss": 1.6936, |
| "mean_token_accuracy": 0.6539974421262741, |
| "num_tokens": 27005866.0, |
| "step": 7405 |
| }, |
| { |
| "epoch": 57.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2955e-05, |
| "loss": 1.7419, |
| "mean_token_accuracy": 0.6442842125892639, |
| "num_tokens": 27023985.0, |
| "step": 7410 |
| }, |
| { |
| "epoch": 57.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.293e-05, |
| "loss": 1.7412, |
| "mean_token_accuracy": 0.6453804969787598, |
| "num_tokens": 27042588.0, |
| "step": 7415 |
| }, |
| { |
| "epoch": 57.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2905000000000001e-05, |
| "loss": 1.7348, |
| "mean_token_accuracy": 0.6512410253286361, |
| "num_tokens": 27060805.0, |
| "step": 7420 |
| }, |
| { |
| "epoch": 57.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.288e-05, |
| "loss": 1.7818, |
| "mean_token_accuracy": 0.6462918251752854, |
| "num_tokens": 27078281.0, |
| "step": 7425 |
| }, |
| { |
| "epoch": 57.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2855e-05, |
| "loss": 1.7089, |
| "mean_token_accuracy": 0.6547515660524368, |
| "num_tokens": 27097554.0, |
| "step": 7430 |
| }, |
| { |
| "epoch": 57.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.283e-05, |
| "loss": 1.7108, |
| "mean_token_accuracy": 0.6577676206827163, |
| "num_tokens": 27116052.0, |
| "step": 7435 |
| }, |
| { |
| "epoch": 57.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2805e-05, |
| "loss": 1.7134, |
| "mean_token_accuracy": 0.6552876085042953, |
| "num_tokens": 27134390.0, |
| "step": 7440 |
| }, |
| { |
| "epoch": 57.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.278e-05, |
| "loss": 1.7117, |
| "mean_token_accuracy": 0.6523223549127579, |
| "num_tokens": 27153262.0, |
| "step": 7445 |
| }, |
| { |
| "epoch": 57.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2755e-05, |
| "loss": 1.7682, |
| "mean_token_accuracy": 0.645848673582077, |
| "num_tokens": 27171115.0, |
| "step": 7450 |
| }, |
| { |
| "epoch": 57.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.273e-05, |
| "loss": 1.7698, |
| "mean_token_accuracy": 0.6474648177623749, |
| "num_tokens": 27188950.0, |
| "step": 7455 |
| }, |
| { |
| "epoch": 57.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2705e-05, |
| "loss": 1.7511, |
| "mean_token_accuracy": 0.6489328056573868, |
| "num_tokens": 27207207.0, |
| "step": 7460 |
| }, |
| { |
| "epoch": 57.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.268e-05, |
| "loss": 1.7531, |
| "mean_token_accuracy": 0.6518824428319931, |
| "num_tokens": 27225139.0, |
| "step": 7465 |
| }, |
| { |
| "epoch": 57.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2655e-05, |
| "loss": 1.7544, |
| "mean_token_accuracy": 0.6468398660421372, |
| "num_tokens": 27243113.0, |
| "step": 7470 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.263e-05, |
| "loss": 1.7591, |
| "mean_token_accuracy": 0.6493138283491134, |
| "num_tokens": 27261095.0, |
| "step": 7475 |
| }, |
| { |
| "epoch": 57.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2605e-05, |
| "loss": 1.7276, |
| "mean_token_accuracy": 0.6510590463876724, |
| "num_tokens": 27279536.0, |
| "step": 7480 |
| }, |
| { |
| "epoch": 57.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.258e-05, |
| "loss": 1.7754, |
| "mean_token_accuracy": 0.6381223618984222, |
| "num_tokens": 27297805.0, |
| "step": 7485 |
| }, |
| { |
| "epoch": 57.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2555000000000001e-05, |
| "loss": 1.7051, |
| "mean_token_accuracy": 0.6624167144298554, |
| "num_tokens": 27316540.0, |
| "step": 7490 |
| }, |
| { |
| "epoch": 57.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.253e-05, |
| "loss": 1.6621, |
| "mean_token_accuracy": 0.6645474463701249, |
| "num_tokens": 27336007.0, |
| "step": 7495 |
| }, |
| { |
| "epoch": 57.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2505e-05, |
| "loss": 1.7167, |
| "mean_token_accuracy": 0.6529757559299469, |
| "num_tokens": 27354439.0, |
| "step": 7500 |
| }, |
| { |
| "epoch": 57.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.248e-05, |
| "loss": 1.7176, |
| "mean_token_accuracy": 0.6583882302045823, |
| "num_tokens": 27372701.0, |
| "step": 7505 |
| }, |
| { |
| "epoch": 57.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2455e-05, |
| "loss": 1.7395, |
| "mean_token_accuracy": 0.6499318182468414, |
| "num_tokens": 27390861.0, |
| "step": 7510 |
| }, |
| { |
| "epoch": 57.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.243e-05, |
| "loss": 1.7021, |
| "mean_token_accuracy": 0.654238548874855, |
| "num_tokens": 27409107.0, |
| "step": 7515 |
| }, |
| { |
| "epoch": 57.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2405e-05, |
| "loss": 1.758, |
| "mean_token_accuracy": 0.6471449792385101, |
| "num_tokens": 27427046.0, |
| "step": 7520 |
| }, |
| { |
| "epoch": 57.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.238e-05, |
| "loss": 1.7179, |
| "mean_token_accuracy": 0.6572539776563644, |
| "num_tokens": 27445469.0, |
| "step": 7525 |
| }, |
| { |
| "epoch": 57.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2355e-05, |
| "loss": 1.7317, |
| "mean_token_accuracy": 0.653878676891327, |
| "num_tokens": 27463877.0, |
| "step": 7530 |
| }, |
| { |
| "epoch": 57.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.233e-05, |
| "loss": 1.836, |
| "mean_token_accuracy": 0.6318351715803147, |
| "num_tokens": 27480728.0, |
| "step": 7535 |
| }, |
| { |
| "epoch": 58.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2305000000000002e-05, |
| "loss": 1.8048, |
| "mean_token_accuracy": 0.6375920951366425, |
| "num_tokens": 27498090.0, |
| "step": 7540 |
| }, |
| { |
| "epoch": 58.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2280000000000001e-05, |
| "loss": 1.7156, |
| "mean_token_accuracy": 0.6615946650505066, |
| "num_tokens": 27516714.0, |
| "step": 7545 |
| }, |
| { |
| "epoch": 58.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2255e-05, |
| "loss": 1.8136, |
| "mean_token_accuracy": 0.6379787772893906, |
| "num_tokens": 27534566.0, |
| "step": 7550 |
| }, |
| { |
| "epoch": 58.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2230000000000001e-05, |
| "loss": 1.7963, |
| "mean_token_accuracy": 0.6373230516910553, |
| "num_tokens": 27552046.0, |
| "step": 7555 |
| }, |
| { |
| "epoch": 58.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2205000000000001e-05, |
| "loss": 1.7564, |
| "mean_token_accuracy": 0.6494860410690307, |
| "num_tokens": 27569697.0, |
| "step": 7560 |
| }, |
| { |
| "epoch": 58.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2180000000000002e-05, |
| "loss": 1.7436, |
| "mean_token_accuracy": 0.6535836279392242, |
| "num_tokens": 27588049.0, |
| "step": 7565 |
| }, |
| { |
| "epoch": 58.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2155000000000001e-05, |
| "loss": 1.7814, |
| "mean_token_accuracy": 0.6435631811618805, |
| "num_tokens": 27605236.0, |
| "step": 7570 |
| }, |
| { |
| "epoch": 58.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.213e-05, |
| "loss": 1.7401, |
| "mean_token_accuracy": 0.6488517135381698, |
| "num_tokens": 27623605.0, |
| "step": 7575 |
| }, |
| { |
| "epoch": 58.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2105000000000002e-05, |
| "loss": 1.7388, |
| "mean_token_accuracy": 0.6520835846662522, |
| "num_tokens": 27641906.0, |
| "step": 7580 |
| }, |
| { |
| "epoch": 58.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2080000000000001e-05, |
| "loss": 1.7482, |
| "mean_token_accuracy": 0.6481060594320297, |
| "num_tokens": 27660198.0, |
| "step": 7585 |
| }, |
| { |
| "epoch": 58.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2055e-05, |
| "loss": 1.8091, |
| "mean_token_accuracy": 0.6401431083679199, |
| "num_tokens": 27677904.0, |
| "step": 7590 |
| }, |
| { |
| "epoch": 58.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2030000000000002e-05, |
| "loss": 1.7091, |
| "mean_token_accuracy": 0.6640697896480561, |
| "num_tokens": 27695801.0, |
| "step": 7595 |
| }, |
| { |
| "epoch": 58.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2005000000000001e-05, |
| "loss": 1.7289, |
| "mean_token_accuracy": 0.6482616752386093, |
| "num_tokens": 27713869.0, |
| "step": 7600 |
| }, |
| { |
| "epoch": 58.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.198e-05, |
| "loss": 1.7355, |
| "mean_token_accuracy": 0.6519585400819778, |
| "num_tokens": 27731955.0, |
| "step": 7605 |
| }, |
| { |
| "epoch": 58.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1955000000000002e-05, |
| "loss": 1.7163, |
| "mean_token_accuracy": 0.6518133997917175, |
| "num_tokens": 27750713.0, |
| "step": 7610 |
| }, |
| { |
| "epoch": 58.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1930000000000001e-05, |
| "loss": 1.7286, |
| "mean_token_accuracy": 0.6503622680902481, |
| "num_tokens": 27769167.0, |
| "step": 7615 |
| }, |
| { |
| "epoch": 58.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1905e-05, |
| "loss": 1.6179, |
| "mean_token_accuracy": 0.676550367474556, |
| "num_tokens": 27788253.0, |
| "step": 7620 |
| }, |
| { |
| "epoch": 58.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1880000000000001e-05, |
| "loss": 1.7219, |
| "mean_token_accuracy": 0.6493042975664138, |
| "num_tokens": 27807507.0, |
| "step": 7625 |
| }, |
| { |
| "epoch": 58.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1855e-05, |
| "loss": 1.7596, |
| "mean_token_accuracy": 0.6421011120080948, |
| "num_tokens": 27825718.0, |
| "step": 7630 |
| }, |
| { |
| "epoch": 58.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.183e-05, |
| "loss": 1.7286, |
| "mean_token_accuracy": 0.6499491780996323, |
| "num_tokens": 27843945.0, |
| "step": 7635 |
| }, |
| { |
| "epoch": 58.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1805000000000001e-05, |
| "loss": 1.7409, |
| "mean_token_accuracy": 0.6533534795045852, |
| "num_tokens": 27862495.0, |
| "step": 7640 |
| }, |
| { |
| "epoch": 58.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.178e-05, |
| "loss": 1.767, |
| "mean_token_accuracy": 0.6454948484897614, |
| "num_tokens": 27880609.0, |
| "step": 7645 |
| }, |
| { |
| "epoch": 58.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1755e-05, |
| "loss": 1.7275, |
| "mean_token_accuracy": 0.6586075276136398, |
| "num_tokens": 27899029.0, |
| "step": 7650 |
| }, |
| { |
| "epoch": 58.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1730000000000001e-05, |
| "loss": 1.7258, |
| "mean_token_accuracy": 0.6555096328258514, |
| "num_tokens": 27917125.0, |
| "step": 7655 |
| }, |
| { |
| "epoch": 58.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1705e-05, |
| "loss": 1.6887, |
| "mean_token_accuracy": 0.6544747292995453, |
| "num_tokens": 27936195.0, |
| "step": 7660 |
| }, |
| { |
| "epoch": 58.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.168e-05, |
| "loss": 1.8034, |
| "mean_token_accuracy": 0.6353619664907455, |
| "num_tokens": 27953877.0, |
| "step": 7665 |
| }, |
| { |
| "epoch": 59.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1655000000000001e-05, |
| "loss": 1.7287, |
| "mean_token_accuracy": 0.6526497393846512, |
| "num_tokens": 27972195.0, |
| "step": 7670 |
| }, |
| { |
| "epoch": 59.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.163e-05, |
| "loss": 1.7736, |
| "mean_token_accuracy": 0.6459016293287277, |
| "num_tokens": 27989703.0, |
| "step": 7675 |
| }, |
| { |
| "epoch": 59.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1605e-05, |
| "loss": 1.7243, |
| "mean_token_accuracy": 0.6570581525564194, |
| "num_tokens": 28007876.0, |
| "step": 7680 |
| }, |
| { |
| "epoch": 59.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1580000000000001e-05, |
| "loss": 1.6764, |
| "mean_token_accuracy": 0.6651537865400314, |
| "num_tokens": 28026168.0, |
| "step": 7685 |
| }, |
| { |
| "epoch": 59.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1555e-05, |
| "loss": 1.7593, |
| "mean_token_accuracy": 0.6441430777311326, |
| "num_tokens": 28044444.0, |
| "step": 7690 |
| }, |
| { |
| "epoch": 59.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.153e-05, |
| "loss": 1.8019, |
| "mean_token_accuracy": 0.637511157989502, |
| "num_tokens": 28062363.0, |
| "step": 7695 |
| }, |
| { |
| "epoch": 59.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1505e-05, |
| "loss": 1.6635, |
| "mean_token_accuracy": 0.665479126572609, |
| "num_tokens": 28081357.0, |
| "step": 7700 |
| }, |
| { |
| "epoch": 59.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.148e-05, |
| "loss": 1.6759, |
| "mean_token_accuracy": 0.6660502076148986, |
| "num_tokens": 28099996.0, |
| "step": 7705 |
| }, |
| { |
| "epoch": 59.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1455000000000001e-05, |
| "loss": 1.7881, |
| "mean_token_accuracy": 0.6390215069055557, |
| "num_tokens": 28117982.0, |
| "step": 7710 |
| }, |
| { |
| "epoch": 59.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.143e-05, |
| "loss": 1.7389, |
| "mean_token_accuracy": 0.6523732364177703, |
| "num_tokens": 28136640.0, |
| "step": 7715 |
| }, |
| { |
| "epoch": 59.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1405e-05, |
| "loss": 1.7337, |
| "mean_token_accuracy": 0.6487939029932022, |
| "num_tokens": 28155122.0, |
| "step": 7720 |
| }, |
| { |
| "epoch": 59.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1380000000000001e-05, |
| "loss": 1.7896, |
| "mean_token_accuracy": 0.6361289143562316, |
| "num_tokens": 28173409.0, |
| "step": 7725 |
| }, |
| { |
| "epoch": 59.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1355e-05, |
| "loss": 1.6714, |
| "mean_token_accuracy": 0.6632986754179001, |
| "num_tokens": 28191671.0, |
| "step": 7730 |
| }, |
| { |
| "epoch": 59.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.133e-05, |
| "loss": 1.7393, |
| "mean_token_accuracy": 0.6471075922250747, |
| "num_tokens": 28210071.0, |
| "step": 7735 |
| }, |
| { |
| "epoch": 59.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1305000000000001e-05, |
| "loss": 1.7713, |
| "mean_token_accuracy": 0.6440162390470505, |
| "num_tokens": 28227999.0, |
| "step": 7740 |
| }, |
| { |
| "epoch": 59.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.128e-05, |
| "loss": 1.7417, |
| "mean_token_accuracy": 0.6511160790920257, |
| "num_tokens": 28246129.0, |
| "step": 7745 |
| }, |
| { |
| "epoch": 59.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1255e-05, |
| "loss": 1.6313, |
| "mean_token_accuracy": 0.6731823951005935, |
| "num_tokens": 28265694.0, |
| "step": 7750 |
| }, |
| { |
| "epoch": 59.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1230000000000001e-05, |
| "loss": 1.7091, |
| "mean_token_accuracy": 0.6587099075317383, |
| "num_tokens": 28283812.0, |
| "step": 7755 |
| }, |
| { |
| "epoch": 59.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1205e-05, |
| "loss": 1.775, |
| "mean_token_accuracy": 0.6434663653373718, |
| "num_tokens": 28301500.0, |
| "step": 7760 |
| }, |
| { |
| "epoch": 59.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.118e-05, |
| "loss": 1.7526, |
| "mean_token_accuracy": 0.6484665781259537, |
| "num_tokens": 28320482.0, |
| "step": 7765 |
| }, |
| { |
| "epoch": 59.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1155e-05, |
| "loss": 1.7621, |
| "mean_token_accuracy": 0.6493111461400985, |
| "num_tokens": 28338521.0, |
| "step": 7770 |
| }, |
| { |
| "epoch": 59.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.113e-05, |
| "loss": 1.7517, |
| "mean_token_accuracy": 0.6488117009401322, |
| "num_tokens": 28356527.0, |
| "step": 7775 |
| }, |
| { |
| "epoch": 59.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1105e-05, |
| "loss": 1.7549, |
| "mean_token_accuracy": 0.6550350069999695, |
| "num_tokens": 28374046.0, |
| "step": 7780 |
| }, |
| { |
| "epoch": 59.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.108e-05, |
| "loss": 1.7463, |
| "mean_token_accuracy": 0.6461563289165497, |
| "num_tokens": 28392669.0, |
| "step": 7785 |
| }, |
| { |
| "epoch": 59.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1055e-05, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6455042749643326, |
| "num_tokens": 28410567.0, |
| "step": 7790 |
| }, |
| { |
| "epoch": 59.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.103e-05, |
| "loss": 1.817, |
| "mean_token_accuracy": 0.6328691780567169, |
| "num_tokens": 28427919.0, |
| "step": 7795 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1005e-05, |
| "loss": 1.7394, |
| "mean_token_accuracy": 0.6530990123748779, |
| "num_tokens": 28446300.0, |
| "step": 7800 |
| }, |
| { |
| "epoch": 60.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.098e-05, |
| "loss": 1.7301, |
| "mean_token_accuracy": 0.6500816881656647, |
| "num_tokens": 28464951.0, |
| "step": 7805 |
| }, |
| { |
| "epoch": 60.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0955e-05, |
| "loss": 1.7905, |
| "mean_token_accuracy": 0.6456600069999695, |
| "num_tokens": 28482557.0, |
| "step": 7810 |
| }, |
| { |
| "epoch": 60.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.093e-05, |
| "loss": 1.719, |
| "mean_token_accuracy": 0.6553211659193039, |
| "num_tokens": 28500556.0, |
| "step": 7815 |
| }, |
| { |
| "epoch": 60.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0905e-05, |
| "loss": 1.804, |
| "mean_token_accuracy": 0.635945051908493, |
| "num_tokens": 28517958.0, |
| "step": 7820 |
| }, |
| { |
| "epoch": 60.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.088e-05, |
| "loss": 1.7217, |
| "mean_token_accuracy": 0.6568192034959793, |
| "num_tokens": 28536147.0, |
| "step": 7825 |
| }, |
| { |
| "epoch": 60.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0855e-05, |
| "loss": 1.7808, |
| "mean_token_accuracy": 0.6401394218206405, |
| "num_tokens": 28554578.0, |
| "step": 7830 |
| }, |
| { |
| "epoch": 60.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.083e-05, |
| "loss": 1.7614, |
| "mean_token_accuracy": 0.6466422110795975, |
| "num_tokens": 28572574.0, |
| "step": 7835 |
| }, |
| { |
| "epoch": 60.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0804999999999999e-05, |
| "loss": 1.7444, |
| "mean_token_accuracy": 0.6499844253063202, |
| "num_tokens": 28590779.0, |
| "step": 7840 |
| }, |
| { |
| "epoch": 60.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0780000000000002e-05, |
| "loss": 1.737, |
| "mean_token_accuracy": 0.6540430366992951, |
| "num_tokens": 28609298.0, |
| "step": 7845 |
| }, |
| { |
| "epoch": 60.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0755000000000001e-05, |
| "loss": 1.7464, |
| "mean_token_accuracy": 0.6548949480056763, |
| "num_tokens": 28626786.0, |
| "step": 7850 |
| }, |
| { |
| "epoch": 60.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.073e-05, |
| "loss": 1.7265, |
| "mean_token_accuracy": 0.6512384116649628, |
| "num_tokens": 28645473.0, |
| "step": 7855 |
| }, |
| { |
| "epoch": 60.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0705000000000002e-05, |
| "loss": 1.6828, |
| "mean_token_accuracy": 0.662873387336731, |
| "num_tokens": 28664176.0, |
| "step": 7860 |
| }, |
| { |
| "epoch": 60.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0680000000000001e-05, |
| "loss": 1.7298, |
| "mean_token_accuracy": 0.6527964979410171, |
| "num_tokens": 28682577.0, |
| "step": 7865 |
| }, |
| { |
| "epoch": 60.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0655e-05, |
| "loss": 1.7012, |
| "mean_token_accuracy": 0.6583037704229355, |
| "num_tokens": 28701898.0, |
| "step": 7870 |
| }, |
| { |
| "epoch": 60.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0630000000000002e-05, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6519467145204544, |
| "num_tokens": 28720551.0, |
| "step": 7875 |
| }, |
| { |
| "epoch": 60.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0605000000000001e-05, |
| "loss": 1.7621, |
| "mean_token_accuracy": 0.6454852074384689, |
| "num_tokens": 28738693.0, |
| "step": 7880 |
| }, |
| { |
| "epoch": 60.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.058e-05, |
| "loss": 1.7185, |
| "mean_token_accuracy": 0.6532977163791657, |
| "num_tokens": 28757044.0, |
| "step": 7885 |
| }, |
| { |
| "epoch": 60.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0555000000000001e-05, |
| "loss": 1.749, |
| "mean_token_accuracy": 0.651301595568657, |
| "num_tokens": 28775068.0, |
| "step": 7890 |
| }, |
| { |
| "epoch": 60.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.053e-05, |
| "loss": 1.7901, |
| "mean_token_accuracy": 0.6404271066188812, |
| "num_tokens": 28792926.0, |
| "step": 7895 |
| }, |
| { |
| "epoch": 60.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0505e-05, |
| "loss": 1.7377, |
| "mean_token_accuracy": 0.6532931387424469, |
| "num_tokens": 28810975.0, |
| "step": 7900 |
| }, |
| { |
| "epoch": 60.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0480000000000001e-05, |
| "loss": 1.8055, |
| "mean_token_accuracy": 0.6381255328655243, |
| "num_tokens": 28828534.0, |
| "step": 7905 |
| }, |
| { |
| "epoch": 60.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0455e-05, |
| "loss": 1.7612, |
| "mean_token_accuracy": 0.6478750109672546, |
| "num_tokens": 28846473.0, |
| "step": 7910 |
| }, |
| { |
| "epoch": 60.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.043e-05, |
| "loss": 1.7125, |
| "mean_token_accuracy": 0.6560309410095215, |
| "num_tokens": 28864548.0, |
| "step": 7915 |
| }, |
| { |
| "epoch": 60.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0405000000000001e-05, |
| "loss": 1.7297, |
| "mean_token_accuracy": 0.6469286412000657, |
| "num_tokens": 28882849.0, |
| "step": 7920 |
| }, |
| { |
| "epoch": 60.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.038e-05, |
| "loss": 1.6918, |
| "mean_token_accuracy": 0.6562754690647126, |
| "num_tokens": 28902085.0, |
| "step": 7925 |
| }, |
| { |
| "epoch": 61.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0355000000000002e-05, |
| "loss": 1.7195, |
| "mean_token_accuracy": 0.6521394342184067, |
| "num_tokens": 28920405.0, |
| "step": 7930 |
| }, |
| { |
| "epoch": 61.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0330000000000001e-05, |
| "loss": 1.7788, |
| "mean_token_accuracy": 0.6419234901666642, |
| "num_tokens": 28937930.0, |
| "step": 7935 |
| }, |
| { |
| "epoch": 61.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0305e-05, |
| "loss": 1.8234, |
| "mean_token_accuracy": 0.6357664644718171, |
| "num_tokens": 28955347.0, |
| "step": 7940 |
| }, |
| { |
| "epoch": 61.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0280000000000002e-05, |
| "loss": 1.7796, |
| "mean_token_accuracy": 0.6469300240278244, |
| "num_tokens": 28972764.0, |
| "step": 7945 |
| }, |
| { |
| "epoch": 61.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0255000000000001e-05, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6496328443288804, |
| "num_tokens": 28991130.0, |
| "step": 7950 |
| }, |
| { |
| "epoch": 61.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 1.023e-05, |
| "loss": 1.7164, |
| "mean_token_accuracy": 0.6583995938301086, |
| "num_tokens": 29009479.0, |
| "step": 7955 |
| }, |
| { |
| "epoch": 61.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0205000000000001e-05, |
| "loss": 1.7438, |
| "mean_token_accuracy": 0.6459792077541351, |
| "num_tokens": 29027531.0, |
| "step": 7960 |
| }, |
| { |
| "epoch": 61.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.018e-05, |
| "loss": 1.7467, |
| "mean_token_accuracy": 0.6560082226991654, |
| "num_tokens": 29045399.0, |
| "step": 7965 |
| }, |
| { |
| "epoch": 61.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0155e-05, |
| "loss": 1.6793, |
| "mean_token_accuracy": 0.6648034691810608, |
| "num_tokens": 29064129.0, |
| "step": 7970 |
| }, |
| { |
| "epoch": 61.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0130000000000001e-05, |
| "loss": 1.7599, |
| "mean_token_accuracy": 0.6455144137144089, |
| "num_tokens": 29082239.0, |
| "step": 7975 |
| }, |
| { |
| "epoch": 61.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0105e-05, |
| "loss": 1.7216, |
| "mean_token_accuracy": 0.6553202778100967, |
| "num_tokens": 29100554.0, |
| "step": 7980 |
| }, |
| { |
| "epoch": 61.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.008e-05, |
| "loss": 1.7646, |
| "mean_token_accuracy": 0.6500458031892776, |
| "num_tokens": 29118533.0, |
| "step": 7985 |
| }, |
| { |
| "epoch": 61.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0055000000000001e-05, |
| "loss": 1.7526, |
| "mean_token_accuracy": 0.6455101191997528, |
| "num_tokens": 29137065.0, |
| "step": 7990 |
| }, |
| { |
| "epoch": 61.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.003e-05, |
| "loss": 1.7622, |
| "mean_token_accuracy": 0.6454584419727325, |
| "num_tokens": 29156036.0, |
| "step": 7995 |
| }, |
| { |
| "epoch": 61.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0005e-05, |
| "loss": 1.7365, |
| "mean_token_accuracy": 0.6461439549922943, |
| "num_tokens": 29174330.0, |
| "step": 8000 |
| }, |
| { |
| "epoch": 61.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 1.7046, |
| "mean_token_accuracy": 0.661187008023262, |
| "num_tokens": 29192595.0, |
| "step": 8005 |
| }, |
| { |
| "epoch": 61.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 9.955e-06, |
| "loss": 1.6992, |
| "mean_token_accuracy": 0.6574147075414658, |
| "num_tokens": 29211106.0, |
| "step": 8010 |
| }, |
| { |
| "epoch": 61.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 9.93e-06, |
| "loss": 1.6859, |
| "mean_token_accuracy": 0.6642335325479507, |
| "num_tokens": 29230108.0, |
| "step": 8015 |
| }, |
| { |
| "epoch": 61.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 9.905000000000001e-06, |
| "loss": 1.8063, |
| "mean_token_accuracy": 0.6333356082439423, |
| "num_tokens": 29247611.0, |
| "step": 8020 |
| }, |
| { |
| "epoch": 61.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 9.88e-06, |
| "loss": 1.7164, |
| "mean_token_accuracy": 0.6516272902488709, |
| "num_tokens": 29265885.0, |
| "step": 8025 |
| }, |
| { |
| "epoch": 61.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 9.855e-06, |
| "loss": 1.7593, |
| "mean_token_accuracy": 0.6486380189657212, |
| "num_tokens": 29284235.0, |
| "step": 8030 |
| }, |
| { |
| "epoch": 61.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 9.83e-06, |
| "loss": 1.7202, |
| "mean_token_accuracy": 0.6530221402645111, |
| "num_tokens": 29302328.0, |
| "step": 8035 |
| }, |
| { |
| "epoch": 61.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 9.805e-06, |
| "loss": 1.7455, |
| "mean_token_accuracy": 0.6459253072738648, |
| "num_tokens": 29320675.0, |
| "step": 8040 |
| }, |
| { |
| "epoch": 61.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 9.78e-06, |
| "loss": 1.7512, |
| "mean_token_accuracy": 0.6454969674348832, |
| "num_tokens": 29339644.0, |
| "step": 8045 |
| }, |
| { |
| "epoch": 61.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 9.755e-06, |
| "loss": 1.7042, |
| "mean_token_accuracy": 0.6602411061525345, |
| "num_tokens": 29357996.0, |
| "step": 8050 |
| }, |
| { |
| "epoch": 61.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 9.73e-06, |
| "loss": 1.6993, |
| "mean_token_accuracy": 0.657492709159851, |
| "num_tokens": 29376683.0, |
| "step": 8055 |
| }, |
| { |
| "epoch": 62.0, |
| "grad_norm": 0.0, |
| "learning_rate": 9.705e-06, |
| "loss": 1.7704, |
| "mean_token_accuracy": 0.6440939038991929, |
| "num_tokens": 29394510.0, |
| "step": 8060 |
| }, |
| { |
| "epoch": 62.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 9.68e-06, |
| "loss": 1.7601, |
| "mean_token_accuracy": 0.6529957592487335, |
| "num_tokens": 29412388.0, |
| "step": 8065 |
| }, |
| { |
| "epoch": 62.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 9.655e-06, |
| "loss": 1.754, |
| "mean_token_accuracy": 0.6488070756196975, |
| "num_tokens": 29430324.0, |
| "step": 8070 |
| }, |
| { |
| "epoch": 62.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 9.630000000000001e-06, |
| "loss": 1.7514, |
| "mean_token_accuracy": 0.6483252078294754, |
| "num_tokens": 29448473.0, |
| "step": 8075 |
| }, |
| { |
| "epoch": 62.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 9.605e-06, |
| "loss": 1.7805, |
| "mean_token_accuracy": 0.6442840725183487, |
| "num_tokens": 29466357.0, |
| "step": 8080 |
| }, |
| { |
| "epoch": 62.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 9.58e-06, |
| "loss": 1.7854, |
| "mean_token_accuracy": 0.6450077176094056, |
| "num_tokens": 29484406.0, |
| "step": 8085 |
| }, |
| { |
| "epoch": 62.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 9.555e-06, |
| "loss": 1.6738, |
| "mean_token_accuracy": 0.6603300988674163, |
| "num_tokens": 29503619.0, |
| "step": 8090 |
| }, |
| { |
| "epoch": 62.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 9.53e-06, |
| "loss": 1.7563, |
| "mean_token_accuracy": 0.6429074674844741, |
| "num_tokens": 29521125.0, |
| "step": 8095 |
| }, |
| { |
| "epoch": 62.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 9.505e-06, |
| "loss": 1.7227, |
| "mean_token_accuracy": 0.6537641167640686, |
| "num_tokens": 29539856.0, |
| "step": 8100 |
| }, |
| { |
| "epoch": 62.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 9.48e-06, |
| "loss": 1.7475, |
| "mean_token_accuracy": 0.6510567903518677, |
| "num_tokens": 29557912.0, |
| "step": 8105 |
| }, |
| { |
| "epoch": 62.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 9.455e-06, |
| "loss": 1.7125, |
| "mean_token_accuracy": 0.6581447750329972, |
| "num_tokens": 29576445.0, |
| "step": 8110 |
| }, |
| { |
| "epoch": 62.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 9.43e-06, |
| "loss": 1.8141, |
| "mean_token_accuracy": 0.634006318449974, |
| "num_tokens": 29594170.0, |
| "step": 8115 |
| }, |
| { |
| "epoch": 62.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 9.405e-06, |
| "loss": 1.6697, |
| "mean_token_accuracy": 0.6640021294355393, |
| "num_tokens": 29612931.0, |
| "step": 8120 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.0, |
| "learning_rate": 9.38e-06, |
| "loss": 1.8029, |
| "mean_token_accuracy": 0.6408674240112304, |
| "num_tokens": 29630058.0, |
| "step": 8125 |
| }, |
| { |
| "epoch": 62.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 9.355e-06, |
| "loss": 1.665, |
| "mean_token_accuracy": 0.6620132178068161, |
| "num_tokens": 29649586.0, |
| "step": 8130 |
| }, |
| { |
| "epoch": 62.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 9.33e-06, |
| "loss": 1.7189, |
| "mean_token_accuracy": 0.6585166305303574, |
| "num_tokens": 29668203.0, |
| "step": 8135 |
| }, |
| { |
| "epoch": 62.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 9.305e-06, |
| "loss": 1.7752, |
| "mean_token_accuracy": 0.6421294629573822, |
| "num_tokens": 29686310.0, |
| "step": 8140 |
| }, |
| { |
| "epoch": 62.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 9.28e-06, |
| "loss": 1.6784, |
| "mean_token_accuracy": 0.6623817592859268, |
| "num_tokens": 29705325.0, |
| "step": 8145 |
| }, |
| { |
| "epoch": 62.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 9.255e-06, |
| "loss": 1.6656, |
| "mean_token_accuracy": 0.6657170474529266, |
| "num_tokens": 29724477.0, |
| "step": 8150 |
| }, |
| { |
| "epoch": 62.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 9.23e-06, |
| "loss": 1.7482, |
| "mean_token_accuracy": 0.6518024027347564, |
| "num_tokens": 29742661.0, |
| "step": 8155 |
| }, |
| { |
| "epoch": 62.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 9.205e-06, |
| "loss": 1.7613, |
| "mean_token_accuracy": 0.6471410870552063, |
| "num_tokens": 29760581.0, |
| "step": 8160 |
| }, |
| { |
| "epoch": 62.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 1.7447, |
| "mean_token_accuracy": 0.6508060991764069, |
| "num_tokens": 29778883.0, |
| "step": 8165 |
| }, |
| { |
| "epoch": 62.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 9.155000000000001e-06, |
| "loss": 1.7297, |
| "mean_token_accuracy": 0.6586943835020065, |
| "num_tokens": 29796604.0, |
| "step": 8170 |
| }, |
| { |
| "epoch": 62.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 9.13e-06, |
| "loss": 1.8185, |
| "mean_token_accuracy": 0.6354205548763275, |
| "num_tokens": 29813853.0, |
| "step": 8175 |
| }, |
| { |
| "epoch": 62.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 9.105000000000002e-06, |
| "loss": 1.7358, |
| "mean_token_accuracy": 0.6511748641729355, |
| "num_tokens": 29832138.0, |
| "step": 8180 |
| }, |
| { |
| "epoch": 62.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 9.080000000000001e-06, |
| "loss": 1.7322, |
| "mean_token_accuracy": 0.6481782138347626, |
| "num_tokens": 29850755.0, |
| "step": 8185 |
| }, |
| { |
| "epoch": 63.0, |
| "grad_norm": 0.0, |
| "learning_rate": 9.055e-06, |
| "loss": 1.7469, |
| "mean_token_accuracy": 0.6486757427453995, |
| "num_tokens": 29868615.0, |
| "step": 8190 |
| }, |
| { |
| "epoch": 63.03846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 9.030000000000002e-06, |
| "loss": 1.7337, |
| "mean_token_accuracy": 0.648679456114769, |
| "num_tokens": 29886878.0, |
| "step": 8195 |
| }, |
| { |
| "epoch": 63.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 9.005000000000001e-06, |
| "loss": 1.7119, |
| "mean_token_accuracy": 0.658037719130516, |
| "num_tokens": 29905302.0, |
| "step": 8200 |
| }, |
| { |
| "epoch": 63.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 8.98e-06, |
| "loss": 1.7854, |
| "mean_token_accuracy": 0.6415458977222442, |
| "num_tokens": 29923223.0, |
| "step": 8205 |
| }, |
| { |
| "epoch": 63.15384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 8.955000000000002e-06, |
| "loss": 1.6884, |
| "mean_token_accuracy": 0.6621178448200226, |
| "num_tokens": 29941689.0, |
| "step": 8210 |
| }, |
| { |
| "epoch": 63.19230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 8.930000000000001e-06, |
| "loss": 1.7898, |
| "mean_token_accuracy": 0.6401251316070556, |
| "num_tokens": 29959317.0, |
| "step": 8215 |
| }, |
| { |
| "epoch": 63.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 8.905e-06, |
| "loss": 1.6928, |
| "mean_token_accuracy": 0.6512804627418518, |
| "num_tokens": 29978320.0, |
| "step": 8220 |
| }, |
| { |
| "epoch": 63.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 8.880000000000001e-06, |
| "loss": 1.7525, |
| "mean_token_accuracy": 0.6476843535900116, |
| "num_tokens": 29996596.0, |
| "step": 8225 |
| }, |
| { |
| "epoch": 63.30769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 8.855e-06, |
| "loss": 1.6552, |
| "mean_token_accuracy": 0.6682954013347626, |
| "num_tokens": 30015615.0, |
| "step": 8230 |
| }, |
| { |
| "epoch": 63.34615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 8.83e-06, |
| "loss": 1.7519, |
| "mean_token_accuracy": 0.6464314192533493, |
| "num_tokens": 30033857.0, |
| "step": 8235 |
| }, |
| { |
| "epoch": 63.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 8.805000000000001e-06, |
| "loss": 1.8065, |
| "mean_token_accuracy": 0.6360196202993393, |
| "num_tokens": 30051356.0, |
| "step": 8240 |
| }, |
| { |
| "epoch": 63.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 8.78e-06, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.661040186882019, |
| "num_tokens": 30069584.0, |
| "step": 8245 |
| }, |
| { |
| "epoch": 63.46153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 8.755e-06, |
| "loss": 1.7783, |
| "mean_token_accuracy": 0.6421218931674957, |
| "num_tokens": 30087772.0, |
| "step": 8250 |
| }, |
| { |
| "epoch": 63.5, |
| "grad_norm": 0.0, |
| "learning_rate": 8.730000000000001e-06, |
| "loss": 1.7815, |
| "mean_token_accuracy": 0.6430323421955109, |
| "num_tokens": 30105880.0, |
| "step": 8255 |
| }, |
| { |
| "epoch": 63.53846153846154, |
| "grad_norm": 0.0, |
| "learning_rate": 8.705e-06, |
| "loss": 1.7272, |
| "mean_token_accuracy": 0.6486560940742493, |
| "num_tokens": 30124841.0, |
| "step": 8260 |
| }, |
| { |
| "epoch": 63.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 8.68e-06, |
| "loss": 1.757, |
| "mean_token_accuracy": 0.6492252767086029, |
| "num_tokens": 30142267.0, |
| "step": 8265 |
| }, |
| { |
| "epoch": 63.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 8.655000000000001e-06, |
| "loss": 1.7874, |
| "mean_token_accuracy": 0.6347816586494446, |
| "num_tokens": 30160045.0, |
| "step": 8270 |
| }, |
| { |
| "epoch": 63.65384615384615, |
| "grad_norm": 0.0, |
| "learning_rate": 8.63e-06, |
| "loss": 1.729, |
| "mean_token_accuracy": 0.6490083158016204, |
| "num_tokens": 30178837.0, |
| "step": 8275 |
| }, |
| { |
| "epoch": 63.69230769230769, |
| "grad_norm": 0.0, |
| "learning_rate": 8.605e-06, |
| "loss": 1.7159, |
| "mean_token_accuracy": 0.6582196593284607, |
| "num_tokens": 30197308.0, |
| "step": 8280 |
| }, |
| { |
| "epoch": 63.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 1.6787, |
| "mean_token_accuracy": 0.6678410351276398, |
| "num_tokens": 30215988.0, |
| "step": 8285 |
| }, |
| { |
| "epoch": 63.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 8.555e-06, |
| "loss": 1.7973, |
| "mean_token_accuracy": 0.637213721871376, |
| "num_tokens": 30233493.0, |
| "step": 8290 |
| }, |
| { |
| "epoch": 63.80769230769231, |
| "grad_norm": 0.0, |
| "learning_rate": 8.53e-06, |
| "loss": 1.7189, |
| "mean_token_accuracy": 0.656983396410942, |
| "num_tokens": 30251845.0, |
| "step": 8295 |
| }, |
| { |
| "epoch": 63.84615384615385, |
| "grad_norm": 0.0, |
| "learning_rate": 8.505e-06, |
| "loss": 1.8205, |
| "mean_token_accuracy": 0.6382214099168777, |
| "num_tokens": 30268899.0, |
| "step": 8300 |
| }, |
| { |
| "epoch": 63.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 8.48e-06, |
| "loss": 1.7196, |
| "mean_token_accuracy": 0.6534265607595444, |
| "num_tokens": 30287324.0, |
| "step": 8305 |
| }, |
| { |
| "epoch": 63.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 8.455000000000001e-06, |
| "loss": 1.755, |
| "mean_token_accuracy": 0.649156528711319, |
| "num_tokens": 30305098.0, |
| "step": 8310 |
| }, |
| { |
| "epoch": 63.96153846153846, |
| "grad_norm": 0.0, |
| "learning_rate": 8.43e-06, |
| "loss": 1.6393, |
| "mean_token_accuracy": 0.6654347360134125, |
| "num_tokens": 30324316.0, |
| "step": 8315 |
| }, |
| { |
| "epoch": 64.0, |
| "grad_norm": 0.0, |
| "learning_rate": 8.405e-06, |
| "loss": 1.7622, |
| "mean_token_accuracy": 0.647731038928032, |
| "num_tokens": 30342720.0, |
| "step": 8320 |
| }, |
| { |
| "epoch": 64.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.6533550292253494, |
| "num_tokens": 30360719.0, |
| "step": 8325 |
| }, |
| { |
| "epoch": 64.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 8.355e-06, |
| "loss": 1.7221, |
| "mean_token_accuracy": 0.6565493553876877, |
| "num_tokens": 30379966.0, |
| "step": 8330 |
| }, |
| { |
| "epoch": 64.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 8.33e-06, |
| "loss": 1.6687, |
| "mean_token_accuracy": 0.6646548688411713, |
| "num_tokens": 30398817.0, |
| "step": 8335 |
| }, |
| { |
| "epoch": 64.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 8.305000000000001e-06, |
| "loss": 1.8145, |
| "mean_token_accuracy": 0.6381383389234543, |
| "num_tokens": 30416271.0, |
| "step": 8340 |
| }, |
| { |
| "epoch": 64.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 8.28e-06, |
| "loss": 1.7004, |
| "mean_token_accuracy": 0.6580903172492981, |
| "num_tokens": 30434471.0, |
| "step": 8345 |
| }, |
| { |
| "epoch": 64.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 8.255e-06, |
| "loss": 1.6885, |
| "mean_token_accuracy": 0.6602605700492858, |
| "num_tokens": 30453103.0, |
| "step": 8350 |
| }, |
| { |
| "epoch": 64.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 8.23e-06, |
| "loss": 1.6849, |
| "mean_token_accuracy": 0.6583765029907227, |
| "num_tokens": 30471800.0, |
| "step": 8355 |
| }, |
| { |
| "epoch": 64.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 8.205e-06, |
| "loss": 1.7548, |
| "mean_token_accuracy": 0.648283451795578, |
| "num_tokens": 30490316.0, |
| "step": 8360 |
| }, |
| { |
| "epoch": 64.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 8.18e-06, |
| "loss": 1.7349, |
| "mean_token_accuracy": 0.6548443913459778, |
| "num_tokens": 30509159.0, |
| "step": 8365 |
| }, |
| { |
| "epoch": 64.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 8.155e-06, |
| "loss": 1.788, |
| "mean_token_accuracy": 0.6422168552875519, |
| "num_tokens": 30526994.0, |
| "step": 8370 |
| }, |
| { |
| "epoch": 64.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 8.13e-06, |
| "loss": 1.7371, |
| "mean_token_accuracy": 0.6487582057714463, |
| "num_tokens": 30545290.0, |
| "step": 8375 |
| }, |
| { |
| "epoch": 64.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 8.105e-06, |
| "loss": 1.7529, |
| "mean_token_accuracy": 0.649326092004776, |
| "num_tokens": 30563300.0, |
| "step": 8380 |
| }, |
| { |
| "epoch": 64.5, |
| "grad_norm": 0.0, |
| "learning_rate": 8.08e-06, |
| "loss": 1.7657, |
| "mean_token_accuracy": 0.6501888036727905, |
| "num_tokens": 30581217.0, |
| "step": 8385 |
| }, |
| { |
| "epoch": 64.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 8.055e-06, |
| "loss": 1.6891, |
| "mean_token_accuracy": 0.6601796567440033, |
| "num_tokens": 30600060.0, |
| "step": 8390 |
| }, |
| { |
| "epoch": 64.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 8.03e-06, |
| "loss": 1.7908, |
| "mean_token_accuracy": 0.6441485762596131, |
| "num_tokens": 30617690.0, |
| "step": 8395 |
| }, |
| { |
| "epoch": 64.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 8.005e-06, |
| "loss": 1.7446, |
| "mean_token_accuracy": 0.6539681345224381, |
| "num_tokens": 30636302.0, |
| "step": 8400 |
| }, |
| { |
| "epoch": 64.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 7.98e-06, |
| "loss": 1.732, |
| "mean_token_accuracy": 0.653337499499321, |
| "num_tokens": 30654393.0, |
| "step": 8405 |
| }, |
| { |
| "epoch": 64.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.955e-06, |
| "loss": 1.7691, |
| "mean_token_accuracy": 0.649465736746788, |
| "num_tokens": 30672362.0, |
| "step": 8410 |
| }, |
| { |
| "epoch": 64.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.93e-06, |
| "loss": 1.7322, |
| "mean_token_accuracy": 0.6514311760663987, |
| "num_tokens": 30690881.0, |
| "step": 8415 |
| }, |
| { |
| "epoch": 64.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.905e-06, |
| "loss": 1.7387, |
| "mean_token_accuracy": 0.6512942612171173, |
| "num_tokens": 30708498.0, |
| "step": 8420 |
| }, |
| { |
| "epoch": 64.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.879999999999999e-06, |
| "loss": 1.7456, |
| "mean_token_accuracy": 0.644093918800354, |
| "num_tokens": 30726723.0, |
| "step": 8425 |
| }, |
| { |
| "epoch": 64.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 7.855e-06, |
| "loss": 1.7664, |
| "mean_token_accuracy": 0.6423812568187713, |
| "num_tokens": 30745189.0, |
| "step": 8430 |
| }, |
| { |
| "epoch": 64.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 7.83e-06, |
| "loss": 1.7892, |
| "mean_token_accuracy": 0.6339890867471695, |
| "num_tokens": 30762976.0, |
| "step": 8435 |
| }, |
| { |
| "epoch": 64.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 7.805e-06, |
| "loss": 1.7893, |
| "mean_token_accuracy": 0.6393702328205109, |
| "num_tokens": 30779931.0, |
| "step": 8440 |
| }, |
| { |
| "epoch": 64.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 7.78e-06, |
| "loss": 1.6928, |
| "mean_token_accuracy": 0.6551145076751709, |
| "num_tokens": 30798591.0, |
| "step": 8445 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.0, |
| "learning_rate": 7.755e-06, |
| "loss": 1.7471, |
| "mean_token_accuracy": 0.6472322821617127, |
| "num_tokens": 30816825.0, |
| "step": 8450 |
| }, |
| { |
| "epoch": 65.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 7.73e-06, |
| "loss": 1.7898, |
| "mean_token_accuracy": 0.6398821353912354, |
| "num_tokens": 30834858.0, |
| "step": 8455 |
| }, |
| { |
| "epoch": 65.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 7.705e-06, |
| "loss": 1.7869, |
| "mean_token_accuracy": 0.6389808714389801, |
| "num_tokens": 30853070.0, |
| "step": 8460 |
| }, |
| { |
| "epoch": 65.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 7.68e-06, |
| "loss": 1.697, |
| "mean_token_accuracy": 0.6605163842439652, |
| "num_tokens": 30872040.0, |
| "step": 8465 |
| }, |
| { |
| "epoch": 65.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 7.655e-06, |
| "loss": 1.7485, |
| "mean_token_accuracy": 0.6444921791553497, |
| "num_tokens": 30890649.0, |
| "step": 8470 |
| }, |
| { |
| "epoch": 65.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.630000000000001e-06, |
| "loss": 1.708, |
| "mean_token_accuracy": 0.6534979492425919, |
| "num_tokens": 30909370.0, |
| "step": 8475 |
| }, |
| { |
| "epoch": 65.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.605000000000001e-06, |
| "loss": 1.6808, |
| "mean_token_accuracy": 0.667791223526001, |
| "num_tokens": 30928442.0, |
| "step": 8480 |
| }, |
| { |
| "epoch": 65.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 1.7272, |
| "mean_token_accuracy": 0.6559307098388671, |
| "num_tokens": 30946632.0, |
| "step": 8485 |
| }, |
| { |
| "epoch": 65.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.555000000000001e-06, |
| "loss": 1.7644, |
| "mean_token_accuracy": 0.6492012023925782, |
| "num_tokens": 30964389.0, |
| "step": 8490 |
| }, |
| { |
| "epoch": 65.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 7.530000000000001e-06, |
| "loss": 1.6994, |
| "mean_token_accuracy": 0.6621949762105942, |
| "num_tokens": 30982829.0, |
| "step": 8495 |
| }, |
| { |
| "epoch": 65.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 7.505000000000001e-06, |
| "loss": 1.6986, |
| "mean_token_accuracy": 0.6562123388051987, |
| "num_tokens": 31001822.0, |
| "step": 8500 |
| }, |
| { |
| "epoch": 65.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 7.480000000000001e-06, |
| "loss": 1.7202, |
| "mean_token_accuracy": 0.6557656317949295, |
| "num_tokens": 31020258.0, |
| "step": 8505 |
| }, |
| { |
| "epoch": 65.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 7.455000000000001e-06, |
| "loss": 1.7205, |
| "mean_token_accuracy": 0.6543114900588989, |
| "num_tokens": 31038288.0, |
| "step": 8510 |
| }, |
| { |
| "epoch": 65.5, |
| "grad_norm": 0.0, |
| "learning_rate": 7.430000000000001e-06, |
| "loss": 1.8082, |
| "mean_token_accuracy": 0.6374682754278183, |
| "num_tokens": 31055635.0, |
| "step": 8515 |
| }, |
| { |
| "epoch": 65.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 7.405000000000001e-06, |
| "loss": 1.718, |
| "mean_token_accuracy": 0.6601694732904434, |
| "num_tokens": 31073830.0, |
| "step": 8520 |
| }, |
| { |
| "epoch": 65.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 1.6752, |
| "mean_token_accuracy": 0.6580190539360047, |
| "num_tokens": 31092611.0, |
| "step": 8525 |
| }, |
| { |
| "epoch": 65.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 7.355000000000001e-06, |
| "loss": 1.8009, |
| "mean_token_accuracy": 0.6394985377788543, |
| "num_tokens": 31110126.0, |
| "step": 8530 |
| }, |
| { |
| "epoch": 65.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 7.330000000000001e-06, |
| "loss": 1.7496, |
| "mean_token_accuracy": 0.6522016197443008, |
| "num_tokens": 31128158.0, |
| "step": 8535 |
| }, |
| { |
| "epoch": 65.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.305e-06, |
| "loss": 1.7608, |
| "mean_token_accuracy": 0.6444296389818192, |
| "num_tokens": 31146338.0, |
| "step": 8540 |
| }, |
| { |
| "epoch": 65.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.280000000000001e-06, |
| "loss": 1.7123, |
| "mean_token_accuracy": 0.6542566984891891, |
| "num_tokens": 31165043.0, |
| "step": 8545 |
| }, |
| { |
| "epoch": 65.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.255000000000001e-06, |
| "loss": 1.7491, |
| "mean_token_accuracy": 0.6535750329494476, |
| "num_tokens": 31183433.0, |
| "step": 8550 |
| }, |
| { |
| "epoch": 65.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.230000000000001e-06, |
| "loss": 1.729, |
| "mean_token_accuracy": 0.6469509840011597, |
| "num_tokens": 31201276.0, |
| "step": 8555 |
| }, |
| { |
| "epoch": 65.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 7.2050000000000005e-06, |
| "loss": 1.7259, |
| "mean_token_accuracy": 0.6532280802726745, |
| "num_tokens": 31219465.0, |
| "step": 8560 |
| }, |
| { |
| "epoch": 65.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 1.7995, |
| "mean_token_accuracy": 0.6446830004453659, |
| "num_tokens": 31236938.0, |
| "step": 8565 |
| }, |
| { |
| "epoch": 65.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 7.155000000000001e-06, |
| "loss": 1.7361, |
| "mean_token_accuracy": 0.6507874131202698, |
| "num_tokens": 31255303.0, |
| "step": 8570 |
| }, |
| { |
| "epoch": 65.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 7.13e-06, |
| "loss": 1.7875, |
| "mean_token_accuracy": 0.6378930985927582, |
| "num_tokens": 31273030.0, |
| "step": 8575 |
| }, |
| { |
| "epoch": 66.0, |
| "grad_norm": 0.0, |
| "learning_rate": 7.105000000000001e-06, |
| "loss": 1.7661, |
| "mean_token_accuracy": 0.6479190230369568, |
| "num_tokens": 31290930.0, |
| "step": 8580 |
| }, |
| { |
| "epoch": 66.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 7.080000000000001e-06, |
| "loss": 1.694, |
| "mean_token_accuracy": 0.6604873329401016, |
| "num_tokens": 31309688.0, |
| "step": 8585 |
| }, |
| { |
| "epoch": 66.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 7.055e-06, |
| "loss": 1.768, |
| "mean_token_accuracy": 0.6487796038389206, |
| "num_tokens": 31327646.0, |
| "step": 8590 |
| }, |
| { |
| "epoch": 66.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 7.0300000000000005e-06, |
| "loss": 1.7308, |
| "mean_token_accuracy": 0.6545611709356308, |
| "num_tokens": 31346349.0, |
| "step": 8595 |
| }, |
| { |
| "epoch": 66.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 7.005000000000001e-06, |
| "loss": 1.7115, |
| "mean_token_accuracy": 0.6564001739025116, |
| "num_tokens": 31364569.0, |
| "step": 8600 |
| }, |
| { |
| "epoch": 66.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.98e-06, |
| "loss": 1.7538, |
| "mean_token_accuracy": 0.6477825731039047, |
| "num_tokens": 31382670.0, |
| "step": 8605 |
| }, |
| { |
| "epoch": 66.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.955e-06, |
| "loss": 1.7799, |
| "mean_token_accuracy": 0.6439489036798477, |
| "num_tokens": 31400493.0, |
| "step": 8610 |
| }, |
| { |
| "epoch": 66.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.9300000000000006e-06, |
| "loss": 1.7762, |
| "mean_token_accuracy": 0.6380701899528504, |
| "num_tokens": 31419137.0, |
| "step": 8615 |
| }, |
| { |
| "epoch": 66.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.905e-06, |
| "loss": 1.5876, |
| "mean_token_accuracy": 0.6766410171985626, |
| "num_tokens": 31439373.0, |
| "step": 8620 |
| }, |
| { |
| "epoch": 66.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 6.88e-06, |
| "loss": 1.7701, |
| "mean_token_accuracy": 0.6451247215270997, |
| "num_tokens": 31457394.0, |
| "step": 8625 |
| }, |
| { |
| "epoch": 66.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 6.8550000000000004e-06, |
| "loss": 1.7894, |
| "mean_token_accuracy": 0.6450399339199067, |
| "num_tokens": 31474686.0, |
| "step": 8630 |
| }, |
| { |
| "epoch": 66.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 6.830000000000001e-06, |
| "loss": 1.7736, |
| "mean_token_accuracy": 0.6471458494663238, |
| "num_tokens": 31492406.0, |
| "step": 8635 |
| }, |
| { |
| "epoch": 66.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 6.805e-06, |
| "loss": 1.6584, |
| "mean_token_accuracy": 0.6657146126031875, |
| "num_tokens": 31511342.0, |
| "step": 8640 |
| }, |
| { |
| "epoch": 66.5, |
| "grad_norm": 0.0, |
| "learning_rate": 6.78e-06, |
| "loss": 1.7357, |
| "mean_token_accuracy": 0.6563478767871856, |
| "num_tokens": 31529627.0, |
| "step": 8645 |
| }, |
| { |
| "epoch": 66.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 6.7550000000000005e-06, |
| "loss": 1.7731, |
| "mean_token_accuracy": 0.6471589595079422, |
| "num_tokens": 31546956.0, |
| "step": 8650 |
| }, |
| { |
| "epoch": 66.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 6.73e-06, |
| "loss": 1.7086, |
| "mean_token_accuracy": 0.6558182656764984, |
| "num_tokens": 31565836.0, |
| "step": 8655 |
| }, |
| { |
| "epoch": 66.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 6.705e-06, |
| "loss": 1.7636, |
| "mean_token_accuracy": 0.6479879409074784, |
| "num_tokens": 31583762.0, |
| "step": 8660 |
| }, |
| { |
| "epoch": 66.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 6.68e-06, |
| "loss": 1.7374, |
| "mean_token_accuracy": 0.6501346677541733, |
| "num_tokens": 31601864.0, |
| "step": 8665 |
| }, |
| { |
| "epoch": 66.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.655e-06, |
| "loss": 1.7447, |
| "mean_token_accuracy": 0.6492434620857239, |
| "num_tokens": 31619969.0, |
| "step": 8670 |
| }, |
| { |
| "epoch": 66.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.63e-06, |
| "loss": 1.7562, |
| "mean_token_accuracy": 0.6430635213851928, |
| "num_tokens": 31638170.0, |
| "step": 8675 |
| }, |
| { |
| "epoch": 66.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.605e-06, |
| "loss": 1.6433, |
| "mean_token_accuracy": 0.6654472947120667, |
| "num_tokens": 31657320.0, |
| "step": 8680 |
| }, |
| { |
| "epoch": 66.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.58e-06, |
| "loss": 1.7904, |
| "mean_token_accuracy": 0.6401758790016174, |
| "num_tokens": 31674990.0, |
| "step": 8685 |
| }, |
| { |
| "epoch": 66.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 6.555e-06, |
| "loss": 1.7889, |
| "mean_token_accuracy": 0.6432503432035446, |
| "num_tokens": 31692632.0, |
| "step": 8690 |
| }, |
| { |
| "epoch": 66.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 6.53e-06, |
| "loss": 1.8144, |
| "mean_token_accuracy": 0.6369650483131408, |
| "num_tokens": 31710059.0, |
| "step": 8695 |
| }, |
| { |
| "epoch": 66.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 6.505e-06, |
| "loss": 1.6865, |
| "mean_token_accuracy": 0.6562155693769455, |
| "num_tokens": 31729323.0, |
| "step": 8700 |
| }, |
| { |
| "epoch": 66.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 6.48e-06, |
| "loss": 1.8384, |
| "mean_token_accuracy": 0.6272630810737609, |
| "num_tokens": 31746601.0, |
| "step": 8705 |
| }, |
| { |
| "epoch": 67.0, |
| "grad_norm": 0.0, |
| "learning_rate": 6.455e-06, |
| "loss": 1.6929, |
| "mean_token_accuracy": 0.6641561448574066, |
| "num_tokens": 31765035.0, |
| "step": 8710 |
| }, |
| { |
| "epoch": 67.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 6.43e-06, |
| "loss": 1.754, |
| "mean_token_accuracy": 0.6518040865659713, |
| "num_tokens": 31783454.0, |
| "step": 8715 |
| }, |
| { |
| "epoch": 67.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 6.405e-06, |
| "loss": 1.7343, |
| "mean_token_accuracy": 0.6550649493932724, |
| "num_tokens": 31801583.0, |
| "step": 8720 |
| }, |
| { |
| "epoch": 67.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 6.38e-06, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.6507348865270615, |
| "num_tokens": 31819899.0, |
| "step": 8725 |
| }, |
| { |
| "epoch": 67.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 6.355e-06, |
| "loss": 1.7462, |
| "mean_token_accuracy": 0.6472515076398849, |
| "num_tokens": 31838133.0, |
| "step": 8730 |
| }, |
| { |
| "epoch": 67.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.3299999999999995e-06, |
| "loss": 1.7469, |
| "mean_token_accuracy": 0.6492879390716553, |
| "num_tokens": 31856487.0, |
| "step": 8735 |
| }, |
| { |
| "epoch": 67.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.305e-06, |
| "loss": 1.7255, |
| "mean_token_accuracy": 0.6533631831407547, |
| "num_tokens": 31874474.0, |
| "step": 8740 |
| }, |
| { |
| "epoch": 67.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.28e-06, |
| "loss": 1.6755, |
| "mean_token_accuracy": 0.6652740865945816, |
| "num_tokens": 31893418.0, |
| "step": 8745 |
| }, |
| { |
| "epoch": 67.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 6.254999999999999e-06, |
| "loss": 1.7896, |
| "mean_token_accuracy": 0.639714989066124, |
| "num_tokens": 31911351.0, |
| "step": 8750 |
| }, |
| { |
| "epoch": 67.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 6.2300000000000005e-06, |
| "loss": 1.7045, |
| "mean_token_accuracy": 0.653382807970047, |
| "num_tokens": 31930240.0, |
| "step": 8755 |
| }, |
| { |
| "epoch": 67.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 6.205000000000001e-06, |
| "loss": 1.8023, |
| "mean_token_accuracy": 0.6487904995679855, |
| "num_tokens": 31947197.0, |
| "step": 8760 |
| }, |
| { |
| "epoch": 67.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 6.18e-06, |
| "loss": 1.7365, |
| "mean_token_accuracy": 0.6566553086042404, |
| "num_tokens": 31965045.0, |
| "step": 8765 |
| }, |
| { |
| "epoch": 67.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 6.155e-06, |
| "loss": 1.7982, |
| "mean_token_accuracy": 0.6392051458358765, |
| "num_tokens": 31982476.0, |
| "step": 8770 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 0.0, |
| "learning_rate": 6.130000000000001e-06, |
| "loss": 1.7112, |
| "mean_token_accuracy": 0.6520894914865494, |
| "num_tokens": 32001093.0, |
| "step": 8775 |
| }, |
| { |
| "epoch": 67.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 6.105e-06, |
| "loss": 1.7363, |
| "mean_token_accuracy": 0.651617681980133, |
| "num_tokens": 32019131.0, |
| "step": 8780 |
| }, |
| { |
| "epoch": 67.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 6.08e-06, |
| "loss": 1.7092, |
| "mean_token_accuracy": 0.6570148169994354, |
| "num_tokens": 32037428.0, |
| "step": 8785 |
| }, |
| { |
| "epoch": 67.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 6.0550000000000005e-06, |
| "loss": 1.7594, |
| "mean_token_accuracy": 0.6468034237623215, |
| "num_tokens": 32055491.0, |
| "step": 8790 |
| }, |
| { |
| "epoch": 67.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 6.03e-06, |
| "loss": 1.7737, |
| "mean_token_accuracy": 0.6413617432117462, |
| "num_tokens": 32073407.0, |
| "step": 8795 |
| }, |
| { |
| "epoch": 67.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 6.005e-06, |
| "loss": 1.733, |
| "mean_token_accuracy": 0.6513085424900055, |
| "num_tokens": 32092096.0, |
| "step": 8800 |
| }, |
| { |
| "epoch": 67.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.98e-06, |
| "loss": 1.6929, |
| "mean_token_accuracy": 0.6587874621152878, |
| "num_tokens": 32110873.0, |
| "step": 8805 |
| }, |
| { |
| "epoch": 67.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.955000000000001e-06, |
| "loss": 1.6503, |
| "mean_token_accuracy": 0.6717524290084839, |
| "num_tokens": 32129582.0, |
| "step": 8810 |
| }, |
| { |
| "epoch": 67.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.93e-06, |
| "loss": 1.724, |
| "mean_token_accuracy": 0.6568674236536026, |
| "num_tokens": 32148017.0, |
| "step": 8815 |
| }, |
| { |
| "epoch": 67.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 5.905e-06, |
| "loss": 1.7912, |
| "mean_token_accuracy": 0.642418372631073, |
| "num_tokens": 32165982.0, |
| "step": 8820 |
| }, |
| { |
| "epoch": 67.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8800000000000005e-06, |
| "loss": 1.6831, |
| "mean_token_accuracy": 0.6590377599000931, |
| "num_tokens": 32185226.0, |
| "step": 8825 |
| }, |
| { |
| "epoch": 67.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 5.855e-06, |
| "loss": 1.8126, |
| "mean_token_accuracy": 0.6364179074764251, |
| "num_tokens": 32202532.0, |
| "step": 8830 |
| }, |
| { |
| "epoch": 67.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 5.83e-06, |
| "loss": 1.842, |
| "mean_token_accuracy": 0.629499414563179, |
| "num_tokens": 32219939.0, |
| "step": 8835 |
| }, |
| { |
| "epoch": 68.0, |
| "grad_norm": 0.0, |
| "learning_rate": 5.805e-06, |
| "loss": 1.7138, |
| "mean_token_accuracy": 0.651302108168602, |
| "num_tokens": 32239140.0, |
| "step": 8840 |
| }, |
| { |
| "epoch": 68.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 5.78e-06, |
| "loss": 1.7806, |
| "mean_token_accuracy": 0.6400546163320542, |
| "num_tokens": 32257043.0, |
| "step": 8845 |
| }, |
| { |
| "epoch": 68.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 5.755e-06, |
| "loss": 1.7155, |
| "mean_token_accuracy": 0.6579635113477706, |
| "num_tokens": 32275029.0, |
| "step": 8850 |
| }, |
| { |
| "epoch": 68.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 5.73e-06, |
| "loss": 1.7521, |
| "mean_token_accuracy": 0.6502374112606049, |
| "num_tokens": 32292737.0, |
| "step": 8855 |
| }, |
| { |
| "epoch": 68.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 5.705e-06, |
| "loss": 1.7176, |
| "mean_token_accuracy": 0.6563062936067581, |
| "num_tokens": 32311081.0, |
| "step": 8860 |
| }, |
| { |
| "epoch": 68.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.680000000000001e-06, |
| "loss": 1.7643, |
| "mean_token_accuracy": 0.6508367985486985, |
| "num_tokens": 32328973.0, |
| "step": 8865 |
| }, |
| { |
| "epoch": 68.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.655000000000001e-06, |
| "loss": 1.7741, |
| "mean_token_accuracy": 0.6429946154356003, |
| "num_tokens": 32346455.0, |
| "step": 8870 |
| }, |
| { |
| "epoch": 68.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.63e-06, |
| "loss": 1.7832, |
| "mean_token_accuracy": 0.6436864167451859, |
| "num_tokens": 32364893.0, |
| "step": 8875 |
| }, |
| { |
| "epoch": 68.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.6050000000000005e-06, |
| "loss": 1.6646, |
| "mean_token_accuracy": 0.66810921728611, |
| "num_tokens": 32383625.0, |
| "step": 8880 |
| }, |
| { |
| "epoch": 68.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 1.7547, |
| "mean_token_accuracy": 0.6530828505754471, |
| "num_tokens": 32401543.0, |
| "step": 8885 |
| }, |
| { |
| "epoch": 68.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 5.555e-06, |
| "loss": 1.7685, |
| "mean_token_accuracy": 0.6441059678792953, |
| "num_tokens": 32419418.0, |
| "step": 8890 |
| }, |
| { |
| "epoch": 68.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 5.53e-06, |
| "loss": 1.8152, |
| "mean_token_accuracy": 0.6348958969116211, |
| "num_tokens": 32436930.0, |
| "step": 8895 |
| }, |
| { |
| "epoch": 68.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 5.505000000000001e-06, |
| "loss": 1.7356, |
| "mean_token_accuracy": 0.6481510013341903, |
| "num_tokens": 32455486.0, |
| "step": 8900 |
| }, |
| { |
| "epoch": 68.5, |
| "grad_norm": 0.0, |
| "learning_rate": 5.48e-06, |
| "loss": 1.6469, |
| "mean_token_accuracy": 0.6719613999128342, |
| "num_tokens": 32475172.0, |
| "step": 8905 |
| }, |
| { |
| "epoch": 68.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 5.455e-06, |
| "loss": 1.7171, |
| "mean_token_accuracy": 0.6506960332393646, |
| "num_tokens": 32493791.0, |
| "step": 8910 |
| }, |
| { |
| "epoch": 68.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 5.4300000000000005e-06, |
| "loss": 1.7171, |
| "mean_token_accuracy": 0.6513607323169708, |
| "num_tokens": 32512069.0, |
| "step": 8915 |
| }, |
| { |
| "epoch": 68.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 5.405e-06, |
| "loss": 1.6989, |
| "mean_token_accuracy": 0.6555921375751496, |
| "num_tokens": 32530853.0, |
| "step": 8920 |
| }, |
| { |
| "epoch": 68.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 5.38e-06, |
| "loss": 1.7921, |
| "mean_token_accuracy": 0.6404555082321167, |
| "num_tokens": 32547929.0, |
| "step": 8925 |
| }, |
| { |
| "epoch": 68.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.355e-06, |
| "loss": 1.7402, |
| "mean_token_accuracy": 0.6483048588037491, |
| "num_tokens": 32566393.0, |
| "step": 8930 |
| }, |
| { |
| "epoch": 68.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.330000000000001e-06, |
| "loss": 1.7599, |
| "mean_token_accuracy": 0.6482443422079086, |
| "num_tokens": 32584222.0, |
| "step": 8935 |
| }, |
| { |
| "epoch": 68.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.305e-06, |
| "loss": 1.7298, |
| "mean_token_accuracy": 0.6541435539722442, |
| "num_tokens": 32602652.0, |
| "step": 8940 |
| }, |
| { |
| "epoch": 68.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.28e-06, |
| "loss": 1.8005, |
| "mean_token_accuracy": 0.6381476581096649, |
| "num_tokens": 32620436.0, |
| "step": 8945 |
| }, |
| { |
| "epoch": 68.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 5.2550000000000005e-06, |
| "loss": 1.7707, |
| "mean_token_accuracy": 0.6429592072963715, |
| "num_tokens": 32638580.0, |
| "step": 8950 |
| }, |
| { |
| "epoch": 68.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 5.23e-06, |
| "loss": 1.7375, |
| "mean_token_accuracy": 0.6491723597049713, |
| "num_tokens": 32657574.0, |
| "step": 8955 |
| }, |
| { |
| "epoch": 68.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 5.205e-06, |
| "loss": 1.6892, |
| "mean_token_accuracy": 0.6607306629419327, |
| "num_tokens": 32676715.0, |
| "step": 8960 |
| }, |
| { |
| "epoch": 68.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 5.18e-06, |
| "loss": 1.7511, |
| "mean_token_accuracy": 0.6513434231281281, |
| "num_tokens": 32694775.0, |
| "step": 8965 |
| }, |
| { |
| "epoch": 69.0, |
| "grad_norm": 0.0, |
| "learning_rate": 5.155e-06, |
| "loss": 1.7027, |
| "mean_token_accuracy": 0.6560029089450836, |
| "num_tokens": 32713245.0, |
| "step": 8970 |
| }, |
| { |
| "epoch": 69.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 5.13e-06, |
| "loss": 1.7155, |
| "mean_token_accuracy": 0.6513414025306702, |
| "num_tokens": 32732025.0, |
| "step": 8975 |
| }, |
| { |
| "epoch": 69.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 5.105e-06, |
| "loss": 1.7302, |
| "mean_token_accuracy": 0.6497060090303421, |
| "num_tokens": 32751198.0, |
| "step": 8980 |
| }, |
| { |
| "epoch": 69.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 5.08e-06, |
| "loss": 1.7842, |
| "mean_token_accuracy": 0.6456437230110168, |
| "num_tokens": 32768638.0, |
| "step": 8985 |
| }, |
| { |
| "epoch": 69.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 5.055e-06, |
| "loss": 1.7089, |
| "mean_token_accuracy": 0.6585129857063293, |
| "num_tokens": 32786796.0, |
| "step": 8990 |
| }, |
| { |
| "epoch": 69.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 5.03e-06, |
| "loss": 1.8112, |
| "mean_token_accuracy": 0.6334796369075775, |
| "num_tokens": 32804348.0, |
| "step": 8995 |
| }, |
| { |
| "epoch": 69.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 5.005e-06, |
| "loss": 1.7595, |
| "mean_token_accuracy": 0.6397767275571823, |
| "num_tokens": 32822708.0, |
| "step": 9000 |
| }, |
| { |
| "epoch": 69.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.98e-06, |
| "loss": 1.724, |
| "mean_token_accuracy": 0.6544425517320633, |
| "num_tokens": 32841355.0, |
| "step": 9005 |
| }, |
| { |
| "epoch": 69.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.955e-06, |
| "loss": 1.7819, |
| "mean_token_accuracy": 0.6387849062681198, |
| "num_tokens": 32859210.0, |
| "step": 9010 |
| }, |
| { |
| "epoch": 69.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 4.93e-06, |
| "loss": 1.717, |
| "mean_token_accuracy": 0.6556219637393952, |
| "num_tokens": 32877341.0, |
| "step": 9015 |
| }, |
| { |
| "epoch": 69.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9050000000000005e-06, |
| "loss": 1.7134, |
| "mean_token_accuracy": 0.6564109534025192, |
| "num_tokens": 32895282.0, |
| "step": 9020 |
| }, |
| { |
| "epoch": 69.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.880000000000001e-06, |
| "loss": 1.7273, |
| "mean_token_accuracy": 0.6536680698394776, |
| "num_tokens": 32912961.0, |
| "step": 9025 |
| }, |
| { |
| "epoch": 69.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.855e-06, |
| "loss": 1.7511, |
| "mean_token_accuracy": 0.645021739602089, |
| "num_tokens": 32931470.0, |
| "step": 9030 |
| }, |
| { |
| "epoch": 69.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.83e-06, |
| "loss": 1.8157, |
| "mean_token_accuracy": 0.6354044616222382, |
| "num_tokens": 32949088.0, |
| "step": 9035 |
| }, |
| { |
| "epoch": 69.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.805000000000001e-06, |
| "loss": 1.7142, |
| "mean_token_accuracy": 0.6553671360015869, |
| "num_tokens": 32967668.0, |
| "step": 9040 |
| }, |
| { |
| "epoch": 69.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 1.7314, |
| "mean_token_accuracy": 0.6554368674755097, |
| "num_tokens": 32985426.0, |
| "step": 9045 |
| }, |
| { |
| "epoch": 69.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 4.755e-06, |
| "loss": 1.7828, |
| "mean_token_accuracy": 0.6435191184282303, |
| "num_tokens": 33003912.0, |
| "step": 9050 |
| }, |
| { |
| "epoch": 69.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7300000000000005e-06, |
| "loss": 1.6889, |
| "mean_token_accuracy": 0.6613710403442383, |
| "num_tokens": 33022739.0, |
| "step": 9055 |
| }, |
| { |
| "epoch": 69.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.705000000000001e-06, |
| "loss": 1.7765, |
| "mean_token_accuracy": 0.6455245047807694, |
| "num_tokens": 33040528.0, |
| "step": 9060 |
| }, |
| { |
| "epoch": 69.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.68e-06, |
| "loss": 1.734, |
| "mean_token_accuracy": 0.6510042399168015, |
| "num_tokens": 33058790.0, |
| "step": 9065 |
| }, |
| { |
| "epoch": 69.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.655e-06, |
| "loss": 1.7801, |
| "mean_token_accuracy": 0.6483240693807601, |
| "num_tokens": 33076529.0, |
| "step": 9070 |
| }, |
| { |
| "epoch": 69.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6300000000000006e-06, |
| "loss": 1.706, |
| "mean_token_accuracy": 0.6595020055770874, |
| "num_tokens": 33095203.0, |
| "step": 9075 |
| }, |
| { |
| "epoch": 69.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 4.605e-06, |
| "loss": 1.7031, |
| "mean_token_accuracy": 0.6580096065998078, |
| "num_tokens": 33113524.0, |
| "step": 9080 |
| }, |
| { |
| "epoch": 69.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 4.58e-06, |
| "loss": 1.7544, |
| "mean_token_accuracy": 0.6489035278558731, |
| "num_tokens": 33131870.0, |
| "step": 9085 |
| }, |
| { |
| "epoch": 69.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5550000000000004e-06, |
| "loss": 1.7172, |
| "mean_token_accuracy": 0.6561585962772369, |
| "num_tokens": 33150722.0, |
| "step": 9090 |
| }, |
| { |
| "epoch": 69.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.53e-06, |
| "loss": 1.6798, |
| "mean_token_accuracy": 0.6648322910070419, |
| "num_tokens": 33169488.0, |
| "step": 9095 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.0, |
| "learning_rate": 4.505e-06, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6524565041065216, |
| "num_tokens": 33187350.0, |
| "step": 9100 |
| }, |
| { |
| "epoch": 70.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.48e-06, |
| "loss": 1.7404, |
| "mean_token_accuracy": 0.6531502634286881, |
| "num_tokens": 33205235.0, |
| "step": 9105 |
| }, |
| { |
| "epoch": 70.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4550000000000005e-06, |
| "loss": 1.6612, |
| "mean_token_accuracy": 0.6699670910835266, |
| "num_tokens": 33224500.0, |
| "step": 9110 |
| }, |
| { |
| "epoch": 70.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 4.43e-06, |
| "loss": 1.7034, |
| "mean_token_accuracy": 0.6562402963638305, |
| "num_tokens": 33243151.0, |
| "step": 9115 |
| }, |
| { |
| "epoch": 70.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 4.405e-06, |
| "loss": 1.7118, |
| "mean_token_accuracy": 0.6556406348943711, |
| "num_tokens": 33261667.0, |
| "step": 9120 |
| }, |
| { |
| "epoch": 70.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.38e-06, |
| "loss": 1.7115, |
| "mean_token_accuracy": 0.6550081551074982, |
| "num_tokens": 33280058.0, |
| "step": 9125 |
| }, |
| { |
| "epoch": 70.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.355e-06, |
| "loss": 1.6983, |
| "mean_token_accuracy": 0.6546464413404465, |
| "num_tokens": 33299469.0, |
| "step": 9130 |
| }, |
| { |
| "epoch": 70.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.33e-06, |
| "loss": 1.7084, |
| "mean_token_accuracy": 0.6555704712867737, |
| "num_tokens": 33317456.0, |
| "step": 9135 |
| }, |
| { |
| "epoch": 70.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.305e-06, |
| "loss": 1.7392, |
| "mean_token_accuracy": 0.649817219376564, |
| "num_tokens": 33336143.0, |
| "step": 9140 |
| }, |
| { |
| "epoch": 70.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 4.28e-06, |
| "loss": 1.7933, |
| "mean_token_accuracy": 0.6456594437360763, |
| "num_tokens": 33353477.0, |
| "step": 9145 |
| }, |
| { |
| "epoch": 70.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 4.255e-06, |
| "loss": 1.7628, |
| "mean_token_accuracy": 0.6442059248685836, |
| "num_tokens": 33371827.0, |
| "step": 9150 |
| }, |
| { |
| "epoch": 70.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 4.23e-06, |
| "loss": 1.784, |
| "mean_token_accuracy": 0.6425058543682098, |
| "num_tokens": 33390073.0, |
| "step": 9155 |
| }, |
| { |
| "epoch": 70.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 4.2049999999999996e-06, |
| "loss": 1.8011, |
| "mean_token_accuracy": 0.6376978397369385, |
| "num_tokens": 33408396.0, |
| "step": 9160 |
| }, |
| { |
| "epoch": 70.5, |
| "grad_norm": 0.0, |
| "learning_rate": 4.18e-06, |
| "loss": 1.7991, |
| "mean_token_accuracy": 0.6374391674995422, |
| "num_tokens": 33425771.0, |
| "step": 9165 |
| }, |
| { |
| "epoch": 70.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 4.155e-06, |
| "loss": 1.8244, |
| "mean_token_accuracy": 0.6361591160297394, |
| "num_tokens": 33443218.0, |
| "step": 9170 |
| }, |
| { |
| "epoch": 70.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 4.13e-06, |
| "loss": 1.7396, |
| "mean_token_accuracy": 0.6503497928380966, |
| "num_tokens": 33462100.0, |
| "step": 9175 |
| }, |
| { |
| "epoch": 70.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1050000000000005e-06, |
| "loss": 1.7632, |
| "mean_token_accuracy": 0.6452891588211059, |
| "num_tokens": 33479921.0, |
| "step": 9180 |
| }, |
| { |
| "epoch": 70.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 4.080000000000001e-06, |
| "loss": 1.7462, |
| "mean_token_accuracy": 0.6508474260568619, |
| "num_tokens": 33497623.0, |
| "step": 9185 |
| }, |
| { |
| "epoch": 70.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.055e-06, |
| "loss": 1.7899, |
| "mean_token_accuracy": 0.6407651424407959, |
| "num_tokens": 33515029.0, |
| "step": 9190 |
| }, |
| { |
| "epoch": 70.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.03e-06, |
| "loss": 1.677, |
| "mean_token_accuracy": 0.6583020657300949, |
| "num_tokens": 33533481.0, |
| "step": 9195 |
| }, |
| { |
| "epoch": 70.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.005000000000001e-06, |
| "loss": 1.7266, |
| "mean_token_accuracy": 0.6561136662960052, |
| "num_tokens": 33551333.0, |
| "step": 9200 |
| }, |
| { |
| "epoch": 70.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.98e-06, |
| "loss": 1.793, |
| "mean_token_accuracy": 0.6400943785905838, |
| "num_tokens": 33569017.0, |
| "step": 9205 |
| }, |
| { |
| "epoch": 70.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 3.955e-06, |
| "loss": 1.7658, |
| "mean_token_accuracy": 0.6454756885766983, |
| "num_tokens": 33587052.0, |
| "step": 9210 |
| }, |
| { |
| "epoch": 70.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 3.9300000000000005e-06, |
| "loss": 1.7387, |
| "mean_token_accuracy": 0.6554303973913193, |
| "num_tokens": 33604732.0, |
| "step": 9215 |
| }, |
| { |
| "epoch": 70.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 3.905000000000001e-06, |
| "loss": 1.7101, |
| "mean_token_accuracy": 0.6562773436307907, |
| "num_tokens": 33623715.0, |
| "step": 9220 |
| }, |
| { |
| "epoch": 70.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.88e-06, |
| "loss": 1.6681, |
| "mean_token_accuracy": 0.6675117045640946, |
| "num_tokens": 33642936.0, |
| "step": 9225 |
| }, |
| { |
| "epoch": 71.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.855e-06, |
| "loss": 1.7163, |
| "mean_token_accuracy": 0.6542115420103073, |
| "num_tokens": 33661455.0, |
| "step": 9230 |
| }, |
| { |
| "epoch": 71.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.830000000000001e-06, |
| "loss": 1.7668, |
| "mean_token_accuracy": 0.6549015700817108, |
| "num_tokens": 33679273.0, |
| "step": 9235 |
| }, |
| { |
| "epoch": 71.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 3.8050000000000004e-06, |
| "loss": 1.7505, |
| "mean_token_accuracy": 0.6491267591714859, |
| "num_tokens": 33697022.0, |
| "step": 9240 |
| }, |
| { |
| "epoch": 71.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 1.6273, |
| "mean_token_accuracy": 0.6743998020887375, |
| "num_tokens": 33715961.0, |
| "step": 9245 |
| }, |
| { |
| "epoch": 71.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 3.755e-06, |
| "loss": 1.722, |
| "mean_token_accuracy": 0.657853615283966, |
| "num_tokens": 33734029.0, |
| "step": 9250 |
| }, |
| { |
| "epoch": 71.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.7300000000000003e-06, |
| "loss": 1.7182, |
| "mean_token_accuracy": 0.653509646654129, |
| "num_tokens": 33752377.0, |
| "step": 9255 |
| }, |
| { |
| "epoch": 71.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.705e-06, |
| "loss": 1.7218, |
| "mean_token_accuracy": 0.6558929115533829, |
| "num_tokens": 33770568.0, |
| "step": 9260 |
| }, |
| { |
| "epoch": 71.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.68e-06, |
| "loss": 1.6921, |
| "mean_token_accuracy": 0.6597426235675812, |
| "num_tokens": 33789676.0, |
| "step": 9265 |
| }, |
| { |
| "epoch": 71.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.655e-06, |
| "loss": 1.7497, |
| "mean_token_accuracy": 0.6486911535263061, |
| "num_tokens": 33807428.0, |
| "step": 9270 |
| }, |
| { |
| "epoch": 71.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 3.63e-06, |
| "loss": 1.7888, |
| "mean_token_accuracy": 0.6441873848438263, |
| "num_tokens": 33825382.0, |
| "step": 9275 |
| }, |
| { |
| "epoch": 71.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 3.6050000000000002e-06, |
| "loss": 1.7271, |
| "mean_token_accuracy": 0.6498310655355454, |
| "num_tokens": 33844179.0, |
| "step": 9280 |
| }, |
| { |
| "epoch": 71.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 3.58e-06, |
| "loss": 1.7322, |
| "mean_token_accuracy": 0.6521621882915497, |
| "num_tokens": 33862565.0, |
| "step": 9285 |
| }, |
| { |
| "epoch": 71.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.555e-06, |
| "loss": 1.7472, |
| "mean_token_accuracy": 0.6527814954519272, |
| "num_tokens": 33881172.0, |
| "step": 9290 |
| }, |
| { |
| "epoch": 71.5, |
| "grad_norm": 0.0, |
| "learning_rate": 3.53e-06, |
| "loss": 1.6943, |
| "mean_token_accuracy": 0.661613667011261, |
| "num_tokens": 33899656.0, |
| "step": 9295 |
| }, |
| { |
| "epoch": 71.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.505e-06, |
| "loss": 1.7055, |
| "mean_token_accuracy": 0.647359648346901, |
| "num_tokens": 33918686.0, |
| "step": 9300 |
| }, |
| { |
| "epoch": 71.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4799999999999997e-06, |
| "loss": 1.6996, |
| "mean_token_accuracy": 0.6571532785892487, |
| "num_tokens": 33937081.0, |
| "step": 9305 |
| }, |
| { |
| "epoch": 71.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 3.455e-06, |
| "loss": 1.7333, |
| "mean_token_accuracy": 0.6503713637590408, |
| "num_tokens": 33954828.0, |
| "step": 9310 |
| }, |
| { |
| "epoch": 71.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 3.4299999999999998e-06, |
| "loss": 1.6752, |
| "mean_token_accuracy": 0.6633023738861084, |
| "num_tokens": 33973326.0, |
| "step": 9315 |
| }, |
| { |
| "epoch": 71.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.405e-06, |
| "loss": 1.757, |
| "mean_token_accuracy": 0.6501501977443696, |
| "num_tokens": 33991262.0, |
| "step": 9320 |
| }, |
| { |
| "epoch": 71.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.38e-06, |
| "loss": 1.6826, |
| "mean_token_accuracy": 0.6632235527038575, |
| "num_tokens": 34010592.0, |
| "step": 9325 |
| }, |
| { |
| "epoch": 71.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3550000000000005e-06, |
| "loss": 1.7825, |
| "mean_token_accuracy": 0.6419746041297912, |
| "num_tokens": 34028492.0, |
| "step": 9330 |
| }, |
| { |
| "epoch": 71.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3300000000000003e-06, |
| "loss": 1.8112, |
| "mean_token_accuracy": 0.6337680786848068, |
| "num_tokens": 34045754.0, |
| "step": 9335 |
| }, |
| { |
| "epoch": 71.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3050000000000005e-06, |
| "loss": 1.7796, |
| "mean_token_accuracy": 0.6412538975477219, |
| "num_tokens": 34063747.0, |
| "step": 9340 |
| }, |
| { |
| "epoch": 71.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2800000000000004e-06, |
| "loss": 1.8211, |
| "mean_token_accuracy": 0.6337336391210556, |
| "num_tokens": 34080973.0, |
| "step": 9345 |
| }, |
| { |
| "epoch": 71.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2550000000000006e-06, |
| "loss": 1.7922, |
| "mean_token_accuracy": 0.6401098728179931, |
| "num_tokens": 34099093.0, |
| "step": 9350 |
| }, |
| { |
| "epoch": 71.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2300000000000004e-06, |
| "loss": 1.8057, |
| "mean_token_accuracy": 0.6371114939451218, |
| "num_tokens": 34117109.0, |
| "step": 9355 |
| }, |
| { |
| "epoch": 72.0, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2050000000000002e-06, |
| "loss": 1.7633, |
| "mean_token_accuracy": 0.6458366394042969, |
| "num_tokens": 34135560.0, |
| "step": 9360 |
| }, |
| { |
| "epoch": 72.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 1.7497, |
| "mean_token_accuracy": 0.6472752332687378, |
| "num_tokens": 34153754.0, |
| "step": 9365 |
| }, |
| { |
| "epoch": 72.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1550000000000003e-06, |
| "loss": 1.7499, |
| "mean_token_accuracy": 0.6482025504112243, |
| "num_tokens": 34172147.0, |
| "step": 9370 |
| }, |
| { |
| "epoch": 72.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 3.13e-06, |
| "loss": 1.7281, |
| "mean_token_accuracy": 0.6510056018829345, |
| "num_tokens": 34191430.0, |
| "step": 9375 |
| }, |
| { |
| "epoch": 72.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1050000000000003e-06, |
| "loss": 1.725, |
| "mean_token_accuracy": 0.6562888383865356, |
| "num_tokens": 34209348.0, |
| "step": 9380 |
| }, |
| { |
| "epoch": 72.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.08e-06, |
| "loss": 1.6941, |
| "mean_token_accuracy": 0.6613337188959122, |
| "num_tokens": 34227848.0, |
| "step": 9385 |
| }, |
| { |
| "epoch": 72.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0550000000000004e-06, |
| "loss": 1.6769, |
| "mean_token_accuracy": 0.6630650132894516, |
| "num_tokens": 34246818.0, |
| "step": 9390 |
| }, |
| { |
| "epoch": 72.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 3.0300000000000002e-06, |
| "loss": 1.75, |
| "mean_token_accuracy": 0.6472353667020798, |
| "num_tokens": 34264967.0, |
| "step": 9395 |
| }, |
| { |
| "epoch": 72.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 3.005e-06, |
| "loss": 1.7359, |
| "mean_token_accuracy": 0.6492182224988937, |
| "num_tokens": 34283311.0, |
| "step": 9400 |
| }, |
| { |
| "epoch": 72.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 1.7515, |
| "mean_token_accuracy": 0.6472180843353271, |
| "num_tokens": 34301877.0, |
| "step": 9405 |
| }, |
| { |
| "epoch": 72.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.955e-06, |
| "loss": 1.7565, |
| "mean_token_accuracy": 0.6518394500017166, |
| "num_tokens": 34319958.0, |
| "step": 9410 |
| }, |
| { |
| "epoch": 72.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.93e-06, |
| "loss": 1.7295, |
| "mean_token_accuracy": 0.6590041428804397, |
| "num_tokens": 34338437.0, |
| "step": 9415 |
| }, |
| { |
| "epoch": 72.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.905e-06, |
| "loss": 1.8063, |
| "mean_token_accuracy": 0.6363181412220001, |
| "num_tokens": 34356326.0, |
| "step": 9420 |
| }, |
| { |
| "epoch": 72.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.88e-06, |
| "loss": 1.811, |
| "mean_token_accuracy": 0.6379002064466477, |
| "num_tokens": 34373863.0, |
| "step": 9425 |
| }, |
| { |
| "epoch": 72.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 2.855e-06, |
| "loss": 1.7939, |
| "mean_token_accuracy": 0.64038887321949, |
| "num_tokens": 34391352.0, |
| "step": 9430 |
| }, |
| { |
| "epoch": 72.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.83e-06, |
| "loss": 1.7532, |
| "mean_token_accuracy": 0.6454617887735367, |
| "num_tokens": 34409347.0, |
| "step": 9435 |
| }, |
| { |
| "epoch": 72.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.805e-06, |
| "loss": 1.7184, |
| "mean_token_accuracy": 0.6529299259185791, |
| "num_tokens": 34427248.0, |
| "step": 9440 |
| }, |
| { |
| "epoch": 72.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 2.78e-06, |
| "loss": 1.7713, |
| "mean_token_accuracy": 0.6492305964231491, |
| "num_tokens": 34445017.0, |
| "step": 9445 |
| }, |
| { |
| "epoch": 72.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7550000000000003e-06, |
| "loss": 1.7775, |
| "mean_token_accuracy": 0.6450568825006485, |
| "num_tokens": 34462759.0, |
| "step": 9450 |
| }, |
| { |
| "epoch": 72.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.73e-06, |
| "loss": 1.791, |
| "mean_token_accuracy": 0.6397794485092163, |
| "num_tokens": 34480097.0, |
| "step": 9455 |
| }, |
| { |
| "epoch": 72.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.7050000000000004e-06, |
| "loss": 1.7267, |
| "mean_token_accuracy": 0.6520882248878479, |
| "num_tokens": 34498104.0, |
| "step": 9460 |
| }, |
| { |
| "epoch": 72.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.68e-06, |
| "loss": 1.7094, |
| "mean_token_accuracy": 0.6541548132896423, |
| "num_tokens": 34516905.0, |
| "step": 9465 |
| }, |
| { |
| "epoch": 72.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 2.655e-06, |
| "loss": 1.6942, |
| "mean_token_accuracy": 0.6577578127384186, |
| "num_tokens": 34535704.0, |
| "step": 9470 |
| }, |
| { |
| "epoch": 72.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6300000000000002e-06, |
| "loss": 1.7822, |
| "mean_token_accuracy": 0.6439197540283204, |
| "num_tokens": 34553487.0, |
| "step": 9475 |
| }, |
| { |
| "epoch": 72.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.605e-06, |
| "loss": 1.6994, |
| "mean_token_accuracy": 0.6599182695150375, |
| "num_tokens": 34572061.0, |
| "step": 9480 |
| }, |
| { |
| "epoch": 72.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 1.6874, |
| "mean_token_accuracy": 0.6625257313251496, |
| "num_tokens": 34590845.0, |
| "step": 9485 |
| }, |
| { |
| "epoch": 73.0, |
| "grad_norm": 0.0, |
| "learning_rate": 2.555e-06, |
| "loss": 1.6979, |
| "mean_token_accuracy": 0.6583450496196747, |
| "num_tokens": 34609665.0, |
| "step": 9490 |
| }, |
| { |
| "epoch": 73.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 2.53e-06, |
| "loss": 1.7997, |
| "mean_token_accuracy": 0.6400972425937652, |
| "num_tokens": 34627692.0, |
| "step": 9495 |
| }, |
| { |
| "epoch": 73.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.505e-06, |
| "loss": 1.7601, |
| "mean_token_accuracy": 0.6496747404336929, |
| "num_tokens": 34645368.0, |
| "step": 9500 |
| }, |
| { |
| "epoch": 73.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.48e-06, |
| "loss": 1.7188, |
| "mean_token_accuracy": 0.6584489345550537, |
| "num_tokens": 34663574.0, |
| "step": 9505 |
| }, |
| { |
| "epoch": 73.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4550000000000002e-06, |
| "loss": 1.6701, |
| "mean_token_accuracy": 0.664915868639946, |
| "num_tokens": 34682190.0, |
| "step": 9510 |
| }, |
| { |
| "epoch": 73.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.43e-06, |
| "loss": 1.718, |
| "mean_token_accuracy": 0.6526754826307297, |
| "num_tokens": 34700323.0, |
| "step": 9515 |
| }, |
| { |
| "epoch": 73.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.405e-06, |
| "loss": 1.7598, |
| "mean_token_accuracy": 0.648136830329895, |
| "num_tokens": 34718585.0, |
| "step": 9520 |
| }, |
| { |
| "epoch": 73.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.38e-06, |
| "loss": 1.8001, |
| "mean_token_accuracy": 0.6407902717590332, |
| "num_tokens": 34736230.0, |
| "step": 9525 |
| }, |
| { |
| "epoch": 73.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3550000000000003e-06, |
| "loss": 1.7617, |
| "mean_token_accuracy": 0.6459957540035248, |
| "num_tokens": 34754509.0, |
| "step": 9530 |
| }, |
| { |
| "epoch": 73.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 2.33e-06, |
| "loss": 1.6889, |
| "mean_token_accuracy": 0.6600727260112762, |
| "num_tokens": 34772888.0, |
| "step": 9535 |
| }, |
| { |
| "epoch": 73.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3050000000000004e-06, |
| "loss": 1.7754, |
| "mean_token_accuracy": 0.6485264748334885, |
| "num_tokens": 34790801.0, |
| "step": 9540 |
| }, |
| { |
| "epoch": 73.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 2.28e-06, |
| "loss": 1.7553, |
| "mean_token_accuracy": 0.6497362434864045, |
| "num_tokens": 34808604.0, |
| "step": 9545 |
| }, |
| { |
| "epoch": 73.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 2.255e-06, |
| "loss": 1.7909, |
| "mean_token_accuracy": 0.639983183145523, |
| "num_tokens": 34826362.0, |
| "step": 9550 |
| }, |
| { |
| "epoch": 73.5, |
| "grad_norm": 0.0, |
| "learning_rate": 2.2300000000000002e-06, |
| "loss": 1.7264, |
| "mean_token_accuracy": 0.6542590230703353, |
| "num_tokens": 34844689.0, |
| "step": 9555 |
| }, |
| { |
| "epoch": 73.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 2.205e-06, |
| "loss": 1.7352, |
| "mean_token_accuracy": 0.6505734384059906, |
| "num_tokens": 34863598.0, |
| "step": 9560 |
| }, |
| { |
| "epoch": 73.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 1.7534, |
| "mean_token_accuracy": 0.644753509759903, |
| "num_tokens": 34881884.0, |
| "step": 9565 |
| }, |
| { |
| "epoch": 73.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 2.155e-06, |
| "loss": 1.7672, |
| "mean_token_accuracy": 0.646284231543541, |
| "num_tokens": 34900082.0, |
| "step": 9570 |
| }, |
| { |
| "epoch": 73.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 2.13e-06, |
| "loss": 1.7086, |
| "mean_token_accuracy": 0.6528146833181381, |
| "num_tokens": 34919228.0, |
| "step": 9575 |
| }, |
| { |
| "epoch": 73.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.105e-06, |
| "loss": 1.7914, |
| "mean_token_accuracy": 0.6391745179891586, |
| "num_tokens": 34936386.0, |
| "step": 9580 |
| }, |
| { |
| "epoch": 73.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.08e-06, |
| "loss": 1.729, |
| "mean_token_accuracy": 0.6509798347949982, |
| "num_tokens": 34954716.0, |
| "step": 9585 |
| }, |
| { |
| "epoch": 73.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 2.055e-06, |
| "loss": 1.6906, |
| "mean_token_accuracy": 0.6624141722917557, |
| "num_tokens": 34973992.0, |
| "step": 9590 |
| }, |
| { |
| "epoch": 73.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 2.03e-06, |
| "loss": 1.7145, |
| "mean_token_accuracy": 0.6552868157625198, |
| "num_tokens": 34992491.0, |
| "step": 9595 |
| }, |
| { |
| "epoch": 73.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 2.005e-06, |
| "loss": 1.763, |
| "mean_token_accuracy": 0.6461809724569321, |
| "num_tokens": 35010375.0, |
| "step": 9600 |
| }, |
| { |
| "epoch": 73.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.98e-06, |
| "loss": 1.7652, |
| "mean_token_accuracy": 0.6461312264204025, |
| "num_tokens": 35028006.0, |
| "step": 9605 |
| }, |
| { |
| "epoch": 73.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9550000000000003e-06, |
| "loss": 1.7037, |
| "mean_token_accuracy": 0.656490358710289, |
| "num_tokens": 35046822.0, |
| "step": 9610 |
| }, |
| { |
| "epoch": 73.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 1.93e-06, |
| "loss": 1.7048, |
| "mean_token_accuracy": 0.6573001593351364, |
| "num_tokens": 35065193.0, |
| "step": 9615 |
| }, |
| { |
| "epoch": 74.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9050000000000002e-06, |
| "loss": 1.7038, |
| "mean_token_accuracy": 0.6570831030607224, |
| "num_tokens": 35083770.0, |
| "step": 9620 |
| }, |
| { |
| "epoch": 74.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8800000000000002e-06, |
| "loss": 1.841, |
| "mean_token_accuracy": 0.6277540296316146, |
| "num_tokens": 35101050.0, |
| "step": 9625 |
| }, |
| { |
| "epoch": 74.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8550000000000002e-06, |
| "loss": 1.7552, |
| "mean_token_accuracy": 0.6447629004716873, |
| "num_tokens": 35119255.0, |
| "step": 9630 |
| }, |
| { |
| "epoch": 74.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.83e-06, |
| "loss": 1.7651, |
| "mean_token_accuracy": 0.6467320770025253, |
| "num_tokens": 35137450.0, |
| "step": 9635 |
| }, |
| { |
| "epoch": 74.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 1.805e-06, |
| "loss": 1.7862, |
| "mean_token_accuracy": 0.6392817258834839, |
| "num_tokens": 35155463.0, |
| "step": 9640 |
| }, |
| { |
| "epoch": 74.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 1.7609, |
| "mean_token_accuracy": 0.6492180019617081, |
| "num_tokens": 35173695.0, |
| "step": 9645 |
| }, |
| { |
| "epoch": 74.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7550000000000001e-06, |
| "loss": 1.7774, |
| "mean_token_accuracy": 0.6455636918544769, |
| "num_tokens": 35191312.0, |
| "step": 9650 |
| }, |
| { |
| "epoch": 74.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.73e-06, |
| "loss": 1.7323, |
| "mean_token_accuracy": 0.656083607673645, |
| "num_tokens": 35209464.0, |
| "step": 9655 |
| }, |
| { |
| "epoch": 74.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.705e-06, |
| "loss": 1.6952, |
| "mean_token_accuracy": 0.6594295680522919, |
| "num_tokens": 35227948.0, |
| "step": 9660 |
| }, |
| { |
| "epoch": 74.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 1.68e-06, |
| "loss": 1.7291, |
| "mean_token_accuracy": 0.6519060641527176, |
| "num_tokens": 35246383.0, |
| "step": 9665 |
| }, |
| { |
| "epoch": 74.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.655e-06, |
| "loss": 1.7533, |
| "mean_token_accuracy": 0.6488317787647248, |
| "num_tokens": 35264529.0, |
| "step": 9670 |
| }, |
| { |
| "epoch": 74.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6299999999999999e-06, |
| "loss": 1.7144, |
| "mean_token_accuracy": 0.6548575252294541, |
| "num_tokens": 35283055.0, |
| "step": 9675 |
| }, |
| { |
| "epoch": 74.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6049999999999999e-06, |
| "loss": 1.7571, |
| "mean_token_accuracy": 0.6488740295171738, |
| "num_tokens": 35301939.0, |
| "step": 9680 |
| }, |
| { |
| "epoch": 74.5, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 1.6961, |
| "mean_token_accuracy": 0.6596014022827148, |
| "num_tokens": 35320606.0, |
| "step": 9685 |
| }, |
| { |
| "epoch": 74.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 1.555e-06, |
| "loss": 1.7047, |
| "mean_token_accuracy": 0.6595968753099442, |
| "num_tokens": 35338994.0, |
| "step": 9690 |
| }, |
| { |
| "epoch": 74.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.53e-06, |
| "loss": 1.7309, |
| "mean_token_accuracy": 0.6514750987291336, |
| "num_tokens": 35357001.0, |
| "step": 9695 |
| }, |
| { |
| "epoch": 74.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.505e-06, |
| "loss": 1.6814, |
| "mean_token_accuracy": 0.6643463402986527, |
| "num_tokens": 35375365.0, |
| "step": 9700 |
| }, |
| { |
| "epoch": 74.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 1.4800000000000002e-06, |
| "loss": 1.7907, |
| "mean_token_accuracy": 0.6419687926769256, |
| "num_tokens": 35393033.0, |
| "step": 9705 |
| }, |
| { |
| "epoch": 74.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.455e-06, |
| "loss": 1.7162, |
| "mean_token_accuracy": 0.6562795639038086, |
| "num_tokens": 35411155.0, |
| "step": 9710 |
| }, |
| { |
| "epoch": 74.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.43e-06, |
| "loss": 1.7366, |
| "mean_token_accuracy": 0.6549755454063415, |
| "num_tokens": 35429377.0, |
| "step": 9715 |
| }, |
| { |
| "epoch": 74.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.405e-06, |
| "loss": 1.6836, |
| "mean_token_accuracy": 0.658492824435234, |
| "num_tokens": 35448518.0, |
| "step": 9720 |
| }, |
| { |
| "epoch": 74.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 1.7531, |
| "mean_token_accuracy": 0.6465422600507736, |
| "num_tokens": 35466311.0, |
| "step": 9725 |
| }, |
| { |
| "epoch": 74.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 1.355e-06, |
| "loss": 1.7721, |
| "mean_token_accuracy": 0.6479380965232849, |
| "num_tokens": 35484088.0, |
| "step": 9730 |
| }, |
| { |
| "epoch": 74.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.33e-06, |
| "loss": 1.747, |
| "mean_token_accuracy": 0.6452481150627136, |
| "num_tokens": 35502027.0, |
| "step": 9735 |
| }, |
| { |
| "epoch": 74.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3050000000000002e-06, |
| "loss": 1.7069, |
| "mean_token_accuracy": 0.6560095697641373, |
| "num_tokens": 35521179.0, |
| "step": 9740 |
| }, |
| { |
| "epoch": 74.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 1.28e-06, |
| "loss": 1.7289, |
| "mean_token_accuracy": 0.6569119274616242, |
| "num_tokens": 35539684.0, |
| "step": 9745 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.0, |
| "learning_rate": 1.255e-06, |
| "loss": 1.7786, |
| "mean_token_accuracy": 0.6401764988899231, |
| "num_tokens": 35557875.0, |
| "step": 9750 |
| }, |
| { |
| "epoch": 75.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 1.23e-06, |
| "loss": 1.7148, |
| "mean_token_accuracy": 0.6562530130147934, |
| "num_tokens": 35576356.0, |
| "step": 9755 |
| }, |
| { |
| "epoch": 75.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2050000000000001e-06, |
| "loss": 1.6865, |
| "mean_token_accuracy": 0.6659580796957016, |
| "num_tokens": 35594809.0, |
| "step": 9760 |
| }, |
| { |
| "epoch": 75.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 1.18e-06, |
| "loss": 1.7825, |
| "mean_token_accuracy": 0.6393451571464539, |
| "num_tokens": 35612932.0, |
| "step": 9765 |
| }, |
| { |
| "epoch": 75.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 1.155e-06, |
| "loss": 1.8061, |
| "mean_token_accuracy": 0.6380451053380967, |
| "num_tokens": 35630549.0, |
| "step": 9770 |
| }, |
| { |
| "epoch": 75.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.13e-06, |
| "loss": 1.8032, |
| "mean_token_accuracy": 0.6359036058187485, |
| "num_tokens": 35648480.0, |
| "step": 9775 |
| }, |
| { |
| "epoch": 75.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1050000000000002e-06, |
| "loss": 1.763, |
| "mean_token_accuracy": 0.6435405910015106, |
| "num_tokens": 35666156.0, |
| "step": 9780 |
| }, |
| { |
| "epoch": 75.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 1.08e-06, |
| "loss": 1.7269, |
| "mean_token_accuracy": 0.6546905755996704, |
| "num_tokens": 35684367.0, |
| "step": 9785 |
| }, |
| { |
| "epoch": 75.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 1.055e-06, |
| "loss": 1.7454, |
| "mean_token_accuracy": 0.6543306648731232, |
| "num_tokens": 35703139.0, |
| "step": 9790 |
| }, |
| { |
| "epoch": 75.34615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 1.03e-06, |
| "loss": 1.7799, |
| "mean_token_accuracy": 0.6433266043663025, |
| "num_tokens": 35721254.0, |
| "step": 9795 |
| }, |
| { |
| "epoch": 75.38461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0050000000000001e-06, |
| "loss": 1.7613, |
| "mean_token_accuracy": 0.647157472372055, |
| "num_tokens": 35739283.0, |
| "step": 9800 |
| }, |
| { |
| "epoch": 75.42307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 9.8e-07, |
| "loss": 1.7482, |
| "mean_token_accuracy": 0.6474371433258057, |
| "num_tokens": 35758040.0, |
| "step": 9805 |
| }, |
| { |
| "epoch": 75.46153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 9.55e-07, |
| "loss": 1.6272, |
| "mean_token_accuracy": 0.6756436079740524, |
| "num_tokens": 35777534.0, |
| "step": 9810 |
| }, |
| { |
| "epoch": 75.5, |
| "grad_norm": 0.0, |
| "learning_rate": 9.3e-07, |
| "loss": 1.6917, |
| "mean_token_accuracy": 0.6589653193950653, |
| "num_tokens": 35796039.0, |
| "step": 9815 |
| }, |
| { |
| "epoch": 75.53846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 9.050000000000001e-07, |
| "loss": 1.7085, |
| "mean_token_accuracy": 0.6560279041528702, |
| "num_tokens": 35814453.0, |
| "step": 9820 |
| }, |
| { |
| "epoch": 75.57692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 8.8e-07, |
| "loss": 1.717, |
| "mean_token_accuracy": 0.6553375959396363, |
| "num_tokens": 35833053.0, |
| "step": 9825 |
| }, |
| { |
| "epoch": 75.61538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 8.550000000000001e-07, |
| "loss": 1.7881, |
| "mean_token_accuracy": 0.6422079712152481, |
| "num_tokens": 35850712.0, |
| "step": 9830 |
| }, |
| { |
| "epoch": 75.65384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 8.300000000000001e-07, |
| "loss": 1.7786, |
| "mean_token_accuracy": 0.6427565932273864, |
| "num_tokens": 35868743.0, |
| "step": 9835 |
| }, |
| { |
| "epoch": 75.6923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 8.05e-07, |
| "loss": 1.7353, |
| "mean_token_accuracy": 0.657978093624115, |
| "num_tokens": 35886399.0, |
| "step": 9840 |
| }, |
| { |
| "epoch": 75.73076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.8e-07, |
| "loss": 1.7547, |
| "mean_token_accuracy": 0.6459228157997131, |
| "num_tokens": 35904125.0, |
| "step": 9845 |
| }, |
| { |
| "epoch": 75.76923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 7.550000000000001e-07, |
| "loss": 1.7782, |
| "mean_token_accuracy": 0.6488775163888931, |
| "num_tokens": 35921829.0, |
| "step": 9850 |
| }, |
| { |
| "epoch": 75.8076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 7.3e-07, |
| "loss": 1.8236, |
| "mean_token_accuracy": 0.6332837879657746, |
| "num_tokens": 35939046.0, |
| "step": 9855 |
| }, |
| { |
| "epoch": 75.84615384615384, |
| "grad_norm": 0.0, |
| "learning_rate": 7.05e-07, |
| "loss": 1.6545, |
| "mean_token_accuracy": 0.6667064487934112, |
| "num_tokens": 35957880.0, |
| "step": 9860 |
| }, |
| { |
| "epoch": 75.88461538461539, |
| "grad_norm": 0.0, |
| "learning_rate": 6.8e-07, |
| "loss": 1.7049, |
| "mean_token_accuracy": 0.6505464226007461, |
| "num_tokens": 35977218.0, |
| "step": 9865 |
| }, |
| { |
| "epoch": 75.92307692307692, |
| "grad_norm": 0.0, |
| "learning_rate": 6.550000000000001e-07, |
| "loss": 1.6894, |
| "mean_token_accuracy": 0.6595763146877289, |
| "num_tokens": 35996459.0, |
| "step": 9870 |
| }, |
| { |
| "epoch": 75.96153846153847, |
| "grad_norm": 0.0, |
| "learning_rate": 6.3e-07, |
| "loss": 1.7751, |
| "mean_token_accuracy": 0.6440996766090393, |
| "num_tokens": 36013795.0, |
| "step": 9875 |
| }, |
| { |
| "epoch": 76.0, |
| "grad_norm": 0.0, |
| "learning_rate": 6.05e-07, |
| "loss": 1.719, |
| "mean_token_accuracy": 0.6536471992731094, |
| "num_tokens": 36031980.0, |
| "step": 9880 |
| }, |
| { |
| "epoch": 76.03846153846153, |
| "grad_norm": 0.0, |
| "learning_rate": 5.8e-07, |
| "loss": 1.7253, |
| "mean_token_accuracy": 0.6511723041534424, |
| "num_tokens": 36050841.0, |
| "step": 9885 |
| }, |
| { |
| "epoch": 76.07692307692308, |
| "grad_norm": 0.0, |
| "learning_rate": 5.550000000000001e-07, |
| "loss": 1.7605, |
| "mean_token_accuracy": 0.6479182183742523, |
| "num_tokens": 36069596.0, |
| "step": 9890 |
| }, |
| { |
| "epoch": 76.11538461538461, |
| "grad_norm": 0.0, |
| "learning_rate": 5.3e-07, |
| "loss": 1.8029, |
| "mean_token_accuracy": 0.6407642930746078, |
| "num_tokens": 36087230.0, |
| "step": 9895 |
| }, |
| { |
| "epoch": 76.15384615384616, |
| "grad_norm": 0.0, |
| "learning_rate": 5.05e-07, |
| "loss": 1.7748, |
| "mean_token_accuracy": 0.6411665916442871, |
| "num_tokens": 36105433.0, |
| "step": 9900 |
| }, |
| { |
| "epoch": 76.1923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.8e-07, |
| "loss": 1.7081, |
| "mean_token_accuracy": 0.658719289302826, |
| "num_tokens": 36123588.0, |
| "step": 9905 |
| }, |
| { |
| "epoch": 76.23076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5500000000000004e-07, |
| "loss": 1.6927, |
| "mean_token_accuracy": 0.6572551965713501, |
| "num_tokens": 36141761.0, |
| "step": 9910 |
| }, |
| { |
| "epoch": 76.26923076923077, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3e-07, |
| "loss": 1.7517, |
| "mean_token_accuracy": 0.6524038553237915, |
| "num_tokens": 36159646.0, |
| "step": 9915 |
| }, |
| { |
| "epoch": 76.3076923076923, |
| "grad_norm": 0.0, |
| "learning_rate": 4.05e-07, |
| "loss": 1.8024, |
| "mean_token_accuracy": 0.6330039739608765, |
| "num_tokens": 36177566.0, |
| "step": 9920 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 77, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.834522347220992e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|