| { | |
| "best_metric": 0.04311952739953995, | |
| "best_model_checkpoint": "t5/checkpoint-58320", | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 486000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.14633552730083466, | |
| "learning_rate": 0.00099, | |
| "loss": 0.1231, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6418053052843314, | |
| "eval_loss": 0.0717623308300972, | |
| "eval_runtime": 1333.7775, | |
| "eval_samples_per_second": 89.456, | |
| "eval_steps_per_second": 0.35, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.10493500530719757, | |
| "learning_rate": 0.00098, | |
| "loss": 0.0712, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6884046431714369, | |
| "eval_loss": 0.06004703789949417, | |
| "eval_runtime": 1304.0684, | |
| "eval_samples_per_second": 91.494, | |
| "eval_steps_per_second": 0.358, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.09630604088306427, | |
| "learning_rate": 0.0009699999999999999, | |
| "loss": 0.0593, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7176465658131835, | |
| "eval_loss": 0.05390430614352226, | |
| "eval_runtime": 1308.2854, | |
| "eval_samples_per_second": 91.2, | |
| "eval_steps_per_second": 0.357, | |
| "step": 14580 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.07841313630342484, | |
| "learning_rate": 0.00096, | |
| "loss": 0.0519, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.730679294304991, | |
| "eval_loss": 0.050438590347766876, | |
| "eval_runtime": 1309.4742, | |
| "eval_samples_per_second": 91.117, | |
| "eval_steps_per_second": 0.357, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.07362372428178787, | |
| "learning_rate": 0.00095, | |
| "loss": 0.0464, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7446423333193647, | |
| "eval_loss": 0.04836108162999153, | |
| "eval_runtime": 1308.1745, | |
| "eval_samples_per_second": 91.207, | |
| "eval_steps_per_second": 0.357, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.05613507702946663, | |
| "learning_rate": 0.00094, | |
| "loss": 0.0422, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7498721870678456, | |
| "eval_loss": 0.04650866985321045, | |
| "eval_runtime": 1310.787, | |
| "eval_samples_per_second": 91.025, | |
| "eval_steps_per_second": 0.356, | |
| "step": 29160 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.06997396796941757, | |
| "learning_rate": 0.00093, | |
| "loss": 0.0385, | |
| "step": 34020 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7611197250974312, | |
| "eval_loss": 0.04569365829229355, | |
| "eval_runtime": 1301.8483, | |
| "eval_samples_per_second": 91.65, | |
| "eval_steps_per_second": 0.359, | |
| "step": 34020 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.054521311074495316, | |
| "learning_rate": 0.00092, | |
| "loss": 0.0354, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7642039978208943, | |
| "eval_loss": 0.04475805535912514, | |
| "eval_runtime": 1307.7292, | |
| "eval_samples_per_second": 91.238, | |
| "eval_steps_per_second": 0.357, | |
| "step": 38880 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.06338842958211899, | |
| "learning_rate": 0.00091, | |
| "loss": 0.0328, | |
| "step": 43740 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.771554289066756, | |
| "eval_loss": 0.0442810133099556, | |
| "eval_runtime": 1304.569, | |
| "eval_samples_per_second": 91.459, | |
| "eval_steps_per_second": 0.358, | |
| "step": 43740 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.054538544267416, | |
| "learning_rate": 0.0009000000000000001, | |
| "loss": 0.0304, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7760130746343712, | |
| "eval_loss": 0.0437050461769104, | |
| "eval_runtime": 1308.322, | |
| "eval_samples_per_second": 91.197, | |
| "eval_steps_per_second": 0.357, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.06947464495897293, | |
| "learning_rate": 0.0008900000000000001, | |
| "loss": 0.0283, | |
| "step": 53460 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7780664627247202, | |
| "eval_loss": 0.04394479840993881, | |
| "eval_runtime": 1311.6831, | |
| "eval_samples_per_second": 90.963, | |
| "eval_steps_per_second": 0.356, | |
| "step": 53460 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.05090058967471123, | |
| "learning_rate": 0.00088, | |
| "loss": 0.0264, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7814943636592214, | |
| "eval_loss": 0.04311952739953995, | |
| "eval_runtime": 1303.6986, | |
| "eval_samples_per_second": 91.52, | |
| "eval_steps_per_second": 0.358, | |
| "step": 58320 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.05226626992225647, | |
| "learning_rate": 0.00087, | |
| "loss": 0.0248, | |
| "step": 63180 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7855257092570087, | |
| "eval_loss": 0.04451437294483185, | |
| "eval_runtime": 1313.511, | |
| "eval_samples_per_second": 90.837, | |
| "eval_steps_per_second": 0.356, | |
| "step": 63180 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.053579073399305344, | |
| "learning_rate": 0.00086, | |
| "loss": 0.0232, | |
| "step": 68040 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7867158362318233, | |
| "eval_loss": 0.04421268403530121, | |
| "eval_runtime": 1304.7705, | |
| "eval_samples_per_second": 91.445, | |
| "eval_steps_per_second": 0.358, | |
| "step": 68040 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.052565447986125946, | |
| "learning_rate": 0.00085, | |
| "loss": 0.0218, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7881657796588861, | |
| "eval_loss": 0.04461174085736275, | |
| "eval_runtime": 1306.4264, | |
| "eval_samples_per_second": 91.329, | |
| "eval_steps_per_second": 0.357, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.05223050341010094, | |
| "learning_rate": 0.00084, | |
| "loss": 0.0206, | |
| "step": 77760 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7900850689351716, | |
| "eval_loss": 0.046072401106357574, | |
| "eval_runtime": 1314.7849, | |
| "eval_samples_per_second": 90.749, | |
| "eval_steps_per_second": 0.355, | |
| "step": 77760 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.04498209059238434, | |
| "learning_rate": 0.00083, | |
| "loss": 0.0194, | |
| "step": 82620 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7911410970959225, | |
| "eval_loss": 0.04610202834010124, | |
| "eval_runtime": 1308.5405, | |
| "eval_samples_per_second": 91.182, | |
| "eval_steps_per_second": 0.357, | |
| "step": 82620 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.047790784388780594, | |
| "learning_rate": 0.00082, | |
| "loss": 0.0183, | |
| "step": 87480 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7914931064828395, | |
| "eval_loss": 0.04640175402164459, | |
| "eval_runtime": 1301.9032, | |
| "eval_samples_per_second": 91.647, | |
| "eval_steps_per_second": 0.359, | |
| "step": 87480 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.04554256424307823, | |
| "learning_rate": 0.0008100000000000001, | |
| "loss": 0.0173, | |
| "step": 92340 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7922055064325525, | |
| "eval_loss": 0.046802300959825516, | |
| "eval_runtime": 1318.5946, | |
| "eval_samples_per_second": 90.486, | |
| "eval_steps_per_second": 0.354, | |
| "step": 92340 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.051229629665613174, | |
| "learning_rate": 0.0008, | |
| "loss": 0.0166, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7951556803419519, | |
| "eval_loss": 0.04811061546206474, | |
| "eval_runtime": 1304.3101, | |
| "eval_samples_per_second": 91.477, | |
| "eval_steps_per_second": 0.358, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.04701264947652817, | |
| "learning_rate": 0.00079, | |
| "loss": 0.0158, | |
| "step": 102060 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7942253698193856, | |
| "eval_loss": 0.04858441650867462, | |
| "eval_runtime": 1303.9515, | |
| "eval_samples_per_second": 91.503, | |
| "eval_steps_per_second": 0.358, | |
| "step": 102060 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.07426326721906662, | |
| "learning_rate": 0.0007800000000000001, | |
| "loss": 0.015, | |
| "step": 106920 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7949126262414616, | |
| "eval_loss": 0.048401448875665665, | |
| "eval_runtime": 1304.9706, | |
| "eval_samples_per_second": 91.431, | |
| "eval_steps_per_second": 0.358, | |
| "step": 106920 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.05088690295815468, | |
| "learning_rate": 0.0007700000000000001, | |
| "loss": 0.0143, | |
| "step": 111780 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7971001131458744, | |
| "eval_loss": 0.04964574798941612, | |
| "eval_runtime": 1311.3518, | |
| "eval_samples_per_second": 90.986, | |
| "eval_steps_per_second": 0.356, | |
| "step": 111780 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.04760482534766197, | |
| "learning_rate": 0.00076, | |
| "loss": 0.0137, | |
| "step": 116640 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7959937979298496, | |
| "eval_loss": 0.049800001084804535, | |
| "eval_runtime": 1306.2706, | |
| "eval_samples_per_second": 91.34, | |
| "eval_steps_per_second": 0.358, | |
| "step": 116640 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.040201518684625626, | |
| "learning_rate": 0.00075, | |
| "loss": 0.0131, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7972677366634539, | |
| "eval_loss": 0.0510859489440918, | |
| "eval_runtime": 1307.6635, | |
| "eval_samples_per_second": 91.243, | |
| "eval_steps_per_second": 0.357, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.04697073623538017, | |
| "learning_rate": 0.00074, | |
| "loss": 0.0125, | |
| "step": 126360 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7971336378493903, | |
| "eval_loss": 0.05105246230959892, | |
| "eval_runtime": 1305.0277, | |
| "eval_samples_per_second": 91.427, | |
| "eval_steps_per_second": 0.358, | |
| "step": 126360 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.035631682723760605, | |
| "learning_rate": 0.00073, | |
| "loss": 0.012, | |
| "step": 131220 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7990780706533127, | |
| "eval_loss": 0.051402851939201355, | |
| "eval_runtime": 1309.1883, | |
| "eval_samples_per_second": 91.137, | |
| "eval_steps_per_second": 0.357, | |
| "step": 131220 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.053142111748456955, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 0.0116, | |
| "step": 136080 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7987009177387587, | |
| "eval_loss": 0.05239921808242798, | |
| "eval_runtime": 1306.5799, | |
| "eval_samples_per_second": 91.319, | |
| "eval_steps_per_second": 0.357, | |
| "step": 136080 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.04080447182059288, | |
| "learning_rate": 0.00071, | |
| "loss": 0.0111, | |
| "step": 140940 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7965804802413778, | |
| "eval_loss": 0.05250364542007446, | |
| "eval_runtime": 1309.3121, | |
| "eval_samples_per_second": 91.128, | |
| "eval_steps_per_second": 0.357, | |
| "step": 140940 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.045455146580934525, | |
| "learning_rate": 0.0007, | |
| "loss": 0.0107, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7984997695176633, | |
| "eval_loss": 0.0543711818754673, | |
| "eval_runtime": 1310.1708, | |
| "eval_samples_per_second": 91.068, | |
| "eval_steps_per_second": 0.356, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 0.03871888667345047, | |
| "learning_rate": 0.00069, | |
| "loss": 0.0104, | |
| "step": 150660 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7982399530654151, | |
| "eval_loss": 0.053769443184137344, | |
| "eval_runtime": 1306.4928, | |
| "eval_samples_per_second": 91.325, | |
| "eval_steps_per_second": 0.357, | |
| "step": 150660 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.048963289707899094, | |
| "learning_rate": 0.00068, | |
| "loss": 0.01, | |
| "step": 155520 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8012404140300884, | |
| "eval_loss": 0.05514230951666832, | |
| "eval_runtime": 1308.1993, | |
| "eval_samples_per_second": 91.206, | |
| "eval_steps_per_second": 0.357, | |
| "step": 155520 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 0.03987804055213928, | |
| "learning_rate": 0.00067, | |
| "loss": 0.0097, | |
| "step": 160380 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.8019109081004064, | |
| "eval_loss": 0.055017631500959396, | |
| "eval_runtime": 1312.5155, | |
| "eval_samples_per_second": 90.906, | |
| "eval_steps_per_second": 0.356, | |
| "step": 160380 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 0.0373803973197937, | |
| "learning_rate": 0.00066, | |
| "loss": 0.0094, | |
| "step": 165240 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7987009177387587, | |
| "eval_loss": 0.055196575820446014, | |
| "eval_runtime": 1306.0614, | |
| "eval_samples_per_second": 91.355, | |
| "eval_steps_per_second": 0.358, | |
| "step": 165240 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 0.039517637342214584, | |
| "learning_rate": 0.0006500000000000001, | |
| "loss": 0.0091, | |
| "step": 170100 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.8004777270251017, | |
| "eval_loss": 0.05661753937602043, | |
| "eval_runtime": 1306.965, | |
| "eval_samples_per_second": 91.292, | |
| "eval_steps_per_second": 0.357, | |
| "step": 170100 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 0.04135722666978836, | |
| "learning_rate": 0.00064, | |
| "loss": 0.0088, | |
| "step": 174960 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8019025269245275, | |
| "eval_loss": 0.05708213895559311, | |
| "eval_runtime": 1305.0895, | |
| "eval_samples_per_second": 91.423, | |
| "eval_steps_per_second": 0.358, | |
| "step": 174960 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 0.04340599477291107, | |
| "learning_rate": 0.00063, | |
| "loss": 0.0085, | |
| "step": 179820 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.8015421363617315, | |
| "eval_loss": 0.0565766803920269, | |
| "eval_runtime": 1303.7543, | |
| "eval_samples_per_second": 91.516, | |
| "eval_steps_per_second": 0.358, | |
| "step": 179820 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 0.042780667543411255, | |
| "learning_rate": 0.00062, | |
| "loss": 0.0082, | |
| "step": 184680 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7985919624523321, | |
| "eval_loss": 0.05795786902308464, | |
| "eval_runtime": 1306.8337, | |
| "eval_samples_per_second": 91.301, | |
| "eval_steps_per_second": 0.357, | |
| "step": 184680 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 0.03298887610435486, | |
| "learning_rate": 0.00061, | |
| "loss": 0.008, | |
| "step": 189540 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.8006537317185601, | |
| "eval_loss": 0.05666106194257736, | |
| "eval_runtime": 1303.7751, | |
| "eval_samples_per_second": 91.515, | |
| "eval_steps_per_second": 0.358, | |
| "step": 189540 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.03825366497039795, | |
| "learning_rate": 0.0006, | |
| "loss": 0.0077, | |
| "step": 194400 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.8003101035075221, | |
| "eval_loss": 0.05909406766295433, | |
| "eval_runtime": 1304.1065, | |
| "eval_samples_per_second": 91.492, | |
| "eval_steps_per_second": 0.358, | |
| "step": 194400 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 0.049214523285627365, | |
| "learning_rate": 0.00059, | |
| "loss": 0.0075, | |
| "step": 199260 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.8027406445124251, | |
| "eval_loss": 0.0589471310377121, | |
| "eval_runtime": 1305.1945, | |
| "eval_samples_per_second": 91.415, | |
| "eval_steps_per_second": 0.358, | |
| "step": 199260 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 0.03445366024971008, | |
| "learning_rate": 0.00058, | |
| "loss": 0.0073, | |
| "step": 204120 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.802765788040062, | |
| "eval_loss": 0.05833474174141884, | |
| "eval_runtime": 1304.7043, | |
| "eval_samples_per_second": 91.45, | |
| "eval_steps_per_second": 0.358, | |
| "step": 204120 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "grad_norm": 0.031152933835983276, | |
| "learning_rate": 0.00057, | |
| "loss": 0.007, | |
| "step": 208980 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.8017851904622219, | |
| "eval_loss": 0.05955711379647255, | |
| "eval_runtime": 1302.9367, | |
| "eval_samples_per_second": 91.574, | |
| "eval_steps_per_second": 0.358, | |
| "step": 208980 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 0.028931325301527977, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 0.0069, | |
| "step": 213840 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.803469806813896, | |
| "eval_loss": 0.05940761789679527, | |
| "eval_runtime": 1302.3209, | |
| "eval_samples_per_second": 91.617, | |
| "eval_steps_per_second": 0.359, | |
| "step": 213840 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 0.03164521977305412, | |
| "learning_rate": 0.00055, | |
| "loss": 0.0066, | |
| "step": 218700 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.803084272723463, | |
| "eval_loss": 0.0604814775288105, | |
| "eval_runtime": 1299.6461, | |
| "eval_samples_per_second": 91.806, | |
| "eval_steps_per_second": 0.359, | |
| "step": 218700 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 0.09477687627077103, | |
| "learning_rate": 0.00054, | |
| "loss": 0.0065, | |
| "step": 223560 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.803805053849055, | |
| "eval_loss": 0.05929319187998772, | |
| "eval_runtime": 1297.2788, | |
| "eval_samples_per_second": 91.973, | |
| "eval_steps_per_second": 0.36, | |
| "step": 223560 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "grad_norm": 0.032785411924123764, | |
| "learning_rate": 0.0005300000000000001, | |
| "loss": 0.0063, | |
| "step": 228420 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.8049784184721116, | |
| "eval_loss": 0.06024543195962906, | |
| "eval_runtime": 1298.8006, | |
| "eval_samples_per_second": 91.866, | |
| "eval_steps_per_second": 0.36, | |
| "step": 228420 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 0.03677200525999069, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 0.006, | |
| "step": 233280 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.8040145832460294, | |
| "eval_loss": 0.061811413615942, | |
| "eval_runtime": 1297.6127, | |
| "eval_samples_per_second": 91.95, | |
| "eval_steps_per_second": 0.36, | |
| "step": 233280 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "grad_norm": 0.030352266505360603, | |
| "learning_rate": 0.00051, | |
| "loss": 0.0059, | |
| "step": 238140 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.8042241126430039, | |
| "eval_loss": 0.061159055680036545, | |
| "eval_runtime": 1299.1406, | |
| "eval_samples_per_second": 91.841, | |
| "eval_steps_per_second": 0.359, | |
| "step": 238140 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 0.02934379130601883, | |
| "learning_rate": 0.0005, | |
| "loss": 0.0057, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.8055148137283661, | |
| "eval_loss": 0.06327831000089645, | |
| "eval_runtime": 1298.5116, | |
| "eval_samples_per_second": 91.886, | |
| "eval_steps_per_second": 0.36, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "grad_norm": 0.023088792338967323, | |
| "learning_rate": 0.00049, | |
| "loss": 0.0055, | |
| "step": 247860 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.8067971336378494, | |
| "eval_loss": 0.06312137842178345, | |
| "eval_runtime": 1302.1135, | |
| "eval_samples_per_second": 91.632, | |
| "eval_steps_per_second": 0.359, | |
| "step": 247860 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "grad_norm": 0.03648848831653595, | |
| "learning_rate": 0.00048, | |
| "loss": 0.0053, | |
| "step": 252720 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8061936889745631, | |
| "eval_loss": 0.06350181996822357, | |
| "eval_runtime": 1297.9213, | |
| "eval_samples_per_second": 91.928, | |
| "eval_steps_per_second": 0.36, | |
| "step": 252720 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "grad_norm": 0.03157039359211922, | |
| "learning_rate": 0.00047, | |
| "loss": 0.0051, | |
| "step": 257580 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.8065121736579642, | |
| "eval_loss": 0.06361949443817139, | |
| "eval_runtime": 1305.9679, | |
| "eval_samples_per_second": 91.361, | |
| "eval_steps_per_second": 0.358, | |
| "step": 257580 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "grad_norm": 0.026564130559563637, | |
| "learning_rate": 0.00046, | |
| "loss": 0.005, | |
| "step": 262440 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.8064535054268114, | |
| "eval_loss": 0.06370926648378372, | |
| "eval_runtime": 1302.6026, | |
| "eval_samples_per_second": 91.597, | |
| "eval_steps_per_second": 0.359, | |
| "step": 262440 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 0.039359357208013535, | |
| "learning_rate": 0.00045000000000000004, | |
| "loss": 0.0048, | |
| "step": 267300 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.8070569500900976, | |
| "eval_loss": 0.0649728775024414, | |
| "eval_runtime": 1301.934, | |
| "eval_samples_per_second": 91.644, | |
| "eval_steps_per_second": 0.359, | |
| "step": 267300 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "grad_norm": 0.02652502991259098, | |
| "learning_rate": 0.00044, | |
| "loss": 0.0047, | |
| "step": 272160 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.80695637597955, | |
| "eval_loss": 0.06507979333400726, | |
| "eval_runtime": 1302.3742, | |
| "eval_samples_per_second": 91.613, | |
| "eval_steps_per_second": 0.359, | |
| "step": 272160 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "grad_norm": 0.04170479625463486, | |
| "learning_rate": 0.00043, | |
| "loss": 0.0045, | |
| "step": 277020 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.8077442065121737, | |
| "eval_loss": 0.06572364270687103, | |
| "eval_runtime": 1303.9456, | |
| "eval_samples_per_second": 91.503, | |
| "eval_steps_per_second": 0.358, | |
| "step": 277020 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "grad_norm": 0.02988004870712757, | |
| "learning_rate": 0.00042, | |
| "loss": 0.0044, | |
| "step": 281880 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.8076687759292629, | |
| "eval_loss": 0.06484715640544891, | |
| "eval_runtime": 1299.0165, | |
| "eval_samples_per_second": 91.85, | |
| "eval_steps_per_second": 0.36, | |
| "step": 281880 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "grad_norm": 0.031678713858127594, | |
| "learning_rate": 0.00041, | |
| "loss": 0.0042, | |
| "step": 286740 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.8078950676779952, | |
| "eval_loss": 0.06634358316659927, | |
| "eval_runtime": 1304.0645, | |
| "eval_samples_per_second": 91.495, | |
| "eval_steps_per_second": 0.358, | |
| "step": 286740 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 0.020897777751088142, | |
| "learning_rate": 0.0004, | |
| "loss": 0.0041, | |
| "step": 291600 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8078866865021163, | |
| "eval_loss": 0.0666716918349266, | |
| "eval_runtime": 1302.0029, | |
| "eval_samples_per_second": 91.64, | |
| "eval_steps_per_second": 0.359, | |
| "step": 291600 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "grad_norm": 0.03830067440867424, | |
| "learning_rate": 0.00039000000000000005, | |
| "loss": 0.004, | |
| "step": 296460 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_accuracy": 0.8104597074969618, | |
| "eval_loss": 0.0661536380648613, | |
| "eval_runtime": 1303.993, | |
| "eval_samples_per_second": 91.5, | |
| "eval_steps_per_second": 0.358, | |
| "step": 296460 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "grad_norm": 0.025591198354959488, | |
| "learning_rate": 0.00038, | |
| "loss": 0.0037, | |
| "step": 301320 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.809722164019612, | |
| "eval_loss": 0.06793326884508133, | |
| "eval_runtime": 1302.3775, | |
| "eval_samples_per_second": 91.613, | |
| "eval_steps_per_second": 0.359, | |
| "step": 301320 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "grad_norm": 0.029204251244664192, | |
| "learning_rate": 0.00037, | |
| "loss": 0.0036, | |
| "step": 306180 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_accuracy": 0.8103172275070193, | |
| "eval_loss": 0.06969352066516876, | |
| "eval_runtime": 1309.132, | |
| "eval_samples_per_second": 91.141, | |
| "eval_steps_per_second": 0.357, | |
| "step": 306180 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "grad_norm": 0.029880277812480927, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 0.0035, | |
| "step": 311040 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.810057411054771, | |
| "eval_loss": 0.06940728425979614, | |
| "eval_runtime": 1308.9186, | |
| "eval_samples_per_second": 91.155, | |
| "eval_steps_per_second": 0.357, | |
| "step": 311040 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "grad_norm": 0.030717821791768074, | |
| "learning_rate": 0.00035, | |
| "loss": 0.0034, | |
| "step": 315900 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_accuracy": 0.810082554582408, | |
| "eval_loss": 0.06925758719444275, | |
| "eval_runtime": 1308.5599, | |
| "eval_samples_per_second": 91.18, | |
| "eval_steps_per_second": 0.357, | |
| "step": 315900 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "grad_norm": 0.02681083045899868, | |
| "learning_rate": 0.00034, | |
| "loss": 0.0032, | |
| "step": 320760 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.8104597074969618, | |
| "eval_loss": 0.07081950455904007, | |
| "eval_runtime": 1307.5379, | |
| "eval_samples_per_second": 91.252, | |
| "eval_steps_per_second": 0.357, | |
| "step": 320760 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "grad_norm": 0.023366352543234825, | |
| "learning_rate": 0.00033, | |
| "loss": 0.0031, | |
| "step": 325620 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_accuracy": 0.8130411096676864, | |
| "eval_loss": 0.07085347920656204, | |
| "eval_runtime": 1306.1303, | |
| "eval_samples_per_second": 91.35, | |
| "eval_steps_per_second": 0.358, | |
| "step": 325620 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "grad_norm": 0.018703831359744072, | |
| "learning_rate": 0.00032, | |
| "loss": 0.0029, | |
| "step": 330480 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8118090768134769, | |
| "eval_loss": 0.07043693959712982, | |
| "eval_runtime": 1308.3789, | |
| "eval_samples_per_second": 91.193, | |
| "eval_steps_per_second": 0.357, | |
| "step": 330480 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "grad_norm": 0.021384961903095245, | |
| "learning_rate": 0.00031, | |
| "loss": 0.0028, | |
| "step": 335340 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_accuracy": 0.8111721074466748, | |
| "eval_loss": 0.0710659921169281, | |
| "eval_runtime": 1306.3675, | |
| "eval_samples_per_second": 91.333, | |
| "eval_steps_per_second": 0.357, | |
| "step": 335340 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "grad_norm": 0.027229884639382362, | |
| "learning_rate": 0.0003, | |
| "loss": 0.0027, | |
| "step": 340200 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.811775552109961, | |
| "eval_loss": 0.07277531921863556, | |
| "eval_runtime": 1306.0312, | |
| "eval_samples_per_second": 91.357, | |
| "eval_steps_per_second": 0.358, | |
| "step": 340200 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "grad_norm": 0.024667974561452866, | |
| "learning_rate": 0.00029, | |
| "loss": 0.0025, | |
| "step": 345060 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_accuracy": 0.8127980555671961, | |
| "eval_loss": 0.07439424097537994, | |
| "eval_runtime": 1304.9752, | |
| "eval_samples_per_second": 91.431, | |
| "eval_steps_per_second": 0.358, | |
| "step": 345060 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "grad_norm": 0.019673120230436325, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 0.0024, | |
| "step": 349920 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.8129572979088966, | |
| "eval_loss": 0.07484369724988937, | |
| "eval_runtime": 1304.8727, | |
| "eval_samples_per_second": 91.438, | |
| "eval_steps_per_second": 0.358, | |
| "step": 349920 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "grad_norm": 0.023998018354177475, | |
| "learning_rate": 0.00027, | |
| "loss": 0.0023, | |
| "step": 354780 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_accuracy": 0.8130662531953233, | |
| "eval_loss": 0.07576391845941544, | |
| "eval_runtime": 1307.7757, | |
| "eval_samples_per_second": 91.235, | |
| "eval_steps_per_second": 0.357, | |
| "step": 354780 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "grad_norm": 0.04219399765133858, | |
| "learning_rate": 0.00026000000000000003, | |
| "loss": 0.0022, | |
| "step": 359640 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.8143485731048066, | |
| "eval_loss": 0.07631613314151764, | |
| "eval_runtime": 1306.0365, | |
| "eval_samples_per_second": 91.357, | |
| "eval_steps_per_second": 0.358, | |
| "step": 359640 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "grad_norm": 0.021946126595139503, | |
| "learning_rate": 0.00025, | |
| "loss": 0.0021, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_accuracy": 0.8144994342706282, | |
| "eval_loss": 0.07692206650972366, | |
| "eval_runtime": 1303.767, | |
| "eval_samples_per_second": 91.516, | |
| "eval_steps_per_second": 0.358, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "grad_norm": 0.023164469748735428, | |
| "learning_rate": 0.00024, | |
| "loss": 0.0019, | |
| "step": 369360 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.8133176884716926, | |
| "eval_loss": 0.0780106782913208, | |
| "eval_runtime": 1305.1212, | |
| "eval_samples_per_second": 91.421, | |
| "eval_steps_per_second": 0.358, | |
| "step": 369360 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "grad_norm": 0.02851826325058937, | |
| "learning_rate": 0.00023, | |
| "loss": 0.0018, | |
| "step": 374220 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_accuracy": 0.8146335330846918, | |
| "eval_loss": 0.0777156725525856, | |
| "eval_runtime": 1304.8456, | |
| "eval_samples_per_second": 91.44, | |
| "eval_steps_per_second": 0.358, | |
| "step": 374220 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "grad_norm": 0.0253597479313612, | |
| "learning_rate": 0.00022, | |
| "loss": 0.0017, | |
| "step": 379080 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.8162175753258182, | |
| "eval_loss": 0.07899600267410278, | |
| "eval_runtime": 1303.5402, | |
| "eval_samples_per_second": 91.532, | |
| "eval_steps_per_second": 0.358, | |
| "step": 379080 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "grad_norm": 0.02291404828429222, | |
| "learning_rate": 0.00021, | |
| "loss": 0.0016, | |
| "step": 383940 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_accuracy": 0.8145245777982651, | |
| "eval_loss": 0.08102333545684814, | |
| "eval_runtime": 1304.8976, | |
| "eval_samples_per_second": 91.436, | |
| "eval_steps_per_second": 0.358, | |
| "step": 383940 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 0.029658950865268707, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0015, | |
| "step": 388800 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.8161170012152705, | |
| "eval_loss": 0.08235077559947968, | |
| "eval_runtime": 1306.0263, | |
| "eval_samples_per_second": 91.357, | |
| "eval_steps_per_second": 0.358, | |
| "step": 388800 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "grad_norm": 0.02459796331822872, | |
| "learning_rate": 0.00019, | |
| "loss": 0.0014, | |
| "step": 393660 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_accuracy": 0.815940996521812, | |
| "eval_loss": 0.08271630853414536, | |
| "eval_runtime": 1306.1311, | |
| "eval_samples_per_second": 91.35, | |
| "eval_steps_per_second": 0.358, | |
| "step": 393660 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "grad_norm": 0.01461075246334076, | |
| "learning_rate": 0.00017999999999999998, | |
| "loss": 0.0013, | |
| "step": 398520 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.8159745212253279, | |
| "eval_loss": 0.08540969341993332, | |
| "eval_runtime": 1303.2922, | |
| "eval_samples_per_second": 91.549, | |
| "eval_steps_per_second": 0.358, | |
| "step": 398520 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "grad_norm": 0.011922557838261127, | |
| "learning_rate": 0.00017, | |
| "loss": 0.0012, | |
| "step": 403380 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_accuracy": 0.8165025353057034, | |
| "eval_loss": 0.08608754724264145, | |
| "eval_runtime": 1300.8769, | |
| "eval_samples_per_second": 91.719, | |
| "eval_steps_per_second": 0.359, | |
| "step": 403380 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "grad_norm": 0.006066465750336647, | |
| "learning_rate": 0.00016, | |
| "loss": 0.0011, | |
| "step": 408240 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.8179943846121611, | |
| "eval_loss": 0.0866456851363182, | |
| "eval_runtime": 1297.1839, | |
| "eval_samples_per_second": 91.98, | |
| "eval_steps_per_second": 0.36, | |
| "step": 408240 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "grad_norm": 0.019887538626790047, | |
| "learning_rate": 0.00015, | |
| "loss": 0.001, | |
| "step": 413100 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_accuracy": 0.8174747517076646, | |
| "eval_loss": 0.08994536101818085, | |
| "eval_runtime": 1297.8948, | |
| "eval_samples_per_second": 91.93, | |
| "eval_steps_per_second": 0.36, | |
| "step": 413100 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "grad_norm": 0.019588502123951912, | |
| "learning_rate": 0.00014000000000000001, | |
| "loss": 0.0009, | |
| "step": 417960 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.8186648786824792, | |
| "eval_loss": 0.08895347267389297, | |
| "eval_runtime": 1297.8927, | |
| "eval_samples_per_second": 91.93, | |
| "eval_steps_per_second": 0.36, | |
| "step": 417960 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "grad_norm": 0.019738251343369484, | |
| "learning_rate": 0.00013000000000000002, | |
| "loss": 0.0008, | |
| "step": 422820 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_accuracy": 0.8184553492855048, | |
| "eval_loss": 0.09202321618795395, | |
| "eval_runtime": 1297.7904, | |
| "eval_samples_per_second": 91.937, | |
| "eval_steps_per_second": 0.36, | |
| "step": 422820 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "grad_norm": 0.0069356439635157585, | |
| "learning_rate": 0.00012, | |
| "loss": 0.0008, | |
| "step": 427680 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.8189749821900013, | |
| "eval_loss": 0.09301886707544327, | |
| "eval_runtime": 1297.4851, | |
| "eval_samples_per_second": 91.959, | |
| "eval_steps_per_second": 0.36, | |
| "step": 427680 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "grad_norm": 0.0168699249625206, | |
| "learning_rate": 0.00011, | |
| "loss": 0.0007, | |
| "step": 432540 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_accuracy": 0.8196370950844404, | |
| "eval_loss": 0.09475909918546677, | |
| "eval_runtime": 1297.2344, | |
| "eval_samples_per_second": 91.976, | |
| "eval_steps_per_second": 0.36, | |
| "step": 432540 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "grad_norm": 0.00997143518179655, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0006, | |
| "step": 437400 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.8189917445417592, | |
| "eval_loss": 0.09576508402824402, | |
| "eval_runtime": 1296.498, | |
| "eval_samples_per_second": 92.029, | |
| "eval_steps_per_second": 0.36, | |
| "step": 437400 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "grad_norm": 0.017046066001057625, | |
| "learning_rate": 8.999999999999999e-05, | |
| "loss": 0.0005, | |
| "step": 442260 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_accuracy": 0.8195365209738926, | |
| "eval_loss": 0.09832222014665604, | |
| "eval_runtime": 1296.9323, | |
| "eval_samples_per_second": 91.998, | |
| "eval_steps_per_second": 0.36, | |
| "step": 442260 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "grad_norm": 0.01709928549826145, | |
| "learning_rate": 8e-05, | |
| "loss": 0.0005, | |
| "step": 447120 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.8208523655868918, | |
| "eval_loss": 0.10070452094078064, | |
| "eval_runtime": 1296.8404, | |
| "eval_samples_per_second": 92.004, | |
| "eval_steps_per_second": 0.36, | |
| "step": 447120 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "grad_norm": 0.014434403739869595, | |
| "learning_rate": 7.000000000000001e-05, | |
| "loss": 0.0004, | |
| "step": 451980 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_accuracy": 0.8202153962200897, | |
| "eval_loss": 0.10308787226676941, | |
| "eval_runtime": 1304.3584, | |
| "eval_samples_per_second": 91.474, | |
| "eval_steps_per_second": 0.358, | |
| "step": 451980 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "grad_norm": 0.014810960739850998, | |
| "learning_rate": 6e-05, | |
| "loss": 0.0004, | |
| "step": 456840 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.82111218203914, | |
| "eval_loss": 0.10589364916086197, | |
| "eval_runtime": 1302.0953, | |
| "eval_samples_per_second": 91.633, | |
| "eval_steps_per_second": 0.359, | |
| "step": 456840 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "grad_norm": 0.0034873096738010645, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "step": 461700 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_accuracy": 0.8215982902401207, | |
| "eval_loss": 0.10970806330442429, | |
| "eval_runtime": 1301.0636, | |
| "eval_samples_per_second": 91.706, | |
| "eval_steps_per_second": 0.359, | |
| "step": 461700 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "grad_norm": 0.002740664640441537, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0003, | |
| "step": 466560 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.8221179231446172, | |
| "eval_loss": 0.11456754803657532, | |
| "eval_runtime": 1302.7083, | |
| "eval_samples_per_second": 91.59, | |
| "eval_steps_per_second": 0.358, | |
| "step": 466560 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "grad_norm": 0.0014008020516484976, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0002, | |
| "step": 471420 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_accuracy": 0.8224364078280183, | |
| "eval_loss": 0.11764019727706909, | |
| "eval_runtime": 1302.3991, | |
| "eval_samples_per_second": 91.612, | |
| "eval_steps_per_second": 0.359, | |
| "step": 471420 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "grad_norm": 0.0054730623960494995, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0002, | |
| "step": 476280 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.8226543184008717, | |
| "eval_loss": 0.12025844305753708, | |
| "eval_runtime": 1302.1251, | |
| "eval_samples_per_second": 91.631, | |
| "eval_steps_per_second": 0.359, | |
| "step": 476280 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "grad_norm": 0.0020766761153936386, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0002, | |
| "step": 481140 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_accuracy": 0.8226878431043876, | |
| "eval_loss": 0.1223362609744072, | |
| "eval_runtime": 1301.9742, | |
| "eval_samples_per_second": 91.642, | |
| "eval_steps_per_second": 0.359, | |
| "step": 481140 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 0.018789879977703094, | |
| "learning_rate": 0.0, | |
| "loss": 0.0001, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.8229895654360306, | |
| "eval_loss": 0.12378211319446564, | |
| "eval_runtime": 1301.6752, | |
| "eval_samples_per_second": 91.663, | |
| "eval_steps_per_second": 0.359, | |
| "step": 486000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 486000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 2.2219508552555553e+19, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |