| { | |
| "best_metric": 0.9534883720930233, | |
| "best_model_checkpoint": "deit-base-distilled-patch16-224-75-fold3/checkpoint-158", | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6744186046511628, | |
| "eval_loss": 0.5697163343429565, | |
| "eval_runtime": 0.6052, | |
| "eval_samples_per_second": 71.053, | |
| "eval_steps_per_second": 3.305, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6976744186046512, | |
| "eval_loss": 0.6347883939743042, | |
| "eval_runtime": 0.6009, | |
| "eval_samples_per_second": 71.563, | |
| "eval_steps_per_second": 3.329, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6976744186046512, | |
| "eval_loss": 0.8029990792274475, | |
| "eval_runtime": 0.6011, | |
| "eval_samples_per_second": 71.536, | |
| "eval_steps_per_second": 3.327, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6976744186046512, | |
| "eval_loss": 0.7091969847679138, | |
| "eval_runtime": 0.6081, | |
| "eval_samples_per_second": 70.714, | |
| "eval_steps_per_second": 3.289, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.5779500007629395, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.5313, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.4643900990486145, | |
| "eval_runtime": 0.6018, | |
| "eval_samples_per_second": 71.457, | |
| "eval_steps_per_second": 3.324, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7906976744186046, | |
| "eval_loss": 0.4210609495639801, | |
| "eval_runtime": 0.6243, | |
| "eval_samples_per_second": 68.88, | |
| "eval_steps_per_second": 3.204, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.4912910759449005, | |
| "eval_runtime": 0.6059, | |
| "eval_samples_per_second": 70.974, | |
| "eval_steps_per_second": 3.301, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.4787815809249878, | |
| "eval_runtime": 0.6179, | |
| "eval_samples_per_second": 69.587, | |
| "eval_steps_per_second": 3.237, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7906976744186046, | |
| "eval_loss": 0.37165048718452454, | |
| "eval_runtime": 0.5981, | |
| "eval_samples_per_second": 71.89, | |
| "eval_steps_per_second": 3.344, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 2.3730225563049316, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3672, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.3762969672679901, | |
| "eval_runtime": 0.6175, | |
| "eval_samples_per_second": 69.636, | |
| "eval_steps_per_second": 3.239, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.5081735849380493, | |
| "eval_runtime": 0.6036, | |
| "eval_samples_per_second": 71.242, | |
| "eval_steps_per_second": 3.314, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.36610597372055054, | |
| "eval_runtime": 0.6117, | |
| "eval_samples_per_second": 70.293, | |
| "eval_steps_per_second": 3.269, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.369310587644577, | |
| "eval_runtime": 0.6134, | |
| "eval_samples_per_second": 70.105, | |
| "eval_steps_per_second": 3.261, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.38081368803977966, | |
| "eval_runtime": 0.6157, | |
| "eval_samples_per_second": 69.837, | |
| "eval_steps_per_second": 3.248, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 1.9244849681854248, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 0.2837, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.3491571247577667, | |
| "eval_runtime": 0.6062, | |
| "eval_samples_per_second": 70.932, | |
| "eval_steps_per_second": 3.299, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7906976744186046, | |
| "eval_loss": 0.34460702538490295, | |
| "eval_runtime": 0.6137, | |
| "eval_samples_per_second": 70.064, | |
| "eval_steps_per_second": 3.259, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.39085254073143005, | |
| "eval_runtime": 0.6016, | |
| "eval_samples_per_second": 71.481, | |
| "eval_steps_per_second": 3.325, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.43787404894828796, | |
| "eval_runtime": 0.605, | |
| "eval_samples_per_second": 71.071, | |
| "eval_steps_per_second": 3.306, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.3904644846916199, | |
| "eval_runtime": 0.6045, | |
| "eval_samples_per_second": 71.13, | |
| "eval_steps_per_second": 3.308, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 4.379067897796631, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.2268, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.3452938497066498, | |
| "eval_runtime": 0.6069, | |
| "eval_samples_per_second": 70.856, | |
| "eval_steps_per_second": 3.296, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.4144999384880066, | |
| "eval_runtime": 0.6082, | |
| "eval_samples_per_second": 70.704, | |
| "eval_steps_per_second": 3.289, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.3370114862918854, | |
| "eval_runtime": 0.6348, | |
| "eval_samples_per_second": 67.738, | |
| "eval_steps_per_second": 3.151, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.3501792252063751, | |
| "eval_runtime": 0.6156, | |
| "eval_samples_per_second": 69.849, | |
| "eval_steps_per_second": 3.249, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.3295463025569916, | |
| "eval_runtime": 0.6413, | |
| "eval_samples_per_second": 67.05, | |
| "eval_steps_per_second": 3.119, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 3.1536622047424316, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.1735, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.31181007623672485, | |
| "eval_runtime": 0.6158, | |
| "eval_samples_per_second": 69.827, | |
| "eval_steps_per_second": 3.248, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.30499619245529175, | |
| "eval_runtime": 0.633, | |
| "eval_samples_per_second": 67.929, | |
| "eval_steps_per_second": 3.159, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.6940184235572815, | |
| "eval_runtime": 0.602, | |
| "eval_samples_per_second": 71.434, | |
| "eval_steps_per_second": 3.323, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.5913228392601013, | |
| "eval_runtime": 0.6136, | |
| "eval_samples_per_second": 70.075, | |
| "eval_steps_per_second": 3.259, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3189955949783325, | |
| "eval_runtime": 0.5963, | |
| "eval_samples_per_second": 72.111, | |
| "eval_steps_per_second": 3.354, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 2.4453742504119873, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.1221, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.414093554019928, | |
| "eval_runtime": 0.605, | |
| "eval_samples_per_second": 71.071, | |
| "eval_steps_per_second": 3.306, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.4571942687034607, | |
| "eval_runtime": 0.6068, | |
| "eval_samples_per_second": 70.866, | |
| "eval_steps_per_second": 3.296, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3048303425312042, | |
| "eval_runtime": 0.6071, | |
| "eval_samples_per_second": 70.828, | |
| "eval_steps_per_second": 3.294, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.3138634264469147, | |
| "eval_runtime": 0.609, | |
| "eval_samples_per_second": 70.604, | |
| "eval_steps_per_second": 3.284, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3089713454246521, | |
| "eval_runtime": 0.6206, | |
| "eval_samples_per_second": 69.284, | |
| "eval_steps_per_second": 3.223, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 2.5120537281036377, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 0.1158, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3392648994922638, | |
| "eval_runtime": 0.6092, | |
| "eval_samples_per_second": 70.586, | |
| "eval_steps_per_second": 3.283, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.3035069704055786, | |
| "eval_runtime": 0.6314, | |
| "eval_samples_per_second": 68.107, | |
| "eval_steps_per_second": 3.168, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.813953488372093, | |
| "eval_loss": 0.4730403423309326, | |
| "eval_runtime": 0.6104, | |
| "eval_samples_per_second": 70.447, | |
| "eval_steps_per_second": 3.277, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.3787640333175659, | |
| "eval_runtime": 0.6293, | |
| "eval_samples_per_second": 68.33, | |
| "eval_steps_per_second": 3.178, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.2904440760612488, | |
| "eval_runtime": 0.6163, | |
| "eval_samples_per_second": 69.766, | |
| "eval_steps_per_second": 3.245, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 3.528836488723755, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1075, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.2749671936035156, | |
| "eval_runtime": 0.6423, | |
| "eval_samples_per_second": 66.944, | |
| "eval_steps_per_second": 3.114, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.33282795548439026, | |
| "eval_runtime": 0.6182, | |
| "eval_samples_per_second": 69.557, | |
| "eval_steps_per_second": 3.235, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.26483291387557983, | |
| "eval_runtime": 0.6241, | |
| "eval_samples_per_second": 68.901, | |
| "eval_steps_per_second": 3.205, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.2517068088054657, | |
| "eval_runtime": 0.6161, | |
| "eval_samples_per_second": 69.789, | |
| "eval_steps_per_second": 3.246, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.4401753544807434, | |
| "eval_runtime": 0.6156, | |
| "eval_samples_per_second": 69.846, | |
| "eval_steps_per_second": 3.249, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 2.312138557434082, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 0.0925, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.4075532555580139, | |
| "eval_runtime": 0.606, | |
| "eval_samples_per_second": 70.958, | |
| "eval_steps_per_second": 3.3, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2390284687280655, | |
| "eval_runtime": 0.6114, | |
| "eval_samples_per_second": 70.331, | |
| "eval_steps_per_second": 3.271, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.217637300491333, | |
| "eval_runtime": 0.6053, | |
| "eval_samples_per_second": 71.037, | |
| "eval_steps_per_second": 3.304, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.257964164018631, | |
| "eval_runtime": 0.6148, | |
| "eval_samples_per_second": 69.944, | |
| "eval_steps_per_second": 3.253, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.20485863089561462, | |
| "eval_runtime": 0.6102, | |
| "eval_samples_per_second": 70.467, | |
| "eval_steps_per_second": 3.278, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 1.734573483467102, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.1085, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.22444705665111542, | |
| "eval_runtime": 0.6167, | |
| "eval_samples_per_second": 69.725, | |
| "eval_steps_per_second": 3.243, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2377014309167862, | |
| "eval_runtime": 0.609, | |
| "eval_samples_per_second": 70.61, | |
| "eval_steps_per_second": 3.284, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.45913732051849365, | |
| "eval_runtime": 0.6106, | |
| "eval_samples_per_second": 70.42, | |
| "eval_steps_per_second": 3.275, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.5053869485855103, | |
| "eval_runtime": 0.6159, | |
| "eval_samples_per_second": 69.811, | |
| "eval_steps_per_second": 3.247, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.29935717582702637, | |
| "eval_runtime": 0.6099, | |
| "eval_samples_per_second": 70.503, | |
| "eval_steps_per_second": 3.279, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "grad_norm": 1.5581638813018799, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0876, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2386816143989563, | |
| "eval_runtime": 0.6123, | |
| "eval_samples_per_second": 70.228, | |
| "eval_steps_per_second": 3.266, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.3077695667743683, | |
| "eval_runtime": 0.6135, | |
| "eval_samples_per_second": 70.085, | |
| "eval_steps_per_second": 3.26, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.44697698950767517, | |
| "eval_runtime": 0.6163, | |
| "eval_samples_per_second": 69.768, | |
| "eval_steps_per_second": 3.245, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.34570202231407166, | |
| "eval_runtime": 0.6289, | |
| "eval_samples_per_second": 68.371, | |
| "eval_steps_per_second": 3.18, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2654927372932434, | |
| "eval_runtime": 0.6174, | |
| "eval_samples_per_second": 69.65, | |
| "eval_steps_per_second": 3.24, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 1.780471682548523, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0823, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.21497979760169983, | |
| "eval_runtime": 0.6248, | |
| "eval_samples_per_second": 68.825, | |
| "eval_steps_per_second": 3.201, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.21164707839488983, | |
| "eval_runtime": 0.6212, | |
| "eval_samples_per_second": 69.216, | |
| "eval_steps_per_second": 3.219, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.23045624792575836, | |
| "eval_runtime": 0.6507, | |
| "eval_samples_per_second": 66.087, | |
| "eval_steps_per_second": 3.074, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.20702533423900604, | |
| "eval_runtime": 0.6456, | |
| "eval_samples_per_second": 66.609, | |
| "eval_steps_per_second": 3.098, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.1808479279279709, | |
| "eval_runtime": 0.6272, | |
| "eval_samples_per_second": 68.555, | |
| "eval_steps_per_second": 3.189, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "grad_norm": 1.7751795053482056, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0791, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.1668923795223236, | |
| "eval_runtime": 0.6087, | |
| "eval_samples_per_second": 70.643, | |
| "eval_steps_per_second": 3.286, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.17213645577430725, | |
| "eval_runtime": 0.6145, | |
| "eval_samples_per_second": 69.98, | |
| "eval_steps_per_second": 3.255, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.21943524479866028, | |
| "eval_runtime": 0.6151, | |
| "eval_samples_per_second": 69.908, | |
| "eval_steps_per_second": 3.252, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3453516960144043, | |
| "eval_runtime": 0.6216, | |
| "eval_samples_per_second": 69.175, | |
| "eval_steps_per_second": 3.217, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_accuracy": 0.8372093023255814, | |
| "eval_loss": 0.5415489077568054, | |
| "eval_runtime": 0.6163, | |
| "eval_samples_per_second": 69.777, | |
| "eval_steps_per_second": 3.245, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "grad_norm": 2.149721384048462, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0607, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.44572049379348755, | |
| "eval_runtime": 0.6401, | |
| "eval_samples_per_second": 67.176, | |
| "eval_steps_per_second": 3.124, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.24112088978290558, | |
| "eval_runtime": 0.6157, | |
| "eval_samples_per_second": 69.834, | |
| "eval_steps_per_second": 3.248, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.20573778450489044, | |
| "eval_runtime": 0.6124, | |
| "eval_samples_per_second": 70.221, | |
| "eval_steps_per_second": 3.266, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.21995888650417328, | |
| "eval_runtime": 0.6102, | |
| "eval_samples_per_second": 70.468, | |
| "eval_steps_per_second": 3.278, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.26773378252983093, | |
| "eval_runtime": 0.6042, | |
| "eval_samples_per_second": 71.167, | |
| "eval_steps_per_second": 3.31, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "grad_norm": 2.265094041824341, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0715, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.2950317859649658, | |
| "eval_runtime": 0.6015, | |
| "eval_samples_per_second": 71.49, | |
| "eval_steps_per_second": 3.325, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.287355899810791, | |
| "eval_runtime": 0.6156, | |
| "eval_samples_per_second": 69.847, | |
| "eval_steps_per_second": 3.249, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.22357946634292603, | |
| "eval_runtime": 0.629, | |
| "eval_samples_per_second": 68.362, | |
| "eval_steps_per_second": 3.18, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.20517852902412415, | |
| "eval_runtime": 0.6341, | |
| "eval_samples_per_second": 67.811, | |
| "eval_steps_per_second": 3.154, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_accuracy": 0.9534883720930233, | |
| "eval_loss": 0.2177267223596573, | |
| "eval_runtime": 0.6148, | |
| "eval_samples_per_second": 69.94, | |
| "eval_steps_per_second": 3.253, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 1.2989553213119507, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0644, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.9534883720930233, | |
| "eval_loss": 0.21780182421207428, | |
| "eval_runtime": 0.6298, | |
| "eval_samples_per_second": 68.28, | |
| "eval_steps_per_second": 3.176, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.2125912755727768, | |
| "eval_runtime": 0.6219, | |
| "eval_samples_per_second": 69.148, | |
| "eval_steps_per_second": 3.216, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.9302325581395349, | |
| "eval_loss": 0.2127012461423874, | |
| "eval_runtime": 0.6331, | |
| "eval_samples_per_second": 67.925, | |
| "eval_steps_per_second": 3.159, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2215750515460968, | |
| "eval_runtime": 0.6157, | |
| "eval_samples_per_second": 69.836, | |
| "eval_steps_per_second": 3.248, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.24196544289588928, | |
| "eval_runtime": 0.6162, | |
| "eval_samples_per_second": 69.782, | |
| "eval_steps_per_second": 3.246, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "grad_norm": 1.7330666780471802, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0622, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.23046602308750153, | |
| "eval_runtime": 0.6106, | |
| "eval_samples_per_second": 70.418, | |
| "eval_steps_per_second": 3.275, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.22466078400611877, | |
| "eval_runtime": 0.6204, | |
| "eval_samples_per_second": 69.314, | |
| "eval_steps_per_second": 3.224, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_accuracy": 0.9069767441860465, | |
| "eval_loss": 0.2492048740386963, | |
| "eval_runtime": 0.6066, | |
| "eval_samples_per_second": 70.887, | |
| "eval_steps_per_second": 3.297, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.32915806770324707, | |
| "eval_runtime": 0.627, | |
| "eval_samples_per_second": 68.577, | |
| "eval_steps_per_second": 3.19, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.38761720061302185, | |
| "eval_runtime": 0.6158, | |
| "eval_samples_per_second": 69.829, | |
| "eval_steps_per_second": 3.248, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "grad_norm": 1.681682825088501, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.0564, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.388582706451416, | |
| "eval_runtime": 0.6559, | |
| "eval_samples_per_second": 65.561, | |
| "eval_steps_per_second": 3.049, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3706725239753723, | |
| "eval_runtime": 0.6138, | |
| "eval_samples_per_second": 70.054, | |
| "eval_steps_per_second": 3.258, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.33771568536758423, | |
| "eval_runtime": 0.6168, | |
| "eval_samples_per_second": 69.711, | |
| "eval_steps_per_second": 3.242, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3185550272464752, | |
| "eval_runtime": 0.612, | |
| "eval_samples_per_second": 70.266, | |
| "eval_steps_per_second": 3.268, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.3037985563278198, | |
| "eval_runtime": 0.6389, | |
| "eval_samples_per_second": 67.304, | |
| "eval_steps_per_second": 3.13, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "grad_norm": 2.513131856918335, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 0.0578, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_accuracy": 0.8604651162790697, | |
| "eval_loss": 0.2818430960178375, | |
| "eval_runtime": 0.6131, | |
| "eval_samples_per_second": 70.132, | |
| "eval_steps_per_second": 3.262, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.27556031942367554, | |
| "eval_runtime": 0.6051, | |
| "eval_samples_per_second": 71.059, | |
| "eval_steps_per_second": 3.305, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.26944613456726074, | |
| "eval_runtime": 0.6128, | |
| "eval_samples_per_second": 70.173, | |
| "eval_steps_per_second": 3.264, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.26979494094848633, | |
| "eval_runtime": 0.6291, | |
| "eval_samples_per_second": 68.356, | |
| "eval_steps_per_second": 3.179, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.27324578166007996, | |
| "eval_runtime": 0.6094, | |
| "eval_samples_per_second": 70.556, | |
| "eval_steps_per_second": 3.282, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "grad_norm": 2.232414722442627, | |
| "learning_rate": 0.0, | |
| "loss": 0.0424, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.8837209302325582, | |
| "eval_loss": 0.2739052474498749, | |
| "eval_runtime": 0.6176, | |
| "eval_samples_per_second": 69.628, | |
| "eval_steps_per_second": 3.238, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 200, | |
| "total_flos": 1.867590382823424e+18, | |
| "train_loss": 0.13966285645961762, | |
| "train_runtime": 1119.8693, | |
| "train_samples_per_second": 21.52, | |
| "train_steps_per_second": 0.179 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9534883720930233, | |
| "eval_loss": 0.2177267223596573, | |
| "eval_runtime": 1.7678, | |
| "eval_samples_per_second": 24.324, | |
| "eval_steps_per_second": 1.131, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.867590382823424e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |