Image Classification
Transformers
TensorBoard
Safetensors
beit
Generated from Trainer
Eval Results (legacy)
Instructions to use BilalMuftuoglu/beit-base-patch16-224-65-fold2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use BilalMuftuoglu/beit-base-patch16-224-65-fold2 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-classification", model="BilalMuftuoglu/beit-base-patch16-224-65-fold2") pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png")# Load model directly from transformers import AutoImageProcessor, AutoModelForImageClassification processor = AutoImageProcessor.from_pretrained("BilalMuftuoglu/beit-base-patch16-224-65-fold2") model = AutoModelForImageClassification.from_pretrained("BilalMuftuoglu/beit-base-patch16-224-65-fold2") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.9014084507042254, | |
| "best_model_checkpoint": "beit-base-patch16-224-65-fold2/checkpoint-100", | |
| "epoch": 92.3076923076923, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9230769230769231, | |
| "eval_accuracy": 0.5492957746478874, | |
| "eval_loss": 0.6829895973205566, | |
| "eval_runtime": 1.1518, | |
| "eval_samples_per_second": 61.641, | |
| "eval_steps_per_second": 2.605, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "eval_accuracy": 0.647887323943662, | |
| "eval_loss": 0.6407253742218018, | |
| "eval_runtime": 1.1245, | |
| "eval_samples_per_second": 63.139, | |
| "eval_steps_per_second": 2.668, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 2.769230769230769, | |
| "eval_accuracy": 0.5352112676056338, | |
| "eval_loss": 0.6611723303794861, | |
| "eval_runtime": 1.0983, | |
| "eval_samples_per_second": 64.644, | |
| "eval_steps_per_second": 2.731, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 3.076923076923077, | |
| "grad_norm": 13.373871803283691, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.7094, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6901408450704225, | |
| "eval_loss": 0.6174536943435669, | |
| "eval_runtime": 1.0831, | |
| "eval_samples_per_second": 65.553, | |
| "eval_steps_per_second": 2.77, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 4.923076923076923, | |
| "eval_accuracy": 0.676056338028169, | |
| "eval_loss": 0.5914616584777832, | |
| "eval_runtime": 1.1133, | |
| "eval_samples_per_second": 63.775, | |
| "eval_steps_per_second": 2.695, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.846153846153846, | |
| "eval_accuracy": 0.704225352112676, | |
| "eval_loss": 0.5677279233932495, | |
| "eval_runtime": 1.1168, | |
| "eval_samples_per_second": 63.574, | |
| "eval_steps_per_second": 2.686, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 6.153846153846154, | |
| "grad_norm": 10.936724662780762, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.6444, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.769230769230769, | |
| "eval_accuracy": 0.7746478873239436, | |
| "eval_loss": 0.5177403688430786, | |
| "eval_runtime": 1.0706, | |
| "eval_samples_per_second": 66.315, | |
| "eval_steps_per_second": 2.802, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7887323943661971, | |
| "eval_loss": 0.4927891492843628, | |
| "eval_runtime": 1.0755, | |
| "eval_samples_per_second": 66.014, | |
| "eval_steps_per_second": 2.789, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 8.923076923076923, | |
| "eval_accuracy": 0.6901408450704225, | |
| "eval_loss": 0.5641009211540222, | |
| "eval_runtime": 1.0927, | |
| "eval_samples_per_second": 64.977, | |
| "eval_steps_per_second": 2.745, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 9.23076923076923, | |
| "grad_norm": 10.08945083618164, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5574, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 9.846153846153847, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.43716326355934143, | |
| "eval_runtime": 1.0928, | |
| "eval_samples_per_second": 64.973, | |
| "eval_steps_per_second": 2.745, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 10.76923076923077, | |
| "eval_accuracy": 0.7605633802816901, | |
| "eval_loss": 0.5677071809768677, | |
| "eval_runtime": 1.15, | |
| "eval_samples_per_second": 61.738, | |
| "eval_steps_per_second": 2.609, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7605633802816901, | |
| "eval_loss": 0.5032159090042114, | |
| "eval_runtime": 1.1005, | |
| "eval_samples_per_second": 64.518, | |
| "eval_steps_per_second": 2.726, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 12.307692307692308, | |
| "grad_norm": 4.973866939544678, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 0.543, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.923076923076923, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.47447821497917175, | |
| "eval_runtime": 1.0767, | |
| "eval_samples_per_second": 65.942, | |
| "eval_steps_per_second": 2.786, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 13.846153846153847, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.4056505262851715, | |
| "eval_runtime": 1.1102, | |
| "eval_samples_per_second": 63.953, | |
| "eval_steps_per_second": 2.702, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.76923076923077, | |
| "eval_accuracy": 0.7746478873239436, | |
| "eval_loss": 0.4012921154499054, | |
| "eval_runtime": 1.0771, | |
| "eval_samples_per_second": 65.919, | |
| "eval_steps_per_second": 2.785, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 15.384615384615385, | |
| "grad_norm": 5.340551376342773, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.4499, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.36697399616241455, | |
| "eval_runtime": 1.1391, | |
| "eval_samples_per_second": 62.328, | |
| "eval_steps_per_second": 2.634, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.923076923076923, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.4215153157711029, | |
| "eval_runtime": 1.1192, | |
| "eval_samples_per_second": 63.435, | |
| "eval_steps_per_second": 2.68, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 17.846153846153847, | |
| "eval_accuracy": 0.7746478873239436, | |
| "eval_loss": 0.4862484037876129, | |
| "eval_runtime": 1.0754, | |
| "eval_samples_per_second": 66.023, | |
| "eval_steps_per_second": 2.79, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 18.46153846153846, | |
| "grad_norm": 9.19057846069336, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.3902, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.76923076923077, | |
| "eval_accuracy": 0.7323943661971831, | |
| "eval_loss": 0.5781329274177551, | |
| "eval_runtime": 1.1009, | |
| "eval_samples_per_second": 64.491, | |
| "eval_steps_per_second": 2.725, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.5248808860778809, | |
| "eval_runtime": 1.1057, | |
| "eval_samples_per_second": 64.211, | |
| "eval_steps_per_second": 2.713, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 20.923076923076923, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.3936827778816223, | |
| "eval_runtime": 1.1333, | |
| "eval_samples_per_second": 62.651, | |
| "eval_steps_per_second": 2.647, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 21.53846153846154, | |
| "grad_norm": 3.5906007289886475, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 0.4029, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 21.846153846153847, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.4131520092487335, | |
| "eval_runtime": 1.0935, | |
| "eval_samples_per_second": 64.93, | |
| "eval_steps_per_second": 2.744, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 22.76923076923077, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.41784021258354187, | |
| "eval_runtime": 1.1092, | |
| "eval_samples_per_second": 64.011, | |
| "eval_steps_per_second": 2.705, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7183098591549296, | |
| "eval_loss": 0.7273370027542114, | |
| "eval_runtime": 1.106, | |
| "eval_samples_per_second": 64.194, | |
| "eval_steps_per_second": 2.712, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 24.615384615384617, | |
| "grad_norm": 6.034952640533447, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.3163, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 24.923076923076923, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.4221486747264862, | |
| "eval_runtime": 1.0943, | |
| "eval_samples_per_second": 64.883, | |
| "eval_steps_per_second": 2.742, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 25.846153846153847, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.4086194336414337, | |
| "eval_runtime": 1.1379, | |
| "eval_samples_per_second": 62.397, | |
| "eval_steps_per_second": 2.637, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 26.76923076923077, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.39464449882507324, | |
| "eval_runtime": 1.1089, | |
| "eval_samples_per_second": 64.025, | |
| "eval_steps_per_second": 2.705, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 27.692307692307693, | |
| "grad_norm": 6.2680439949035645, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.2786, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.5320041179656982, | |
| "eval_runtime": 1.1427, | |
| "eval_samples_per_second": 62.134, | |
| "eval_steps_per_second": 2.625, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 28.923076923076923, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.41318005323410034, | |
| "eval_runtime": 1.1147, | |
| "eval_samples_per_second": 63.695, | |
| "eval_steps_per_second": 2.691, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 29.846153846153847, | |
| "eval_accuracy": 0.7746478873239436, | |
| "eval_loss": 0.5542149543762207, | |
| "eval_runtime": 1.0924, | |
| "eval_samples_per_second": 64.994, | |
| "eval_steps_per_second": 2.746, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 30.76923076923077, | |
| "grad_norm": 5.675084114074707, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.2763, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 30.76923076923077, | |
| "eval_accuracy": 0.9014084507042254, | |
| "eval_loss": 0.37337467074394226, | |
| "eval_runtime": 1.1285, | |
| "eval_samples_per_second": 62.913, | |
| "eval_steps_per_second": 2.658, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.44785401225090027, | |
| "eval_runtime": 1.0865, | |
| "eval_samples_per_second": 65.347, | |
| "eval_steps_per_second": 2.761, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 32.92307692307692, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.3481605350971222, | |
| "eval_runtime": 1.0909, | |
| "eval_samples_per_second": 65.081, | |
| "eval_steps_per_second": 2.75, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 33.84615384615385, | |
| "grad_norm": 9.396614074707031, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 0.25, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 33.84615384615385, | |
| "eval_accuracy": 0.7887323943661971, | |
| "eval_loss": 0.5442134737968445, | |
| "eval_runtime": 1.0623, | |
| "eval_samples_per_second": 66.836, | |
| "eval_steps_per_second": 2.824, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 34.76923076923077, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.4211238920688629, | |
| "eval_runtime": 1.1024, | |
| "eval_samples_per_second": 64.406, | |
| "eval_steps_per_second": 2.721, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.4860071539878845, | |
| "eval_runtime": 1.1377, | |
| "eval_samples_per_second": 62.405, | |
| "eval_steps_per_second": 2.637, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 36.92307692307692, | |
| "grad_norm": 3.626502513885498, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.2125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 36.92307692307692, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.46536797285079956, | |
| "eval_runtime": 1.1169, | |
| "eval_samples_per_second": 63.568, | |
| "eval_steps_per_second": 2.686, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 37.84615384615385, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.477935254573822, | |
| "eval_runtime": 1.1116, | |
| "eval_samples_per_second": 63.871, | |
| "eval_steps_per_second": 2.699, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 38.76923076923077, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.5691684484481812, | |
| "eval_runtime": 1.1081, | |
| "eval_samples_per_second": 64.071, | |
| "eval_steps_per_second": 2.707, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 6.315129280090332, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 0.2225, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.4911651611328125, | |
| "eval_runtime": 1.097, | |
| "eval_samples_per_second": 64.719, | |
| "eval_steps_per_second": 2.735, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 40.92307692307692, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.4528195559978485, | |
| "eval_runtime": 1.115, | |
| "eval_samples_per_second": 63.678, | |
| "eval_steps_per_second": 2.691, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 41.84615384615385, | |
| "eval_accuracy": 0.7887323943661971, | |
| "eval_loss": 0.4470233619213104, | |
| "eval_runtime": 1.1058, | |
| "eval_samples_per_second": 64.204, | |
| "eval_steps_per_second": 2.713, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 42.76923076923077, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.4251406788825989, | |
| "eval_runtime": 1.0717, | |
| "eval_samples_per_second": 66.249, | |
| "eval_steps_per_second": 2.799, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 43.07692307692308, | |
| "grad_norm": 6.046936988830566, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.1991, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.4864138662815094, | |
| "eval_runtime": 1.068, | |
| "eval_samples_per_second": 66.48, | |
| "eval_steps_per_second": 2.809, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 44.92307692307692, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.4651569426059723, | |
| "eval_runtime": 1.0698, | |
| "eval_samples_per_second": 66.368, | |
| "eval_steps_per_second": 2.804, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 45.84615384615385, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.5949256420135498, | |
| "eval_runtime": 1.1229, | |
| "eval_samples_per_second": 63.23, | |
| "eval_steps_per_second": 2.672, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 46.15384615384615, | |
| "grad_norm": 4.972904682159424, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.164, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 46.76923076923077, | |
| "eval_accuracy": 0.7464788732394366, | |
| "eval_loss": 1.0035008192062378, | |
| "eval_runtime": 1.091, | |
| "eval_samples_per_second": 65.076, | |
| "eval_steps_per_second": 2.75, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.6393434405326843, | |
| "eval_runtime": 1.1065, | |
| "eval_samples_per_second": 64.167, | |
| "eval_steps_per_second": 2.711, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 48.92307692307692, | |
| "eval_accuracy": 0.676056338028169, | |
| "eval_loss": 0.9221746921539307, | |
| "eval_runtime": 1.1189, | |
| "eval_samples_per_second": 63.454, | |
| "eval_steps_per_second": 2.681, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 49.23076923076923, | |
| "grad_norm": 31.394847869873047, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.1974, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 49.84615384615385, | |
| "eval_accuracy": 0.6901408450704225, | |
| "eval_loss": 1.0632798671722412, | |
| "eval_runtime": 1.0728, | |
| "eval_samples_per_second": 66.183, | |
| "eval_steps_per_second": 2.796, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 50.76923076923077, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.6050125360488892, | |
| "eval_runtime": 1.1003, | |
| "eval_samples_per_second": 64.529, | |
| "eval_steps_per_second": 2.727, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.7133720517158508, | |
| "eval_runtime": 1.1005, | |
| "eval_samples_per_second": 64.514, | |
| "eval_steps_per_second": 2.726, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 52.30769230769231, | |
| "grad_norm": 4.075202465057373, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 0.213, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 52.92307692307692, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.6648813486099243, | |
| "eval_runtime": 1.1101, | |
| "eval_samples_per_second": 63.956, | |
| "eval_steps_per_second": 2.702, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 53.84615384615385, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.7125517129898071, | |
| "eval_runtime": 1.097, | |
| "eval_samples_per_second": 64.721, | |
| "eval_steps_per_second": 2.735, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 54.76923076923077, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.6906399130821228, | |
| "eval_runtime": 1.0988, | |
| "eval_samples_per_second": 64.617, | |
| "eval_steps_per_second": 2.73, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 55.38461538461539, | |
| "grad_norm": 4.072634220123291, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.1642, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.6955938339233398, | |
| "eval_runtime": 1.0988, | |
| "eval_samples_per_second": 64.617, | |
| "eval_steps_per_second": 2.73, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 56.92307692307692, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.5828216075897217, | |
| "eval_runtime": 1.1089, | |
| "eval_samples_per_second": 64.03, | |
| "eval_steps_per_second": 2.705, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 57.84615384615385, | |
| "eval_accuracy": 0.8028169014084507, | |
| "eval_loss": 0.5865649580955505, | |
| "eval_runtime": 1.1049, | |
| "eval_samples_per_second": 64.258, | |
| "eval_steps_per_second": 2.715, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 58.46153846153846, | |
| "grad_norm": 9.487194061279297, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 0.1657, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 58.76923076923077, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.6171615123748779, | |
| "eval_runtime": 1.1009, | |
| "eval_samples_per_second": 64.495, | |
| "eval_steps_per_second": 2.725, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.7427834868431091, | |
| "eval_runtime": 1.0942, | |
| "eval_samples_per_second": 64.889, | |
| "eval_steps_per_second": 2.742, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 60.92307692307692, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.8981254696846008, | |
| "eval_runtime": 1.0972, | |
| "eval_samples_per_second": 64.708, | |
| "eval_steps_per_second": 2.734, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 61.53846153846154, | |
| "grad_norm": 6.037697792053223, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.1347, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 61.84615384615385, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.7168216705322266, | |
| "eval_runtime": 1.1259, | |
| "eval_samples_per_second": 63.063, | |
| "eval_steps_per_second": 2.665, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 62.76923076923077, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.8026114106178284, | |
| "eval_runtime": 1.113, | |
| "eval_samples_per_second": 63.789, | |
| "eval_steps_per_second": 2.695, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.8639427423477173, | |
| "eval_runtime": 1.1002, | |
| "eval_samples_per_second": 64.535, | |
| "eval_steps_per_second": 2.727, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 64.61538461538461, | |
| "grad_norm": 7.2083282470703125, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.1335, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 64.92307692307692, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.7604219913482666, | |
| "eval_runtime": 1.1013, | |
| "eval_samples_per_second": 64.472, | |
| "eval_steps_per_second": 2.724, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 65.84615384615384, | |
| "eval_accuracy": 0.8169014084507042, | |
| "eval_loss": 0.7992713451385498, | |
| "eval_runtime": 1.0706, | |
| "eval_samples_per_second": 66.321, | |
| "eval_steps_per_second": 2.802, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 66.76923076923077, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.833002507686615, | |
| "eval_runtime": 1.1411, | |
| "eval_samples_per_second": 62.218, | |
| "eval_steps_per_second": 2.629, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 67.6923076923077, | |
| "grad_norm": 4.454225540161133, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.145, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.8143167495727539, | |
| "eval_runtime": 1.0982, | |
| "eval_samples_per_second": 64.653, | |
| "eval_steps_per_second": 2.732, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 68.92307692307692, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.751991868019104, | |
| "eval_runtime": 1.1019, | |
| "eval_samples_per_second": 64.432, | |
| "eval_steps_per_second": 2.722, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 69.84615384615384, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.721619725227356, | |
| "eval_runtime": 1.0719, | |
| "eval_samples_per_second": 66.236, | |
| "eval_steps_per_second": 2.799, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 70.76923076923077, | |
| "grad_norm": 5.271942138671875, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 0.1658, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 70.76923076923077, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7968146204948425, | |
| "eval_runtime": 1.1157, | |
| "eval_samples_per_second": 63.638, | |
| "eval_steps_per_second": 2.689, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.7730194330215454, | |
| "eval_runtime": 1.1178, | |
| "eval_samples_per_second": 63.517, | |
| "eval_steps_per_second": 2.684, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 72.92307692307692, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.7449730634689331, | |
| "eval_runtime": 1.1284, | |
| "eval_samples_per_second": 62.921, | |
| "eval_steps_per_second": 2.659, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 73.84615384615384, | |
| "grad_norm": 4.669654846191406, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.1381, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 73.84615384615384, | |
| "eval_accuracy": 0.8732394366197183, | |
| "eval_loss": 0.7855215072631836, | |
| "eval_runtime": 1.0837, | |
| "eval_samples_per_second": 65.515, | |
| "eval_steps_per_second": 2.768, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 74.76923076923077, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.8253357410430908, | |
| "eval_runtime": 1.1299, | |
| "eval_samples_per_second": 62.836, | |
| "eval_steps_per_second": 2.655, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.8064681887626648, | |
| "eval_runtime": 1.0974, | |
| "eval_samples_per_second": 64.697, | |
| "eval_steps_per_second": 2.734, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 76.92307692307692, | |
| "grad_norm": 4.332691669464111, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.1306, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 76.92307692307692, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7778077721595764, | |
| "eval_runtime": 1.1004, | |
| "eval_samples_per_second": 64.523, | |
| "eval_steps_per_second": 2.726, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 77.84615384615384, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7814451456069946, | |
| "eval_runtime": 1.1215, | |
| "eval_samples_per_second": 63.309, | |
| "eval_steps_per_second": 2.675, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 78.76923076923077, | |
| "eval_accuracy": 0.8309859154929577, | |
| "eval_loss": 0.733501672744751, | |
| "eval_runtime": 1.1106, | |
| "eval_samples_per_second": 63.928, | |
| "eval_steps_per_second": 2.701, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 3.209725856781006, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.1027, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7371585369110107, | |
| "eval_runtime": 1.0987, | |
| "eval_samples_per_second": 64.619, | |
| "eval_steps_per_second": 2.73, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 80.92307692307692, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7617682814598083, | |
| "eval_runtime": 1.0988, | |
| "eval_samples_per_second": 64.618, | |
| "eval_steps_per_second": 2.73, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 81.84615384615384, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7891401648521423, | |
| "eval_runtime": 1.0792, | |
| "eval_samples_per_second": 65.789, | |
| "eval_steps_per_second": 2.78, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 82.76923076923077, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.828731119632721, | |
| "eval_runtime": 1.1039, | |
| "eval_samples_per_second": 64.319, | |
| "eval_steps_per_second": 2.718, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 83.07692307692308, | |
| "grad_norm": 9.988916397094727, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.1296, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.8412397503852844, | |
| "eval_runtime": 1.0672, | |
| "eval_samples_per_second": 66.527, | |
| "eval_steps_per_second": 2.811, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 84.92307692307692, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.8014044761657715, | |
| "eval_runtime": 1.1292, | |
| "eval_samples_per_second": 62.877, | |
| "eval_steps_per_second": 2.657, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 85.84615384615384, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7529923915863037, | |
| "eval_runtime": 1.1018, | |
| "eval_samples_per_second": 64.442, | |
| "eval_steps_per_second": 2.723, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 86.15384615384616, | |
| "grad_norm": 5.536340236663818, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.1162, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 86.76923076923077, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7242565155029297, | |
| "eval_runtime": 1.096, | |
| "eval_samples_per_second": 64.781, | |
| "eval_steps_per_second": 2.737, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.7246566414833069, | |
| "eval_runtime": 1.0962, | |
| "eval_samples_per_second": 64.771, | |
| "eval_steps_per_second": 2.737, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 88.92307692307692, | |
| "eval_accuracy": 0.8450704225352113, | |
| "eval_loss": 0.735383927822113, | |
| "eval_runtime": 1.0904, | |
| "eval_samples_per_second": 65.113, | |
| "eval_steps_per_second": 2.751, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 89.23076923076923, | |
| "grad_norm": 3.339935064315796, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.1166, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 89.84615384615384, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7390521764755249, | |
| "eval_runtime": 1.0977, | |
| "eval_samples_per_second": 64.682, | |
| "eval_steps_per_second": 2.733, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 90.76923076923077, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7390065789222717, | |
| "eval_runtime": 1.0658, | |
| "eval_samples_per_second": 66.614, | |
| "eval_steps_per_second": 2.815, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7374176979064941, | |
| "eval_runtime": 1.0983, | |
| "eval_samples_per_second": 64.643, | |
| "eval_steps_per_second": 2.731, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 92.3076923076923, | |
| "grad_norm": 3.673130512237549, | |
| "learning_rate": 0.0, | |
| "loss": 0.1031, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 92.3076923076923, | |
| "eval_accuracy": 0.8591549295774648, | |
| "eval_loss": 0.7372613549232483, | |
| "eval_runtime": 1.1158, | |
| "eval_samples_per_second": 63.634, | |
| "eval_steps_per_second": 2.689, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 92.3076923076923, | |
| "step": 300, | |
| "total_flos": 2.839022453308834e+18, | |
| "train_loss": 0.25909996310869854, | |
| "train_runtime": 1733.8719, | |
| "train_samples_per_second": 22.897, | |
| "train_steps_per_second": 0.173 | |
| }, | |
| { | |
| "epoch": 92.3076923076923, | |
| "eval_accuracy": 0.9014084507042254, | |
| "eval_loss": 0.37337467074394226, | |
| "eval_runtime": 1.0981, | |
| "eval_samples_per_second": 64.659, | |
| "eval_steps_per_second": 2.732, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 2.839022453308834e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |