Image Classification
Transformers
TensorBoard
Safetensors
beit
Generated from Trainer
Eval Results (legacy)
Instructions to use BilalMuftuoglu/beit-base-patch16-224-55-fold1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use BilalMuftuoglu/beit-base-patch16-224-55-fold1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-classification", model="BilalMuftuoglu/beit-base-patch16-224-55-fold1") pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png")# Load model directly from transformers import AutoImageProcessor, AutoModelForImageClassification processor = AutoImageProcessor.from_pretrained("BilalMuftuoglu/beit-base-patch16-224-55-fold1") model = AutoModelForImageClassification.from_pretrained("BilalMuftuoglu/beit-base-patch16-224-55-fold1") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": 0.8481012658227848, | |
| "best_model_checkpoint": "beit-base-patch16-224-fold1/checkpoint-248", | |
| "epoch": 85.71428571428571, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8571428571428571, | |
| "eval_accuracy": 0.45569620253164556, | |
| "eval_loss": 0.8050068020820618, | |
| "eval_runtime": 20.5223, | |
| "eval_samples_per_second": 3.849, | |
| "eval_steps_per_second": 0.146, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.569620253164557, | |
| "eval_loss": 0.7151382565498352, | |
| "eval_runtime": 1.3733, | |
| "eval_samples_per_second": 57.526, | |
| "eval_steps_per_second": 2.185, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 4.684664249420166, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.8103, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "eval_accuracy": 0.5569620253164557, | |
| "eval_loss": 0.6821601390838623, | |
| "eval_runtime": 1.4197, | |
| "eval_samples_per_second": 55.644, | |
| "eval_steps_per_second": 2.113, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.569620253164557, | |
| "eval_loss": 0.640774667263031, | |
| "eval_runtime": 1.4465, | |
| "eval_samples_per_second": 54.616, | |
| "eval_steps_per_second": 2.074, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 4.857142857142857, | |
| "eval_accuracy": 0.6708860759493671, | |
| "eval_loss": 0.6244170069694519, | |
| "eval_runtime": 1.5149, | |
| "eval_samples_per_second": 52.147, | |
| "eval_steps_per_second": 1.98, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 5.714285714285714, | |
| "grad_norm": 2.6553750038146973, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.6583, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6708860759493671, | |
| "eval_loss": 0.5892533659934998, | |
| "eval_runtime": 1.448, | |
| "eval_samples_per_second": 54.56, | |
| "eval_steps_per_second": 2.072, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 6.857142857142857, | |
| "eval_accuracy": 0.6329113924050633, | |
| "eval_loss": 0.5876858234405518, | |
| "eval_runtime": 1.4555, | |
| "eval_samples_per_second": 54.277, | |
| "eval_steps_per_second": 2.061, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6835443037974683, | |
| "eval_loss": 0.5752159953117371, | |
| "eval_runtime": 1.4455, | |
| "eval_samples_per_second": 54.653, | |
| "eval_steps_per_second": 2.075, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 8.571428571428571, | |
| "grad_norm": 8.61998176574707, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5912, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 8.857142857142858, | |
| "eval_accuracy": 0.6455696202531646, | |
| "eval_loss": 0.5825986862182617, | |
| "eval_runtime": 1.4574, | |
| "eval_samples_per_second": 54.204, | |
| "eval_steps_per_second": 2.058, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6835443037974683, | |
| "eval_loss": 0.5469183325767517, | |
| "eval_runtime": 1.4759, | |
| "eval_samples_per_second": 53.528, | |
| "eval_steps_per_second": 2.033, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 10.857142857142858, | |
| "eval_accuracy": 0.6582278481012658, | |
| "eval_loss": 0.6173216700553894, | |
| "eval_runtime": 1.4368, | |
| "eval_samples_per_second": 54.985, | |
| "eval_steps_per_second": 2.088, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 11.428571428571429, | |
| "grad_norm": 4.089001655578613, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 0.5301, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6962025316455697, | |
| "eval_loss": 0.51507169008255, | |
| "eval_runtime": 1.4391, | |
| "eval_samples_per_second": 54.894, | |
| "eval_steps_per_second": 2.085, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 12.857142857142858, | |
| "eval_accuracy": 0.6962025316455697, | |
| "eval_loss": 0.5105239152908325, | |
| "eval_runtime": 1.5017, | |
| "eval_samples_per_second": 52.608, | |
| "eval_steps_per_second": 1.998, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7088607594936709, | |
| "eval_loss": 0.5488570928573608, | |
| "eval_runtime": 1.4299, | |
| "eval_samples_per_second": 55.25, | |
| "eval_steps_per_second": 2.098, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 14.285714285714286, | |
| "grad_norm": 5.854975700378418, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.4703, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 14.857142857142858, | |
| "eval_accuracy": 0.6835443037974683, | |
| "eval_loss": 0.5724519491195679, | |
| "eval_runtime": 1.4269, | |
| "eval_samples_per_second": 55.364, | |
| "eval_steps_per_second": 2.102, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6962025316455697, | |
| "eval_loss": 0.5559752583503723, | |
| "eval_runtime": 1.4698, | |
| "eval_samples_per_second": 53.75, | |
| "eval_steps_per_second": 2.041, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 16.857142857142858, | |
| "eval_accuracy": 0.6708860759493671, | |
| "eval_loss": 0.5824136137962341, | |
| "eval_runtime": 1.4546, | |
| "eval_samples_per_second": 54.312, | |
| "eval_steps_per_second": 2.062, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 17.142857142857142, | |
| "grad_norm": 6.086174964904785, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.4189, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.540145754814148, | |
| "eval_runtime": 1.4352, | |
| "eval_samples_per_second": 55.046, | |
| "eval_steps_per_second": 2.09, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 18.857142857142858, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.514731764793396, | |
| "eval_runtime": 1.4503, | |
| "eval_samples_per_second": 54.473, | |
| "eval_steps_per_second": 2.069, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 8.862787246704102, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 0.3741, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.759493670886076, | |
| "eval_loss": 0.48641237616539, | |
| "eval_runtime": 1.4392, | |
| "eval_samples_per_second": 54.891, | |
| "eval_steps_per_second": 2.084, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 20.857142857142858, | |
| "eval_accuracy": 0.7341772151898734, | |
| "eval_loss": 0.5272199511528015, | |
| "eval_runtime": 1.461, | |
| "eval_samples_per_second": 54.072, | |
| "eval_steps_per_second": 2.053, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.49136921763420105, | |
| "eval_runtime": 1.4904, | |
| "eval_samples_per_second": 53.005, | |
| "eval_steps_per_second": 2.013, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 22.857142857142858, | |
| "grad_norm": 8.650327682495117, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.387, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 22.857142857142858, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.5658156275749207, | |
| "eval_runtime": 1.498, | |
| "eval_samples_per_second": 52.738, | |
| "eval_steps_per_second": 2.003, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.4662097096443176, | |
| "eval_runtime": 1.512, | |
| "eval_samples_per_second": 52.249, | |
| "eval_steps_per_second": 1.984, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 24.857142857142858, | |
| "eval_accuracy": 0.7848101265822784, | |
| "eval_loss": 0.4376372694969177, | |
| "eval_runtime": 1.5044, | |
| "eval_samples_per_second": 52.514, | |
| "eval_steps_per_second": 1.994, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 25.714285714285715, | |
| "grad_norm": 6.057330131530762, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.3502, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.5366873145103455, | |
| "eval_runtime": 1.5039, | |
| "eval_samples_per_second": 52.529, | |
| "eval_steps_per_second": 1.995, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 26.857142857142858, | |
| "eval_accuracy": 0.7341772151898734, | |
| "eval_loss": 0.5490015745162964, | |
| "eval_runtime": 1.4224, | |
| "eval_samples_per_second": 55.541, | |
| "eval_steps_per_second": 2.109, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.7162956595420837, | |
| "eval_runtime": 1.4548, | |
| "eval_samples_per_second": 54.303, | |
| "eval_steps_per_second": 2.062, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 28.571428571428573, | |
| "grad_norm": 6.062076568603516, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.3148, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 28.857142857142858, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.6004660129547119, | |
| "eval_runtime": 1.4277, | |
| "eval_samples_per_second": 55.333, | |
| "eval_steps_per_second": 2.101, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.6500609517097473, | |
| "eval_runtime": 1.4701, | |
| "eval_samples_per_second": 53.739, | |
| "eval_steps_per_second": 2.041, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 30.857142857142858, | |
| "eval_accuracy": 0.7974683544303798, | |
| "eval_loss": 0.5312591791152954, | |
| "eval_runtime": 1.4996, | |
| "eval_samples_per_second": 52.68, | |
| "eval_steps_per_second": 2.001, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 31.428571428571427, | |
| "grad_norm": 5.8153252601623535, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 0.2973, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.5466110706329346, | |
| "eval_runtime": 1.5101, | |
| "eval_samples_per_second": 52.314, | |
| "eval_steps_per_second": 1.987, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 32.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.5730607509613037, | |
| "eval_runtime": 1.4879, | |
| "eval_samples_per_second": 53.094, | |
| "eval_steps_per_second": 2.016, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6543712615966797, | |
| "eval_runtime": 1.4649, | |
| "eval_samples_per_second": 53.927, | |
| "eval_steps_per_second": 2.048, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 34.285714285714285, | |
| "grad_norm": 5.931222438812256, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.2474, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 34.857142857142854, | |
| "eval_accuracy": 0.7848101265822784, | |
| "eval_loss": 0.6060739159584045, | |
| "eval_runtime": 1.4417, | |
| "eval_samples_per_second": 54.798, | |
| "eval_steps_per_second": 2.081, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.5815550684928894, | |
| "eval_runtime": 1.4713, | |
| "eval_samples_per_second": 53.693, | |
| "eval_steps_per_second": 2.039, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 36.857142857142854, | |
| "eval_accuracy": 0.759493670886076, | |
| "eval_loss": 0.7160954475402832, | |
| "eval_runtime": 1.5016, | |
| "eval_samples_per_second": 52.612, | |
| "eval_steps_per_second": 1.998, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 37.142857142857146, | |
| "grad_norm": 5.137592792510986, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 0.2033, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7848101265822784, | |
| "eval_loss": 0.6234713196754456, | |
| "eval_runtime": 1.4848, | |
| "eval_samples_per_second": 53.205, | |
| "eval_steps_per_second": 2.02, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 38.857142857142854, | |
| "eval_accuracy": 0.759493670886076, | |
| "eval_loss": 0.7888889312744141, | |
| "eval_runtime": 1.4207, | |
| "eval_samples_per_second": 55.607, | |
| "eval_steps_per_second": 2.112, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 5.441008567810059, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.2338, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.759493670886076, | |
| "eval_loss": 0.5943406224250793, | |
| "eval_runtime": 1.4387, | |
| "eval_samples_per_second": 54.911, | |
| "eval_steps_per_second": 2.085, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 40.857142857142854, | |
| "eval_accuracy": 0.7341772151898734, | |
| "eval_loss": 0.6169915795326233, | |
| "eval_runtime": 1.4387, | |
| "eval_samples_per_second": 54.909, | |
| "eval_steps_per_second": 2.085, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.6962025316455697, | |
| "eval_loss": 0.6963752508163452, | |
| "eval_runtime": 1.4482, | |
| "eval_samples_per_second": 54.55, | |
| "eval_steps_per_second": 2.072, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 42.857142857142854, | |
| "grad_norm": 6.6485161781311035, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.2067, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 42.857142857142854, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.7153680324554443, | |
| "eval_runtime": 1.574, | |
| "eval_samples_per_second": 50.19, | |
| "eval_steps_per_second": 1.906, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.767503559589386, | |
| "eval_runtime": 1.4899, | |
| "eval_samples_per_second": 53.025, | |
| "eval_steps_per_second": 2.014, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 44.857142857142854, | |
| "eval_accuracy": 0.7468354430379747, | |
| "eval_loss": 0.7765600681304932, | |
| "eval_runtime": 1.4794, | |
| "eval_samples_per_second": 53.4, | |
| "eval_steps_per_second": 2.028, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 45.714285714285715, | |
| "grad_norm": 6.1349005699157715, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.2133, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.7848101265822784, | |
| "eval_loss": 0.932968258857727, | |
| "eval_runtime": 1.4465, | |
| "eval_samples_per_second": 54.616, | |
| "eval_steps_per_second": 2.074, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 46.857142857142854, | |
| "eval_accuracy": 0.7974683544303798, | |
| "eval_loss": 0.64939284324646, | |
| "eval_runtime": 1.4334, | |
| "eval_samples_per_second": 55.113, | |
| "eval_steps_per_second": 2.093, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.5709493160247803, | |
| "eval_runtime": 1.4722, | |
| "eval_samples_per_second": 53.662, | |
| "eval_steps_per_second": 2.038, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 48.57142857142857, | |
| "grad_norm": 3.4344000816345215, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 0.2004, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 48.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6462149620056152, | |
| "eval_runtime": 1.5036, | |
| "eval_samples_per_second": 52.54, | |
| "eval_steps_per_second": 1.995, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.7721518987341772, | |
| "eval_loss": 0.6667977571487427, | |
| "eval_runtime": 1.5326, | |
| "eval_samples_per_second": 51.547, | |
| "eval_steps_per_second": 1.957, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 50.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6305052638053894, | |
| "eval_runtime": 1.4376, | |
| "eval_samples_per_second": 54.953, | |
| "eval_steps_per_second": 2.087, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 51.42857142857143, | |
| "grad_norm": 5.206828594207764, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.188, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.7189355492591858, | |
| "eval_runtime": 1.4518, | |
| "eval_samples_per_second": 54.415, | |
| "eval_steps_per_second": 2.066, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 52.857142857142854, | |
| "eval_accuracy": 0.7848101265822784, | |
| "eval_loss": 0.6853471398353577, | |
| "eval_runtime": 1.49, | |
| "eval_samples_per_second": 53.02, | |
| "eval_steps_per_second": 2.013, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.8039994835853577, | |
| "eval_runtime": 1.4908, | |
| "eval_samples_per_second": 52.991, | |
| "eval_steps_per_second": 2.012, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 54.285714285714285, | |
| "grad_norm": 5.863402843475342, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 0.1623, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 54.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.69575035572052, | |
| "eval_runtime": 1.5387, | |
| "eval_samples_per_second": 51.343, | |
| "eval_steps_per_second": 1.95, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6906704902648926, | |
| "eval_runtime": 1.576, | |
| "eval_samples_per_second": 50.126, | |
| "eval_steps_per_second": 1.904, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 56.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6821295619010925, | |
| "eval_runtime": 1.4415, | |
| "eval_samples_per_second": 54.804, | |
| "eval_steps_per_second": 2.081, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 57.142857142857146, | |
| "grad_norm": 4.665853500366211, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.1588, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6534023880958557, | |
| "eval_runtime": 1.429, | |
| "eval_samples_per_second": 55.283, | |
| "eval_steps_per_second": 2.099, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 58.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.7192457318305969, | |
| "eval_runtime": 1.4185, | |
| "eval_samples_per_second": 55.694, | |
| "eval_steps_per_second": 2.115, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 6.225094318389893, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.1607, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.7752671837806702, | |
| "eval_runtime": 1.4284, | |
| "eval_samples_per_second": 55.308, | |
| "eval_steps_per_second": 2.1, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 60.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.8949642181396484, | |
| "eval_runtime": 1.5505, | |
| "eval_samples_per_second": 50.951, | |
| "eval_steps_per_second": 1.935, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.7903599739074707, | |
| "eval_runtime": 1.5102, | |
| "eval_samples_per_second": 52.311, | |
| "eval_steps_per_second": 1.986, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 62.857142857142854, | |
| "grad_norm": 4.583127498626709, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.1767, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 62.857142857142854, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6972522735595703, | |
| "eval_runtime": 1.4628, | |
| "eval_samples_per_second": 54.005, | |
| "eval_steps_per_second": 2.051, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.7974683544303798, | |
| "eval_loss": 0.6694443225860596, | |
| "eval_runtime": 1.4975, | |
| "eval_samples_per_second": 52.754, | |
| "eval_steps_per_second": 2.003, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 64.85714285714286, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6338869333267212, | |
| "eval_runtime": 1.4504, | |
| "eval_samples_per_second": 54.468, | |
| "eval_steps_per_second": 2.068, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 65.71428571428571, | |
| "grad_norm": 3.7681446075439453, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 0.1463, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6530351042747498, | |
| "eval_runtime": 1.4628, | |
| "eval_samples_per_second": 54.005, | |
| "eval_steps_per_second": 2.051, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 66.85714285714286, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.6141919493675232, | |
| "eval_runtime": 1.5198, | |
| "eval_samples_per_second": 51.98, | |
| "eval_steps_per_second": 1.974, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.628998339176178, | |
| "eval_runtime": 1.4603, | |
| "eval_samples_per_second": 54.1, | |
| "eval_steps_per_second": 2.054, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 68.57142857142857, | |
| "grad_norm": 5.3702874183654785, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.1287, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 68.85714285714286, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6333932280540466, | |
| "eval_runtime": 1.437, | |
| "eval_samples_per_second": 54.977, | |
| "eval_steps_per_second": 2.088, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.810126582278481, | |
| "eval_loss": 0.8058773279190063, | |
| "eval_runtime": 1.437, | |
| "eval_samples_per_second": 54.974, | |
| "eval_steps_per_second": 2.088, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 70.85714285714286, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.7241003513336182, | |
| "eval_runtime": 1.4148, | |
| "eval_samples_per_second": 55.837, | |
| "eval_steps_per_second": 2.12, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 71.42857142857143, | |
| "grad_norm": 4.752800941467285, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.1323, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.6835869550704956, | |
| "eval_runtime": 1.4344, | |
| "eval_samples_per_second": 55.075, | |
| "eval_steps_per_second": 2.091, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 72.85714285714286, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.6587881445884705, | |
| "eval_runtime": 1.559, | |
| "eval_samples_per_second": 50.672, | |
| "eval_steps_per_second": 1.924, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.6597732901573181, | |
| "eval_runtime": 1.5278, | |
| "eval_samples_per_second": 51.709, | |
| "eval_steps_per_second": 1.964, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 74.28571428571429, | |
| "grad_norm": 3.2891921997070312, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.1042, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 74.85714285714286, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.713896632194519, | |
| "eval_runtime": 1.4654, | |
| "eval_samples_per_second": 53.909, | |
| "eval_steps_per_second": 2.047, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.72358638048172, | |
| "eval_runtime": 1.4884, | |
| "eval_samples_per_second": 53.077, | |
| "eval_steps_per_second": 2.016, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 76.85714285714286, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6918818950653076, | |
| "eval_runtime": 1.4316, | |
| "eval_samples_per_second": 55.184, | |
| "eval_steps_per_second": 2.096, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 77.14285714285714, | |
| "grad_norm": 4.013108730316162, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.1106, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6568043828010559, | |
| "eval_runtime": 1.5155, | |
| "eval_samples_per_second": 52.128, | |
| "eval_steps_per_second": 1.98, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 78.85714285714286, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.6556110382080078, | |
| "eval_runtime": 1.5408, | |
| "eval_samples_per_second": 51.272, | |
| "eval_steps_per_second": 1.947, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 6.208752632141113, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.1348, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6612224578857422, | |
| "eval_runtime": 1.4365, | |
| "eval_samples_per_second": 54.993, | |
| "eval_steps_per_second": 2.088, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 80.85714285714286, | |
| "eval_accuracy": 0.8227848101265823, | |
| "eval_loss": 0.6686135530471802, | |
| "eval_runtime": 1.4579, | |
| "eval_samples_per_second": 54.186, | |
| "eval_steps_per_second": 2.058, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.6705390214920044, | |
| "eval_runtime": 1.4513, | |
| "eval_samples_per_second": 54.434, | |
| "eval_steps_per_second": 2.067, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 82.85714285714286, | |
| "grad_norm": 4.1432647705078125, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.1352, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 82.85714285714286, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.677626371383667, | |
| "eval_runtime": 1.4762, | |
| "eval_samples_per_second": 53.516, | |
| "eval_steps_per_second": 2.032, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6872657537460327, | |
| "eval_runtime": 1.5716, | |
| "eval_samples_per_second": 50.268, | |
| "eval_steps_per_second": 1.909, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 84.85714285714286, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.6887751817703247, | |
| "eval_runtime": 1.5031, | |
| "eval_samples_per_second": 52.557, | |
| "eval_steps_per_second": 1.996, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 85.71428571428571, | |
| "grad_norm": 3.4207639694213867, | |
| "learning_rate": 0.0, | |
| "loss": 0.1226, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 85.71428571428571, | |
| "eval_accuracy": 0.8354430379746836, | |
| "eval_loss": 0.688024640083313, | |
| "eval_runtime": 1.4114, | |
| "eval_samples_per_second": 55.972, | |
| "eval_steps_per_second": 2.126, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 85.71428571428571, | |
| "step": 300, | |
| "total_flos": 2.9349165326823014e+18, | |
| "train_loss": 0.2789517060915629, | |
| "train_runtime": 2373.8808, | |
| "train_samples_per_second": 18.619, | |
| "train_steps_per_second": 0.126 | |
| }, | |
| { | |
| "epoch": 85.71428571428571, | |
| "eval_accuracy": 0.8481012658227848, | |
| "eval_loss": 0.7241003513336182, | |
| "eval_runtime": 1.4361, | |
| "eval_samples_per_second": 55.011, | |
| "eval_steps_per_second": 2.089, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 2.9349165326823014e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |