| { | |
| "best_metric": 0.344835102558136, | |
| "best_model_checkpoint": "./vit-base-pets/checkpoint-235", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 235, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 216203.125, | |
| "learning_rate": 0.0002872340425531915, | |
| "loss": 3.3311, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 198156.9375, | |
| "learning_rate": 0.000274468085106383, | |
| "loss": 2.5921, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 176661.6875, | |
| "learning_rate": 0.0002617021276595745, | |
| "loss": 1.9823, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 142275.75, | |
| "learning_rate": 0.0002489361702127659, | |
| "loss": 1.5136, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8430311231393776, | |
| "eval_loss": 1.1030857563018799, | |
| "eval_runtime": 8.7542, | |
| "eval_samples_per_second": 84.417, | |
| "eval_steps_per_second": 5.369, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 124899.421875, | |
| "learning_rate": 0.00023617021276595742, | |
| "loss": 1.1858, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 98946.875, | |
| "learning_rate": 0.0002234042553191489, | |
| "loss": 0.9313, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 92924.6484375, | |
| "learning_rate": 0.0002106382978723404, | |
| "loss": 0.7466, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 90032.1484375, | |
| "learning_rate": 0.00019787234042553187, | |
| "loss": 0.6475, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 68696.1875, | |
| "learning_rate": 0.0001851063829787234, | |
| "loss": 0.5547, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9269282814614344, | |
| "eval_loss": 0.5232290625572205, | |
| "eval_runtime": 8.9185, | |
| "eval_samples_per_second": 82.861, | |
| "eval_steps_per_second": 5.27, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 67699.609375, | |
| "learning_rate": 0.0001723404255319149, | |
| "loss": 0.5311, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 72000.0234375, | |
| "learning_rate": 0.00015957446808510637, | |
| "loss": 0.4636, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 54618.05078125, | |
| "learning_rate": 0.00014680851063829785, | |
| "loss": 0.4171, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 57285.890625, | |
| "learning_rate": 0.00013404255319148935, | |
| "loss": 0.3946, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 73116.6171875, | |
| "learning_rate": 0.00012127659574468084, | |
| "loss": 0.4111, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9309878213802436, | |
| "eval_loss": 0.39878538250923157, | |
| "eval_runtime": 9.1048, | |
| "eval_samples_per_second": 81.166, | |
| "eval_steps_per_second": 5.162, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 64126.3828125, | |
| "learning_rate": 0.00010851063829787234, | |
| "loss": 0.3607, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 69913.5390625, | |
| "learning_rate": 9.574468085106382e-05, | |
| "loss": 0.3387, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 54300.03125, | |
| "learning_rate": 8.297872340425531e-05, | |
| "loss": 0.3568, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 60929.75390625, | |
| "learning_rate": 7.02127659574468e-05, | |
| "loss": 0.3438, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9336941813261164, | |
| "eval_loss": 0.35527506470680237, | |
| "eval_runtime": 9.4286, | |
| "eval_samples_per_second": 78.378, | |
| "eval_steps_per_second": 4.985, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 69279.328125, | |
| "learning_rate": 5.7446808510638294e-05, | |
| "loss": 0.3087, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 49415.69140625, | |
| "learning_rate": 4.468085106382978e-05, | |
| "loss": 0.328, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 62788.3359375, | |
| "learning_rate": 3.1914893617021275e-05, | |
| "loss": 0.3199, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 61852.7421875, | |
| "learning_rate": 1.9148936170212762e-05, | |
| "loss": 0.3244, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 64687.453125, | |
| "learning_rate": 6.382978723404255e-06, | |
| "loss": 0.298, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9296346414073072, | |
| "eval_loss": 0.344835102558136, | |
| "eval_runtime": 9.1923, | |
| "eval_samples_per_second": 80.393, | |
| "eval_steps_per_second": 5.113, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 235, | |
| "total_flos": 2.2913817801515827e+18, | |
| "train_loss": 0.8009341437765892, | |
| "train_runtime": 407.3253, | |
| "train_samples_per_second": 72.571, | |
| "train_steps_per_second": 0.577 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 235, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 2.2913817801515827e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |