| { |
| "best_global_step": 120, |
| "best_metric": 0.5769068002700806, |
| "best_model_checkpoint": "./vit-base-beans-demo-v5/checkpoint-120", |
| "epoch": 1.0, |
| "eval_steps": 30, |
| "global_step": 122, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.040983606557377046, |
| "grad_norm": 63320.57421875, |
| "learning_rate": 0.00019344262295081967, |
| "loss": 0.5881, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08196721311475409, |
| "grad_norm": 20211.603515625, |
| "learning_rate": 0.00018524590163934427, |
| "loss": 0.6081, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12295081967213115, |
| "grad_norm": 38881.02734375, |
| "learning_rate": 0.00017704918032786885, |
| "loss": 0.5742, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.16393442622950818, |
| "grad_norm": 19475.11328125, |
| "learning_rate": 0.00016885245901639346, |
| "loss": 0.6071, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20491803278688525, |
| "grad_norm": 9836.5791015625, |
| "learning_rate": 0.00016065573770491804, |
| "loss": 0.5601, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "grad_norm": 30543.1875, |
| "learning_rate": 0.00015245901639344262, |
| "loss": 0.5984, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "eval_accuracy": 0.6399794713882474, |
| "eval_loss": 0.5906035304069519, |
| "eval_runtime": 267.3699, |
| "eval_samples_per_second": 14.575, |
| "eval_steps_per_second": 0.06, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.28688524590163933, |
| "grad_norm": 7142.35205078125, |
| "learning_rate": 0.00014426229508196722, |
| "loss": 0.5782, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 20473.416015625, |
| "learning_rate": 0.0001360655737704918, |
| "loss": 0.5865, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.36885245901639346, |
| "grad_norm": 7634.8935546875, |
| "learning_rate": 0.0001278688524590164, |
| "loss": 0.5969, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.4098360655737705, |
| "grad_norm": 13310.08203125, |
| "learning_rate": 0.00011967213114754099, |
| "loss": 0.5731, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.45081967213114754, |
| "grad_norm": 10772.8818359375, |
| "learning_rate": 0.00011147540983606557, |
| "loss": 0.5825, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "grad_norm": 9481.56640625, |
| "learning_rate": 0.00010327868852459018, |
| "loss": 0.6018, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "eval_accuracy": 0.6579420066717988, |
| "eval_loss": 0.5840609073638916, |
| "eval_runtime": 276.2712, |
| "eval_samples_per_second": 14.106, |
| "eval_steps_per_second": 0.058, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5327868852459017, |
| "grad_norm": 19119.552734375, |
| "learning_rate": 9.508196721311476e-05, |
| "loss": 0.5955, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5737704918032787, |
| "grad_norm": 18152.6171875, |
| "learning_rate": 8.688524590163935e-05, |
| "loss": 0.5934, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6147540983606558, |
| "grad_norm": 9084.3095703125, |
| "learning_rate": 7.868852459016394e-05, |
| "loss": 0.602, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 18591.34375, |
| "learning_rate": 7.049180327868853e-05, |
| "loss": 0.6009, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6967213114754098, |
| "grad_norm": 17511.595703125, |
| "learning_rate": 6.229508196721313e-05, |
| "loss": 0.5888, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "grad_norm": 9820.2109375, |
| "learning_rate": 5.409836065573771e-05, |
| "loss": 0.5816, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "eval_accuracy": 0.6433153707980498, |
| "eval_loss": 0.5819421410560608, |
| "eval_runtime": 262.5138, |
| "eval_samples_per_second": 14.845, |
| "eval_steps_per_second": 0.061, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7786885245901639, |
| "grad_norm": 10211.087890625, |
| "learning_rate": 4.59016393442623e-05, |
| "loss": 0.5958, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.819672131147541, |
| "grad_norm": 18134.767578125, |
| "learning_rate": 3.7704918032786885e-05, |
| "loss": 0.5626, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.860655737704918, |
| "grad_norm": 13692.79296875, |
| "learning_rate": 2.9508196721311478e-05, |
| "loss": 0.5686, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9016393442622951, |
| "grad_norm": 17321.041015625, |
| "learning_rate": 2.1311475409836064e-05, |
| "loss": 0.5841, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9426229508196722, |
| "grad_norm": 10492.5380859375, |
| "learning_rate": 1.3114754098360657e-05, |
| "loss": 0.5818, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 17490.5, |
| "learning_rate": 4.918032786885246e-06, |
| "loss": 0.594, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "eval_accuracy": 0.6584552219656146, |
| "eval_loss": 0.5769068002700806, |
| "eval_runtime": 263.9515, |
| "eval_samples_per_second": 14.764, |
| "eval_steps_per_second": 0.061, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 122, |
| "total_flos": 1.2078676421226455e+18, |
| "train_loss": 0.587343445566834, |
| "train_runtime": 5701.3536, |
| "train_samples_per_second": 2.734, |
| "train_steps_per_second": 0.021 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 122, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 30, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2078676421226455e+18, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|