{ "best_global_step": 100, "best_metric": 0.1291341334581375, "best_model_checkpoint": "./vit_focus/checkpoint-100", "epoch": 18.0, "eval_steps": 500, "global_step": 180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.11766688525676727, "eval_mae": 0.3475768268108368, "eval_mse": 0.1613956242799759, "eval_runtime": 10.0195, "eval_samples_per_second": 7.585, "eval_steps_per_second": 0.998, "step": 10 }, { "epoch": 2.0, "eval_loss": 0.09316065162420273, "eval_mae": 0.32877787947654724, "eval_mse": 0.14106006920337677, "eval_runtime": 9.4019, "eval_samples_per_second": 8.083, "eval_steps_per_second": 1.064, "step": 20 }, { "epoch": 3.0, "eval_loss": 0.09138184040784836, "eval_mae": 0.3351140022277832, "eval_mse": 0.14778681099414825, "eval_runtime": 9.4168, "eval_samples_per_second": 8.071, "eval_steps_per_second": 1.062, "step": 30 }, { "epoch": 4.0, "grad_norm": 3.422793388366699, "learning_rate": 7.833333333333333e-05, "loss": 0.3379, "step": 40 }, { "epoch": 4.0, "eval_loss": 0.07990685850381851, "eval_mae": 0.3216111361980438, "eval_mse": 0.1368674337863922, "eval_runtime": 9.6691, "eval_samples_per_second": 7.86, "eval_steps_per_second": 1.034, "step": 40 }, { "epoch": 5.0, "eval_loss": 0.07730501145124435, "eval_mae": 0.3210676312446594, "eval_mse": 0.1364751160144806, "eval_runtime": 9.6377, "eval_samples_per_second": 7.886, "eval_steps_per_second": 1.038, "step": 50 }, { "epoch": 6.0, "eval_loss": 0.08424032479524612, "eval_mae": 0.3250683844089508, "eval_mse": 0.14131243526935577, "eval_runtime": 9.4489, "eval_samples_per_second": 8.043, "eval_steps_per_second": 1.058, "step": 60 }, { "epoch": 7.0, "eval_loss": 0.08766830712556839, "eval_mae": 0.3169863820075989, "eval_mse": 0.13288582861423492, "eval_runtime": 10.5663, "eval_samples_per_second": 7.193, "eval_steps_per_second": 0.946, "step": 70 }, { "epoch": 8.0, "grad_norm": 3.210268020629883, "learning_rate": 5.6111111111111114e-05, "loss": 0.2829, "step": 80 }, { "epoch": 8.0, "eval_loss": 0.08300431817770004, "eval_mae": 0.3192159831523895, "eval_mse": 0.1370069533586502, "eval_runtime": 9.75, "eval_samples_per_second": 7.795, "eval_steps_per_second": 1.026, "step": 80 }, { "epoch": 9.0, "eval_loss": 0.08274427056312561, "eval_mae": 0.32542383670806885, "eval_mse": 0.140847310423851, "eval_runtime": 9.5472, "eval_samples_per_second": 7.96, "eval_steps_per_second": 1.047, "step": 90 }, { "epoch": 10.0, "eval_loss": 0.08588916808366776, "eval_mae": 0.3119707405567169, "eval_mse": 0.1291341334581375, "eval_runtime": 10.6888, "eval_samples_per_second": 7.11, "eval_steps_per_second": 0.936, "step": 100 }, { "epoch": 11.0, "eval_loss": 0.10085263103246689, "eval_mae": 0.330853134393692, "eval_mse": 0.14782297611236572, "eval_runtime": 9.484, "eval_samples_per_second": 8.013, "eval_steps_per_second": 1.054, "step": 110 }, { "epoch": 12.0, "grad_norm": 2.0312862396240234, "learning_rate": 3.388888888888889e-05, "loss": 0.2461, "step": 120 }, { "epoch": 12.0, "eval_loss": 0.08102226257324219, "eval_mae": 0.3174217641353607, "eval_mse": 0.135166734457016, "eval_runtime": 9.5097, "eval_samples_per_second": 7.992, "eval_steps_per_second": 1.052, "step": 120 }, { "epoch": 13.0, "eval_loss": 0.08370836824178696, "eval_mae": 0.31886667013168335, "eval_mse": 0.13684500753879547, "eval_runtime": 9.4388, "eval_samples_per_second": 8.052, "eval_steps_per_second": 1.059, "step": 130 }, { "epoch": 14.0, "eval_loss": 0.08873885869979858, "eval_mae": 0.32301220297813416, "eval_mse": 0.1409922093153, "eval_runtime": 9.629, "eval_samples_per_second": 7.893, "eval_steps_per_second": 1.039, "step": 140 }, { "epoch": 15.0, "eval_loss": 0.08366208523511887, "eval_mae": 0.31394028663635254, "eval_mse": 0.13212691247463226, "eval_runtime": 9.4772, "eval_samples_per_second": 8.019, "eval_steps_per_second": 1.055, "step": 150 }, { "epoch": 16.0, "grad_norm": 2.5568466186523438, "learning_rate": 1.1666666666666668e-05, "loss": 0.2104, "step": 160 }, { "epoch": 16.0, "eval_loss": 0.09077440947294235, "eval_mae": 0.3225802481174469, "eval_mse": 0.14043577015399933, "eval_runtime": 9.6623, "eval_samples_per_second": 7.866, "eval_steps_per_second": 1.035, "step": 160 }, { "epoch": 17.0, "eval_loss": 0.08574231714010239, "eval_mae": 0.31825631856918335, "eval_mse": 0.13667234778404236, "eval_runtime": 9.4595, "eval_samples_per_second": 8.034, "eval_steps_per_second": 1.057, "step": 170 }, { "epoch": 18.0, "eval_loss": 0.08340632170438766, "eval_mae": 0.3171162009239197, "eval_mse": 0.13509555160999298, "eval_runtime": 9.4982, "eval_samples_per_second": 8.002, "eval_steps_per_second": 1.053, "step": 180 }, { "epoch": 18.0, "step": 180, "total_flos": 0.0, "train_loss": 0.2600840449333191, "train_runtime": 1002.0455, "train_samples_per_second": 6.048, "train_steps_per_second": 0.18 } ], "logging_steps": 40, "max_steps": 180, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }