| { |
| "best_global_step": 120, |
| "best_metric": 0.12533096969127655, |
| "best_model_checkpoint": "./vit_focus/checkpoint-120", |
| "epoch": 27.0, |
| "eval_steps": 500, |
| "global_step": 270, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.0993029847741127, |
| "eval_mae": 0.3380415141582489, |
| "eval_mse": 0.15286438167095184, |
| "eval_runtime": 9.9578, |
| "eval_samples_per_second": 7.632, |
| "eval_steps_per_second": 1.004, |
| "step": 10 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.1050349548459053, |
| "eval_mae": 0.34093156456947327, |
| "eval_mse": 0.15535660088062286, |
| "eval_runtime": 9.7265, |
| "eval_samples_per_second": 7.814, |
| "eval_steps_per_second": 1.028, |
| "step": 20 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.09966066479682922, |
| "eval_mae": 0.3352396786212921, |
| "eval_mse": 0.14932329952716827, |
| "eval_runtime": 9.4644, |
| "eval_samples_per_second": 8.03, |
| "eval_steps_per_second": 1.057, |
| "step": 30 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 6.926674842834473, |
| "learning_rate": 4.277777777777778e-05, |
| "loss": 0.313, |
| "step": 40 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.06556536257266998, |
| "eval_mae": 0.3157392740249634, |
| "eval_mse": 0.13447947800159454, |
| "eval_runtime": 10.1308, |
| "eval_samples_per_second": 7.502, |
| "eval_steps_per_second": 0.987, |
| "step": 40 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.06592569500207901, |
| "eval_mae": 0.3202681839466095, |
| "eval_mse": 0.13659903407096863, |
| "eval_runtime": 9.7043, |
| "eval_samples_per_second": 7.832, |
| "eval_steps_per_second": 1.03, |
| "step": 50 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.0638844296336174, |
| "eval_mae": 0.31192123889923096, |
| "eval_mse": 0.12961846590042114, |
| "eval_runtime": 9.5179, |
| "eval_samples_per_second": 7.985, |
| "eval_steps_per_second": 1.051, |
| "step": 60 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.06389027088880539, |
| "eval_mae": 0.3178236484527588, |
| "eval_mse": 0.13510307669639587, |
| "eval_runtime": 9.5025, |
| "eval_samples_per_second": 7.998, |
| "eval_steps_per_second": 1.052, |
| "step": 70 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 4.451300144195557, |
| "learning_rate": 3.537037037037037e-05, |
| "loss": 0.1742, |
| "step": 80 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.06391099840402603, |
| "eval_mae": 0.3085971772670746, |
| "eval_mse": 0.12736700475215912, |
| "eval_runtime": 10.1592, |
| "eval_samples_per_second": 7.481, |
| "eval_steps_per_second": 0.984, |
| "step": 80 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.07279632240533829, |
| "eval_mae": 0.3096161186695099, |
| "eval_mse": 0.12943950295448303, |
| "eval_runtime": 9.4821, |
| "eval_samples_per_second": 8.015, |
| "eval_steps_per_second": 1.055, |
| "step": 90 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.06712160259485245, |
| "eval_mae": 0.3150458335876465, |
| "eval_mse": 0.13300836086273193, |
| "eval_runtime": 9.7046, |
| "eval_samples_per_second": 7.831, |
| "eval_steps_per_second": 1.03, |
| "step": 100 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.06695493310689926, |
| "eval_mae": 0.30665045976638794, |
| "eval_mse": 0.12600918114185333, |
| "eval_runtime": 9.7852, |
| "eval_samples_per_second": 7.767, |
| "eval_steps_per_second": 1.022, |
| "step": 110 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 2.9957473278045654, |
| "learning_rate": 2.7962962962962965e-05, |
| "loss": 0.1284, |
| "step": 120 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.06580791622400284, |
| "eval_mae": 0.3059428930282593, |
| "eval_mse": 0.12533096969127655, |
| "eval_runtime": 9.7135, |
| "eval_samples_per_second": 7.824, |
| "eval_steps_per_second": 1.029, |
| "step": 120 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.06405826658010483, |
| "eval_mae": 0.3104270100593567, |
| "eval_mse": 0.1280805468559265, |
| "eval_runtime": 12.0741, |
| "eval_samples_per_second": 6.294, |
| "eval_steps_per_second": 0.828, |
| "step": 130 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.06428611278533936, |
| "eval_mae": 0.3104848563671112, |
| "eval_mse": 0.12893278896808624, |
| "eval_runtime": 9.5891, |
| "eval_samples_per_second": 7.926, |
| "eval_steps_per_second": 1.043, |
| "step": 140 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.06487523764371872, |
| "eval_mae": 0.3171584904193878, |
| "eval_mse": 0.13420797884464264, |
| "eval_runtime": 9.5632, |
| "eval_samples_per_second": 7.947, |
| "eval_steps_per_second": 1.046, |
| "step": 150 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 1.922245740890503, |
| "learning_rate": 2.0555555555555555e-05, |
| "loss": 0.0981, |
| "step": 160 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.06558659672737122, |
| "eval_mae": 0.30849871039390564, |
| "eval_mse": 0.12756428122520447, |
| "eval_runtime": 9.5905, |
| "eval_samples_per_second": 7.924, |
| "eval_steps_per_second": 1.043, |
| "step": 160 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 0.06274469941854477, |
| "eval_mae": 0.3136182427406311, |
| "eval_mse": 0.13160544633865356, |
| "eval_runtime": 10.0109, |
| "eval_samples_per_second": 7.592, |
| "eval_steps_per_second": 0.999, |
| "step": 170 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 0.06201491877436638, |
| "eval_mae": 0.3168633282184601, |
| "eval_mse": 0.1343080997467041, |
| "eval_runtime": 9.9918, |
| "eval_samples_per_second": 7.606, |
| "eval_steps_per_second": 1.001, |
| "step": 180 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 0.0631915032863617, |
| "eval_mae": 0.31292420625686646, |
| "eval_mse": 0.13110676407814026, |
| "eval_runtime": 9.5351, |
| "eval_samples_per_second": 7.971, |
| "eval_steps_per_second": 1.049, |
| "step": 190 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 1.9687647819519043, |
| "learning_rate": 1.3148148148148148e-05, |
| "loss": 0.0767, |
| "step": 200 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 0.06296339631080627, |
| "eval_mae": 0.3142727017402649, |
| "eval_mse": 0.1326274573802948, |
| "eval_runtime": 9.7999, |
| "eval_samples_per_second": 7.755, |
| "eval_steps_per_second": 1.02, |
| "step": 200 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 0.06408733129501343, |
| "eval_mae": 0.311717689037323, |
| "eval_mse": 0.12986762821674347, |
| "eval_runtime": 9.6462, |
| "eval_samples_per_second": 7.879, |
| "eval_steps_per_second": 1.037, |
| "step": 210 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 0.06340750306844711, |
| "eval_mae": 0.3114081621170044, |
| "eval_mse": 0.12940751016139984, |
| "eval_runtime": 9.5394, |
| "eval_samples_per_second": 7.967, |
| "eval_steps_per_second": 1.048, |
| "step": 220 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 0.06285858899354935, |
| "eval_mae": 0.31304195523262024, |
| "eval_mse": 0.13149800896644592, |
| "eval_runtime": 9.8923, |
| "eval_samples_per_second": 7.683, |
| "eval_steps_per_second": 1.011, |
| "step": 230 |
| }, |
| { |
| "epoch": 24.0, |
| "grad_norm": 1.0159116983413696, |
| "learning_rate": 5.740740740740741e-06, |
| "loss": 0.0615, |
| "step": 240 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 0.06115531921386719, |
| "eval_mae": 0.3123721480369568, |
| "eval_mse": 0.13078482449054718, |
| "eval_runtime": 9.6638, |
| "eval_samples_per_second": 7.864, |
| "eval_steps_per_second": 1.035, |
| "step": 240 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 0.059913910925388336, |
| "eval_mae": 0.31175902485847473, |
| "eval_mse": 0.13015513122081757, |
| "eval_runtime": 9.6921, |
| "eval_samples_per_second": 7.841, |
| "eval_steps_per_second": 1.032, |
| "step": 250 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 0.06085545942187309, |
| "eval_mae": 0.313151478767395, |
| "eval_mse": 0.13129989802837372, |
| "eval_runtime": 9.5449, |
| "eval_samples_per_second": 7.962, |
| "eval_steps_per_second": 1.048, |
| "step": 260 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 0.060885023325681686, |
| "eval_mae": 0.3116842210292816, |
| "eval_mse": 0.13011318445205688, |
| "eval_runtime": 9.6423, |
| "eval_samples_per_second": 7.882, |
| "eval_steps_per_second": 1.037, |
| "step": 270 |
| }, |
| { |
| "epoch": 27.0, |
| "step": 270, |
| "total_flos": 0.0, |
| "train_loss": 0.13240765200720894, |
| "train_runtime": 1534.1197, |
| "train_samples_per_second": 5.925, |
| "train_steps_per_second": 0.176 |
| } |
| ], |
| "logging_steps": 40, |
| "max_steps": 270, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|