| { | |
| "best_global_step": 350, | |
| "best_metric": 0.19462016224861145, | |
| "best_model_checkpoint": "./vit-stroke-detector/checkpoint-350", | |
| "epoch": 3.0, | |
| "eval_steps": 50, | |
| "global_step": 375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.828596830368042, | |
| "learning_rate": 0.0001952, | |
| "loss": 0.6849, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.6220552921295166, | |
| "learning_rate": 0.00018986666666666668, | |
| "loss": 0.6632, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.931017279624939, | |
| "learning_rate": 0.00018453333333333334, | |
| "loss": 0.6737, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.5325367450714111, | |
| "learning_rate": 0.00017920000000000002, | |
| "loss": 0.6441, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.20815162360668182, | |
| "learning_rate": 0.00017386666666666667, | |
| "loss": 0.6959, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.6007984031936128, | |
| "eval_loss": 0.6659401059150696, | |
| "eval_runtime": 12.6028, | |
| "eval_samples_per_second": 39.753, | |
| "eval_steps_per_second": 4.999, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.6144075393676758, | |
| "learning_rate": 0.00016853333333333336, | |
| "loss": 0.6542, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.3622696101665497, | |
| "learning_rate": 0.0001632, | |
| "loss": 0.6336, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.7294650077819824, | |
| "learning_rate": 0.00015786666666666666, | |
| "loss": 0.6705, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2596920728683472, | |
| "learning_rate": 0.00015253333333333335, | |
| "loss": 0.6144, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.432494640350342, | |
| "learning_rate": 0.0001472, | |
| "loss": 0.6038, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.624750499001996, | |
| "eval_loss": 0.6349905133247375, | |
| "eval_runtime": 4.8798, | |
| "eval_samples_per_second": 102.668, | |
| "eval_steps_per_second": 12.91, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.8303850293159485, | |
| "learning_rate": 0.00014186666666666668, | |
| "loss": 0.5625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.7314858436584473, | |
| "learning_rate": 0.00013653333333333334, | |
| "loss": 0.4656, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 2.2635459899902344, | |
| "learning_rate": 0.00013120000000000002, | |
| "loss": 0.401, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 1.9528553485870361, | |
| "learning_rate": 0.00012586666666666667, | |
| "loss": 0.5874, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.2214958667755127, | |
| "learning_rate": 0.00012053333333333334, | |
| "loss": 0.4041, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_accuracy": 0.7904191616766467, | |
| "eval_loss": 0.5236000418663025, | |
| "eval_runtime": 6.2426, | |
| "eval_samples_per_second": 80.255, | |
| "eval_steps_per_second": 10.092, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.578341007232666, | |
| "learning_rate": 0.0001152, | |
| "loss": 0.4256, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.521945059299469, | |
| "learning_rate": 0.00010986666666666668, | |
| "loss": 0.3168, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.1707364320755005, | |
| "learning_rate": 0.00010453333333333333, | |
| "loss": 0.4316, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.4519349336624146, | |
| "learning_rate": 9.92e-05, | |
| "loss": 0.2944, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.0237702131271362, | |
| "learning_rate": 9.386666666666667e-05, | |
| "loss": 0.3243, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.874251497005988, | |
| "eval_loss": 0.3280556797981262, | |
| "eval_runtime": 4.9813, | |
| "eval_samples_per_second": 100.577, | |
| "eval_steps_per_second": 12.647, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 4.305712699890137, | |
| "learning_rate": 8.853333333333333e-05, | |
| "loss": 0.3118, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.6635700464248657, | |
| "learning_rate": 8.32e-05, | |
| "loss": 0.2534, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 1.5301051139831543, | |
| "learning_rate": 7.786666666666667e-05, | |
| "loss": 0.2702, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.7002310156822205, | |
| "learning_rate": 7.253333333333334e-05, | |
| "loss": 0.1926, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.4949961304664612, | |
| "learning_rate": 6.720000000000001e-05, | |
| "loss": 0.2041, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8922155688622755, | |
| "eval_loss": 0.28237032890319824, | |
| "eval_runtime": 4.9486, | |
| "eval_samples_per_second": 101.242, | |
| "eval_steps_per_second": 12.731, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 13.5546236038208, | |
| "learning_rate": 6.186666666666668e-05, | |
| "loss": 0.173, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 7.708662986755371, | |
| "learning_rate": 5.6533333333333336e-05, | |
| "loss": 0.1206, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.44631195068359375, | |
| "learning_rate": 5.1200000000000004e-05, | |
| "loss": 0.1615, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.9430702924728394, | |
| "learning_rate": 4.5866666666666666e-05, | |
| "loss": 0.1196, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.0821869373321533, | |
| "learning_rate": 4.0533333333333334e-05, | |
| "loss": 0.117, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 0.9161676646706587, | |
| "eval_loss": 0.22871683537960052, | |
| "eval_runtime": 4.8973, | |
| "eval_samples_per_second": 102.302, | |
| "eval_steps_per_second": 12.864, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 9.634779930114746, | |
| "learning_rate": 3.52e-05, | |
| "loss": 0.0606, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 6.319236755371094, | |
| "learning_rate": 2.986666666666667e-05, | |
| "loss": 0.0774, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 9.121068000793457, | |
| "learning_rate": 2.4533333333333334e-05, | |
| "loss": 0.06, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 3.4029245376586914, | |
| "learning_rate": 1.9200000000000003e-05, | |
| "loss": 0.0997, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.8972381353378296, | |
| "learning_rate": 1.3866666666666667e-05, | |
| "loss": 0.0353, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 0.9401197604790419, | |
| "eval_loss": 0.19462016224861145, | |
| "eval_runtime": 5.5189, | |
| "eval_samples_per_second": 90.78, | |
| "eval_steps_per_second": 11.415, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.1985252946615219, | |
| "learning_rate": 8.533333333333334e-06, | |
| "loss": 0.0833, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.129100501537323, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.0775, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 375, | |
| "total_flos": 4.64951937687552e+17, | |
| "train_loss": 0.3516346867879232, | |
| "train_runtime": 232.2268, | |
| "train_samples_per_second": 25.837, | |
| "train_steps_per_second": 1.615 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.64951937687552e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |