| { |
| "best_metric": 0.22952628135681152, |
| "best_model_checkpoint": "./vit5_summary/checkpoint-2500", |
| "epoch": 5.990254466702761, |
| "eval_steps": 100, |
| "global_step": 2766, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2165674066053059, |
| "grad_norm": 109060.0546875, |
| "learning_rate": 7.220216606498196e-06, |
| "loss": 12.7509, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2165674066053059, |
| "eval_loss": 0.5880528688430786, |
| "eval_runtime": 137.2063, |
| "eval_samples_per_second": 23.075, |
| "eval_steps_per_second": 1.924, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4331348132106118, |
| "grad_norm": 125010.6640625, |
| "learning_rate": 1.4440433212996392e-05, |
| "loss": 0.5052, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4331348132106118, |
| "eval_loss": 0.36430519819259644, |
| "eval_runtime": 137.901, |
| "eval_samples_per_second": 22.959, |
| "eval_steps_per_second": 1.914, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6497022198159177, |
| "grad_norm": 65910.375, |
| "learning_rate": 1.9815186822016877e-05, |
| "loss": 0.3734, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6497022198159177, |
| "eval_loss": 0.33203062415122986, |
| "eval_runtime": 138.0962, |
| "eval_samples_per_second": 22.926, |
| "eval_steps_per_second": 1.912, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8662696264212236, |
| "grad_norm": 43025.13671875, |
| "learning_rate": 1.9011651265568504e-05, |
| "loss": 0.3584, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8662696264212236, |
| "eval_loss": 0.3121868371963501, |
| "eval_runtime": 142.7377, |
| "eval_samples_per_second": 22.181, |
| "eval_steps_per_second": 1.85, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0828370330265296, |
| "grad_norm": 58838.125, |
| "learning_rate": 1.820811570912013e-05, |
| "loss": 0.3364, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0828370330265296, |
| "eval_loss": 0.2999902665615082, |
| "eval_runtime": 138.8144, |
| "eval_samples_per_second": 22.807, |
| "eval_steps_per_second": 1.902, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.2994044396318354, |
| "grad_norm": 77278.2890625, |
| "learning_rate": 1.7404580152671757e-05, |
| "loss": 0.317, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.2994044396318354, |
| "eval_loss": 0.2913927137851715, |
| "eval_runtime": 138.1948, |
| "eval_samples_per_second": 22.91, |
| "eval_steps_per_second": 1.91, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.5159718462371412, |
| "grad_norm": 50982.0703125, |
| "learning_rate": 1.6601044596223384e-05, |
| "loss": 0.3006, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5159718462371412, |
| "eval_loss": 0.2813930809497833, |
| "eval_runtime": 137.5997, |
| "eval_samples_per_second": 23.009, |
| "eval_steps_per_second": 1.919, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7325392528424473, |
| "grad_norm": 63450.98046875, |
| "learning_rate": 1.579750903977501e-05, |
| "loss": 0.2931, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.7325392528424473, |
| "eval_loss": 0.2716469168663025, |
| "eval_runtime": 137.578, |
| "eval_samples_per_second": 23.012, |
| "eval_steps_per_second": 1.919, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.949106659447753, |
| "grad_norm": 46679.9140625, |
| "learning_rate": 1.4993973483326637e-05, |
| "loss": 0.2925, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.949106659447753, |
| "eval_loss": 0.26637548208236694, |
| "eval_runtime": 142.275, |
| "eval_samples_per_second": 22.253, |
| "eval_steps_per_second": 1.856, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.165674066053059, |
| "grad_norm": 51095.6640625, |
| "learning_rate": 1.4190437926878266e-05, |
| "loss": 0.2752, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.165674066053059, |
| "eval_loss": 0.2650776505470276, |
| "eval_runtime": 144.2721, |
| "eval_samples_per_second": 21.945, |
| "eval_steps_per_second": 1.83, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.382241472658365, |
| "grad_norm": 50603.19921875, |
| "learning_rate": 1.3386902370429893e-05, |
| "loss": 0.2675, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.382241472658365, |
| "eval_loss": 0.2589792311191559, |
| "eval_runtime": 137.6329, |
| "eval_samples_per_second": 23.003, |
| "eval_steps_per_second": 1.918, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.5988088792636708, |
| "grad_norm": 53034.58984375, |
| "learning_rate": 1.258336681398152e-05, |
| "loss": 0.259, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5988088792636708, |
| "eval_loss": 0.2544151544570923, |
| "eval_runtime": 137.1871, |
| "eval_samples_per_second": 23.078, |
| "eval_steps_per_second": 1.924, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.8153762858689766, |
| "grad_norm": 51014.87890625, |
| "learning_rate": 1.1779831257533148e-05, |
| "loss": 0.2639, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.8153762858689766, |
| "eval_loss": 0.2480245977640152, |
| "eval_runtime": 138.2167, |
| "eval_samples_per_second": 22.906, |
| "eval_steps_per_second": 1.91, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.0319436924742824, |
| "grad_norm": 52027.0625, |
| "learning_rate": 1.0976295701084774e-05, |
| "loss": 0.2515, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.0319436924742824, |
| "eval_loss": 0.24613255262374878, |
| "eval_runtime": 138.1819, |
| "eval_samples_per_second": 22.912, |
| "eval_steps_per_second": 1.911, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.2485110990795887, |
| "grad_norm": 64106.3203125, |
| "learning_rate": 1.01727601446364e-05, |
| "loss": 0.2349, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.2485110990795887, |
| "eval_loss": 0.24559645354747772, |
| "eval_runtime": 142.67, |
| "eval_samples_per_second": 22.191, |
| "eval_steps_per_second": 1.85, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.4650785056848945, |
| "grad_norm": 53759.48046875, |
| "learning_rate": 9.369224588188028e-06, |
| "loss": 0.2386, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.4650785056848945, |
| "eval_loss": 0.24301083385944366, |
| "eval_runtime": 138.4069, |
| "eval_samples_per_second": 22.875, |
| "eval_steps_per_second": 1.907, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.6816459122902003, |
| "grad_norm": 53480.16015625, |
| "learning_rate": 8.565689031739656e-06, |
| "loss": 0.2343, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.6816459122902003, |
| "eval_loss": 0.23925070464611053, |
| "eval_runtime": 137.9969, |
| "eval_samples_per_second": 22.943, |
| "eval_steps_per_second": 1.913, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.898213318895506, |
| "grad_norm": 58331.94921875, |
| "learning_rate": 7.762153475291283e-06, |
| "loss": 0.2362, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.898213318895506, |
| "eval_loss": 0.23644807934761047, |
| "eval_runtime": 137.3893, |
| "eval_samples_per_second": 23.044, |
| "eval_steps_per_second": 1.922, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.114780725500812, |
| "grad_norm": 53067.78125, |
| "learning_rate": 6.958617918842909e-06, |
| "loss": 0.2264, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.114780725500812, |
| "eval_loss": 0.23520290851593018, |
| "eval_runtime": 137.4102, |
| "eval_samples_per_second": 23.041, |
| "eval_steps_per_second": 1.921, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.331348132106118, |
| "grad_norm": 63341.34765625, |
| "learning_rate": 6.155082362394537e-06, |
| "loss": 0.2209, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.331348132106118, |
| "eval_loss": 0.23410087823867798, |
| "eval_runtime": 142.6996, |
| "eval_samples_per_second": 22.186, |
| "eval_steps_per_second": 1.85, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.547915538711424, |
| "grad_norm": 59952.515625, |
| "learning_rate": 5.3515468059461635e-06, |
| "loss": 0.2209, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.547915538711424, |
| "eval_loss": 0.23309889435768127, |
| "eval_runtime": 142.5841, |
| "eval_samples_per_second": 22.204, |
| "eval_steps_per_second": 1.852, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.76448294531673, |
| "grad_norm": 61383.71484375, |
| "learning_rate": 4.548011249497791e-06, |
| "loss": 0.2204, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.76448294531673, |
| "eval_loss": 0.23156361281871796, |
| "eval_runtime": 137.9316, |
| "eval_samples_per_second": 22.953, |
| "eval_steps_per_second": 1.914, |
| "step": 2200 |
| }, |
| { |
| "epoch": 4.981050351922036, |
| "grad_norm": 57406.8125, |
| "learning_rate": 3.7444756930494173e-06, |
| "loss": 0.2185, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.981050351922036, |
| "eval_loss": 0.22999364137649536, |
| "eval_runtime": 137.9105, |
| "eval_samples_per_second": 22.957, |
| "eval_steps_per_second": 1.914, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.1976177585273415, |
| "grad_norm": 58135.14453125, |
| "learning_rate": 2.940940136601045e-06, |
| "loss": 0.2136, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.1976177585273415, |
| "eval_loss": 0.23013143241405487, |
| "eval_runtime": 137.4929, |
| "eval_samples_per_second": 23.027, |
| "eval_steps_per_second": 1.92, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.414185165132648, |
| "grad_norm": 54848.12890625, |
| "learning_rate": 2.137404580152672e-06, |
| "loss": 0.2102, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.414185165132648, |
| "eval_loss": 0.22952628135681152, |
| "eval_runtime": 142.5207, |
| "eval_samples_per_second": 22.214, |
| "eval_steps_per_second": 1.852, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.630752571737953, |
| "grad_norm": 62785.00390625, |
| "learning_rate": 1.333869023704299e-06, |
| "loss": 0.2141, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.630752571737953, |
| "eval_loss": 0.2286754995584488, |
| "eval_runtime": 142.4263, |
| "eval_samples_per_second": 22.229, |
| "eval_steps_per_second": 1.854, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.8473199783432595, |
| "grad_norm": 63349.1015625, |
| "learning_rate": 5.303334672559261e-07, |
| "loss": 0.2115, |
| "step": 2700 |
| }, |
| { |
| "epoch": 5.8473199783432595, |
| "eval_loss": 0.22872701287269592, |
| "eval_runtime": 137.9701, |
| "eval_samples_per_second": 22.947, |
| "eval_steps_per_second": 1.913, |
| "step": 2700 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2766, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 3, |
| "early_stopping_threshold": 0.001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.084098684551168e+16, |
| "train_batch_size": 12, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|