t5_summary / trainer_state.json
hghaan's picture
upload
f693022 verified
{
"best_metric": 0.22952628135681152,
"best_model_checkpoint": "./vit5_summary/checkpoint-2500",
"epoch": 5.990254466702761,
"eval_steps": 100,
"global_step": 2766,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2165674066053059,
"grad_norm": 109060.0546875,
"learning_rate": 7.220216606498196e-06,
"loss": 12.7509,
"step": 100
},
{
"epoch": 0.2165674066053059,
"eval_loss": 0.5880528688430786,
"eval_runtime": 137.2063,
"eval_samples_per_second": 23.075,
"eval_steps_per_second": 1.924,
"step": 100
},
{
"epoch": 0.4331348132106118,
"grad_norm": 125010.6640625,
"learning_rate": 1.4440433212996392e-05,
"loss": 0.5052,
"step": 200
},
{
"epoch": 0.4331348132106118,
"eval_loss": 0.36430519819259644,
"eval_runtime": 137.901,
"eval_samples_per_second": 22.959,
"eval_steps_per_second": 1.914,
"step": 200
},
{
"epoch": 0.6497022198159177,
"grad_norm": 65910.375,
"learning_rate": 1.9815186822016877e-05,
"loss": 0.3734,
"step": 300
},
{
"epoch": 0.6497022198159177,
"eval_loss": 0.33203062415122986,
"eval_runtime": 138.0962,
"eval_samples_per_second": 22.926,
"eval_steps_per_second": 1.912,
"step": 300
},
{
"epoch": 0.8662696264212236,
"grad_norm": 43025.13671875,
"learning_rate": 1.9011651265568504e-05,
"loss": 0.3584,
"step": 400
},
{
"epoch": 0.8662696264212236,
"eval_loss": 0.3121868371963501,
"eval_runtime": 142.7377,
"eval_samples_per_second": 22.181,
"eval_steps_per_second": 1.85,
"step": 400
},
{
"epoch": 1.0828370330265296,
"grad_norm": 58838.125,
"learning_rate": 1.820811570912013e-05,
"loss": 0.3364,
"step": 500
},
{
"epoch": 1.0828370330265296,
"eval_loss": 0.2999902665615082,
"eval_runtime": 138.8144,
"eval_samples_per_second": 22.807,
"eval_steps_per_second": 1.902,
"step": 500
},
{
"epoch": 1.2994044396318354,
"grad_norm": 77278.2890625,
"learning_rate": 1.7404580152671757e-05,
"loss": 0.317,
"step": 600
},
{
"epoch": 1.2994044396318354,
"eval_loss": 0.2913927137851715,
"eval_runtime": 138.1948,
"eval_samples_per_second": 22.91,
"eval_steps_per_second": 1.91,
"step": 600
},
{
"epoch": 1.5159718462371412,
"grad_norm": 50982.0703125,
"learning_rate": 1.6601044596223384e-05,
"loss": 0.3006,
"step": 700
},
{
"epoch": 1.5159718462371412,
"eval_loss": 0.2813930809497833,
"eval_runtime": 137.5997,
"eval_samples_per_second": 23.009,
"eval_steps_per_second": 1.919,
"step": 700
},
{
"epoch": 1.7325392528424473,
"grad_norm": 63450.98046875,
"learning_rate": 1.579750903977501e-05,
"loss": 0.2931,
"step": 800
},
{
"epoch": 1.7325392528424473,
"eval_loss": 0.2716469168663025,
"eval_runtime": 137.578,
"eval_samples_per_second": 23.012,
"eval_steps_per_second": 1.919,
"step": 800
},
{
"epoch": 1.949106659447753,
"grad_norm": 46679.9140625,
"learning_rate": 1.4993973483326637e-05,
"loss": 0.2925,
"step": 900
},
{
"epoch": 1.949106659447753,
"eval_loss": 0.26637548208236694,
"eval_runtime": 142.275,
"eval_samples_per_second": 22.253,
"eval_steps_per_second": 1.856,
"step": 900
},
{
"epoch": 2.165674066053059,
"grad_norm": 51095.6640625,
"learning_rate": 1.4190437926878266e-05,
"loss": 0.2752,
"step": 1000
},
{
"epoch": 2.165674066053059,
"eval_loss": 0.2650776505470276,
"eval_runtime": 144.2721,
"eval_samples_per_second": 21.945,
"eval_steps_per_second": 1.83,
"step": 1000
},
{
"epoch": 2.382241472658365,
"grad_norm": 50603.19921875,
"learning_rate": 1.3386902370429893e-05,
"loss": 0.2675,
"step": 1100
},
{
"epoch": 2.382241472658365,
"eval_loss": 0.2589792311191559,
"eval_runtime": 137.6329,
"eval_samples_per_second": 23.003,
"eval_steps_per_second": 1.918,
"step": 1100
},
{
"epoch": 2.5988088792636708,
"grad_norm": 53034.58984375,
"learning_rate": 1.258336681398152e-05,
"loss": 0.259,
"step": 1200
},
{
"epoch": 2.5988088792636708,
"eval_loss": 0.2544151544570923,
"eval_runtime": 137.1871,
"eval_samples_per_second": 23.078,
"eval_steps_per_second": 1.924,
"step": 1200
},
{
"epoch": 2.8153762858689766,
"grad_norm": 51014.87890625,
"learning_rate": 1.1779831257533148e-05,
"loss": 0.2639,
"step": 1300
},
{
"epoch": 2.8153762858689766,
"eval_loss": 0.2480245977640152,
"eval_runtime": 138.2167,
"eval_samples_per_second": 22.906,
"eval_steps_per_second": 1.91,
"step": 1300
},
{
"epoch": 3.0319436924742824,
"grad_norm": 52027.0625,
"learning_rate": 1.0976295701084774e-05,
"loss": 0.2515,
"step": 1400
},
{
"epoch": 3.0319436924742824,
"eval_loss": 0.24613255262374878,
"eval_runtime": 138.1819,
"eval_samples_per_second": 22.912,
"eval_steps_per_second": 1.911,
"step": 1400
},
{
"epoch": 3.2485110990795887,
"grad_norm": 64106.3203125,
"learning_rate": 1.01727601446364e-05,
"loss": 0.2349,
"step": 1500
},
{
"epoch": 3.2485110990795887,
"eval_loss": 0.24559645354747772,
"eval_runtime": 142.67,
"eval_samples_per_second": 22.191,
"eval_steps_per_second": 1.85,
"step": 1500
},
{
"epoch": 3.4650785056848945,
"grad_norm": 53759.48046875,
"learning_rate": 9.369224588188028e-06,
"loss": 0.2386,
"step": 1600
},
{
"epoch": 3.4650785056848945,
"eval_loss": 0.24301083385944366,
"eval_runtime": 138.4069,
"eval_samples_per_second": 22.875,
"eval_steps_per_second": 1.907,
"step": 1600
},
{
"epoch": 3.6816459122902003,
"grad_norm": 53480.16015625,
"learning_rate": 8.565689031739656e-06,
"loss": 0.2343,
"step": 1700
},
{
"epoch": 3.6816459122902003,
"eval_loss": 0.23925070464611053,
"eval_runtime": 137.9969,
"eval_samples_per_second": 22.943,
"eval_steps_per_second": 1.913,
"step": 1700
},
{
"epoch": 3.898213318895506,
"grad_norm": 58331.94921875,
"learning_rate": 7.762153475291283e-06,
"loss": 0.2362,
"step": 1800
},
{
"epoch": 3.898213318895506,
"eval_loss": 0.23644807934761047,
"eval_runtime": 137.3893,
"eval_samples_per_second": 23.044,
"eval_steps_per_second": 1.922,
"step": 1800
},
{
"epoch": 4.114780725500812,
"grad_norm": 53067.78125,
"learning_rate": 6.958617918842909e-06,
"loss": 0.2264,
"step": 1900
},
{
"epoch": 4.114780725500812,
"eval_loss": 0.23520290851593018,
"eval_runtime": 137.4102,
"eval_samples_per_second": 23.041,
"eval_steps_per_second": 1.921,
"step": 1900
},
{
"epoch": 4.331348132106118,
"grad_norm": 63341.34765625,
"learning_rate": 6.155082362394537e-06,
"loss": 0.2209,
"step": 2000
},
{
"epoch": 4.331348132106118,
"eval_loss": 0.23410087823867798,
"eval_runtime": 142.6996,
"eval_samples_per_second": 22.186,
"eval_steps_per_second": 1.85,
"step": 2000
},
{
"epoch": 4.547915538711424,
"grad_norm": 59952.515625,
"learning_rate": 5.3515468059461635e-06,
"loss": 0.2209,
"step": 2100
},
{
"epoch": 4.547915538711424,
"eval_loss": 0.23309889435768127,
"eval_runtime": 142.5841,
"eval_samples_per_second": 22.204,
"eval_steps_per_second": 1.852,
"step": 2100
},
{
"epoch": 4.76448294531673,
"grad_norm": 61383.71484375,
"learning_rate": 4.548011249497791e-06,
"loss": 0.2204,
"step": 2200
},
{
"epoch": 4.76448294531673,
"eval_loss": 0.23156361281871796,
"eval_runtime": 137.9316,
"eval_samples_per_second": 22.953,
"eval_steps_per_second": 1.914,
"step": 2200
},
{
"epoch": 4.981050351922036,
"grad_norm": 57406.8125,
"learning_rate": 3.7444756930494173e-06,
"loss": 0.2185,
"step": 2300
},
{
"epoch": 4.981050351922036,
"eval_loss": 0.22999364137649536,
"eval_runtime": 137.9105,
"eval_samples_per_second": 22.957,
"eval_steps_per_second": 1.914,
"step": 2300
},
{
"epoch": 5.1976177585273415,
"grad_norm": 58135.14453125,
"learning_rate": 2.940940136601045e-06,
"loss": 0.2136,
"step": 2400
},
{
"epoch": 5.1976177585273415,
"eval_loss": 0.23013143241405487,
"eval_runtime": 137.4929,
"eval_samples_per_second": 23.027,
"eval_steps_per_second": 1.92,
"step": 2400
},
{
"epoch": 5.414185165132648,
"grad_norm": 54848.12890625,
"learning_rate": 2.137404580152672e-06,
"loss": 0.2102,
"step": 2500
},
{
"epoch": 5.414185165132648,
"eval_loss": 0.22952628135681152,
"eval_runtime": 142.5207,
"eval_samples_per_second": 22.214,
"eval_steps_per_second": 1.852,
"step": 2500
},
{
"epoch": 5.630752571737953,
"grad_norm": 62785.00390625,
"learning_rate": 1.333869023704299e-06,
"loss": 0.2141,
"step": 2600
},
{
"epoch": 5.630752571737953,
"eval_loss": 0.2286754995584488,
"eval_runtime": 142.4263,
"eval_samples_per_second": 22.229,
"eval_steps_per_second": 1.854,
"step": 2600
},
{
"epoch": 5.8473199783432595,
"grad_norm": 63349.1015625,
"learning_rate": 5.303334672559261e-07,
"loss": 0.2115,
"step": 2700
},
{
"epoch": 5.8473199783432595,
"eval_loss": 0.22872701287269592,
"eval_runtime": 137.9701,
"eval_samples_per_second": 22.947,
"eval_steps_per_second": 1.913,
"step": 2700
}
],
"logging_steps": 100,
"max_steps": 2766,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.084098684551168e+16,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}