Vit_edges / drawing /trainer_state.json
peter9356's picture
Upload folder using huggingface_hub
79a6001
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 73.07692307692308,
"global_step": 3800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.92,
"learning_rate": 9.940384615384614e-07,
"loss": 2.1803,
"step": 100
},
{
"epoch": 1.92,
"eval_loss": 1.6282658576965332,
"eval_runtime": 4.7479,
"eval_samples_per_second": 4.634,
"eval_steps_per_second": 0.632,
"step": 100
},
{
"epoch": 3.85,
"learning_rate": 9.87628205128205e-07,
"loss": 1.9556,
"step": 200
},
{
"epoch": 3.85,
"eval_loss": 1.6041302680969238,
"eval_runtime": 4.2335,
"eval_samples_per_second": 5.197,
"eval_steps_per_second": 0.709,
"step": 200
},
{
"epoch": 5.77,
"learning_rate": 9.812179487179486e-07,
"loss": 1.8415,
"step": 300
},
{
"epoch": 5.77,
"eval_loss": 1.5072983503341675,
"eval_runtime": 4.2682,
"eval_samples_per_second": 5.154,
"eval_steps_per_second": 0.703,
"step": 300
},
{
"epoch": 7.69,
"learning_rate": 9.748076923076923e-07,
"loss": 1.849,
"step": 400
},
{
"epoch": 7.69,
"eval_loss": 1.5139409303665161,
"eval_runtime": 4.1966,
"eval_samples_per_second": 5.242,
"eval_steps_per_second": 0.715,
"step": 400
},
{
"epoch": 9.62,
"learning_rate": 9.683974358974359e-07,
"loss": 1.7466,
"step": 500
},
{
"epoch": 9.62,
"eval_loss": 1.4191316366195679,
"eval_runtime": 4.1991,
"eval_samples_per_second": 5.239,
"eval_steps_per_second": 0.714,
"step": 500
},
{
"epoch": 11.54,
"learning_rate": 9.62051282051282e-07,
"loss": 1.7443,
"step": 600
},
{
"epoch": 11.54,
"eval_loss": 1.4041415452957153,
"eval_runtime": 4.3488,
"eval_samples_per_second": 5.059,
"eval_steps_per_second": 0.69,
"step": 600
},
{
"epoch": 13.46,
"learning_rate": 9.556410256410255e-07,
"loss": 1.6662,
"step": 700
},
{
"epoch": 13.46,
"eval_loss": 1.3958640098571777,
"eval_runtime": 4.2134,
"eval_samples_per_second": 5.221,
"eval_steps_per_second": 0.712,
"step": 700
},
{
"epoch": 15.38,
"learning_rate": 9.492307692307691e-07,
"loss": 1.6388,
"step": 800
},
{
"epoch": 15.38,
"eval_loss": 1.364356517791748,
"eval_runtime": 4.4313,
"eval_samples_per_second": 4.965,
"eval_steps_per_second": 0.677,
"step": 800
},
{
"epoch": 17.31,
"learning_rate": 9.428205128205127e-07,
"loss": 1.5375,
"step": 900
},
{
"epoch": 17.31,
"eval_loss": 1.3546158075332642,
"eval_runtime": 4.2939,
"eval_samples_per_second": 5.124,
"eval_steps_per_second": 0.699,
"step": 900
},
{
"epoch": 19.23,
"learning_rate": 9.364102564102564e-07,
"loss": 1.7777,
"step": 1000
},
{
"epoch": 19.23,
"eval_loss": 1.345603585243225,
"eval_runtime": 4.2959,
"eval_samples_per_second": 5.121,
"eval_steps_per_second": 0.698,
"step": 1000
},
{
"epoch": 21.15,
"learning_rate": 9.3e-07,
"loss": 1.562,
"step": 1100
},
{
"epoch": 21.15,
"eval_loss": 1.3052722215652466,
"eval_runtime": 4.3299,
"eval_samples_per_second": 5.081,
"eval_steps_per_second": 0.693,
"step": 1100
},
{
"epoch": 23.08,
"learning_rate": 9.235897435897435e-07,
"loss": 1.4714,
"step": 1200
},
{
"epoch": 23.08,
"eval_loss": 1.2963144779205322,
"eval_runtime": 4.2895,
"eval_samples_per_second": 5.129,
"eval_steps_per_second": 0.699,
"step": 1200
},
{
"epoch": 25.0,
"learning_rate": 9.172435897435897e-07,
"loss": 1.5037,
"step": 1300
},
{
"epoch": 25.0,
"eval_loss": 1.2800103425979614,
"eval_runtime": 4.2403,
"eval_samples_per_second": 5.188,
"eval_steps_per_second": 0.707,
"step": 1300
},
{
"epoch": 26.92,
"learning_rate": 9.108333333333333e-07,
"loss": 1.3721,
"step": 1400
},
{
"epoch": 26.92,
"eval_loss": 1.276384711265564,
"eval_runtime": 4.3022,
"eval_samples_per_second": 5.114,
"eval_steps_per_second": 0.697,
"step": 1400
},
{
"epoch": 28.85,
"learning_rate": 9.044230769230768e-07,
"loss": 1.4898,
"step": 1500
},
{
"epoch": 28.85,
"eval_loss": 1.29233980178833,
"eval_runtime": 4.2189,
"eval_samples_per_second": 5.215,
"eval_steps_per_second": 0.711,
"step": 1500
},
{
"epoch": 30.77,
"learning_rate": 8.980128205128205e-07,
"loss": 1.4936,
"step": 1600
},
{
"epoch": 30.77,
"eval_loss": 1.2993799448013306,
"eval_runtime": 4.1785,
"eval_samples_per_second": 5.265,
"eval_steps_per_second": 0.718,
"step": 1600
},
{
"epoch": 32.69,
"learning_rate": 8.916025641025641e-07,
"loss": 1.4178,
"step": 1700
},
{
"epoch": 32.69,
"eval_loss": 1.253838062286377,
"eval_runtime": 4.2708,
"eval_samples_per_second": 5.151,
"eval_steps_per_second": 0.702,
"step": 1700
},
{
"epoch": 34.62,
"learning_rate": 8.851923076923077e-07,
"loss": 1.2665,
"step": 1800
},
{
"epoch": 34.62,
"eval_loss": 1.2696892023086548,
"eval_runtime": 4.2383,
"eval_samples_per_second": 5.191,
"eval_steps_per_second": 0.708,
"step": 1800
},
{
"epoch": 36.54,
"learning_rate": 8.787820512820513e-07,
"loss": 1.3008,
"step": 1900
},
{
"epoch": 36.54,
"eval_loss": 1.2657532691955566,
"eval_runtime": 4.3241,
"eval_samples_per_second": 5.088,
"eval_steps_per_second": 0.694,
"step": 1900
},
{
"epoch": 38.46,
"learning_rate": 8.723717948717948e-07,
"loss": 1.4281,
"step": 2000
},
{
"epoch": 38.46,
"eval_loss": 1.2740833759307861,
"eval_runtime": 4.3835,
"eval_samples_per_second": 5.019,
"eval_steps_per_second": 0.684,
"step": 2000
},
{
"epoch": 40.38,
"learning_rate": 8.659615384615384e-07,
"loss": 1.2315,
"step": 2100
},
{
"epoch": 40.38,
"eval_loss": 1.2776740789413452,
"eval_runtime": 4.3172,
"eval_samples_per_second": 5.096,
"eval_steps_per_second": 0.695,
"step": 2100
},
{
"epoch": 42.31,
"learning_rate": 8.59551282051282e-07,
"loss": 1.2442,
"step": 2200
},
{
"epoch": 42.31,
"eval_loss": 1.2726629972457886,
"eval_runtime": 4.2859,
"eval_samples_per_second": 5.133,
"eval_steps_per_second": 0.7,
"step": 2200
},
{
"epoch": 44.23,
"learning_rate": 8.531410256410256e-07,
"loss": 1.2882,
"step": 2300
},
{
"epoch": 44.23,
"eval_loss": 1.2753421068191528,
"eval_runtime": 4.2108,
"eval_samples_per_second": 5.225,
"eval_steps_per_second": 0.712,
"step": 2300
},
{
"epoch": 46.15,
"learning_rate": 8.467307692307691e-07,
"loss": 1.194,
"step": 2400
},
{
"epoch": 46.15,
"eval_loss": 1.2462925910949707,
"eval_runtime": 4.1887,
"eval_samples_per_second": 5.252,
"eval_steps_per_second": 0.716,
"step": 2400
},
{
"epoch": 48.08,
"learning_rate": 8.403205128205128e-07,
"loss": 1.2223,
"step": 2500
},
{
"epoch": 48.08,
"eval_loss": 1.2566279172897339,
"eval_runtime": 4.2232,
"eval_samples_per_second": 5.209,
"eval_steps_per_second": 0.71,
"step": 2500
},
{
"epoch": 50.0,
"learning_rate": 8.339102564102564e-07,
"loss": 1.1984,
"step": 2600
},
{
"epoch": 50.0,
"eval_loss": 1.2548397779464722,
"eval_runtime": 4.2193,
"eval_samples_per_second": 5.214,
"eval_steps_per_second": 0.711,
"step": 2600
},
{
"epoch": 51.92,
"learning_rate": 8.275e-07,
"loss": 1.1202,
"step": 2700
},
{
"epoch": 51.92,
"eval_loss": 1.2726110219955444,
"eval_runtime": 4.197,
"eval_samples_per_second": 5.242,
"eval_steps_per_second": 0.715,
"step": 2700
},
{
"epoch": 53.85,
"learning_rate": 8.210897435897435e-07,
"loss": 1.1128,
"step": 2800
},
{
"epoch": 53.85,
"eval_loss": 1.2732219696044922,
"eval_runtime": 4.189,
"eval_samples_per_second": 5.252,
"eval_steps_per_second": 0.716,
"step": 2800
},
{
"epoch": 55.77,
"learning_rate": 8.146794871794871e-07,
"loss": 1.1649,
"step": 2900
},
{
"epoch": 55.77,
"eval_loss": 1.2652595043182373,
"eval_runtime": 4.2028,
"eval_samples_per_second": 5.235,
"eval_steps_per_second": 0.714,
"step": 2900
},
{
"epoch": 57.69,
"learning_rate": 8.082692307692307e-07,
"loss": 1.1066,
"step": 3000
},
{
"epoch": 57.69,
"eval_loss": 1.254478931427002,
"eval_runtime": 4.2195,
"eval_samples_per_second": 5.214,
"eval_steps_per_second": 0.711,
"step": 3000
},
{
"epoch": 59.62,
"learning_rate": 8.018589743589743e-07,
"loss": 1.0629,
"step": 3100
},
{
"epoch": 59.62,
"eval_loss": 1.2273532152175903,
"eval_runtime": 4.2147,
"eval_samples_per_second": 5.22,
"eval_steps_per_second": 0.712,
"step": 3100
},
{
"epoch": 61.54,
"learning_rate": 7.955128205128204e-07,
"loss": 1.093,
"step": 3200
},
{
"epoch": 61.54,
"eval_loss": 1.2593165636062622,
"eval_runtime": 4.2295,
"eval_samples_per_second": 5.202,
"eval_steps_per_second": 0.709,
"step": 3200
},
{
"epoch": 63.46,
"learning_rate": 7.89102564102564e-07,
"loss": 0.9813,
"step": 3300
},
{
"epoch": 63.46,
"eval_loss": 1.2721446752548218,
"eval_runtime": 4.1692,
"eval_samples_per_second": 5.277,
"eval_steps_per_second": 0.72,
"step": 3300
},
{
"epoch": 65.38,
"learning_rate": 7.826923076923076e-07,
"loss": 1.0538,
"step": 3400
},
{
"epoch": 65.38,
"eval_loss": 1.2700778245925903,
"eval_runtime": 4.2208,
"eval_samples_per_second": 5.212,
"eval_steps_per_second": 0.711,
"step": 3400
},
{
"epoch": 67.31,
"learning_rate": 7.762820512820513e-07,
"loss": 0.9368,
"step": 3500
},
{
"epoch": 67.31,
"eval_loss": 1.2771656513214111,
"eval_runtime": 4.2547,
"eval_samples_per_second": 5.171,
"eval_steps_per_second": 0.705,
"step": 3500
},
{
"epoch": 69.23,
"learning_rate": 7.698717948717949e-07,
"loss": 1.0294,
"step": 3600
},
{
"epoch": 69.23,
"eval_loss": 1.2606873512268066,
"eval_runtime": 4.2609,
"eval_samples_per_second": 5.163,
"eval_steps_per_second": 0.704,
"step": 3600
},
{
"epoch": 71.15,
"learning_rate": 7.634615384615384e-07,
"loss": 0.9596,
"step": 3700
},
{
"epoch": 71.15,
"eval_loss": 1.2618690729141235,
"eval_runtime": 4.2669,
"eval_samples_per_second": 5.156,
"eval_steps_per_second": 0.703,
"step": 3700
},
{
"epoch": 73.08,
"learning_rate": 7.57051282051282e-07,
"loss": 0.9926,
"step": 3800
},
{
"epoch": 73.08,
"eval_loss": 1.2698349952697754,
"eval_runtime": 4.2523,
"eval_samples_per_second": 5.174,
"eval_steps_per_second": 0.705,
"step": 3800
}
],
"max_steps": 15600,
"num_train_epochs": 300,
"total_flos": 9.161067103204147e+18,
"trial_name": null,
"trial_params": null
}