vit_focus_full / trainer_state.json
JulesGo's picture
Fin de l'entraînement
bedeabf verified
{
"best_global_step": 969,
"best_metric": 0.12907913327217102,
"best_model_checkpoint": "./vit_focus_full/checkpoint-969",
"epoch": 29.985507246376812,
"eval_steps": 500,
"global_step": 1530,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7729468599033816,
"grad_norm": 10.6144437789917,
"learning_rate": 4.872549019607843e-05,
"loss": 0.3146,
"step": 40
},
{
"epoch": 0.9855072463768116,
"eval_loss": 0.05953465402126312,
"eval_mae": 0.3265259861946106,
"eval_mse": 0.1403445601463318,
"eval_runtime": 57.1346,
"eval_samples_per_second": 7.246,
"eval_steps_per_second": 0.91,
"step": 51
},
{
"epoch": 1.5603864734299517,
"grad_norm": 5.5437469482421875,
"learning_rate": 4.741830065359477e-05,
"loss": 0.2488,
"step": 80
},
{
"epoch": 1.9855072463768115,
"eval_loss": 0.05661279708147049,
"eval_mae": 0.3253043293952942,
"eval_mse": 0.13950611650943756,
"eval_runtime": 56.4199,
"eval_samples_per_second": 7.338,
"eval_steps_per_second": 0.922,
"step": 102
},
{
"epoch": 2.3478260869565215,
"grad_norm": 4.464972972869873,
"learning_rate": 4.6111111111111115e-05,
"loss": 0.2278,
"step": 120
},
{
"epoch": 2.9855072463768115,
"eval_loss": 0.06113344058394432,
"eval_mae": 0.3287981450557709,
"eval_mse": 0.14261041581630707,
"eval_runtime": 56.9728,
"eval_samples_per_second": 7.267,
"eval_steps_per_second": 0.913,
"step": 153
},
{
"epoch": 3.135265700483092,
"grad_norm": 8.567197799682617,
"learning_rate": 4.480392156862745e-05,
"loss": 0.2148,
"step": 160
},
{
"epoch": 3.9082125603864735,
"grad_norm": 5.523195266723633,
"learning_rate": 4.3496732026143795e-05,
"loss": 0.206,
"step": 200
},
{
"epoch": 3.9855072463768115,
"eval_loss": 0.05355362221598625,
"eval_mae": 0.31797775626182556,
"eval_mse": 0.13227654993534088,
"eval_runtime": 56.8583,
"eval_samples_per_second": 7.281,
"eval_steps_per_second": 0.915,
"step": 204
},
{
"epoch": 4.695652173913043,
"grad_norm": 6.00140380859375,
"learning_rate": 4.218954248366013e-05,
"loss": 0.1902,
"step": 240
},
{
"epoch": 4.9855072463768115,
"eval_loss": 0.06186218187212944,
"eval_mae": 0.3270839750766754,
"eval_mse": 0.1410592794418335,
"eval_runtime": 56.1466,
"eval_samples_per_second": 7.374,
"eval_steps_per_second": 0.926,
"step": 255
},
{
"epoch": 5.483091787439614,
"grad_norm": 9.328702926635742,
"learning_rate": 4.0882352941176474e-05,
"loss": 0.187,
"step": 280
},
{
"epoch": 5.9855072463768115,
"eval_loss": 0.05080530419945717,
"eval_mae": 0.3168753385543823,
"eval_mse": 0.1319676637649536,
"eval_runtime": 57.4612,
"eval_samples_per_second": 7.205,
"eval_steps_per_second": 0.905,
"step": 306
},
{
"epoch": 6.270531400966184,
"grad_norm": 7.799366474151611,
"learning_rate": 3.957516339869281e-05,
"loss": 0.1757,
"step": 320
},
{
"epoch": 6.9855072463768115,
"eval_loss": 0.05371123179793358,
"eval_mae": 0.31825557351112366,
"eval_mse": 0.13387194275856018,
"eval_runtime": 57.3782,
"eval_samples_per_second": 7.215,
"eval_steps_per_second": 0.906,
"step": 357
},
{
"epoch": 7.057971014492754,
"grad_norm": 4.06664514541626,
"learning_rate": 3.8267973856209146e-05,
"loss": 0.1677,
"step": 360
},
{
"epoch": 7.830917874396135,
"grad_norm": 5.403101921081543,
"learning_rate": 3.6960784313725496e-05,
"loss": 0.1523,
"step": 400
},
{
"epoch": 7.9855072463768115,
"eval_loss": 0.055755238980054855,
"eval_mae": 0.31683334708213806,
"eval_mse": 0.13297995924949646,
"eval_runtime": 65.7904,
"eval_samples_per_second": 6.293,
"eval_steps_per_second": 0.79,
"step": 408
},
{
"epoch": 8.618357487922705,
"grad_norm": 6.7577948570251465,
"learning_rate": 3.565359477124183e-05,
"loss": 0.1528,
"step": 440
},
{
"epoch": 8.985507246376812,
"eval_loss": 0.05914789438247681,
"eval_mae": 0.3224806785583496,
"eval_mse": 0.1381232738494873,
"eval_runtime": 56.9831,
"eval_samples_per_second": 7.265,
"eval_steps_per_second": 0.913,
"step": 459
},
{
"epoch": 9.405797101449275,
"grad_norm": 4.654517650604248,
"learning_rate": 3.434640522875817e-05,
"loss": 0.1416,
"step": 480
},
{
"epoch": 9.985507246376812,
"eval_loss": 0.05355934053659439,
"eval_mae": 0.3197546601295471,
"eval_mse": 0.1352616846561432,
"eval_runtime": 57.4136,
"eval_samples_per_second": 7.211,
"eval_steps_per_second": 0.906,
"step": 510
},
{
"epoch": 10.193236714975846,
"grad_norm": 4.063232421875,
"learning_rate": 3.303921568627451e-05,
"loss": 0.1391,
"step": 520
},
{
"epoch": 10.966183574879228,
"grad_norm": 4.905858993530273,
"learning_rate": 3.173202614379085e-05,
"loss": 0.1298,
"step": 560
},
{
"epoch": 10.985507246376812,
"eval_loss": 0.05300646275281906,
"eval_mae": 0.3164079189300537,
"eval_mse": 0.132521390914917,
"eval_runtime": 58.995,
"eval_samples_per_second": 7.018,
"eval_steps_per_second": 0.881,
"step": 561
},
{
"epoch": 11.753623188405797,
"grad_norm": 4.643632411956787,
"learning_rate": 3.0424836601307187e-05,
"loss": 0.1161,
"step": 600
},
{
"epoch": 11.985507246376812,
"eval_loss": 0.0511205680668354,
"eval_mae": 0.315570205450058,
"eval_mse": 0.13146661221981049,
"eval_runtime": 57.0447,
"eval_samples_per_second": 7.257,
"eval_steps_per_second": 0.912,
"step": 612
},
{
"epoch": 12.541062801932368,
"grad_norm": 3.0849831104278564,
"learning_rate": 2.9117647058823534e-05,
"loss": 0.1085,
"step": 640
},
{
"epoch": 12.985507246376812,
"eval_loss": 0.05314180254936218,
"eval_mae": 0.32430657744407654,
"eval_mse": 0.13849547505378723,
"eval_runtime": 631.8234,
"eval_samples_per_second": 0.655,
"eval_steps_per_second": 0.082,
"step": 663
},
{
"epoch": 13.328502415458937,
"grad_norm": 5.586836338043213,
"learning_rate": 2.7810457516339873e-05,
"loss": 0.1028,
"step": 680
},
{
"epoch": 13.985507246376812,
"eval_loss": 0.05296429246664047,
"eval_mae": 0.31508708000183105,
"eval_mse": 0.1316087543964386,
"eval_runtime": 57.8458,
"eval_samples_per_second": 7.157,
"eval_steps_per_second": 0.899,
"step": 714
},
{
"epoch": 14.115942028985508,
"grad_norm": 3.5024545192718506,
"learning_rate": 2.650326797385621e-05,
"loss": 0.0974,
"step": 720
},
{
"epoch": 14.88888888888889,
"grad_norm": 3.7782580852508545,
"learning_rate": 2.519607843137255e-05,
"loss": 0.0891,
"step": 760
},
{
"epoch": 14.985507246376812,
"eval_loss": 0.0540492981672287,
"eval_mae": 0.31779569387435913,
"eval_mse": 0.1337898075580597,
"eval_runtime": 57.6717,
"eval_samples_per_second": 7.179,
"eval_steps_per_second": 0.902,
"step": 765
},
{
"epoch": 15.676328502415458,
"grad_norm": 3.615967035293579,
"learning_rate": 2.3888888888888892e-05,
"loss": 0.0878,
"step": 800
},
{
"epoch": 15.985507246376812,
"eval_loss": 0.05357988178730011,
"eval_mae": 0.3177140951156616,
"eval_mse": 0.13350851833820343,
"eval_runtime": 57.5097,
"eval_samples_per_second": 7.199,
"eval_steps_per_second": 0.904,
"step": 816
},
{
"epoch": 16.463768115942027,
"grad_norm": 9.533724784851074,
"learning_rate": 2.258169934640523e-05,
"loss": 0.077,
"step": 840
},
{
"epoch": 16.985507246376812,
"eval_loss": 0.05338989570736885,
"eval_mae": 0.31321439146995544,
"eval_mse": 0.12988565862178802,
"eval_runtime": 58.1505,
"eval_samples_per_second": 7.119,
"eval_steps_per_second": 0.894,
"step": 867
},
{
"epoch": 17.2512077294686,
"grad_norm": 3.7093381881713867,
"learning_rate": 2.1274509803921568e-05,
"loss": 0.0769,
"step": 880
},
{
"epoch": 17.985507246376812,
"eval_loss": 0.0548611618578434,
"eval_mae": 0.3149418532848358,
"eval_mse": 0.1313086301088333,
"eval_runtime": 56.4832,
"eval_samples_per_second": 7.33,
"eval_steps_per_second": 0.921,
"step": 918
},
{
"epoch": 18.03864734299517,
"grad_norm": 2.9852871894836426,
"learning_rate": 1.996732026143791e-05,
"loss": 0.0717,
"step": 920
},
{
"epoch": 18.81159420289855,
"grad_norm": 3.3752264976501465,
"learning_rate": 1.866013071895425e-05,
"loss": 0.0663,
"step": 960
},
{
"epoch": 18.985507246376812,
"eval_loss": 0.05310577526688576,
"eval_mae": 0.3118866980075836,
"eval_mse": 0.12907913327217102,
"eval_runtime": 58.2255,
"eval_samples_per_second": 7.11,
"eval_steps_per_second": 0.893,
"step": 969
},
{
"epoch": 19.59903381642512,
"grad_norm": 2.9139506816864014,
"learning_rate": 1.735294117647059e-05,
"loss": 0.064,
"step": 1000
},
{
"epoch": 19.985507246376812,
"eval_loss": 0.05400167778134346,
"eval_mae": 0.31967055797576904,
"eval_mse": 0.13520964980125427,
"eval_runtime": 58.0572,
"eval_samples_per_second": 7.131,
"eval_steps_per_second": 0.896,
"step": 1020
},
{
"epoch": 20.386473429951693,
"grad_norm": 3.1011509895324707,
"learning_rate": 1.604575163398693e-05,
"loss": 0.0608,
"step": 1040
},
{
"epoch": 20.985507246376812,
"eval_loss": 0.05348004400730133,
"eval_mae": 0.3179128170013428,
"eval_mse": 0.13336069881916046,
"eval_runtime": 56.8284,
"eval_samples_per_second": 7.285,
"eval_steps_per_second": 0.915,
"step": 1071
},
{
"epoch": 21.17391304347826,
"grad_norm": 2.4269816875457764,
"learning_rate": 1.473856209150327e-05,
"loss": 0.0558,
"step": 1080
},
{
"epoch": 21.946859903381643,
"grad_norm": 2.612093925476074,
"learning_rate": 1.3431372549019607e-05,
"loss": 0.0548,
"step": 1120
},
{
"epoch": 21.985507246376812,
"eval_loss": 0.052902594208717346,
"eval_mae": 0.3134055733680725,
"eval_mse": 0.129911869764328,
"eval_runtime": 57.5407,
"eval_samples_per_second": 7.195,
"eval_steps_per_second": 0.904,
"step": 1122
},
{
"epoch": 22.734299516908212,
"grad_norm": 1.7072349786758423,
"learning_rate": 1.2124183006535949e-05,
"loss": 0.0517,
"step": 1160
},
{
"epoch": 22.985507246376812,
"eval_loss": 0.05338846519589424,
"eval_mae": 0.31519371271133423,
"eval_mse": 0.13099054992198944,
"eval_runtime": 2988.6114,
"eval_samples_per_second": 0.139,
"eval_steps_per_second": 0.017,
"step": 1173
},
{
"epoch": 23.52173913043478,
"grad_norm": 2.942000389099121,
"learning_rate": 1.0816993464052288e-05,
"loss": 0.0498,
"step": 1200
},
{
"epoch": 23.985507246376812,
"eval_loss": 0.05435283109545708,
"eval_mae": 0.31506991386413574,
"eval_mse": 0.13137240707874298,
"eval_runtime": 158.629,
"eval_samples_per_second": 2.61,
"eval_steps_per_second": 0.328,
"step": 1224
},
{
"epoch": 24.309178743961354,
"grad_norm": 1.7872236967086792,
"learning_rate": 9.509803921568628e-06,
"loss": 0.047,
"step": 1240
},
{
"epoch": 24.985507246376812,
"eval_loss": 0.05310087278485298,
"eval_mae": 0.3145076036453247,
"eval_mse": 0.13092052936553955,
"eval_runtime": 59.2601,
"eval_samples_per_second": 6.986,
"eval_steps_per_second": 0.877,
"step": 1275
},
{
"epoch": 25.096618357487923,
"grad_norm": 1.8146392107009888,
"learning_rate": 8.202614379084967e-06,
"loss": 0.0467,
"step": 1280
},
{
"epoch": 25.869565217391305,
"grad_norm": 1.8770432472229004,
"learning_rate": 6.895424836601308e-06,
"loss": 0.0443,
"step": 1320
},
{
"epoch": 25.985507246376812,
"eval_loss": 0.053730811923742294,
"eval_mae": 0.31641700863838196,
"eval_mse": 0.1325235366821289,
"eval_runtime": 8331.3737,
"eval_samples_per_second": 0.05,
"eval_steps_per_second": 0.006,
"step": 1326
},
{
"epoch": 26.657004830917874,
"grad_norm": 2.1211466789245605,
"learning_rate": 5.588235294117647e-06,
"loss": 0.042,
"step": 1360
},
{
"epoch": 26.985507246376812,
"eval_loss": 0.05325399339199066,
"eval_mae": 0.31560125946998596,
"eval_mse": 0.13193772733211517,
"eval_runtime": 3946.2723,
"eval_samples_per_second": 0.105,
"eval_steps_per_second": 0.013,
"step": 1377
},
{
"epoch": 27.444444444444443,
"grad_norm": 1.9497586488723755,
"learning_rate": 4.281045751633987e-06,
"loss": 0.0397,
"step": 1400
},
{
"epoch": 27.985507246376812,
"eval_loss": 0.052952226251363754,
"eval_mae": 0.3155405521392822,
"eval_mse": 0.13170257210731506,
"eval_runtime": 58.8468,
"eval_samples_per_second": 7.035,
"eval_steps_per_second": 0.884,
"step": 1428
},
{
"epoch": 28.231884057971016,
"grad_norm": 5.6321330070495605,
"learning_rate": 2.9738562091503266e-06,
"loss": 0.0411,
"step": 1440
},
{
"epoch": 28.985507246376812,
"eval_loss": 0.05421222001314163,
"eval_mae": 0.31665799021720886,
"eval_mse": 0.13281531631946564,
"eval_runtime": 60.076,
"eval_samples_per_second": 6.891,
"eval_steps_per_second": 0.866,
"step": 1479
},
{
"epoch": 29.019323671497585,
"grad_norm": 1.5062155723571777,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0385,
"step": 1480
},
{
"epoch": 29.792270531400966,
"grad_norm": 3.7781600952148438,
"learning_rate": 3.5947712418300653e-07,
"loss": 0.0382,
"step": 1520
},
{
"epoch": 29.985507246376812,
"eval_loss": 0.05334796383976936,
"eval_mae": 0.31658393144607544,
"eval_mse": 0.13268809020519257,
"eval_runtime": 61.4065,
"eval_samples_per_second": 6.742,
"eval_steps_per_second": 0.847,
"step": 1530
},
{
"epoch": 29.985507246376812,
"step": 1530,
"total_flos": 0.0,
"train_loss": 0.11046674571006126,
"train_runtime": 98364.9673,
"train_samples_per_second": 0.504,
"train_steps_per_second": 0.016
}
],
"logging_steps": 40,
"max_steps": 1530,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}