vit_focus / trainer_state.json
JulesGo's picture
Fin de l'entraînement
ce955af verified
raw
history blame
15.1 kB
{
"best_global_step": 58,
"best_metric": 0.16954755783081055,
"best_model_checkpoint": "./vit_focus/checkpoint-58",
"epoch": 50.0,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.12364959716796875,
"eval_mae": 0.41142117977142334,
"eval_mse": 0.2097923755645752,
"eval_runtime": 1.3273,
"eval_samples_per_second": 7.534,
"eval_steps_per_second": 1.507,
"step": 2
},
{
"epoch": 2.0,
"eval_loss": 0.1196303591132164,
"eval_mae": 0.40995553135871887,
"eval_mse": 0.2083383947610855,
"eval_runtime": 1.2492,
"eval_samples_per_second": 8.005,
"eval_steps_per_second": 1.601,
"step": 4
},
{
"epoch": 3.0,
"eval_loss": 0.11328934133052826,
"eval_mae": 0.4075067639350891,
"eval_mse": 0.20581738650798798,
"eval_runtime": 1.1974,
"eval_samples_per_second": 8.352,
"eval_steps_per_second": 1.67,
"step": 6
},
{
"epoch": 4.0,
"eval_loss": 0.10604934394359589,
"eval_mae": 0.4042162597179413,
"eval_mse": 0.20277394354343414,
"eval_runtime": 1.202,
"eval_samples_per_second": 8.32,
"eval_steps_per_second": 1.664,
"step": 8
},
{
"epoch": 5.0,
"eval_loss": 0.10327418148517609,
"eval_mae": 0.4024171531200409,
"eval_mse": 0.20201840996742249,
"eval_runtime": 1.3089,
"eval_samples_per_second": 7.64,
"eval_steps_per_second": 1.528,
"step": 10
},
{
"epoch": 6.0,
"eval_loss": 0.09384196996688843,
"eval_mae": 0.3968161642551422,
"eval_mse": 0.19719576835632324,
"eval_runtime": 1.2505,
"eval_samples_per_second": 7.997,
"eval_steps_per_second": 1.599,
"step": 12
},
{
"epoch": 7.0,
"eval_loss": 0.10823704302310944,
"eval_mae": 0.40178966522216797,
"eval_mse": 0.2021051049232483,
"eval_runtime": 1.2484,
"eval_samples_per_second": 8.01,
"eval_steps_per_second": 1.602,
"step": 14
},
{
"epoch": 8.0,
"eval_loss": 0.0798194482922554,
"eval_mae": 0.3889862298965454,
"eval_mse": 0.18965376913547516,
"eval_runtime": 1.2049,
"eval_samples_per_second": 8.3,
"eval_steps_per_second": 1.66,
"step": 16
},
{
"epoch": 9.0,
"eval_loss": 0.10108314454555511,
"eval_mae": 0.39635446667671204,
"eval_mse": 0.19831416010856628,
"eval_runtime": 1.2689,
"eval_samples_per_second": 7.881,
"eval_steps_per_second": 1.576,
"step": 18
},
{
"epoch": 10.0,
"eval_loss": 0.08742986619472504,
"eval_mae": 0.3887980282306671,
"eval_mse": 0.19129884243011475,
"eval_runtime": 1.2793,
"eval_samples_per_second": 7.817,
"eval_steps_per_second": 1.563,
"step": 20
},
{
"epoch": 11.0,
"eval_loss": 0.06867878139019012,
"eval_mae": 0.3799101710319519,
"eval_mse": 0.18114082515239716,
"eval_runtime": 1.3093,
"eval_samples_per_second": 7.637,
"eval_steps_per_second": 1.527,
"step": 22
},
{
"epoch": 12.0,
"eval_loss": 0.08115904778242111,
"eval_mae": 0.38764962553977966,
"eval_mse": 0.18922746181488037,
"eval_runtime": 1.1917,
"eval_samples_per_second": 8.392,
"eval_steps_per_second": 1.678,
"step": 24
},
{
"epoch": 13.0,
"eval_loss": 0.07042384147644043,
"eval_mae": 0.3812001347541809,
"eval_mse": 0.1831439584493637,
"eval_runtime": 1.2731,
"eval_samples_per_second": 7.855,
"eval_steps_per_second": 1.571,
"step": 26
},
{
"epoch": 14.0,
"eval_loss": 0.057908277958631516,
"eval_mae": 0.37130409479141235,
"eval_mse": 0.17441079020500183,
"eval_runtime": 3.0777,
"eval_samples_per_second": 3.249,
"eval_steps_per_second": 0.65,
"step": 28
},
{
"epoch": 15.0,
"eval_loss": 0.06775574386119843,
"eval_mae": 0.37615475058555603,
"eval_mse": 0.1790776401758194,
"eval_runtime": 1.2719,
"eval_samples_per_second": 7.862,
"eval_steps_per_second": 1.572,
"step": 30
},
{
"epoch": 16.0,
"eval_loss": 0.08812335133552551,
"eval_mae": 0.38629305362701416,
"eval_mse": 0.1883399784564972,
"eval_runtime": 1.6525,
"eval_samples_per_second": 6.051,
"eval_steps_per_second": 1.21,
"step": 32
},
{
"epoch": 17.0,
"eval_loss": 0.09601505100727081,
"eval_mae": 0.38798943161964417,
"eval_mse": 0.18960143625736237,
"eval_runtime": 1.4237,
"eval_samples_per_second": 7.024,
"eval_steps_per_second": 1.405,
"step": 34
},
{
"epoch": 18.0,
"eval_loss": 0.0696040540933609,
"eval_mae": 0.3752928674221039,
"eval_mse": 0.17762857675552368,
"eval_runtime": 1.2854,
"eval_samples_per_second": 7.78,
"eval_steps_per_second": 1.556,
"step": 36
},
{
"epoch": 19.0,
"eval_loss": 0.057551633566617966,
"eval_mae": 0.3691748082637787,
"eval_mse": 0.1715679168701172,
"eval_runtime": 1.4944,
"eval_samples_per_second": 6.692,
"eval_steps_per_second": 1.338,
"step": 38
},
{
"epoch": 20.0,
"eval_loss": 0.05853904038667679,
"eval_mae": 0.3697223663330078,
"eval_mse": 0.17197829484939575,
"eval_runtime": 1.2655,
"eval_samples_per_second": 7.902,
"eval_steps_per_second": 1.58,
"step": 40
},
{
"epoch": 21.0,
"eval_loss": 0.07102437317371368,
"eval_mae": 0.37734654545783997,
"eval_mse": 0.1792123019695282,
"eval_runtime": 1.923,
"eval_samples_per_second": 5.2,
"eval_steps_per_second": 1.04,
"step": 42
},
{
"epoch": 22.0,
"eval_loss": 0.08145803213119507,
"eval_mae": 0.38286036252975464,
"eval_mse": 0.1842835247516632,
"eval_runtime": 1.1671,
"eval_samples_per_second": 8.568,
"eval_steps_per_second": 1.714,
"step": 44
},
{
"epoch": 23.0,
"eval_loss": 0.06858658790588379,
"eval_mae": 0.3709622919559479,
"eval_mse": 0.17373624444007874,
"eval_runtime": 1.3253,
"eval_samples_per_second": 7.545,
"eval_steps_per_second": 1.509,
"step": 46
},
{
"epoch": 24.0,
"eval_loss": 0.06739358603954315,
"eval_mae": 0.37041887640953064,
"eval_mse": 0.173396497964859,
"eval_runtime": 3.1433,
"eval_samples_per_second": 3.181,
"eval_steps_per_second": 0.636,
"step": 48
},
{
"epoch": 25.0,
"grad_norm": 0.9524662494659424,
"learning_rate": 1.53e-05,
"loss": 0.118,
"step": 50
},
{
"epoch": 25.0,
"eval_loss": 0.07070203125476837,
"eval_mae": 0.3756571412086487,
"eval_mse": 0.17759118974208832,
"eval_runtime": 1.2345,
"eval_samples_per_second": 8.1,
"eval_steps_per_second": 1.62,
"step": 50
},
{
"epoch": 26.0,
"eval_loss": 0.07530557364225388,
"eval_mae": 0.38040465116500854,
"eval_mse": 0.1816762387752533,
"eval_runtime": 1.7054,
"eval_samples_per_second": 5.864,
"eval_steps_per_second": 1.173,
"step": 52
},
{
"epoch": 27.0,
"eval_loss": 0.0708126351237297,
"eval_mae": 0.37711387872695923,
"eval_mse": 0.17874492704868317,
"eval_runtime": 1.1644,
"eval_samples_per_second": 8.588,
"eval_steps_per_second": 1.718,
"step": 54
},
{
"epoch": 28.0,
"eval_loss": 0.06371479481458664,
"eval_mae": 0.3699105381965637,
"eval_mse": 0.1721055954694748,
"eval_runtime": 1.5225,
"eval_samples_per_second": 6.568,
"eval_steps_per_second": 1.314,
"step": 56
},
{
"epoch": 29.0,
"eval_loss": 0.05992668867111206,
"eval_mae": 0.3671800494194031,
"eval_mse": 0.16954755783081055,
"eval_runtime": 1.3349,
"eval_samples_per_second": 7.491,
"eval_steps_per_second": 1.498,
"step": 58
},
{
"epoch": 30.0,
"eval_loss": 0.06269104778766632,
"eval_mae": 0.3710530698299408,
"eval_mse": 0.17309635877609253,
"eval_runtime": 1.2258,
"eval_samples_per_second": 8.158,
"eval_steps_per_second": 1.632,
"step": 60
},
{
"epoch": 31.0,
"eval_loss": 0.06929778307676315,
"eval_mae": 0.37701377272605896,
"eval_mse": 0.1785697340965271,
"eval_runtime": 1.4115,
"eval_samples_per_second": 7.085,
"eval_steps_per_second": 1.417,
"step": 62
},
{
"epoch": 32.0,
"eval_loss": 0.07379092276096344,
"eval_mae": 0.37811335921287537,
"eval_mse": 0.17990507185459137,
"eval_runtime": 1.2044,
"eval_samples_per_second": 8.303,
"eval_steps_per_second": 1.661,
"step": 64
},
{
"epoch": 33.0,
"eval_loss": 0.07304650545120239,
"eval_mae": 0.3751561939716339,
"eval_mse": 0.17731547355651855,
"eval_runtime": 3.3496,
"eval_samples_per_second": 2.985,
"eval_steps_per_second": 0.597,
"step": 66
},
{
"epoch": 34.0,
"eval_loss": 0.06837408244609833,
"eval_mae": 0.3710552453994751,
"eval_mse": 0.17353828251361847,
"eval_runtime": 1.4629,
"eval_samples_per_second": 6.836,
"eval_steps_per_second": 1.367,
"step": 68
},
{
"epoch": 35.0,
"eval_loss": 0.06416038423776627,
"eval_mae": 0.3673146069049835,
"eval_mse": 0.17018868029117584,
"eval_runtime": 1.31,
"eval_samples_per_second": 7.634,
"eval_steps_per_second": 1.527,
"step": 70
},
{
"epoch": 36.0,
"eval_loss": 0.06411489099264145,
"eval_mae": 0.36943870782852173,
"eval_mse": 0.1721121072769165,
"eval_runtime": 1.5048,
"eval_samples_per_second": 6.646,
"eval_steps_per_second": 1.329,
"step": 72
},
{
"epoch": 37.0,
"eval_loss": 0.06872588396072388,
"eval_mae": 0.373674213886261,
"eval_mse": 0.17579391598701477,
"eval_runtime": 1.4802,
"eval_samples_per_second": 6.756,
"eval_steps_per_second": 1.351,
"step": 74
},
{
"epoch": 38.0,
"eval_loss": 0.07391238957643509,
"eval_mae": 0.3772023916244507,
"eval_mse": 0.17880253493785858,
"eval_runtime": 1.3549,
"eval_samples_per_second": 7.381,
"eval_steps_per_second": 1.476,
"step": 76
},
{
"epoch": 39.0,
"eval_loss": 0.07059959322214127,
"eval_mae": 0.3748144209384918,
"eval_mse": 0.1765395551919937,
"eval_runtime": 1.3015,
"eval_samples_per_second": 7.683,
"eval_steps_per_second": 1.537,
"step": 78
},
{
"epoch": 40.0,
"eval_loss": 0.06648191064596176,
"eval_mae": 0.37035736441612244,
"eval_mse": 0.17257149517536163,
"eval_runtime": 1.4739,
"eval_samples_per_second": 6.785,
"eval_steps_per_second": 1.357,
"step": 80
},
{
"epoch": 41.0,
"eval_loss": 0.06419553607702255,
"eval_mae": 0.36771345138549805,
"eval_mse": 0.1702636033296585,
"eval_runtime": 1.3959,
"eval_samples_per_second": 7.164,
"eval_steps_per_second": 1.433,
"step": 82
},
{
"epoch": 42.0,
"eval_loss": 0.06590329110622406,
"eval_mae": 0.3694536089897156,
"eval_mse": 0.17185601592063904,
"eval_runtime": 1.3544,
"eval_samples_per_second": 7.383,
"eval_steps_per_second": 1.477,
"step": 84
},
{
"epoch": 43.0,
"eval_loss": 0.06824100762605667,
"eval_mae": 0.3721493184566498,
"eval_mse": 0.1743040680885315,
"eval_runtime": 1.3016,
"eval_samples_per_second": 7.683,
"eval_steps_per_second": 1.537,
"step": 86
},
{
"epoch": 44.0,
"eval_loss": 0.07233113795518875,
"eval_mae": 0.37587830424308777,
"eval_mse": 0.17762817442417145,
"eval_runtime": 1.2282,
"eval_samples_per_second": 8.142,
"eval_steps_per_second": 1.628,
"step": 88
},
{
"epoch": 45.0,
"eval_loss": 0.0730481892824173,
"eval_mae": 0.37610477209091187,
"eval_mse": 0.1778472512960434,
"eval_runtime": 1.2255,
"eval_samples_per_second": 8.16,
"eval_steps_per_second": 1.632,
"step": 90
},
{
"epoch": 46.0,
"eval_loss": 0.07376444339752197,
"eval_mae": 0.37569037079811096,
"eval_mse": 0.17753194272518158,
"eval_runtime": 1.2758,
"eval_samples_per_second": 7.838,
"eval_steps_per_second": 1.568,
"step": 92
},
{
"epoch": 47.0,
"eval_loss": 0.07429193705320358,
"eval_mae": 0.3751032054424286,
"eval_mse": 0.1769881248474121,
"eval_runtime": 1.1953,
"eval_samples_per_second": 8.366,
"eval_steps_per_second": 1.673,
"step": 94
},
{
"epoch": 48.0,
"eval_loss": 0.07430405914783478,
"eval_mae": 0.3747301399707794,
"eval_mse": 0.17660388350486755,
"eval_runtime": 1.1671,
"eval_samples_per_second": 8.568,
"eval_steps_per_second": 1.714,
"step": 96
},
{
"epoch": 49.0,
"eval_loss": 0.07368143647909164,
"eval_mae": 0.37456196546554565,
"eval_mse": 0.17642483115196228,
"eval_runtime": 1.545,
"eval_samples_per_second": 6.472,
"eval_steps_per_second": 1.294,
"step": 98
},
{
"epoch": 50.0,
"grad_norm": 0.7434751987457275,
"learning_rate": 3.0000000000000004e-07,
"loss": 0.0449,
"step": 100
},
{
"epoch": 50.0,
"eval_loss": 0.07292107492685318,
"eval_mae": 0.3741002678871155,
"eval_mse": 0.17599670588970184,
"eval_runtime": 1.2607,
"eval_samples_per_second": 7.932,
"eval_steps_per_second": 1.586,
"step": 100
},
{
"epoch": 50.0,
"step": 100,
"total_flos": 0.0,
"train_loss": 0.08142914772033691,
"train_runtime": 429.9896,
"train_samples_per_second": 9.07,
"train_steps_per_second": 0.233
}
],
"logging_steps": 50,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}