finetuned_4_12 / checkpoint-260 /trainer_state.json
ImNotTam's picture
Upload full training folder with all checkpoints
d4c59df verified
{
"best_global_step": 220,
"best_metric": 0.010845971293747425,
"best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/medgemma_finetuned/checkpoint-220",
"epoch": 1.0116731517509727,
"eval_steps": 10,
"global_step": 260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019455252918287938,
"grad_norm": 3.7302613258361816,
"learning_rate": 1.777777777777778e-06,
"loss": 0.8239,
"step": 5
},
{
"epoch": 0.038910505836575876,
"grad_norm": 3.058382034301758,
"learning_rate": 4e-06,
"loss": 0.7964,
"step": 10
},
{
"epoch": 0.038910505836575876,
"eval_loss": 0.7571244239807129,
"eval_runtime": 152.7626,
"eval_samples_per_second": 3.339,
"eval_steps_per_second": 0.838,
"step": 10
},
{
"epoch": 0.058365758754863814,
"grad_norm": 1.8304738998413086,
"learning_rate": 6.222222222222222e-06,
"loss": 0.6943,
"step": 15
},
{
"epoch": 0.07782101167315175,
"grad_norm": 1.3162634372711182,
"learning_rate": 8.444444444444446e-06,
"loss": 0.5569,
"step": 20
},
{
"epoch": 0.07782101167315175,
"eval_loss": 0.5357815623283386,
"eval_runtime": 136.4123,
"eval_samples_per_second": 3.739,
"eval_steps_per_second": 0.938,
"step": 20
},
{
"epoch": 0.09727626459143969,
"grad_norm": 1.1865513324737549,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.4222,
"step": 25
},
{
"epoch": 0.11673151750972763,
"grad_norm": 1.2043355703353882,
"learning_rate": 1.2888888888888889e-05,
"loss": 0.2831,
"step": 30
},
{
"epoch": 0.11673151750972763,
"eval_loss": 0.28264933824539185,
"eval_runtime": 136.5183,
"eval_samples_per_second": 3.736,
"eval_steps_per_second": 0.938,
"step": 30
},
{
"epoch": 0.13618677042801555,
"grad_norm": 0.9503483772277832,
"learning_rate": 1.511111111111111e-05,
"loss": 0.1405,
"step": 35
},
{
"epoch": 0.1556420233463035,
"grad_norm": 0.34179583191871643,
"learning_rate": 1.7333333333333332e-05,
"loss": 0.0515,
"step": 40
},
{
"epoch": 0.1556420233463035,
"eval_loss": 0.05298379808664322,
"eval_runtime": 136.391,
"eval_samples_per_second": 3.739,
"eval_steps_per_second": 0.938,
"step": 40
},
{
"epoch": 0.17509727626459143,
"grad_norm": 0.24818392097949982,
"learning_rate": 1.9555555555555557e-05,
"loss": 0.0302,
"step": 45
},
{
"epoch": 0.19455252918287938,
"grad_norm": 0.19131316244602203,
"learning_rate": 2.177777777777778e-05,
"loss": 0.0221,
"step": 50
},
{
"epoch": 0.19455252918287938,
"eval_loss": 0.024967821314930916,
"eval_runtime": 136.5245,
"eval_samples_per_second": 3.736,
"eval_steps_per_second": 0.938,
"step": 50
},
{
"epoch": 0.2140077821011673,
"grad_norm": 2.454702377319336,
"learning_rate": 2.4e-05,
"loss": 0.0169,
"step": 55
},
{
"epoch": 0.23346303501945526,
"grad_norm": 0.12364993244409561,
"learning_rate": 2.6222222222222226e-05,
"loss": 0.0164,
"step": 60
},
{
"epoch": 0.23346303501945526,
"eval_loss": 0.021661706268787384,
"eval_runtime": 136.8436,
"eval_samples_per_second": 3.727,
"eval_steps_per_second": 0.935,
"step": 60
},
{
"epoch": 0.2529182879377432,
"grad_norm": 0.14085163176059723,
"learning_rate": 2.8444444444444447e-05,
"loss": 0.0131,
"step": 65
},
{
"epoch": 0.2723735408560311,
"grad_norm": 0.15322668850421906,
"learning_rate": 3.066666666666666e-05,
"loss": 0.0089,
"step": 70
},
{
"epoch": 0.2723735408560311,
"eval_loss": 0.016315119341015816,
"eval_runtime": 136.2077,
"eval_samples_per_second": 3.744,
"eval_steps_per_second": 0.94,
"step": 70
},
{
"epoch": 0.2918287937743191,
"grad_norm": 0.08343034237623215,
"learning_rate": 3.288888888888889e-05,
"loss": 0.0076,
"step": 75
},
{
"epoch": 0.311284046692607,
"grad_norm": 0.11078440397977829,
"learning_rate": 3.511111111111111e-05,
"loss": 0.008,
"step": 80
},
{
"epoch": 0.311284046692607,
"eval_loss": 0.015718888491392136,
"eval_runtime": 136.7712,
"eval_samples_per_second": 3.729,
"eval_steps_per_second": 0.936,
"step": 80
},
{
"epoch": 0.33073929961089493,
"grad_norm": 0.08361168950796127,
"learning_rate": 3.733333333333334e-05,
"loss": 0.008,
"step": 85
},
{
"epoch": 0.35019455252918286,
"grad_norm": 0.06539439409971237,
"learning_rate": 3.9555555555555556e-05,
"loss": 0.0083,
"step": 90
},
{
"epoch": 0.35019455252918286,
"eval_loss": 0.015910081565380096,
"eval_runtime": 136.5187,
"eval_samples_per_second": 3.736,
"eval_steps_per_second": 0.938,
"step": 90
},
{
"epoch": 0.36964980544747084,
"grad_norm": 0.14973388612270355,
"learning_rate": 4.177777777777778e-05,
"loss": 0.0085,
"step": 95
},
{
"epoch": 0.38910505836575876,
"grad_norm": 0.08519362658262253,
"learning_rate": 4.4e-05,
"loss": 0.0077,
"step": 100
},
{
"epoch": 0.38910505836575876,
"eval_loss": 0.01615685597062111,
"eval_runtime": 136.4452,
"eval_samples_per_second": 3.738,
"eval_steps_per_second": 0.938,
"step": 100
},
{
"epoch": 0.4085603112840467,
"grad_norm": 0.05565109848976135,
"learning_rate": 4.6222222222222224e-05,
"loss": 0.0085,
"step": 105
},
{
"epoch": 0.4280155642023346,
"grad_norm": 0.07286959886550903,
"learning_rate": 4.844444444444445e-05,
"loss": 0.0082,
"step": 110
},
{
"epoch": 0.4280155642023346,
"eval_loss": 0.015698084607720375,
"eval_runtime": 136.3832,
"eval_samples_per_second": 3.739,
"eval_steps_per_second": 0.939,
"step": 110
},
{
"epoch": 0.4474708171206226,
"grad_norm": 0.13329896330833435,
"learning_rate": 5.066666666666667e-05,
"loss": 0.0085,
"step": 115
},
{
"epoch": 0.4669260700389105,
"grad_norm": 0.04628467932343483,
"learning_rate": 5.288888888888889e-05,
"loss": 0.0075,
"step": 120
},
{
"epoch": 0.4669260700389105,
"eval_loss": 0.015623296611011028,
"eval_runtime": 136.0422,
"eval_samples_per_second": 3.749,
"eval_steps_per_second": 0.941,
"step": 120
},
{
"epoch": 0.48638132295719844,
"grad_norm": 0.058520544320344925,
"learning_rate": 5.511111111111111e-05,
"loss": 0.0079,
"step": 125
},
{
"epoch": 0.5058365758754864,
"grad_norm": 0.06411632895469666,
"learning_rate": 5.7333333333333336e-05,
"loss": 0.0076,
"step": 130
},
{
"epoch": 0.5058365758754864,
"eval_loss": 0.014402530156075954,
"eval_runtime": 135.5623,
"eval_samples_per_second": 3.762,
"eval_steps_per_second": 0.944,
"step": 130
},
{
"epoch": 0.5252918287937743,
"grad_norm": 0.044081032276153564,
"learning_rate": 5.9555555555555554e-05,
"loss": 0.0072,
"step": 135
},
{
"epoch": 0.5447470817120622,
"grad_norm": 0.04867592826485634,
"learning_rate": 6.177777777777779e-05,
"loss": 0.0077,
"step": 140
},
{
"epoch": 0.5447470817120622,
"eval_loss": 0.012970623560249805,
"eval_runtime": 137.0137,
"eval_samples_per_second": 3.722,
"eval_steps_per_second": 0.934,
"step": 140
},
{
"epoch": 0.5642023346303502,
"grad_norm": 0.044633813202381134,
"learning_rate": 6.4e-05,
"loss": 0.0077,
"step": 145
},
{
"epoch": 0.5836575875486382,
"grad_norm": 0.052950419485569,
"learning_rate": 6.622222222222222e-05,
"loss": 0.008,
"step": 150
},
{
"epoch": 0.5836575875486382,
"eval_loss": 0.012441293336451054,
"eval_runtime": 136.133,
"eval_samples_per_second": 3.746,
"eval_steps_per_second": 0.94,
"step": 150
},
{
"epoch": 0.603112840466926,
"grad_norm": 0.039904553443193436,
"learning_rate": 6.844444444444445e-05,
"loss": 0.0078,
"step": 155
},
{
"epoch": 0.622568093385214,
"grad_norm": 0.05680263414978981,
"learning_rate": 7.066666666666667e-05,
"loss": 0.0074,
"step": 160
},
{
"epoch": 0.622568093385214,
"eval_loss": 0.01192025002092123,
"eval_runtime": 136.5582,
"eval_samples_per_second": 3.735,
"eval_steps_per_second": 0.937,
"step": 160
},
{
"epoch": 0.642023346303502,
"grad_norm": 0.05537933111190796,
"learning_rate": 7.288888888888888e-05,
"loss": 0.0076,
"step": 165
},
{
"epoch": 0.6614785992217899,
"grad_norm": 0.04935755953192711,
"learning_rate": 7.511111111111111e-05,
"loss": 0.0077,
"step": 170
},
{
"epoch": 0.6614785992217899,
"eval_loss": 0.012302271090447903,
"eval_runtime": 136.6757,
"eval_samples_per_second": 3.731,
"eval_steps_per_second": 0.937,
"step": 170
},
{
"epoch": 0.6809338521400778,
"grad_norm": 0.05575108528137207,
"learning_rate": 7.733333333333333e-05,
"loss": 0.0081,
"step": 175
},
{
"epoch": 0.7003891050583657,
"grad_norm": 0.0551481656730175,
"learning_rate": 7.955555555555556e-05,
"loss": 0.0081,
"step": 180
},
{
"epoch": 0.7003891050583657,
"eval_loss": 0.011542496271431446,
"eval_runtime": 136.4051,
"eval_samples_per_second": 3.739,
"eval_steps_per_second": 0.938,
"step": 180
},
{
"epoch": 0.7198443579766537,
"grad_norm": 0.04738597571849823,
"learning_rate": 8.177777777777778e-05,
"loss": 0.0076,
"step": 185
},
{
"epoch": 0.7392996108949417,
"grad_norm": 0.029748599976301193,
"learning_rate": 8.4e-05,
"loss": 0.0073,
"step": 190
},
{
"epoch": 0.7392996108949417,
"eval_loss": 0.012059729546308517,
"eval_runtime": 135.8659,
"eval_samples_per_second": 3.754,
"eval_steps_per_second": 0.942,
"step": 190
},
{
"epoch": 0.7587548638132295,
"grad_norm": 0.03995237499475479,
"learning_rate": 8.622222222222223e-05,
"loss": 0.0077,
"step": 195
},
{
"epoch": 0.7782101167315175,
"grad_norm": 0.02774854749441147,
"learning_rate": 8.844444444444445e-05,
"loss": 0.0075,
"step": 200
},
{
"epoch": 0.7782101167315175,
"eval_loss": 0.011773883365094662,
"eval_runtime": 136.231,
"eval_samples_per_second": 3.744,
"eval_steps_per_second": 0.94,
"step": 200
},
{
"epoch": 0.7976653696498055,
"grad_norm": 0.026570243760943413,
"learning_rate": 9.066666666666667e-05,
"loss": 0.0072,
"step": 205
},
{
"epoch": 0.8171206225680934,
"grad_norm": 0.047289494425058365,
"learning_rate": 9.288888888888888e-05,
"loss": 0.0074,
"step": 210
},
{
"epoch": 0.8171206225680934,
"eval_loss": 0.011569861322641373,
"eval_runtime": 136.4761,
"eval_samples_per_second": 3.737,
"eval_steps_per_second": 0.938,
"step": 210
},
{
"epoch": 0.8365758754863813,
"grad_norm": 0.036366503685712814,
"learning_rate": 9.511111111111112e-05,
"loss": 0.007,
"step": 215
},
{
"epoch": 0.8560311284046692,
"grad_norm": 0.07178617268800735,
"learning_rate": 9.733333333333333e-05,
"loss": 0.0073,
"step": 220
},
{
"epoch": 0.8560311284046692,
"eval_loss": 0.010845971293747425,
"eval_runtime": 136.7794,
"eval_samples_per_second": 3.729,
"eval_steps_per_second": 0.936,
"step": 220
},
{
"epoch": 0.8754863813229572,
"grad_norm": 0.042044900357723236,
"learning_rate": 9.955555555555556e-05,
"loss": 0.0069,
"step": 225
},
{
"epoch": 0.8949416342412452,
"grad_norm": 0.18266713619232178,
"learning_rate": 0.00010177777777777777,
"loss": 0.007,
"step": 230
},
{
"epoch": 0.8949416342412452,
"eval_loss": 0.012411113828420639,
"eval_runtime": 136.7046,
"eval_samples_per_second": 3.731,
"eval_steps_per_second": 0.936,
"step": 230
},
{
"epoch": 0.914396887159533,
"grad_norm": 0.04458538442850113,
"learning_rate": 0.00010400000000000001,
"loss": 0.0077,
"step": 235
},
{
"epoch": 0.933852140077821,
"grad_norm": 0.04118124023079872,
"learning_rate": 0.00010622222222222222,
"loss": 0.0071,
"step": 240
},
{
"epoch": 0.933852140077821,
"eval_loss": 0.016743971034884453,
"eval_runtime": 136.6604,
"eval_samples_per_second": 3.732,
"eval_steps_per_second": 0.937,
"step": 240
},
{
"epoch": 0.953307392996109,
"grad_norm": 0.042440321296453476,
"learning_rate": 0.00010844444444444444,
"loss": 0.0071,
"step": 245
},
{
"epoch": 0.9727626459143969,
"grad_norm": 0.03553004935383797,
"learning_rate": 0.00011066666666666668,
"loss": 0.0073,
"step": 250
},
{
"epoch": 0.9727626459143969,
"eval_loss": 0.022762756794691086,
"eval_runtime": 136.4551,
"eval_samples_per_second": 3.737,
"eval_steps_per_second": 0.938,
"step": 250
},
{
"epoch": 0.9922178988326849,
"grad_norm": 0.06393753737211227,
"learning_rate": 0.0001128888888888889,
"loss": 0.0076,
"step": 255
},
{
"epoch": 1.0116731517509727,
"grad_norm": 0.030655624344944954,
"learning_rate": 0.00011511111111111112,
"loss": 0.0073,
"step": 260
},
{
"epoch": 1.0116731517509727,
"eval_loss": 0.024547625333070755,
"eval_runtime": 136.5694,
"eval_samples_per_second": 3.734,
"eval_steps_per_second": 0.937,
"step": 260
}
],
"logging_steps": 5,
"max_steps": 1799,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 10,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 30,
"early_stopping_threshold": 0.001
},
"attributes": {
"early_stopping_patience_counter": 12
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.535511208042892e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}