Qwen2-VL-7B-ChartQAtesting / trainer_state.json
RantiRepo's picture
Upload trainer_state.json with huggingface_hub
fe6bff0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 10,
"global_step": 178,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 2.375955265760422,
"epoch": 0.11299435028248588,
"grad_norm": 3.953125,
"learning_rate": 0.00019651162790697676,
"loss": 16.610369873046874,
"mean_token_accuracy": 0.10732572241686285,
"num_tokens": 168709.0,
"step": 10
},
{
"epoch": 0.11299435028248588,
"eval_entropy": 2.850843206048012,
"eval_loss": 16.137414932250977,
"eval_mean_token_accuracy": 0.0946836918592453,
"eval_num_tokens": 168709.0,
"eval_runtime": 43.886,
"eval_samples_per_second": 4.375,
"eval_steps_per_second": 1.094,
"step": 10
},
{
"entropy": 4.185183194279671,
"epoch": 0.22598870056497175,
"grad_norm": 14.125,
"learning_rate": 0.00018488372093023256,
"loss": 13.943736267089843,
"mean_token_accuracy": 0.115196983050555,
"num_tokens": 336118.0,
"step": 20
},
{
"epoch": 0.22598870056497175,
"eval_entropy": 6.767949452002843,
"eval_loss": 11.27951717376709,
"eval_mean_token_accuracy": 0.09991752542555332,
"eval_num_tokens": 336118.0,
"eval_runtime": 43.8324,
"eval_samples_per_second": 4.38,
"eval_steps_per_second": 1.095,
"step": 20
},
{
"entropy": 7.635345196723938,
"epoch": 0.3389830508474576,
"grad_norm": 8.9375,
"learning_rate": 0.00017325581395348838,
"loss": 9.085212707519531,
"mean_token_accuracy": 0.11288385493680834,
"num_tokens": 505921.0,
"step": 30
},
{
"epoch": 0.3389830508474576,
"eval_entropy": 7.880531340837479,
"eval_loss": 7.876997470855713,
"eval_mean_token_accuracy": 0.09879284957423806,
"eval_num_tokens": 505921.0,
"eval_runtime": 43.9757,
"eval_samples_per_second": 4.366,
"eval_steps_per_second": 1.092,
"step": 30
},
{
"entropy": 7.543534195423126,
"epoch": 0.4519774011299435,
"grad_norm": 1.40625,
"learning_rate": 0.00016162790697674419,
"loss": 7.40704116821289,
"mean_token_accuracy": 0.11707657705992461,
"num_tokens": 677163.0,
"step": 40
},
{
"epoch": 0.4519774011299435,
"eval_entropy": 7.573027561108272,
"eval_loss": 7.244246006011963,
"eval_mean_token_accuracy": 0.10892315845315655,
"eval_num_tokens": 677163.0,
"eval_runtime": 44.408,
"eval_samples_per_second": 4.324,
"eval_steps_per_second": 1.081,
"step": 40
},
{
"entropy": 7.289045333862305,
"epoch": 0.5649717514124294,
"grad_norm": 1.796875,
"learning_rate": 0.00015000000000000001,
"loss": 7.031863403320313,
"mean_token_accuracy": 0.1295573660172522,
"num_tokens": 848791.0,
"step": 50
},
{
"epoch": 0.5649717514124294,
"eval_entropy": 7.325021078189214,
"eval_loss": 6.95927095413208,
"eval_mean_token_accuracy": 0.13235394159952799,
"eval_num_tokens": 848791.0,
"eval_runtime": 43.8325,
"eval_samples_per_second": 4.38,
"eval_steps_per_second": 1.095,
"step": 50
},
{
"entropy": 6.9638889849185945,
"epoch": 0.6779661016949152,
"grad_norm": 1.2578125,
"learning_rate": 0.00013837209302325582,
"loss": 6.742725372314453,
"mean_token_accuracy": 0.1696016845293343,
"num_tokens": 1018925.0,
"step": 60
},
{
"epoch": 0.6779661016949152,
"eval_entropy": 7.055461843808492,
"eval_loss": 6.720689296722412,
"eval_mean_token_accuracy": 0.16983537826066217,
"eval_num_tokens": 1018925.0,
"eval_runtime": 43.7205,
"eval_samples_per_second": 4.392,
"eval_steps_per_second": 1.098,
"step": 60
},
{
"entropy": 6.680863696336746,
"epoch": 0.7909604519774012,
"grad_norm": 1.34375,
"learning_rate": 0.00012674418604651164,
"loss": 6.45898666381836,
"mean_token_accuracy": 0.21339080817997455,
"num_tokens": 1181787.0,
"step": 70
},
{
"epoch": 0.7909604519774012,
"eval_entropy": 6.870167553424835,
"eval_loss": 6.5776286125183105,
"eval_mean_token_accuracy": 0.19155203737318516,
"eval_num_tokens": 1181787.0,
"eval_runtime": 43.9254,
"eval_samples_per_second": 4.371,
"eval_steps_per_second": 1.093,
"step": 70
},
{
"entropy": 6.659492689371109,
"epoch": 0.903954802259887,
"grad_norm": 0.82421875,
"learning_rate": 0.00011511627906976746,
"loss": 6.499990081787109,
"mean_token_accuracy": 0.21302505303174257,
"num_tokens": 1360714.0,
"step": 80
},
{
"epoch": 0.903954802259887,
"eval_entropy": 6.760685175657272,
"eval_loss": 6.4918060302734375,
"eval_mean_token_accuracy": 0.20070527338733277,
"eval_num_tokens": 1360714.0,
"eval_runtime": 44.0105,
"eval_samples_per_second": 4.363,
"eval_steps_per_second": 1.091,
"step": 80
},
{
"entropy": 6.3696688413619995,
"epoch": 1.0112994350282485,
"grad_norm": 0.64453125,
"learning_rate": 0.00010348837209302327,
"loss": 6.255178451538086,
"mean_token_accuracy": 0.24344109077202647,
"num_tokens": 1511764.0,
"step": 90
},
{
"epoch": 1.0112994350282485,
"eval_entropy": 6.723124821980794,
"eval_loss": 6.457315921783447,
"eval_mean_token_accuracy": 0.20087979889164367,
"eval_num_tokens": 1511764.0,
"eval_runtime": 43.8306,
"eval_samples_per_second": 4.381,
"eval_steps_per_second": 1.095,
"step": 90
},
{
"entropy": 6.43809232711792,
"epoch": 1.1242937853107344,
"grad_norm": 0.55859375,
"learning_rate": 9.186046511627907e-05,
"loss": 6.314236068725586,
"mean_token_accuracy": 0.23132331417873503,
"num_tokens": 1682798.0,
"step": 100
},
{
"epoch": 1.1242937853107344,
"eval_entropy": 6.687405467033386,
"eval_loss": 6.43255090713501,
"eval_mean_token_accuracy": 0.20094856123129526,
"eval_num_tokens": 1682798.0,
"eval_runtime": 44.0723,
"eval_samples_per_second": 4.356,
"eval_steps_per_second": 1.089,
"step": 100
},
{
"entropy": 6.333824092149735,
"epoch": 1.2372881355932204,
"grad_norm": 0.79296875,
"learning_rate": 8.023255813953489e-05,
"loss": 6.235330963134766,
"mean_token_accuracy": 0.2396117802709341,
"num_tokens": 1846342.0,
"step": 110
},
{
"epoch": 1.2372881355932204,
"eval_entropy": 6.654318938652675,
"eval_loss": 6.4151153564453125,
"eval_mean_token_accuracy": 0.20105128269642591,
"eval_num_tokens": 1846342.0,
"eval_runtime": 44.0421,
"eval_samples_per_second": 4.359,
"eval_steps_per_second": 1.09,
"step": 110
},
{
"entropy": 6.359769129753113,
"epoch": 1.3502824858757063,
"grad_norm": 0.79296875,
"learning_rate": 6.86046511627907e-05,
"loss": 6.299611282348633,
"mean_token_accuracy": 0.23366298619657755,
"num_tokens": 2018599.0,
"step": 120
},
{
"epoch": 1.3502824858757063,
"eval_entropy": 6.63616219162941,
"eval_loss": 6.399180889129639,
"eval_mean_token_accuracy": 0.20099820289760828,
"eval_num_tokens": 2018599.0,
"eval_runtime": 44.0128,
"eval_samples_per_second": 4.362,
"eval_steps_per_second": 1.091,
"step": 120
},
{
"entropy": 6.290887945890427,
"epoch": 1.463276836158192,
"grad_norm": 0.7265625,
"learning_rate": 5.697674418604652e-05,
"loss": 6.218046951293945,
"mean_token_accuracy": 0.24080509012565016,
"num_tokens": 2183142.0,
"step": 130
},
{
"epoch": 1.463276836158192,
"eval_entropy": 6.626457552115123,
"eval_loss": 6.388693332672119,
"eval_mean_token_accuracy": 0.20111992427458367,
"eval_num_tokens": 2183142.0,
"eval_runtime": 43.8362,
"eval_samples_per_second": 4.38,
"eval_steps_per_second": 1.095,
"step": 130
},
{
"entropy": 6.49563906788826,
"epoch": 1.576271186440678,
"grad_norm": 0.6015625,
"learning_rate": 4.5348837209302326e-05,
"loss": 6.376762771606446,
"mean_token_accuracy": 0.21492539951577783,
"num_tokens": 2364034.0,
"step": 140
},
{
"epoch": 1.576271186440678,
"eval_entropy": 6.611844847599666,
"eval_loss": 6.380344390869141,
"eval_mean_token_accuracy": 0.20116183906793594,
"eval_num_tokens": 2364034.0,
"eval_runtime": 44.3142,
"eval_samples_per_second": 4.333,
"eval_steps_per_second": 1.083,
"step": 140
},
{
"entropy": 6.287807840108871,
"epoch": 1.689265536723164,
"grad_norm": 0.26171875,
"learning_rate": 3.372093023255814e-05,
"loss": 6.183982086181641,
"mean_token_accuracy": 0.2382544383406639,
"num_tokens": 2527248.0,
"step": 150
},
{
"epoch": 1.689265536723164,
"eval_entropy": 6.602698942025502,
"eval_loss": 6.374426364898682,
"eval_mean_token_accuracy": 0.20121282618492842,
"eval_num_tokens": 2527248.0,
"eval_runtime": 43.9903,
"eval_samples_per_second": 4.365,
"eval_steps_per_second": 1.091,
"step": 150
},
{
"entropy": 6.482830649614334,
"epoch": 1.8022598870056497,
"grad_norm": 0.37890625,
"learning_rate": 2.2093023255813955e-05,
"loss": 6.326276779174805,
"mean_token_accuracy": 0.2152696281671524,
"num_tokens": 2704860.0,
"step": 160
},
{
"epoch": 1.8022598870056497,
"eval_entropy": 6.604644636313121,
"eval_loss": 6.371755123138428,
"eval_mean_token_accuracy": 0.20118677647163472,
"eval_num_tokens": 2704860.0,
"eval_runtime": 44.2676,
"eval_samples_per_second": 4.337,
"eval_steps_per_second": 1.084,
"step": 160
},
{
"entropy": 6.240383183956146,
"epoch": 1.9152542372881356,
"grad_norm": 0.34765625,
"learning_rate": 1.0465116279069768e-05,
"loss": 6.1943107604980465,
"mean_token_accuracy": 0.2432584844529629,
"num_tokens": 2869517.0,
"step": 170
},
{
"epoch": 1.9152542372881356,
"eval_entropy": 6.599712918202083,
"eval_loss": 6.369909763336182,
"eval_mean_token_accuracy": 0.20116472554703554,
"eval_num_tokens": 2869517.0,
"eval_runtime": 43.9021,
"eval_samples_per_second": 4.373,
"eval_steps_per_second": 1.093,
"step": 170
}
],
"logging_steps": 10,
"max_steps": 178,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.1389497039169126e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}