test_gemma_geo / trainer_state.json
paidaixing's picture
上传Qwen2.5-VL-3B-Instruct模型
c051dbc verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968454258675079,
"eval_steps": 500,
"global_step": 158,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06309148264984227,
"grad_norm": 3.9812393188476562,
"learning_rate": 7.99912273389938e-06,
"loss": 1.7721,
"step": 10
},
{
"epoch": 0.12618296529968454,
"grad_norm": 1.7699296474456787,
"learning_rate": 7.89431561149264e-06,
"loss": 1.5926,
"step": 20
},
{
"epoch": 0.1892744479495268,
"grad_norm": 1.4895917177200317,
"learning_rate": 7.619308209864078e-06,
"loss": 1.4542,
"step": 30
},
{
"epoch": 0.25236593059936907,
"grad_norm": 1.315661907196045,
"learning_rate": 7.1861196720967844e-06,
"loss": 1.3731,
"step": 40
},
{
"epoch": 0.31545741324921134,
"grad_norm": 1.2776089906692505,
"learning_rate": 6.613682415960422e-06,
"loss": 1.3256,
"step": 50
},
{
"epoch": 0.3785488958990536,
"grad_norm": 1.3066082000732422,
"learning_rate": 5.9270146964068605e-06,
"loss": 1.2842,
"step": 60
},
{
"epoch": 0.4416403785488959,
"grad_norm": 1.2110307216644287,
"learning_rate": 5.156127187777886e-06,
"loss": 1.2698,
"step": 70
},
{
"epoch": 0.5047318611987381,
"grad_norm": 1.25718355178833,
"learning_rate": 4.334711373329263e-06,
"loss": 1.2558,
"step": 80
},
{
"epoch": 0.5678233438485805,
"grad_norm": 1.1852293014526367,
"learning_rate": 3.498667065742782e-06,
"loss": 1.2268,
"step": 90
},
{
"epoch": 0.6309148264984227,
"grad_norm": 1.1762120723724365,
"learning_rate": 2.6845334130456668e-06,
"loss": 1.2238,
"step": 100
},
{
"epoch": 0.694006309148265,
"grad_norm": 1.1798163652420044,
"learning_rate": 1.9278919625074795e-06,
"loss": 1.2093,
"step": 110
},
{
"epoch": 0.7570977917981072,
"grad_norm": 1.1991688013076782,
"learning_rate": 1.2618115762852451e-06,
"loss": 1.227,
"step": 120
},
{
"epoch": 0.8201892744479495,
"grad_norm": 1.1350749731063843,
"learning_rate": 7.154031634651838e-07,
"loss": 1.2058,
"step": 130
},
{
"epoch": 0.8832807570977917,
"grad_norm": 1.1695517301559448,
"learning_rate": 3.1254739364599835e-07,
"loss": 1.208,
"step": 140
},
{
"epoch": 0.9463722397476341,
"grad_norm": 1.1390548944473267,
"learning_rate": 7.085099708524556e-08,
"loss": 1.2032,
"step": 150
}
],
"logging_steps": 10,
"max_steps": 158,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6672282252869632e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}