internvl3_2b_clarification_sft / trainer_state.json
Helen-ZW's picture
Upload folder using huggingface_hub
d03655d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9263157894736844,
"eval_steps": 100.0,
"global_step": 33,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08421052631578947,
"grad_norm": 0.09988608956336975,
"learning_rate": 5e-05,
"loss": 2.3771400451660156,
"step": 1,
"token_acc": 0.4664310954063604
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.04726523533463478,
"learning_rate": 9.770696282000244e-05,
"loss": 2.1574275493621826,
"step": 5,
"token_acc": 0.519051651143099
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.03580109775066376,
"learning_rate": 8.444834595378434e-05,
"loss": 1.7195571899414062,
"step": 10,
"token_acc": 0.5904558404558404
},
{
"epoch": 1.3368421052631578,
"grad_norm": 0.03144058212637901,
"learning_rate": 6.253262661293604e-05,
"loss": 1.85762939453125,
"step": 15,
"token_acc": 0.6245161290322581
},
{
"epoch": 1.7578947368421054,
"grad_norm": 0.0300274845212698,
"learning_rate": 3.746737338706397e-05,
"loss": 1.3842062950134277,
"step": 20,
"token_acc": 0.653372008701958
},
{
"epoch": 2.2526315789473683,
"grad_norm": 0.02691994234919548,
"learning_rate": 1.555165404621567e-05,
"loss": 1.6822933197021483,
"step": 25,
"token_acc": 0.6409290646578782
},
{
"epoch": 2.6736842105263157,
"grad_norm": 0.030764643102884293,
"learning_rate": 2.2930371799975594e-06,
"loss": 1.3119495391845704,
"step": 30,
"token_acc": 0.6437125748502994
}
],
"logging_steps": 5,
"max_steps": 33,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.1367359130171867e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}