Qwen2.5-7B-t13 / trainer_state.json
AiMijie's picture
Upload 15 files
3c4129b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9920692141312184,
"eval_steps": 500,
"global_step": 43,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.023071377072819033,
"grad_norm": 11.529925678238175,
"learning_rate": 0.0,
"loss": 2.0516,
"step": 1
},
{
"epoch": 0.046142754145638065,
"grad_norm": 11.988806479251192,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.1984,
"step": 2
},
{
"epoch": 0.0692141312184571,
"grad_norm": 10.48865159335969,
"learning_rate": 4.000000000000001e-06,
"loss": 2.0262,
"step": 3
},
{
"epoch": 0.09228550829127613,
"grad_norm": 7.872336296393583,
"learning_rate": 6e-06,
"loss": 1.9887,
"step": 4
},
{
"epoch": 0.11535688536409516,
"grad_norm": 5.600464723177604,
"learning_rate": 8.000000000000001e-06,
"loss": 1.8539,
"step": 5
},
{
"epoch": 0.1384282624369142,
"grad_norm": 5.379982813408392,
"learning_rate": 1e-05,
"loss": 1.6958,
"step": 6
},
{
"epoch": 0.16149963950973323,
"grad_norm": 6.150171510754621,
"learning_rate": 9.98292246503335e-06,
"loss": 1.5919,
"step": 7
},
{
"epoch": 0.18457101658255226,
"grad_norm": 7.730500186977511,
"learning_rate": 9.931806517013612e-06,
"loss": 1.4645,
"step": 8
},
{
"epoch": 0.2076423936553713,
"grad_norm": 5.657841206609976,
"learning_rate": 9.847001329696653e-06,
"loss": 1.4254,
"step": 9
},
{
"epoch": 0.23071377072819033,
"grad_norm": 4.330967629257847,
"learning_rate": 9.729086208503174e-06,
"loss": 1.3279,
"step": 10
},
{
"epoch": 0.25378514780100936,
"grad_norm": 3.4335170991549617,
"learning_rate": 9.578866633275289e-06,
"loss": 1.2615,
"step": 11
},
{
"epoch": 0.2768565248738284,
"grad_norm": 3.5158187786127737,
"learning_rate": 9.397368756032445e-06,
"loss": 1.198,
"step": 12
},
{
"epoch": 0.2999279019466474,
"grad_norm": 2.8923651000012804,
"learning_rate": 9.185832391312644e-06,
"loss": 1.2127,
"step": 13
},
{
"epoch": 0.32299927901946646,
"grad_norm": 2.7678712156217045,
"learning_rate": 8.94570254698197e-06,
"loss": 1.192,
"step": 14
},
{
"epoch": 0.3460706560922855,
"grad_norm": 2.660124872631909,
"learning_rate": 8.67861955336566e-06,
"loss": 1.1819,
"step": 15
},
{
"epoch": 0.3691420331651045,
"grad_norm": 2.599566946174234,
"learning_rate": 8.386407858128707e-06,
"loss": 1.1531,
"step": 16
},
{
"epoch": 0.39221341023792355,
"grad_norm": 2.4817341082971156,
"learning_rate": 8.071063563448341e-06,
"loss": 1.2092,
"step": 17
},
{
"epoch": 0.4152847873107426,
"grad_norm": 2.5122202204515136,
"learning_rate": 7.734740790612137e-06,
"loss": 1.1634,
"step": 18
},
{
"epoch": 0.4383561643835616,
"grad_norm": 2.3856245897153676,
"learning_rate": 7.379736965185369e-06,
"loss": 1.1431,
"step": 19
},
{
"epoch": 0.46142754145638065,
"grad_norm": 2.3564683980893326,
"learning_rate": 7.008477123264849e-06,
"loss": 1.1209,
"step": 20
},
{
"epoch": 0.4844989185291997,
"grad_norm": 2.330304260395665,
"learning_rate": 6.6234973460234184e-06,
"loss": 1.1877,
"step": 21
},
{
"epoch": 0.5075702956020187,
"grad_norm": 2.1638793690371845,
"learning_rate": 6.227427435703997e-06,
"loss": 1.0835,
"step": 22
},
{
"epoch": 0.5306416726748377,
"grad_norm": 2.121227470925116,
"learning_rate": 5.82297295140367e-06,
"loss": 1.0847,
"step": 23
},
{
"epoch": 0.5537130497476568,
"grad_norm": 2.18566259900884,
"learning_rate": 5.412896727361663e-06,
"loss": 1.1755,
"step": 24
},
{
"epoch": 0.5767844268204758,
"grad_norm": 2.2506433459611905,
"learning_rate": 5e-06,
"loss": 1.1454,
"step": 25
},
{
"epoch": 0.5998558038932948,
"grad_norm": 1.9935766338338359,
"learning_rate": 4.587103272638339e-06,
"loss": 1.0624,
"step": 26
},
{
"epoch": 0.6229271809661139,
"grad_norm": 2.024598244784711,
"learning_rate": 4.17702704859633e-06,
"loss": 1.1401,
"step": 27
},
{
"epoch": 0.6459985580389329,
"grad_norm": 1.9014864665100077,
"learning_rate": 3.7725725642960047e-06,
"loss": 1.0402,
"step": 28
},
{
"epoch": 0.669069935111752,
"grad_norm": 1.9082403800768388,
"learning_rate": 3.3765026539765832e-06,
"loss": 1.1484,
"step": 29
},
{
"epoch": 0.692141312184571,
"grad_norm": 1.9140993299277556,
"learning_rate": 2.991522876735154e-06,
"loss": 1.0909,
"step": 30
},
{
"epoch": 0.71521268925739,
"grad_norm": 2.0056513408575634,
"learning_rate": 2.6202630348146323e-06,
"loss": 1.118,
"step": 31
},
{
"epoch": 0.738284066330209,
"grad_norm": 2.0600105157689463,
"learning_rate": 2.265259209387867e-06,
"loss": 1.0666,
"step": 32
},
{
"epoch": 0.7613554434030281,
"grad_norm": 1.8062623897997554,
"learning_rate": 1.928936436551661e-06,
"loss": 1.086,
"step": 33
},
{
"epoch": 0.7844268204758471,
"grad_norm": 1.9039511452967517,
"learning_rate": 1.6135921418712959e-06,
"loss": 1.0922,
"step": 34
},
{
"epoch": 0.8074981975486661,
"grad_norm": 1.796397313791375,
"learning_rate": 1.321380446634342e-06,
"loss": 1.0186,
"step": 35
},
{
"epoch": 0.8305695746214852,
"grad_norm": 1.9645645345105642,
"learning_rate": 1.0542974530180327e-06,
"loss": 1.131,
"step": 36
},
{
"epoch": 0.8536409516943042,
"grad_norm": 1.8741132808199075,
"learning_rate": 8.141676086873574e-07,
"loss": 1.0346,
"step": 37
},
{
"epoch": 0.8767123287671232,
"grad_norm": 1.871976097374707,
"learning_rate": 6.026312439675553e-07,
"loss": 1.0757,
"step": 38
},
{
"epoch": 0.8997837058399423,
"grad_norm": 1.9322927512032353,
"learning_rate": 4.211333667247125e-07,
"loss": 1.1638,
"step": 39
},
{
"epoch": 0.9228550829127613,
"grad_norm": 1.811353999287456,
"learning_rate": 2.7091379149682683e-07,
"loss": 1.0145,
"step": 40
},
{
"epoch": 0.9459264599855803,
"grad_norm": 1.836091748570849,
"learning_rate": 1.5299867030334815e-07,
"loss": 1.0958,
"step": 41
},
{
"epoch": 0.9689978370583994,
"grad_norm": 1.8619728224137473,
"learning_rate": 6.819348298638839e-08,
"loss": 1.0899,
"step": 42
},
{
"epoch": 0.9920692141312184,
"grad_norm": 1.859646123275278,
"learning_rate": 1.7077534966650767e-08,
"loss": 1.0737,
"step": 43
},
{
"epoch": 0.9920692141312184,
"step": 43,
"total_flos": 5518750777344.0,
"train_loss": 1.2716188818909402,
"train_runtime": 679.939,
"train_samples_per_second": 2.04,
"train_steps_per_second": 0.063
}
],
"logging_steps": 1,
"max_steps": 43,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5518750777344.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}