job-parser-model-qwen / trainer_state.json
Rithankoushik's picture
Upload folder using huggingface_hub
2ff9105 verified
{
"best_global_step": 190,
"best_metric": 6.224213600158691,
"best_model_checkpoint": "/kaggle/working/qwen-model-finetuned/checkpoint-190",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5194805194805194,
"grad_norm": 504.0,
"learning_rate": 9.800000000000001e-06,
"loss": 10.9305,
"mean_token_accuracy": 0.022239863348659128,
"num_tokens": 81920.0,
"step": 5
},
{
"epoch": 1.0,
"grad_norm": 528.0,
"learning_rate": 9.55e-06,
"loss": 9.7037,
"mean_token_accuracy": 0.03139729846923335,
"num_tokens": 157696.0,
"step": 10
},
{
"epoch": 1.0,
"eval_loss": 9.371186256408691,
"eval_mean_token_accuracy": 0.025891548436548974,
"eval_num_tokens": 157696.0,
"eval_runtime": 10.0705,
"eval_samples_per_second": 0.894,
"eval_steps_per_second": 0.894,
"step": 10
},
{
"epoch": 1.5194805194805194,
"grad_norm": 374.0,
"learning_rate": 9.3e-06,
"loss": 9.0276,
"mean_token_accuracy": 0.04255007305182516,
"num_tokens": 239616.0,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": 3504.0,
"learning_rate": 9.050000000000001e-06,
"loss": 8.5915,
"mean_token_accuracy": 0.06423375010490417,
"num_tokens": 315392.0,
"step": 20
},
{
"epoch": 2.0,
"eval_loss": 8.596755981445312,
"eval_mean_token_accuracy": 0.06323617200056712,
"eval_num_tokens": 315392.0,
"eval_runtime": 9.9888,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 20
},
{
"epoch": 2.5194805194805197,
"grad_norm": 502.0,
"learning_rate": 8.8e-06,
"loss": 8.3693,
"mean_token_accuracy": 0.0784318515099585,
"num_tokens": 397312.0,
"step": 25
},
{
"epoch": 3.0,
"grad_norm": 288.0,
"learning_rate": 8.550000000000001e-06,
"loss": 8.2333,
"mean_token_accuracy": 0.07881012428048495,
"num_tokens": 473088.0,
"step": 30
},
{
"epoch": 3.0,
"eval_loss": 8.20586109161377,
"eval_mean_token_accuracy": 0.06926124874088499,
"eval_num_tokens": 473088.0,
"eval_runtime": 9.9908,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 30
},
{
"epoch": 3.5194805194805197,
"grad_norm": 249.0,
"learning_rate": 8.3e-06,
"loss": 8.0225,
"mean_token_accuracy": 0.09137762561440468,
"num_tokens": 555008.0,
"step": 35
},
{
"epoch": 4.0,
"grad_norm": 218.0,
"learning_rate": 8.050000000000001e-06,
"loss": 7.8343,
"mean_token_accuracy": 0.09395423753036035,
"num_tokens": 630784.0,
"step": 40
},
{
"epoch": 4.0,
"eval_loss": 7.793511390686035,
"eval_mean_token_accuracy": 0.0885306414630678,
"eval_num_tokens": 630784.0,
"eval_runtime": 9.9692,
"eval_samples_per_second": 0.903,
"eval_steps_per_second": 0.903,
"step": 40
},
{
"epoch": 4.51948051948052,
"grad_norm": 124.5,
"learning_rate": 7.800000000000002e-06,
"loss": 7.667,
"mean_token_accuracy": 0.10079384371638297,
"num_tokens": 712704.0,
"step": 45
},
{
"epoch": 5.0,
"grad_norm": 199.0,
"learning_rate": 7.5500000000000006e-06,
"loss": 7.5062,
"mean_token_accuracy": 0.10965288692229502,
"num_tokens": 788480.0,
"step": 50
},
{
"epoch": 5.0,
"eval_loss": 7.532417297363281,
"eval_mean_token_accuracy": 0.11339086873663796,
"eval_num_tokens": 788480.0,
"eval_runtime": 9.9854,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 50
},
{
"epoch": 5.51948051948052,
"grad_norm": 189.0,
"learning_rate": 7.3e-06,
"loss": 7.4228,
"mean_token_accuracy": 0.14055935498327016,
"num_tokens": 870400.0,
"step": 55
},
{
"epoch": 6.0,
"grad_norm": 179.0,
"learning_rate": 7.05e-06,
"loss": 7.1983,
"mean_token_accuracy": 0.1601288616657257,
"num_tokens": 946176.0,
"step": 60
},
{
"epoch": 6.0,
"eval_loss": 7.2402825355529785,
"eval_mean_token_accuracy": 0.15719481143686506,
"eval_num_tokens": 946176.0,
"eval_runtime": 9.9843,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 60
},
{
"epoch": 6.51948051948052,
"grad_norm": 212.0,
"learning_rate": 6.800000000000001e-06,
"loss": 7.1554,
"mean_token_accuracy": 0.16111382581293582,
"num_tokens": 1028096.0,
"step": 65
},
{
"epoch": 7.0,
"grad_norm": 240.0,
"learning_rate": 6.550000000000001e-06,
"loss": 7.0146,
"mean_token_accuracy": 0.16839409096015467,
"num_tokens": 1103872.0,
"step": 70
},
{
"epoch": 7.0,
"eval_loss": 7.037937641143799,
"eval_mean_token_accuracy": 0.16300277080800799,
"eval_num_tokens": 1103872.0,
"eval_runtime": 9.9771,
"eval_samples_per_second": 0.902,
"eval_steps_per_second": 0.902,
"step": 70
},
{
"epoch": 7.51948051948052,
"grad_norm": 89.0,
"learning_rate": 6.300000000000001e-06,
"loss": 6.9988,
"mean_token_accuracy": 0.1658891063183546,
"num_tokens": 1185792.0,
"step": 75
},
{
"epoch": 8.0,
"grad_norm": 99.5,
"learning_rate": 6.0500000000000005e-06,
"loss": 6.8182,
"mean_token_accuracy": 0.1801449720923965,
"num_tokens": 1261568.0,
"step": 80
},
{
"epoch": 8.0,
"eval_loss": 6.879114627838135,
"eval_mean_token_accuracy": 0.17081908716095817,
"eval_num_tokens": 1261568.0,
"eval_runtime": 9.9794,
"eval_samples_per_second": 0.902,
"eval_steps_per_second": 0.902,
"step": 80
},
{
"epoch": 8.519480519480519,
"grad_norm": 79.0,
"learning_rate": 5.8e-06,
"loss": 6.7599,
"mean_token_accuracy": 0.18248656746000053,
"num_tokens": 1343488.0,
"step": 85
},
{
"epoch": 9.0,
"grad_norm": 244.0,
"learning_rate": 5.550000000000001e-06,
"loss": 6.7712,
"mean_token_accuracy": 0.18557150158527735,
"num_tokens": 1419264.0,
"step": 90
},
{
"epoch": 9.0,
"eval_loss": 6.76102352142334,
"eval_mean_token_accuracy": 0.18694023622406852,
"eval_num_tokens": 1419264.0,
"eval_runtime": 9.9905,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 90
},
{
"epoch": 9.519480519480519,
"grad_norm": 122.0,
"learning_rate": 5.300000000000001e-06,
"loss": 6.7039,
"mean_token_accuracy": 0.18946018554270266,
"num_tokens": 1501184.0,
"step": 95
},
{
"epoch": 10.0,
"grad_norm": 182.0,
"learning_rate": 5.050000000000001e-06,
"loss": 6.618,
"mean_token_accuracy": 0.1998177944002925,
"num_tokens": 1576960.0,
"step": 100
},
{
"epoch": 10.0,
"eval_loss": 6.640429496765137,
"eval_mean_token_accuracy": 0.19361667500601876,
"eval_num_tokens": 1576960.0,
"eval_runtime": 9.9854,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 100
},
{
"epoch": 10.519480519480519,
"grad_norm": 103.5,
"learning_rate": 4.800000000000001e-06,
"loss": 6.5581,
"mean_token_accuracy": 0.19838788434863092,
"num_tokens": 1658880.0,
"step": 105
},
{
"epoch": 11.0,
"grad_norm": 132.0,
"learning_rate": 4.5500000000000005e-06,
"loss": 6.5207,
"mean_token_accuracy": 0.19835223559592222,
"num_tokens": 1734656.0,
"step": 110
},
{
"epoch": 11.0,
"eval_loss": 6.540436744689941,
"eval_mean_token_accuracy": 0.19757911231782702,
"eval_num_tokens": 1734656.0,
"eval_runtime": 9.9902,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 110
},
{
"epoch": 11.519480519480519,
"grad_norm": 82.0,
"learning_rate": 4.3e-06,
"loss": 6.487,
"mean_token_accuracy": 0.1987176351249218,
"num_tokens": 1816576.0,
"step": 115
},
{
"epoch": 12.0,
"grad_norm": 55.0,
"learning_rate": 4.05e-06,
"loss": 6.381,
"mean_token_accuracy": 0.20363352991439201,
"num_tokens": 1892352.0,
"step": 120
},
{
"epoch": 12.0,
"eval_loss": 6.44816255569458,
"eval_mean_token_accuracy": 0.19850187169180977,
"eval_num_tokens": 1892352.0,
"eval_runtime": 9.9827,
"eval_samples_per_second": 0.902,
"eval_steps_per_second": 0.902,
"step": 120
},
{
"epoch": 12.519480519480519,
"grad_norm": 109.5,
"learning_rate": 3.8000000000000005e-06,
"loss": 6.3201,
"mean_token_accuracy": 0.207901806011796,
"num_tokens": 1974272.0,
"step": 125
},
{
"epoch": 13.0,
"grad_norm": 81.0,
"learning_rate": 3.5500000000000003e-06,
"loss": 6.3889,
"mean_token_accuracy": 0.1981937969858582,
"num_tokens": 2050048.0,
"step": 130
},
{
"epoch": 13.0,
"eval_loss": 6.373791217803955,
"eval_mean_token_accuracy": 0.19828475183910793,
"eval_num_tokens": 2050048.0,
"eval_runtime": 9.9649,
"eval_samples_per_second": 0.903,
"eval_steps_per_second": 0.903,
"step": 130
},
{
"epoch": 13.519480519480519,
"grad_norm": 57.75,
"learning_rate": 3.3000000000000006e-06,
"loss": 6.3537,
"mean_token_accuracy": 0.20020762123167515,
"num_tokens": 2131968.0,
"step": 135
},
{
"epoch": 14.0,
"grad_norm": 131.0,
"learning_rate": 3.05e-06,
"loss": 6.2238,
"mean_token_accuracy": 0.20939014046578794,
"num_tokens": 2207744.0,
"step": 140
},
{
"epoch": 14.0,
"eval_loss": 6.317364692687988,
"eval_mean_token_accuracy": 0.20094447003470528,
"eval_num_tokens": 2207744.0,
"eval_runtime": 9.9793,
"eval_samples_per_second": 0.902,
"eval_steps_per_second": 0.902,
"step": 140
},
{
"epoch": 14.519480519480519,
"grad_norm": 111.0,
"learning_rate": 2.8000000000000003e-06,
"loss": 6.2564,
"mean_token_accuracy": 0.2047997061163187,
"num_tokens": 2289664.0,
"step": 145
},
{
"epoch": 15.0,
"grad_norm": 60.5,
"learning_rate": 2.55e-06,
"loss": 6.237,
"mean_token_accuracy": 0.2071191845713435,
"num_tokens": 2365440.0,
"step": 150
},
{
"epoch": 15.0,
"eval_loss": 6.276952743530273,
"eval_mean_token_accuracy": 0.20354990826712716,
"eval_num_tokens": 2365440.0,
"eval_runtime": 9.9884,
"eval_samples_per_second": 0.901,
"eval_steps_per_second": 0.901,
"step": 150
},
{
"epoch": 15.519480519480519,
"grad_norm": 57.0,
"learning_rate": 2.3000000000000004e-06,
"loss": 6.1936,
"mean_token_accuracy": 0.2097093306481838,
"num_tokens": 2447360.0,
"step": 155
},
{
"epoch": 16.0,
"grad_norm": 92.5,
"learning_rate": 2.05e-06,
"loss": 6.2567,
"mean_token_accuracy": 0.20516510428609075,
"num_tokens": 2523136.0,
"step": 160
},
{
"epoch": 16.0,
"eval_loss": 6.257329940795898,
"eval_mean_token_accuracy": 0.20631818804475996,
"eval_num_tokens": 2523136.0,
"eval_runtime": 10.0306,
"eval_samples_per_second": 0.897,
"eval_steps_per_second": 0.897,
"step": 160
},
{
"epoch": 16.51948051948052,
"grad_norm": 57.75,
"learning_rate": 1.8000000000000001e-06,
"loss": 6.1925,
"mean_token_accuracy": 0.2081704933196306,
"num_tokens": 2605056.0,
"step": 165
},
{
"epoch": 17.0,
"grad_norm": 67.5,
"learning_rate": 1.5500000000000002e-06,
"loss": 6.2109,
"mean_token_accuracy": 0.2084659144685075,
"num_tokens": 2680832.0,
"step": 170
},
{
"epoch": 17.0,
"eval_loss": 6.232978343963623,
"eval_mean_token_accuracy": 0.20924930771191916,
"eval_num_tokens": 2680832.0,
"eval_runtime": 9.9679,
"eval_samples_per_second": 0.903,
"eval_steps_per_second": 0.903,
"step": 170
},
{
"epoch": 17.51948051948052,
"grad_norm": 66.0,
"learning_rate": 1.3e-06,
"loss": 6.1513,
"mean_token_accuracy": 0.21311675421893597,
"num_tokens": 2762752.0,
"step": 175
},
{
"epoch": 18.0,
"grad_norm": 62.25,
"learning_rate": 1.0500000000000001e-06,
"loss": 6.2227,
"mean_token_accuracy": 0.20644582042822968,
"num_tokens": 2838528.0,
"step": 180
},
{
"epoch": 18.0,
"eval_loss": 6.226585388183594,
"eval_mean_token_accuracy": 0.19757911231782702,
"eval_num_tokens": 2838528.0,
"eval_runtime": 9.9755,
"eval_samples_per_second": 0.902,
"eval_steps_per_second": 0.902,
"step": 180
},
{
"epoch": 18.51948051948052,
"grad_norm": 47.75,
"learning_rate": 8.000000000000001e-07,
"loss": 6.1354,
"mean_token_accuracy": 0.20610649585723878,
"num_tokens": 2920448.0,
"step": 185
},
{
"epoch": 19.0,
"grad_norm": 58.25,
"learning_rate": 5.5e-07,
"loss": 6.2115,
"mean_token_accuracy": 0.20702676193134203,
"num_tokens": 2996224.0,
"step": 190
},
{
"epoch": 19.0,
"eval_loss": 6.224213600158691,
"eval_mean_token_accuracy": 0.20870650642448002,
"eval_num_tokens": 2996224.0,
"eval_runtime": 9.9648,
"eval_samples_per_second": 0.903,
"eval_steps_per_second": 0.903,
"step": 190
},
{
"epoch": 19.51948051948052,
"grad_norm": 58.75,
"learning_rate": 3.0000000000000004e-07,
"loss": 6.1279,
"mean_token_accuracy": 0.21613336391746998,
"num_tokens": 3078144.0,
"step": 195
},
{
"epoch": 20.0,
"grad_norm": 57.75,
"learning_rate": 5.0000000000000004e-08,
"loss": 6.2165,
"mean_token_accuracy": 0.20515190266274116,
"num_tokens": 3153920.0,
"step": 200
},
{
"epoch": 20.0,
"eval_loss": 6.226177215576172,
"eval_mean_token_accuracy": 0.20892362627718183,
"eval_num_tokens": 3153920.0,
"eval_runtime": 9.9552,
"eval_samples_per_second": 0.904,
"eval_steps_per_second": 0.904,
"step": 200
}
],
"logging_steps": 5,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8335194712965120.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}