task-20-Qwen-Qwen2.5-3B-Instruct / trainer_state.json
robertou2's picture
Upload folder using huggingface_hub
fb43fef verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.96969696969697,
"eval_steps": 500,
"global_step": 44,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12121212121212122,
"grad_norm": 1.8328726291656494,
"learning_rate": 0.0,
"loss": 2.5573,
"step": 1
},
{
"epoch": 0.24242424242424243,
"grad_norm": 2.0915732383728027,
"learning_rate": 3.3333333333333335e-05,
"loss": 2.1189,
"step": 2
},
{
"epoch": 0.36363636363636365,
"grad_norm": 1.0810253620147705,
"learning_rate": 6.666666666666667e-05,
"loss": 1.7979,
"step": 3
},
{
"epoch": 0.48484848484848486,
"grad_norm": 1.1273187398910522,
"learning_rate": 0.0001,
"loss": 2.0746,
"step": 4
},
{
"epoch": 0.6060606060606061,
"grad_norm": 1.1481099128723145,
"learning_rate": 0.00013333333333333334,
"loss": 1.9376,
"step": 5
},
{
"epoch": 0.7272727272727273,
"grad_norm": 1.120679259300232,
"learning_rate": 0.00016666666666666666,
"loss": 1.9616,
"step": 6
},
{
"epoch": 0.8484848484848485,
"grad_norm": 0.42956575751304626,
"learning_rate": 0.0002,
"loss": 1.5583,
"step": 7
},
{
"epoch": 0.9696969696969697,
"grad_norm": 0.5541684031486511,
"learning_rate": 0.00023333333333333333,
"loss": 1.6371,
"step": 8
},
{
"epoch": 1.0,
"grad_norm": 0.8354697823524475,
"learning_rate": 0.0002666666666666667,
"loss": 1.9758,
"step": 9
},
{
"epoch": 1.121212121212121,
"grad_norm": 0.8120695352554321,
"learning_rate": 0.0003,
"loss": 1.8198,
"step": 10
},
{
"epoch": 1.2424242424242424,
"grad_norm": 0.595156729221344,
"learning_rate": 0.0003333333333333333,
"loss": 1.649,
"step": 11
},
{
"epoch": 1.3636363636363638,
"grad_norm": 0.4005779027938843,
"learning_rate": 0.00036666666666666667,
"loss": 1.3083,
"step": 12
},
{
"epoch": 1.4848484848484849,
"grad_norm": 0.8254678845405579,
"learning_rate": 0.0004,
"loss": 1.2479,
"step": 13
},
{
"epoch": 1.606060606060606,
"grad_norm": 0.582369863986969,
"learning_rate": 0.00043333333333333337,
"loss": 1.4063,
"step": 14
},
{
"epoch": 1.7272727272727273,
"grad_norm": 0.7945718765258789,
"learning_rate": 0.00046666666666666666,
"loss": 1.4941,
"step": 15
},
{
"epoch": 1.8484848484848486,
"grad_norm": 0.7387524843215942,
"learning_rate": 0.0005,
"loss": 1.3542,
"step": 16
},
{
"epoch": 1.9696969696969697,
"grad_norm": 0.6516833305358887,
"learning_rate": 0.0004986304738420684,
"loss": 1.8561,
"step": 17
},
{
"epoch": 2.0,
"grad_norm": 2.1795907020568848,
"learning_rate": 0.0004945369001834514,
"loss": 1.1684,
"step": 18
},
{
"epoch": 2.121212121212121,
"grad_norm": 2.144760847091675,
"learning_rate": 0.0004877641290737884,
"loss": 1.3242,
"step": 19
},
{
"epoch": 2.242424242424242,
"grad_norm": 1.0639714002609253,
"learning_rate": 0.0004783863644106502,
"loss": 1.116,
"step": 20
},
{
"epoch": 2.3636363636363638,
"grad_norm": 2.3435370922088623,
"learning_rate": 0.00046650635094610973,
"loss": 1.4533,
"step": 21
},
{
"epoch": 2.484848484848485,
"grad_norm": 0.9590378999710083,
"learning_rate": 0.0004522542485937369,
"loss": 1.0565,
"step": 22
},
{
"epoch": 2.606060606060606,
"grad_norm": 0.8274413347244263,
"learning_rate": 0.00043578620636934855,
"loss": 1.0777,
"step": 23
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.7866584062576294,
"learning_rate": 0.0004172826515897146,
"loss": 0.9818,
"step": 24
},
{
"epoch": 2.8484848484848486,
"grad_norm": 1.3257405757904053,
"learning_rate": 0.0003969463130731183,
"loss": 0.9458,
"step": 25
},
{
"epoch": 2.9696969696969697,
"grad_norm": 0.7941730618476868,
"learning_rate": 0.000375,
"loss": 0.8782,
"step": 26
},
{
"epoch": 3.0,
"grad_norm": 1.2293105125427246,
"learning_rate": 0.0003516841607689501,
"loss": 0.9678,
"step": 27
},
{
"epoch": 3.121212121212121,
"grad_norm": 0.9322613477706909,
"learning_rate": 0.00032725424859373687,
"loss": 0.5318,
"step": 28
},
{
"epoch": 3.242424242424242,
"grad_norm": 0.8166446089744568,
"learning_rate": 0.0003019779227044398,
"loss": 0.9006,
"step": 29
},
{
"epoch": 3.3636363636363638,
"grad_norm": 1.2094213962554932,
"learning_rate": 0.0002761321158169134,
"loss": 0.6391,
"step": 30
},
{
"epoch": 3.484848484848485,
"grad_norm": 0.9135984778404236,
"learning_rate": 0.00025,
"loss": 0.6285,
"step": 31
},
{
"epoch": 3.606060606060606,
"grad_norm": 0.9478852152824402,
"learning_rate": 0.00022386788418308668,
"loss": 0.6269,
"step": 32
},
{
"epoch": 3.7272727272727275,
"grad_norm": 0.5533197522163391,
"learning_rate": 0.0001980220772955602,
"loss": 0.5646,
"step": 33
},
{
"epoch": 3.8484848484848486,
"grad_norm": 1.0226417779922485,
"learning_rate": 0.00017274575140626317,
"loss": 0.5521,
"step": 34
},
{
"epoch": 3.9696969696969697,
"grad_norm": 1.2138278484344482,
"learning_rate": 0.00014831583923105,
"loss": 0.8734,
"step": 35
},
{
"epoch": 4.0,
"grad_norm": 2.8926355838775635,
"learning_rate": 0.00012500000000000006,
"loss": 0.8668,
"step": 36
},
{
"epoch": 4.121212121212121,
"grad_norm": 0.9145299792289734,
"learning_rate": 0.00010305368692688174,
"loss": 0.2851,
"step": 37
},
{
"epoch": 4.242424242424242,
"grad_norm": 0.7148826718330383,
"learning_rate": 8.271734841028553e-05,
"loss": 0.6555,
"step": 38
},
{
"epoch": 4.363636363636363,
"grad_norm": 1.015910267829895,
"learning_rate": 6.421379363065141e-05,
"loss": 0.3664,
"step": 39
},
{
"epoch": 4.484848484848484,
"grad_norm": 0.9201410412788391,
"learning_rate": 4.7745751406263163e-05,
"loss": 0.575,
"step": 40
},
{
"epoch": 4.606060606060606,
"grad_norm": 0.8212230801582336,
"learning_rate": 3.3493649053890325e-05,
"loss": 0.4084,
"step": 41
},
{
"epoch": 4.7272727272727275,
"grad_norm": 0.8163782358169556,
"learning_rate": 2.1613635589349755e-05,
"loss": 0.3754,
"step": 42
},
{
"epoch": 4.848484848484849,
"grad_norm": 0.7215772867202759,
"learning_rate": 1.2235870926211617e-05,
"loss": 0.2209,
"step": 43
},
{
"epoch": 4.96969696969697,
"grad_norm": 1.07026207447052,
"learning_rate": 5.463099816548578e-06,
"loss": 0.3138,
"step": 44
}
],
"logging_steps": 1,
"max_steps": 45,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2267168340344832.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}