sweatSmile's picture
Upload Qwen3-4B fine-tuned on xlam-function-calling dataset
7faf827 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.927444794952681,
"eval_steps": 500,
"global_step": 840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14195583596214512,
"grad_norm": 1.257095217704773,
"learning_rate": 6.511627906976745e-05,
"loss": 2.7447,
"step": 15
},
{
"epoch": 0.28391167192429023,
"grad_norm": 0.6703794598579407,
"learning_rate": 0.00013488372093023256,
"loss": 1.7492,
"step": 30
},
{
"epoch": 0.42586750788643535,
"grad_norm": 0.5519043207168579,
"learning_rate": 0.00019999923848679644,
"loss": 1.2311,
"step": 45
},
{
"epoch": 0.5678233438485805,
"grad_norm": 0.5175157785415649,
"learning_rate": 0.00019980511570519505,
"loss": 1.1063,
"step": 60
},
{
"epoch": 0.7097791798107256,
"grad_norm": 0.6619298458099365,
"learning_rate": 0.00019926907703615428,
"loss": 1.0206,
"step": 75
},
{
"epoch": 0.8517350157728707,
"grad_norm": 0.7122862935066223,
"learning_rate": 0.00019839295885986296,
"loss": 0.9689,
"step": 90
},
{
"epoch": 0.9936908517350158,
"grad_norm": 0.851209819316864,
"learning_rate": 0.0001971797626128871,
"loss": 0.9688,
"step": 105
},
{
"epoch": 1.1324921135646688,
"grad_norm": 0.6464285850524902,
"learning_rate": 0.00019563364450574252,
"loss": 0.8574,
"step": 120
},
{
"epoch": 1.274447949526814,
"grad_norm": 0.6553785800933838,
"learning_rate": 0.00019375990128440204,
"loss": 0.8527,
"step": 135
},
{
"epoch": 1.416403785488959,
"grad_norm": 0.6542381644248962,
"learning_rate": 0.00019156495208451658,
"loss": 0.8163,
"step": 150
},
{
"epoch": 1.5583596214511042,
"grad_norm": 0.7524316906929016,
"learning_rate": 0.0001890563164405134,
"loss": 0.823,
"step": 165
},
{
"epoch": 1.7003154574132493,
"grad_norm": 0.715350866317749,
"learning_rate": 0.0001862425885249098,
"loss": 0.8389,
"step": 180
},
{
"epoch": 1.8422712933753944,
"grad_norm": 0.7483017444610596,
"learning_rate": 0.00018313340770609367,
"loss": 0.7991,
"step": 195
},
{
"epoch": 1.9842271293375395,
"grad_norm": 0.8540375232696533,
"learning_rate": 0.00017973942552543503,
"loss": 0.7916,
"step": 210
},
{
"epoch": 2.1230283911671926,
"grad_norm": 0.9041481018066406,
"learning_rate": 0.00017607226920685976,
"loss": 0.6324,
"step": 225
},
{
"epoch": 2.2649842271293377,
"grad_norm": 0.9540446996688843,
"learning_rate": 0.00017214450182389559,
"loss": 0.6236,
"step": 240
},
{
"epoch": 2.406940063091483,
"grad_norm": 0.9503123760223389,
"learning_rate": 0.00016796957926065134,
"loss": 0.6384,
"step": 255
},
{
"epoch": 2.548895899053628,
"grad_norm": 1.0240495204925537,
"learning_rate": 0.00016356180411417447,
"loss": 0.5756,
"step": 270
},
{
"epoch": 2.690851735015773,
"grad_norm": 1.0138286352157593,
"learning_rate": 0.00015893627669610926,
"loss": 0.6549,
"step": 285
},
{
"epoch": 2.832807570977918,
"grad_norm": 1.0231430530548096,
"learning_rate": 0.00015410884330151626,
"loss": 0.6267,
"step": 300
},
{
"epoch": 2.9747634069400632,
"grad_norm": 0.9881237149238586,
"learning_rate": 0.00014909604192207568,
"loss": 0.6235,
"step": 315
},
{
"epoch": 3.1135646687697163,
"grad_norm": 1.4114665985107422,
"learning_rate": 0.00014391504558965157,
"loss": 0.4734,
"step": 330
},
{
"epoch": 3.2555205047318614,
"grad_norm": 1.3432203531265259,
"learning_rate": 0.00013858360354431355,
"loss": 0.4271,
"step": 345
},
{
"epoch": 3.3974763406940065,
"grad_norm": 1.1701949834823608,
"learning_rate": 0.00013311998042836356,
"loss": 0.4547,
"step": 360
},
{
"epoch": 3.5394321766561516,
"grad_norm": 1.1254725456237793,
"learning_rate": 0.00012754289371467986,
"loss": 0.421,
"step": 375
},
{
"epoch": 3.6813880126182967,
"grad_norm": 1.2798420190811157,
"learning_rate": 0.00012187144958373793,
"loss": 0.4518,
"step": 390
},
{
"epoch": 3.823343848580442,
"grad_norm": 1.2269246578216553,
"learning_rate": 0.00011612507746898307,
"loss": 0.4841,
"step": 405
},
{
"epoch": 3.965299684542587,
"grad_norm": 1.3260618448257446,
"learning_rate": 0.00011032346349479161,
"loss": 0.4677,
"step": 420
},
{
"epoch": 4.10410094637224,
"grad_norm": 1.6245781183242798,
"learning_rate": 0.00010448648303505151,
"loss": 0.348,
"step": 435
},
{
"epoch": 4.246056782334385,
"grad_norm": 1.2491908073425293,
"learning_rate": 9.863413262340491e-05,
"loss": 0.3101,
"step": 450
},
{
"epoch": 4.38801261829653,
"grad_norm": 1.3882880210876465,
"learning_rate": 9.278646144841758e-05,
"loss": 0.3044,
"step": 465
},
{
"epoch": 4.529968454258675,
"grad_norm": 1.356520175933838,
"learning_rate": 8.696350266836128e-05,
"loss": 0.3251,
"step": 480
},
{
"epoch": 4.6719242902208205,
"grad_norm": 1.248788595199585,
"learning_rate": 8.118520478091311e-05,
"loss": 0.3087,
"step": 495
},
{
"epoch": 4.813880126182966,
"grad_norm": 1.6940261125564575,
"learning_rate": 7.547136328288814e-05,
"loss": 0.3158,
"step": 510
},
{
"epoch": 4.955835962145111,
"grad_norm": 1.34111487865448,
"learning_rate": 6.9841552854128e-05,
"loss": 0.3222,
"step": 525
},
{
"epoch": 5.094637223974764,
"grad_norm": 1.0496258735656738,
"learning_rate": 6.431506029787189e-05,
"loss": 0.2501,
"step": 540
},
{
"epoch": 5.236593059936909,
"grad_norm": 1.2102131843566895,
"learning_rate": 5.8910818467345185e-05,
"loss": 0.2205,
"step": 555
},
{
"epoch": 5.378548895899054,
"grad_norm": 1.1957101821899414,
"learning_rate": 5.3647341404923134e-05,
"loss": 0.2214,
"step": 570
},
{
"epoch": 5.520504731861199,
"grad_norm": 1.1445940732955933,
"learning_rate": 4.8542660916070736e-05,
"loss": 0.2252,
"step": 585
},
{
"epoch": 5.662460567823344,
"grad_norm": 1.2054235935211182,
"learning_rate": 4.361426479534753e-05,
"loss": 0.2275,
"step": 600
},
{
"epoch": 5.804416403785489,
"grad_norm": 2.122396469116211,
"learning_rate": 3.8879036916103704e-05,
"loss": 0.2306,
"step": 615
},
{
"epoch": 5.946372239747634,
"grad_norm": 1.7328449487686157,
"learning_rate": 3.4353199389111065e-05,
"loss": 0.2344,
"step": 630
},
{
"epoch": 6.085173501577287,
"grad_norm": 0.821266770362854,
"learning_rate": 3.005225698828338e-05,
"loss": 0.1922,
"step": 645
},
{
"epoch": 6.2271293375394325,
"grad_norm": 0.9995691180229187,
"learning_rate": 2.599094403387481e-05,
"loss": 0.1655,
"step": 660
},
{
"epoch": 6.369085173501578,
"grad_norm": 1.0223170518875122,
"learning_rate": 2.2183173915125656e-05,
"loss": 0.1731,
"step": 675
},
{
"epoch": 6.511041009463723,
"grad_norm": 1.3553900718688965,
"learning_rate": 1.8641991425282345e-05,
"loss": 0.1696,
"step": 690
},
{
"epoch": 6.652996845425868,
"grad_norm": 1.330947995185852,
"learning_rate": 1.53795280722846e-05,
"loss": 0.1706,
"step": 705
},
{
"epoch": 6.794952681388013,
"grad_norm": 1.1109322309494019,
"learning_rate": 1.2406960518217326e-05,
"loss": 0.1749,
"step": 720
},
{
"epoch": 6.936908517350158,
"grad_norm": 1.1352533102035522,
"learning_rate": 9.734472289907182e-06,
"loss": 0.1683,
"step": 735
},
{
"epoch": 7.075709779179811,
"grad_norm": 1.0221686363220215,
"learning_rate": 7.3712188918370285e-06,
"loss": 0.1664,
"step": 750
},
{
"epoch": 7.217665615141956,
"grad_norm": 0.9193073511123657,
"learning_rate": 5.325296440895622e-06,
"loss": 0.1444,
"step": 765
},
{
"epoch": 7.3596214511041005,
"grad_norm": 1.8977042436599731,
"learning_rate": 3.6037139304146762e-06,
"loss": 0.1528,
"step": 780
},
{
"epoch": 7.501577287066246,
"grad_norm": 0.9380797147750854,
"learning_rate": 2.212369218512078e-06,
"loss": 0.1438,
"step": 795
},
{
"epoch": 7.643533123028391,
"grad_norm": 1.0754660367965698,
"learning_rate": 1.1560288230015203e-06,
"loss": 0.1502,
"step": 810
},
{
"epoch": 7.785488958990536,
"grad_norm": 0.9819433093070984,
"learning_rate": 4.383115920874814e-07,
"loss": 0.1452,
"step": 825
},
{
"epoch": 7.927444794952681,
"grad_norm": 1.4535789489746094,
"learning_rate": 6.16763067873949e-08,
"loss": 0.1484,
"step": 840
}
],
"logging_steps": 15,
"max_steps": 848,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 15,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.111385862982861e+16,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}