Upload Qwen3-4B fine-tuned on xlam-function-calling dataset

7faf827 verified 8 months ago

10.5 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 7.927444794952681,
	"eval_steps": 500,
	"global_step": 840,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.14195583596214512,
	"grad_norm": 1.257095217704773,
	"learning_rate": 6.511627906976745e-05,
	"loss": 2.7447,
	"step": 15
	},
	{
	"epoch": 0.28391167192429023,
	"grad_norm": 0.6703794598579407,
	"learning_rate": 0.00013488372093023256,
	"loss": 1.7492,
	"step": 30
	},
	{
	"epoch": 0.42586750788643535,
	"grad_norm": 0.5519043207168579,
	"learning_rate": 0.00019999923848679644,
	"loss": 1.2311,
	"step": 45
	},
	{
	"epoch": 0.5678233438485805,
	"grad_norm": 0.5175157785415649,
	"learning_rate": 0.00019980511570519505,
	"loss": 1.1063,
	"step": 60
	},
	{
	"epoch": 0.7097791798107256,
	"grad_norm": 0.6619298458099365,
	"learning_rate": 0.00019926907703615428,
	"loss": 1.0206,
	"step": 75
	},
	{
	"epoch": 0.8517350157728707,
	"grad_norm": 0.7122862935066223,
	"learning_rate": 0.00019839295885986296,
	"loss": 0.9689,
	"step": 90
	},
	{
	"epoch": 0.9936908517350158,
	"grad_norm": 0.851209819316864,
	"learning_rate": 0.0001971797626128871,
	"loss": 0.9688,
	"step": 105
	},
	{
	"epoch": 1.1324921135646688,
	"grad_norm": 0.6464285850524902,
	"learning_rate": 0.00019563364450574252,
	"loss": 0.8574,
	"step": 120
	},
	{
	"epoch": 1.274447949526814,
	"grad_norm": 0.6553785800933838,
	"learning_rate": 0.00019375990128440204,
	"loss": 0.8527,
	"step": 135
	},
	{
	"epoch": 1.416403785488959,
	"grad_norm": 0.6542381644248962,
	"learning_rate": 0.00019156495208451658,
	"loss": 0.8163,
	"step": 150
	},
	{
	"epoch": 1.5583596214511042,
	"grad_norm": 0.7524316906929016,
	"learning_rate": 0.0001890563164405134,
	"loss": 0.823,
	"step": 165
	},
	{
	"epoch": 1.7003154574132493,
	"grad_norm": 0.715350866317749,
	"learning_rate": 0.0001862425885249098,
	"loss": 0.8389,
	"step": 180
	},
	{
	"epoch": 1.8422712933753944,
	"grad_norm": 0.7483017444610596,
	"learning_rate": 0.00018313340770609367,
	"loss": 0.7991,
	"step": 195
	},
	{
	"epoch": 1.9842271293375395,
	"grad_norm": 0.8540375232696533,
	"learning_rate": 0.00017973942552543503,
	"loss": 0.7916,
	"step": 210
	},
	{
	"epoch": 2.1230283911671926,
	"grad_norm": 0.9041481018066406,
	"learning_rate": 0.00017607226920685976,
	"loss": 0.6324,
	"step": 225
	},
	{
	"epoch": 2.2649842271293377,
	"grad_norm": 0.9540446996688843,
	"learning_rate": 0.00017214450182389559,
	"loss": 0.6236,
	"step": 240
	},
	{
	"epoch": 2.406940063091483,
	"grad_norm": 0.9503123760223389,
	"learning_rate": 0.00016796957926065134,
	"loss": 0.6384,
	"step": 255
	},
	{
	"epoch": 2.548895899053628,
	"grad_norm": 1.0240495204925537,
	"learning_rate": 0.00016356180411417447,
	"loss": 0.5756,
	"step": 270
	},
	{
	"epoch": 2.690851735015773,
	"grad_norm": 1.0138286352157593,
	"learning_rate": 0.00015893627669610926,
	"loss": 0.6549,
	"step": 285
	},
	{
	"epoch": 2.832807570977918,
	"grad_norm": 1.0231430530548096,
	"learning_rate": 0.00015410884330151626,
	"loss": 0.6267,
	"step": 300
	},
	{
	"epoch": 2.9747634069400632,
	"grad_norm": 0.9881237149238586,
	"learning_rate": 0.00014909604192207568,
	"loss": 0.6235,
	"step": 315
	},
	{
	"epoch": 3.1135646687697163,
	"grad_norm": 1.4114665985107422,
	"learning_rate": 0.00014391504558965157,
	"loss": 0.4734,
	"step": 330
	},
	{
	"epoch": 3.2555205047318614,
	"grad_norm": 1.3432203531265259,
	"learning_rate": 0.00013858360354431355,
	"loss": 0.4271,
	"step": 345
	},
	{
	"epoch": 3.3974763406940065,
	"grad_norm": 1.1701949834823608,
	"learning_rate": 0.00013311998042836356,
	"loss": 0.4547,
	"step": 360
	},
	{
	"epoch": 3.5394321766561516,
	"grad_norm": 1.1254725456237793,
	"learning_rate": 0.00012754289371467986,
	"loss": 0.421,
	"step": 375
	},
	{
	"epoch": 3.6813880126182967,
	"grad_norm": 1.2798420190811157,
	"learning_rate": 0.00012187144958373793,
	"loss": 0.4518,
	"step": 390
	},
	{
	"epoch": 3.823343848580442,
	"grad_norm": 1.2269246578216553,
	"learning_rate": 0.00011612507746898307,
	"loss": 0.4841,
	"step": 405
	},
	{
	"epoch": 3.965299684542587,
	"grad_norm": 1.3260618448257446,
	"learning_rate": 0.00011032346349479161,
	"loss": 0.4677,
	"step": 420
	},
	{
	"epoch": 4.10410094637224,
	"grad_norm": 1.6245781183242798,
	"learning_rate": 0.00010448648303505151,
	"loss": 0.348,
	"step": 435
	},
	{
	"epoch": 4.246056782334385,
	"grad_norm": 1.2491908073425293,
	"learning_rate": 9.863413262340491e-05,
	"loss": 0.3101,
	"step": 450
	},
	{
	"epoch": 4.38801261829653,
	"grad_norm": 1.3882880210876465,
	"learning_rate": 9.278646144841758e-05,
	"loss": 0.3044,
	"step": 465
	},
	{
	"epoch": 4.529968454258675,
	"grad_norm": 1.356520175933838,
	"learning_rate": 8.696350266836128e-05,
	"loss": 0.3251,
	"step": 480
	},
	{
	"epoch": 4.6719242902208205,
	"grad_norm": 1.248788595199585,
	"learning_rate": 8.118520478091311e-05,
	"loss": 0.3087,
	"step": 495
	},
	{
	"epoch": 4.813880126182966,
	"grad_norm": 1.6940261125564575,
	"learning_rate": 7.547136328288814e-05,
	"loss": 0.3158,
	"step": 510
	},
	{
	"epoch": 4.955835962145111,
	"grad_norm": 1.34111487865448,
	"learning_rate": 6.9841552854128e-05,
	"loss": 0.3222,
	"step": 525
	},
	{
	"epoch": 5.094637223974764,
	"grad_norm": 1.0496258735656738,
	"learning_rate": 6.431506029787189e-05,
	"loss": 0.2501,
	"step": 540
	},
	{
	"epoch": 5.236593059936909,
	"grad_norm": 1.2102131843566895,
	"learning_rate": 5.8910818467345185e-05,
	"loss": 0.2205,
	"step": 555
	},
	{
	"epoch": 5.378548895899054,
	"grad_norm": 1.1957101821899414,
	"learning_rate": 5.3647341404923134e-05,
	"loss": 0.2214,
	"step": 570
	},
	{
	"epoch": 5.520504731861199,
	"grad_norm": 1.1445940732955933,
	"learning_rate": 4.8542660916070736e-05,
	"loss": 0.2252,
	"step": 585
	},
	{
	"epoch": 5.662460567823344,
	"grad_norm": 1.2054235935211182,
	"learning_rate": 4.361426479534753e-05,
	"loss": 0.2275,
	"step": 600
	},
	{
	"epoch": 5.804416403785489,
	"grad_norm": 2.122396469116211,
	"learning_rate": 3.8879036916103704e-05,
	"loss": 0.2306,
	"step": 615
	},
	{
	"epoch": 5.946372239747634,
	"grad_norm": 1.7328449487686157,
	"learning_rate": 3.4353199389111065e-05,
	"loss": 0.2344,
	"step": 630
	},
	{
	"epoch": 6.085173501577287,
	"grad_norm": 0.821266770362854,
	"learning_rate": 3.005225698828338e-05,
	"loss": 0.1922,
	"step": 645
	},
	{
	"epoch": 6.2271293375394325,
	"grad_norm": 0.9995691180229187,
	"learning_rate": 2.599094403387481e-05,
	"loss": 0.1655,
	"step": 660
	},
	{
	"epoch": 6.369085173501578,
	"grad_norm": 1.0223170518875122,
	"learning_rate": 2.2183173915125656e-05,
	"loss": 0.1731,
	"step": 675
	},
	{
	"epoch": 6.511041009463723,
	"grad_norm": 1.3553900718688965,
	"learning_rate": 1.8641991425282345e-05,
	"loss": 0.1696,
	"step": 690
	},
	{
	"epoch": 6.652996845425868,
	"grad_norm": 1.330947995185852,
	"learning_rate": 1.53795280722846e-05,
	"loss": 0.1706,
	"step": 705
	},
	{
	"epoch": 6.794952681388013,
	"grad_norm": 1.1109322309494019,
	"learning_rate": 1.2406960518217326e-05,
	"loss": 0.1749,
	"step": 720
	},
	{
	"epoch": 6.936908517350158,
	"grad_norm": 1.1352533102035522,
	"learning_rate": 9.734472289907182e-06,
	"loss": 0.1683,
	"step": 735
	},
	{
	"epoch": 7.075709779179811,
	"grad_norm": 1.0221686363220215,
	"learning_rate": 7.3712188918370285e-06,
	"loss": 0.1664,
	"step": 750
	},
	{
	"epoch": 7.217665615141956,
	"grad_norm": 0.9193073511123657,
	"learning_rate": 5.325296440895622e-06,
	"loss": 0.1444,
	"step": 765
	},
	{
	"epoch": 7.3596214511041005,
	"grad_norm": 1.8977042436599731,
	"learning_rate": 3.6037139304146762e-06,
	"loss": 0.1528,
	"step": 780
	},
	{
	"epoch": 7.501577287066246,
	"grad_norm": 0.9380797147750854,
	"learning_rate": 2.212369218512078e-06,
	"loss": 0.1438,
	"step": 795
	},
	{
	"epoch": 7.643533123028391,
	"grad_norm": 1.0754660367965698,
	"learning_rate": 1.1560288230015203e-06,
	"loss": 0.1502,
	"step": 810
	},
	{
	"epoch": 7.785488958990536,
	"grad_norm": 0.9819433093070984,
	"learning_rate": 4.383115920874814e-07,
	"loss": 0.1452,
	"step": 825
	},
	{
	"epoch": 7.927444794952681,
	"grad_norm": 1.4535789489746094,
	"learning_rate": 6.16763067873949e-08,
	"loss": 0.1484,
	"step": 840
	}
	],
	"logging_steps": 15,
	"max_steps": 848,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 8,
	"save_steps": 15,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.111385862982861e+16,
	"train_batch_size": 6,
	"trial_name": null,
	"trial_params": null
	}