Qwen3.5-27B-Text2SQL / checkpoint-698 /trainer_state.json
mahernaija's picture
Upload folder using huggingface_hub
5e81299 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 200,
"global_step": 698,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0014336917562724014,
"grad_norm": 5.102053642272949,
"learning_rate": 0.0,
"loss": 1.1358120441436768,
"step": 1
},
{
"epoch": 0.014336917562724014,
"grad_norm": 0.22924242913722992,
"learning_rate": 5.142857142857142e-06,
"loss": 0.5612610710991753,
"step": 10
},
{
"epoch": 0.02867383512544803,
"grad_norm": 0.12333694845438004,
"learning_rate": 1.0857142857142858e-05,
"loss": 0.18545818328857422,
"step": 20
},
{
"epoch": 0.043010752688172046,
"grad_norm": 0.14276181161403656,
"learning_rate": 1.6571428571428574e-05,
"loss": 0.16717807054519654,
"step": 30
},
{
"epoch": 0.05734767025089606,
"grad_norm": 0.10264057666063309,
"learning_rate": 1.9998203820754213e-05,
"loss": 0.17055926322937012,
"step": 40
},
{
"epoch": 0.07168458781362007,
"grad_norm": 0.104465052485466,
"learning_rate": 1.9978004213822736e-05,
"loss": 0.1625239610671997,
"step": 50
},
{
"epoch": 0.08602150537634409,
"grad_norm": 0.10240382701158524,
"learning_rate": 1.993540527265239e-05,
"loss": 0.1715969681739807,
"step": 60
},
{
"epoch": 0.1003584229390681,
"grad_norm": 0.08893006294965744,
"learning_rate": 1.9870502626379127e-05,
"loss": 0.16313509941101073,
"step": 70
},
{
"epoch": 0.11469534050179211,
"grad_norm": 0.12808184325695038,
"learning_rate": 1.9783441973084023e-05,
"loss": 0.16091221570968628,
"step": 80
},
{
"epoch": 0.12903225806451613,
"grad_norm": 0.08048602193593979,
"learning_rate": 1.9674418752719835e-05,
"loss": 0.15647590160369873,
"step": 90
},
{
"epoch": 0.14336917562724014,
"grad_norm": 0.08210249245166779,
"learning_rate": 1.9543677708373496e-05,
"loss": 0.16005860567092894,
"step": 100
},
{
"epoch": 0.15770609318996415,
"grad_norm": 0.0777139738202095,
"learning_rate": 1.9391512336849406e-05,
"loss": 0.15733616352081298,
"step": 110
},
{
"epoch": 0.17204301075268819,
"grad_norm": 0.07640817761421204,
"learning_rate": 1.9218264229806917e-05,
"loss": 0.15088759660720824,
"step": 120
},
{
"epoch": 0.1863799283154122,
"grad_norm": 0.08728871494531631,
"learning_rate": 1.9024322306931035e-05,
"loss": 0.1508031129837036,
"step": 130
},
{
"epoch": 0.2007168458781362,
"grad_norm": 0.0811760425567627,
"learning_rate": 1.8810121942857848e-05,
"loss": 0.15035929679870605,
"step": 140
},
{
"epoch": 0.21505376344086022,
"grad_norm": 0.08090047538280487,
"learning_rate": 1.8576143989814524e-05,
"loss": 0.15261363983154297,
"step": 150
},
{
"epoch": 0.22939068100358423,
"grad_norm": 0.09918594360351562,
"learning_rate": 1.8322913698168014e-05,
"loss": 0.15076379776000975,
"step": 160
},
{
"epoch": 0.24372759856630824,
"grad_norm": 0.08928713947534561,
"learning_rate": 1.8050999537305634e-05,
"loss": 0.15194735527038575,
"step": 170
},
{
"epoch": 0.25806451612903225,
"grad_norm": 0.07266855984926224,
"learning_rate": 1.776101191949449e-05,
"loss": 0.15317165851593018,
"step": 180
},
{
"epoch": 0.2724014336917563,
"grad_norm": 0.07347071915864944,
"learning_rate": 1.745360182958459e-05,
"loss": 0.15253105163574218,
"step": 190
},
{
"epoch": 0.2867383512544803,
"grad_norm": 0.0758705884218216,
"learning_rate": 1.7129459363631692e-05,
"loss": 0.14779815673828126,
"step": 200
},
{
"epoch": 0.2867383512544803,
"eval_loss": 0.2187168002128601,
"eval_runtime": 47.0917,
"eval_samples_per_second": 124.247,
"eval_steps_per_second": 0.255,
"step": 200
},
{
"epoch": 0.3010752688172043,
"grad_norm": 0.07489030808210373,
"learning_rate": 1.678931217972055e-05,
"loss": 0.14687340259552,
"step": 210
},
{
"epoch": 0.3154121863799283,
"grad_norm": 0.0788242295384407,
"learning_rate": 1.6433923864466235e-05,
"loss": 0.14286456108093262,
"step": 220
},
{
"epoch": 0.32974910394265233,
"grad_norm": 0.8215664029121399,
"learning_rate": 1.6064092218860553e-05,
"loss": 0.15229986906051635,
"step": 230
},
{
"epoch": 0.34408602150537637,
"grad_norm": 0.06975199282169342,
"learning_rate": 1.568064746731156e-05,
"loss": 0.14773318767547608,
"step": 240
},
{
"epoch": 0.35842293906810035,
"grad_norm": 0.07249259203672409,
"learning_rate": 1.5284450393896713e-05,
"loss": 0.15218548774719237,
"step": 250
},
{
"epoch": 0.3727598566308244,
"grad_norm": 0.07201112061738968,
"learning_rate": 1.4876390410013498e-05,
"loss": 0.14689412117004394,
"step": 260
},
{
"epoch": 0.3870967741935484,
"grad_norm": 0.06810774654150009,
"learning_rate": 1.4457383557765385e-05,
"loss": 0.14651920795440673,
"step": 270
},
{
"epoch": 0.4014336917562724,
"grad_norm": 0.06508651375770569,
"learning_rate": 1.402837045356531e-05,
"loss": 0.1467513084411621,
"step": 280
},
{
"epoch": 0.4157706093189964,
"grad_norm": 0.06673789769411087,
"learning_rate": 1.3590314176572989e-05,
"loss": 0.13457820415496827,
"step": 290
},
{
"epoch": 0.43010752688172044,
"grad_norm": 0.07056573778390884,
"learning_rate": 1.314419810670624e-05,
"loss": 0.14077857732772828,
"step": 300
},
{
"epoch": 0.4444444444444444,
"grad_norm": 0.06851952522993088,
"learning_rate": 1.2691023717079735e-05,
"loss": 0.138556170463562,
"step": 310
},
{
"epoch": 0.45878136200716846,
"grad_norm": 0.06733077019453049,
"learning_rate": 1.2231808325826862e-05,
"loss": 0.13921281099319457,
"step": 320
},
{
"epoch": 0.4731182795698925,
"grad_norm": 0.07425093650817871,
"learning_rate": 1.176758281235155e-05,
"loss": 0.13010122776031494,
"step": 330
},
{
"epoch": 0.4874551971326165,
"grad_norm": 0.06684776395559311,
"learning_rate": 1.129938930313678e-05,
"loss": 0.1314706802368164,
"step": 340
},
{
"epoch": 0.5017921146953405,
"grad_norm": 0.06619785726070404,
"learning_rate": 1.082827883230487e-05,
"loss": 0.13385097980499266,
"step": 350
},
{
"epoch": 0.5161290322580645,
"grad_norm": 0.07259789854288101,
"learning_rate": 1.0355308982181254e-05,
"loss": 0.13582653999328614,
"step": 360
},
{
"epoch": 0.5304659498207885,
"grad_norm": 0.06673564016819,
"learning_rate": 9.881541509158366e-06,
"loss": 0.1326061010360718,
"step": 370
},
{
"epoch": 0.5448028673835126,
"grad_norm": 0.0609956830739975,
"learning_rate": 9.408039960189317e-06,
"loss": 0.12904767990112304,
"step": 380
},
{
"epoch": 0.5591397849462365,
"grad_norm": 0.07004307955503464,
"learning_rate": 8.935867285261977e-06,
"loss": 0.14127167463302612,
"step": 390
},
{
"epoch": 0.5734767025089605,
"grad_norm": 0.06114516407251358,
"learning_rate": 8.466083451213145e-06,
"loss": 0.13098690509796143,
"step": 400
},
{
"epoch": 0.5734767025089605,
"eval_loss": 0.1937306523323059,
"eval_runtime": 47.434,
"eval_samples_per_second": 123.35,
"eval_steps_per_second": 0.253,
"step": 400
},
{
"epoch": 0.5878136200716846,
"grad_norm": 0.07561196386814117,
"learning_rate": 7.999743062239557e-06,
"loss": 0.13492929935455322,
"step": 410
},
{
"epoch": 0.6021505376344086,
"grad_norm": 0.06452950835227966,
"learning_rate": 7.5378929924472735e-06,
"loss": 0.13134829998016356,
"step": 420
},
{
"epoch": 0.6164874551971327,
"grad_norm": 0.05738452449440956,
"learning_rate": 7.081570035754189e-06,
"loss": 0.1305772066116333,
"step": 430
},
{
"epoch": 0.6308243727598566,
"grad_norm": 0.06140970438718796,
"learning_rate": 6.631798578421195e-06,
"loss": 0.12945042848587035,
"step": 440
},
{
"epoch": 0.6451612903225806,
"grad_norm": 0.0673835426568985,
"learning_rate": 6.189588299436997e-06,
"loss": 0.133364737033844,
"step": 450
},
{
"epoch": 0.6594982078853047,
"grad_norm": 0.06186283379793167,
"learning_rate": 5.755931903918835e-06,
"loss": 0.12029262781143188,
"step": 460
},
{
"epoch": 0.6738351254480287,
"grad_norm": 0.061157312244176865,
"learning_rate": 5.331802894617333e-06,
"loss": 0.12551844120025635,
"step": 470
},
{
"epoch": 0.6881720430107527,
"grad_norm": 0.06083202734589577,
"learning_rate": 4.918153386528271e-06,
"loss": 0.12006042003631592,
"step": 480
},
{
"epoch": 0.7025089605734767,
"grad_norm": 0.05786828324198723,
"learning_rate": 4.515911969516985e-06,
"loss": 0.12329277992248536,
"step": 490
},
{
"epoch": 0.7168458781362007,
"grad_norm": 0.07877270132303238,
"learning_rate": 4.125981623753801e-06,
"loss": 0.1233370542526245,
"step": 500
},
{
"epoch": 0.7311827956989247,
"grad_norm": 0.06246776878833771,
"learning_rate": 3.7492376926397966e-06,
"loss": 0.12439545392990112,
"step": 510
},
{
"epoch": 0.7455197132616488,
"grad_norm": 0.06481961905956268,
"learning_rate": 3.3865259177736663e-06,
"loss": 0.12364455461502075,
"step": 520
},
{
"epoch": 0.7598566308243727,
"grad_norm": 0.0777650848031044,
"learning_rate": 3.0386605403707347e-06,
"loss": 0.1195528745651245,
"step": 530
},
{
"epoch": 0.7741935483870968,
"grad_norm": 0.05968371778726578,
"learning_rate": 2.7064224733963197e-06,
"loss": 0.1176903247833252,
"step": 540
},
{
"epoch": 0.7885304659498208,
"grad_norm": 0.059006717056035995,
"learning_rate": 2.3905575485167098e-06,
"loss": 0.12136919498443603,
"step": 550
},
{
"epoch": 0.8028673835125448,
"grad_norm": 0.062085703015327454,
"learning_rate": 2.0917748418031415e-06,
"loss": 0.1185749888420105,
"step": 560
},
{
"epoch": 0.8172043010752689,
"grad_norm": 0.05684107542037964,
"learning_rate": 1.8107450819473505e-06,
"loss": 0.12052475214004517,
"step": 570
},
{
"epoch": 0.8315412186379928,
"grad_norm": 0.056005772203207016,
"learning_rate": 1.5480991445620541e-06,
"loss": 0.11635445356369019,
"step": 580
},
{
"epoch": 0.8458781362007168,
"grad_norm": 0.08036984503269196,
"learning_rate": 1.3044266359464542e-06,
"loss": 0.12693744897842407,
"step": 590
},
{
"epoch": 0.8602150537634409,
"grad_norm": 0.05893237143754959,
"learning_rate": 1.080274569496057e-06,
"loss": 0.11600825786590577,
"step": 600
},
{
"epoch": 0.8602150537634409,
"eval_loss": 0.17753292620182037,
"eval_runtime": 48.0542,
"eval_samples_per_second": 121.758,
"eval_steps_per_second": 0.25,
"step": 600
},
{
"epoch": 0.8745519713261649,
"grad_norm": 0.06269249320030212,
"learning_rate": 8.761461377280311e-07,
"loss": 0.11844503879547119,
"step": 610
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.06056367605924606,
"learning_rate": 6.924995826788516e-07,
"loss": 0.1225665807723999,
"step": 620
},
{
"epoch": 0.9032258064516129,
"grad_norm": 0.05936103314161301,
"learning_rate": 5.29747167209923e-07,
"loss": 0.11874287128448487,
"step": 630
},
{
"epoch": 0.9175627240143369,
"grad_norm": 0.06041805073618889,
"learning_rate": 3.88254249530583e-07,
"loss": 0.11610586643218994,
"step": 640
},
{
"epoch": 0.931899641577061,
"grad_norm": 0.0608358308672905,
"learning_rate": 2.6833846301596246e-07,
"loss": 0.11917197704315186,
"step": 650
},
{
"epoch": 0.946236559139785,
"grad_norm": 0.0607263408601284,
"learning_rate": 1.7026900316098217e-07,
"loss": 0.11865537166595459,
"step": 660
},
{
"epoch": 0.9605734767025089,
"grad_norm": 0.05322834476828575,
"learning_rate": 9.426602327113788e-08,
"loss": 0.11138832569122314,
"step": 670
},
{
"epoch": 0.974910394265233,
"grad_norm": 0.06341966986656189,
"learning_rate": 4.050014024668425e-08,
"loss": 0.11363672018051148,
"step": 680
},
{
"epoch": 0.989247311827957,
"grad_norm": 0.06364062428474426,
"learning_rate": 9.092051569674632e-09,
"loss": 0.12197227478027343,
"step": 690
},
{
"epoch": 1.0,
"eval_loss": 0.17584605515003204,
"eval_runtime": 49.4161,
"eval_samples_per_second": 118.403,
"eval_steps_per_second": 0.243,
"step": 698
}
],
"logging_steps": 10,
"max_steps": 698,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2148830443916493e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}