phi4-mini-ifc-bim-lora / trainer_state.json
Dietmar2020's picture
Fine-tuned LoRA adapter for IFC/BIM domain
99f33e8 verified
{
"best_global_step": 2400,
"best_metric": 0.4560001492500305,
"best_model_checkpoint": "./phi4-mini-ifc-FULL-2xRTX3090-20250623-182201/checkpoint-2400",
"epoch": 2.9993898718730936,
"eval_steps": 100,
"global_step": 2457,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03050640634533252,
"grad_norm": 2.6699628829956055,
"learning_rate": 1.9512195121951222e-05,
"loss": 3.4284,
"step": 25
},
{
"epoch": 0.06101281269066504,
"grad_norm": 3.3811914920806885,
"learning_rate": 3.983739837398374e-05,
"loss": 2.0523,
"step": 50
},
{
"epoch": 0.09151921903599756,
"grad_norm": 1.4363490343093872,
"learning_rate": 6.016260162601627e-05,
"loss": 1.1186,
"step": 75
},
{
"epoch": 0.12202562538133008,
"grad_norm": 0.9445595741271973,
"learning_rate": 8.048780487804879e-05,
"loss": 0.7767,
"step": 100
},
{
"epoch": 0.12202562538133008,
"eval_loss": 0.8299265503883362,
"eval_runtime": 871.0818,
"eval_samples_per_second": 4.192,
"eval_steps_per_second": 0.699,
"step": 100
},
{
"epoch": 0.1525320317266626,
"grad_norm": 0.9062737822532654,
"learning_rate": 0.0001008130081300813,
"loss": 0.6824,
"step": 125
},
{
"epoch": 0.18303843807199513,
"grad_norm": 0.6746254563331604,
"learning_rate": 0.00012113821138211383,
"loss": 0.5972,
"step": 150
},
{
"epoch": 0.21354484441732763,
"grad_norm": 0.6552643179893494,
"learning_rate": 0.00014146341463414634,
"loss": 0.5609,
"step": 175
},
{
"epoch": 0.24405125076266015,
"grad_norm": 0.654053270816803,
"learning_rate": 0.00016178861788617888,
"loss": 0.5291,
"step": 200
},
{
"epoch": 0.24405125076266015,
"eval_loss": 0.6649972200393677,
"eval_runtime": 877.5464,
"eval_samples_per_second": 4.162,
"eval_steps_per_second": 0.694,
"step": 200
},
{
"epoch": 0.2745576571079927,
"grad_norm": 0.7661640048027039,
"learning_rate": 0.00018211382113821138,
"loss": 0.5005,
"step": 225
},
{
"epoch": 0.3050640634533252,
"grad_norm": 0.5404186248779297,
"learning_rate": 0.00019999909148078624,
"loss": 0.5128,
"step": 250
},
{
"epoch": 0.33557046979865773,
"grad_norm": 0.6695619225502014,
"learning_rate": 0.00019992086820059076,
"loss": 0.4854,
"step": 275
},
{
"epoch": 0.36607687614399026,
"grad_norm": 0.6187945008277893,
"learning_rate": 0.00019971657461388795,
"loss": 0.4872,
"step": 300
},
{
"epoch": 0.36607687614399026,
"eval_loss": 0.6011127233505249,
"eval_runtime": 894.271,
"eval_samples_per_second": 4.084,
"eval_steps_per_second": 0.681,
"step": 300
},
{
"epoch": 0.39658328248932273,
"grad_norm": 0.5339706540107727,
"learning_rate": 0.00019938646847819693,
"loss": 0.458,
"step": 325
},
{
"epoch": 0.42708968883465526,
"grad_norm": 0.5768976211547852,
"learning_rate": 0.00019893096628891503,
"loss": 0.4696,
"step": 350
},
{
"epoch": 0.4575960951799878,
"grad_norm": 0.4378525912761688,
"learning_rate": 0.00019835064275382507,
"loss": 0.4474,
"step": 375
},
{
"epoch": 0.4881025015253203,
"grad_norm": 0.637508749961853,
"learning_rate": 0.00019764623006798555,
"loss": 0.4469,
"step": 400
},
{
"epoch": 0.4881025015253203,
"eval_loss": 0.572228193283081,
"eval_runtime": 1153.4356,
"eval_samples_per_second": 3.166,
"eval_steps_per_second": 0.528,
"step": 400
},
{
"epoch": 0.5186089078706528,
"grad_norm": 0.4896605908870697,
"learning_rate": 0.00019681861698991922,
"loss": 0.4232,
"step": 425
},
{
"epoch": 0.5491153142159854,
"grad_norm": 0.5206599235534668,
"learning_rate": 0.0001958688477202651,
"loss": 0.4537,
"step": 450
},
{
"epoch": 0.5796217205613179,
"grad_norm": 0.3808891177177429,
"learning_rate": 0.00019479812058430883,
"loss": 0.4206,
"step": 475
},
{
"epoch": 0.6101281269066504,
"grad_norm": 0.4902428090572357,
"learning_rate": 0.00019360778652005416,
"loss": 0.4821,
"step": 500
},
{
"epoch": 0.6101281269066504,
"eval_loss": 0.547258734703064,
"eval_runtime": 1090.047,
"eval_samples_per_second": 3.35,
"eval_steps_per_second": 0.559,
"step": 500
},
{
"epoch": 0.6406345332519829,
"grad_norm": 0.48209357261657715,
"learning_rate": 0.00019229934737374232,
"loss": 0.4004,
"step": 525
},
{
"epoch": 0.6711409395973155,
"grad_norm": 0.3675863742828369,
"learning_rate": 0.00019087445400497042,
"loss": 0.405,
"step": 550
},
{
"epoch": 0.701647345942648,
"grad_norm": 0.5796023011207581,
"learning_rate": 0.00018933490420379947,
"loss": 0.4028,
"step": 575
},
{
"epoch": 0.7321537522879805,
"grad_norm": 0.44113054871559143,
"learning_rate": 0.00018768264042248013,
"loss": 0.3989,
"step": 600
},
{
"epoch": 0.7321537522879805,
"eval_loss": 0.5261130332946777,
"eval_runtime": 1094.835,
"eval_samples_per_second": 3.336,
"eval_steps_per_second": 0.556,
"step": 600
},
{
"epoch": 0.762660158633313,
"grad_norm": 0.4297366440296173,
"learning_rate": 0.0001859197473246576,
"loss": 0.3941,
"step": 625
},
{
"epoch": 0.7931665649786455,
"grad_norm": 0.48033469915390015,
"learning_rate": 0.00018404844915514867,
"loss": 0.406,
"step": 650
},
{
"epoch": 0.823672971323978,
"grad_norm": 0.4652460217475891,
"learning_rate": 0.00018207110693360868,
"loss": 0.3799,
"step": 675
},
{
"epoch": 0.8541793776693105,
"grad_norm": 0.3977755606174469,
"learning_rate": 0.00017999021547562943,
"loss": 0.3809,
"step": 700
},
{
"epoch": 0.8541793776693105,
"eval_loss": 0.5122238993644714,
"eval_runtime": 1006.0944,
"eval_samples_per_second": 3.63,
"eval_steps_per_second": 0.605,
"step": 700
},
{
"epoch": 0.884685784014643,
"grad_norm": 0.4193669855594635,
"learning_rate": 0.00017780840024502693,
"loss": 0.3872,
"step": 725
},
{
"epoch": 0.9151921903599756,
"grad_norm": 0.5903205275535583,
"learning_rate": 0.00017552841404128947,
"loss": 0.3786,
"step": 750
},
{
"epoch": 0.9456985967053081,
"grad_norm": 0.4734324514865875,
"learning_rate": 0.0001731531335263669,
"loss": 0.3463,
"step": 775
},
{
"epoch": 0.9762050030506406,
"grad_norm": 0.590374231338501,
"learning_rate": 0.00017068555559518163,
"loss": 0.373,
"step": 800
},
{
"epoch": 0.9762050030506406,
"eval_loss": 0.5041590332984924,
"eval_runtime": 1029.9504,
"eval_samples_per_second": 3.546,
"eval_steps_per_second": 0.591,
"step": 800
},
{
"epoch": 1.00732153752288,
"grad_norm": 0.35001957416534424,
"learning_rate": 0.0001681287935944421,
"loss": 0.4096,
"step": 825
},
{
"epoch": 1.0378279438682123,
"grad_norm": 0.715282142162323,
"learning_rate": 0.00016548607339452853,
"loss": 0.362,
"step": 850
},
{
"epoch": 1.068334350213545,
"grad_norm": 0.39849480986595154,
"learning_rate": 0.0001627607293194077,
"loss": 0.3521,
"step": 875
},
{
"epoch": 1.0988407565588774,
"grad_norm": 0.46415719389915466,
"learning_rate": 0.00015995619993971122,
"loss": 0.3523,
"step": 900
},
{
"epoch": 1.0988407565588774,
"eval_loss": 0.4963458776473999,
"eval_runtime": 1026.062,
"eval_samples_per_second": 3.559,
"eval_steps_per_second": 0.594,
"step": 900
},
{
"epoch": 1.1293471629042098,
"grad_norm": 0.4831911623477936,
"learning_rate": 0.00015707602373428626,
"loss": 0.3414,
"step": 925
},
{
"epoch": 1.1598535692495424,
"grad_norm": 0.4268845021724701,
"learning_rate": 0.0001541238346256912,
"loss": 0.3456,
"step": 950
},
{
"epoch": 1.1903599755948748,
"grad_norm": 0.4288281202316284,
"learning_rate": 0.00015110335739527045,
"loss": 0.3139,
"step": 975
},
{
"epoch": 1.2208663819402075,
"grad_norm": 0.4492523968219757,
"learning_rate": 0.00014801840298359217,
"loss": 0.3559,
"step": 1000
},
{
"epoch": 1.2208663819402075,
"eval_loss": 0.4898269474506378,
"eval_runtime": 1025.9893,
"eval_samples_per_second": 3.559,
"eval_steps_per_second": 0.594,
"step": 1000
},
{
"epoch": 1.2513727882855399,
"grad_norm": 0.4456675052642822,
"learning_rate": 0.00014487286368217915,
"loss": 0.3524,
"step": 1025
},
{
"epoch": 1.2818791946308725,
"grad_norm": 0.4612857401371002,
"learning_rate": 0.00014167070822259867,
"loss": 0.3376,
"step": 1050
},
{
"epoch": 1.312385600976205,
"grad_norm": 0.3742743134498596,
"learning_rate": 0.00013841597676910816,
"loss": 0.3461,
"step": 1075
},
{
"epoch": 1.3428920073215376,
"grad_norm": 0.4589081406593323,
"learning_rate": 0.0001351127758211739,
"loss": 0.3294,
"step": 1100
},
{
"epoch": 1.3428920073215376,
"eval_loss": 0.4819416105747223,
"eval_runtime": 1027.6928,
"eval_samples_per_second": 3.554,
"eval_steps_per_second": 0.593,
"step": 1100
},
{
"epoch": 1.37339841366687,
"grad_norm": 0.4929927587509155,
"learning_rate": 0.0001317652730322948,
"loss": 0.3518,
"step": 1125
},
{
"epoch": 1.4039048200122026,
"grad_norm": 0.4028312861919403,
"learning_rate": 0.00012837769195166756,
"loss": 0.3156,
"step": 1150
},
{
"epoch": 1.434411226357535,
"grad_norm": 0.47802016139030457,
"learning_rate": 0.00012495430669532862,
"loss": 0.3314,
"step": 1175
},
{
"epoch": 1.4649176327028677,
"grad_norm": 0.4614291191101074,
"learning_rate": 0.00012149943655349567,
"loss": 0.3499,
"step": 1200
},
{
"epoch": 1.4649176327028677,
"eval_loss": 0.4747631251811981,
"eval_runtime": 1024.8622,
"eval_samples_per_second": 3.563,
"eval_steps_per_second": 0.594,
"step": 1200
},
{
"epoch": 1.4954240390482,
"grad_norm": 0.3750015199184418,
"learning_rate": 0.00011801744054091276,
"loss": 0.3173,
"step": 1225
},
{
"epoch": 1.5259304453935325,
"grad_norm": 0.4086696207523346,
"learning_rate": 0.00011451271189707497,
"loss": 0.3086,
"step": 1250
},
{
"epoch": 1.5564368517388651,
"grad_norm": 0.5094380974769592,
"learning_rate": 0.00011098967254327173,
"loss": 0.3135,
"step": 1275
},
{
"epoch": 1.5869432580841978,
"grad_norm": 0.6511872410774231,
"learning_rate": 0.00010745276750344218,
"loss": 0.3239,
"step": 1300
},
{
"epoch": 1.5869432580841978,
"eval_loss": 0.4694528877735138,
"eval_runtime": 1025.1911,
"eval_samples_per_second": 3.562,
"eval_steps_per_second": 0.594,
"step": 1300
},
{
"epoch": 1.6174496644295302,
"grad_norm": 0.40622764825820923,
"learning_rate": 0.00010390645929588196,
"loss": 0.3124,
"step": 1325
},
{
"epoch": 1.6479560707748626,
"grad_norm": 0.42357689142227173,
"learning_rate": 0.0001003552223028772,
"loss": 0.2789,
"step": 1350
},
{
"epoch": 1.6784624771201953,
"grad_norm": 0.4693623483181,
"learning_rate": 9.680353712536995e-05,
"loss": 0.2955,
"step": 1375
},
{
"epoch": 1.7089688834655279,
"grad_norm": 0.39055606722831726,
"learning_rate": 9.325588492977734e-05,
"loss": 0.2731,
"step": 1400
},
{
"epoch": 1.7089688834655279,
"eval_loss": 0.47373446822166443,
"eval_runtime": 964.0616,
"eval_samples_per_second": 3.788,
"eval_steps_per_second": 0.632,
"step": 1400
},
{
"epoch": 1.7394752898108603,
"grad_norm": 0.35268428921699524,
"learning_rate": 8.971674179409714e-05,
"loss": 0.2899,
"step": 1425
},
{
"epoch": 1.7699816961561927,
"grad_norm": 0.6586225628852844,
"learning_rate": 8.619057306043388e-05,
"loss": 0.2802,
"step": 1450
},
{
"epoch": 1.8004881025015254,
"grad_norm": 0.43787944316864014,
"learning_rate": 8.268182770106981e-05,
"loss": 0.2951,
"step": 1475
},
{
"epoch": 1.830994508846858,
"grad_norm": 0.4165858328342438,
"learning_rate": 7.91949327051903e-05,
"loss": 0.269,
"step": 1500
},
{
"epoch": 1.830994508846858,
"eval_loss": 0.47291475534439087,
"eval_runtime": 993.1212,
"eval_samples_per_second": 3.677,
"eval_steps_per_second": 0.613,
"step": 1500
},
{
"epoch": 1.8615009151921904,
"grad_norm": 0.38626885414123535,
"learning_rate": 7.573428749334481e-05,
"loss": 0.2769,
"step": 1525
},
{
"epoch": 1.8920073215375228,
"grad_norm": 0.38758519291877747,
"learning_rate": 7.230425836669183e-05,
"loss": 0.2807,
"step": 1550
},
{
"epoch": 1.9225137278828552,
"grad_norm": 0.387899786233902,
"learning_rate": 6.890917299802986e-05,
"loss": 0.265,
"step": 1575
},
{
"epoch": 1.9530201342281879,
"grad_norm": 0.37289875745773315,
"learning_rate": 6.555331497156672e-05,
"loss": 0.2492,
"step": 1600
},
{
"epoch": 1.9530201342281879,
"eval_loss": 0.4726342558860779,
"eval_runtime": 1054.2196,
"eval_samples_per_second": 3.464,
"eval_steps_per_second": 0.578,
"step": 1600
},
{
"epoch": 1.9835265405735205,
"grad_norm": 0.6593974232673645,
"learning_rate": 6.224091837831467e-05,
"loss": 0.2924,
"step": 1625
},
{
"epoch": 2.01464307504576,
"grad_norm": 0.4300393760204315,
"learning_rate": 5.897616247393181e-05,
"loss": 0.2949,
"step": 1650
},
{
"epoch": 2.045149481391092,
"grad_norm": 0.5719879865646362,
"learning_rate": 5.5763166405748855e-05,
"loss": 0.2876,
"step": 1675
},
{
"epoch": 2.0756558877364246,
"grad_norm": 0.37001481652259827,
"learning_rate": 5.260598401563508e-05,
"loss": 0.286,
"step": 1700
},
{
"epoch": 2.0756558877364246,
"eval_loss": 0.4663603603839874,
"eval_runtime": 1040.9239,
"eval_samples_per_second": 3.508,
"eval_steps_per_second": 0.585,
"step": 1700
},
{
"epoch": 2.106162294081757,
"grad_norm": 0.291629433631897,
"learning_rate": 4.950859872525999e-05,
"loss": 0.2641,
"step": 1725
},
{
"epoch": 2.13666870042709,
"grad_norm": 0.5670744776725769,
"learning_rate": 4.647491851020414e-05,
"loss": 0.287,
"step": 1750
},
{
"epoch": 2.1671751067724223,
"grad_norm": 0.43546485900878906,
"learning_rate": 4.350877096926107e-05,
"loss": 0.3091,
"step": 1775
},
{
"epoch": 2.1976815131177547,
"grad_norm": 0.35784369707107544,
"learning_rate": 4.061389849514965e-05,
"loss": 0.2824,
"step": 1800
},
{
"epoch": 2.1976815131177547,
"eval_loss": 0.4643763601779938,
"eval_runtime": 1066.3493,
"eval_samples_per_second": 3.425,
"eval_steps_per_second": 0.571,
"step": 1800
},
{
"epoch": 2.228187919463087,
"grad_norm": 0.46165725588798523,
"learning_rate": 3.7793953552732294e-05,
"loss": 0.2716,
"step": 1825
},
{
"epoch": 2.2586943258084196,
"grad_norm": 0.4466606378555298,
"learning_rate": 3.505249407069414e-05,
"loss": 0.2543,
"step": 1850
},
{
"epoch": 2.2892007321537524,
"grad_norm": 0.4068796932697296,
"learning_rate": 3.239297895249955e-05,
"loss": 0.2624,
"step": 1875
},
{
"epoch": 2.319707138499085,
"grad_norm": 0.4468878209590912,
"learning_rate": 2.9818763712288354e-05,
"loss": 0.2812,
"step": 1900
},
{
"epoch": 2.319707138499085,
"eval_loss": 0.46269848942756653,
"eval_runtime": 1022.7507,
"eval_samples_per_second": 3.571,
"eval_steps_per_second": 0.595,
"step": 1900
},
{
"epoch": 2.3502135448444172,
"grad_norm": 0.5847700834274292,
"learning_rate": 2.733309624121877e-05,
"loss": 0.27,
"step": 1925
},
{
"epoch": 2.3807199511897497,
"grad_norm": 0.45481202006340027,
"learning_rate": 2.4939112709598324e-05,
"loss": 0.2931,
"step": 1950
},
{
"epoch": 2.4112263575350825,
"grad_norm": 0.5133854150772095,
"learning_rate": 2.2639833609973182e-05,
"loss": 0.2639,
"step": 1975
},
{
"epoch": 2.441732763880415,
"grad_norm": 0.4334775507450104,
"learning_rate": 2.0438159946168167e-05,
"loss": 0.2716,
"step": 2000
},
{
"epoch": 2.441732763880415,
"eval_loss": 0.45944076776504517,
"eval_runtime": 967.2644,
"eval_samples_per_second": 3.776,
"eval_steps_per_second": 0.63,
"step": 2000
},
{
"epoch": 2.4722391702257474,
"grad_norm": 0.6098644733428955,
"learning_rate": 1.833686957308619e-05,
"loss": 0.2661,
"step": 2025
},
{
"epoch": 2.5027455765710798,
"grad_norm": 0.6568627953529358,
"learning_rate": 1.633861369188431e-05,
"loss": 0.277,
"step": 2050
},
{
"epoch": 2.533251982916412,
"grad_norm": 0.9738965034484863,
"learning_rate": 1.4445913504949603e-05,
"loss": 0.2594,
"step": 2075
},
{
"epoch": 2.563758389261745,
"grad_norm": 0.5284731388092041,
"learning_rate": 1.2661157034894267e-05,
"loss": 0.2898,
"step": 2100
},
{
"epoch": 2.563758389261745,
"eval_loss": 0.45677462220191956,
"eval_runtime": 943.8598,
"eval_samples_per_second": 3.869,
"eval_steps_per_second": 0.645,
"step": 2100
},
{
"epoch": 2.5942647956070775,
"grad_norm": 0.4135531187057495,
"learning_rate": 1.098659611158399e-05,
"loss": 0.2904,
"step": 2125
},
{
"epoch": 2.62477120195241,
"grad_norm": 0.5006974935531616,
"learning_rate": 9.424343531000968e-06,
"loss": 0.2595,
"step": 2150
},
{
"epoch": 2.6552776082977427,
"grad_norm": 0.36891356110572815,
"learning_rate": 7.9763703895259e-06,
"loss": 0.2404,
"step": 2175
},
{
"epoch": 2.685784014643075,
"grad_norm": 0.5526717305183411,
"learning_rate": 6.644503597003126e-06,
"loss": 0.2766,
"step": 2200
},
{
"epoch": 2.685784014643075,
"eval_loss": 0.4571227431297302,
"eval_runtime": 964.8941,
"eval_samples_per_second": 3.785,
"eval_steps_per_second": 0.631,
"step": 2200
},
{
"epoch": 2.7162904209884076,
"grad_norm": 0.37439122796058655,
"learning_rate": 5.430423571725396e-06,
"loss": 0.2767,
"step": 2225
},
{
"epoch": 2.74679682733374,
"grad_norm": 0.5347726941108704,
"learning_rate": 4.3356621202479855e-06,
"loss": 0.2782,
"step": 2250
},
{
"epoch": 2.7773032336790724,
"grad_norm": 0.5072506070137024,
"learning_rate": 3.3616005047058197e-06,
"loss": 0.2525,
"step": 2275
},
{
"epoch": 2.8078096400244053,
"grad_norm": 0.4093267023563385,
"learning_rate": 2.5094677000732205e-06,
"loss": 0.2584,
"step": 2300
},
{
"epoch": 2.8078096400244053,
"eval_loss": 0.4563479721546173,
"eval_runtime": 988.2982,
"eval_samples_per_second": 3.695,
"eval_steps_per_second": 0.616,
"step": 2300
},
{
"epoch": 2.8383160463697377,
"grad_norm": 0.3554539084434509,
"learning_rate": 1.7803388435642666e-06,
"loss": 0.2956,
"step": 2325
},
{
"epoch": 2.86882245271507,
"grad_norm": 0.3955806791782379,
"learning_rate": 1.1751338781305854e-06,
"loss": 0.2595,
"step": 2350
},
{
"epoch": 2.899328859060403,
"grad_norm": 0.47958260774612427,
"learning_rate": 6.946163917680327e-07,
"loss": 0.2693,
"step": 2375
},
{
"epoch": 2.9298352654057354,
"grad_norm": 0.395571768283844,
"learning_rate": 3.393926540965264e-07,
"loss": 0.2578,
"step": 2400
},
{
"epoch": 2.9298352654057354,
"eval_loss": 0.4560001492500305,
"eval_runtime": 984.0097,
"eval_samples_per_second": 3.711,
"eval_steps_per_second": 0.619,
"step": 2400
},
{
"epoch": 2.9603416717510678,
"grad_norm": 0.4586999714374542,
"learning_rate": 1.0991085142886271e-07,
"loss": 0.2722,
"step": 2425
},
{
"epoch": 2.9908480780964,
"grad_norm": 0.45692864060401917,
"learning_rate": 6.4605212932611344e-09,
"loss": 0.2744,
"step": 2450
}
],
"logging_steps": 25,
"max_steps": 2457,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.869883433513124e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}