EmoViT-Qwen2.5-Reproduce / trainer_state.json
wudq's picture
Add files using upload-large-folder tool
665a12f verified
{
"best_global_step": 345,
"best_metric": 0.12013684,
"best_model_checkpoint": "/mnt/bn/wdq-base1/data/VLMs/vsa_rl/checkpoint/emovit/v2-20250701-195511/checkpoint-345",
"epoch": 1.0,
"eval_steps": 100,
"global_step": 345,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002902757619738752,
"grad_norm": 255.519775390625,
"learning_rate": 5.555555555555555e-08,
"loss": 4.085740089416504,
"memory(GiB)": 43.95,
"step": 1,
"token_acc": 0.5151515151515151,
"train_speed(iter/s)": 0.011687
},
{
"epoch": 0.01451378809869376,
"grad_norm": 244.64340209960938,
"learning_rate": 2.7777777777777776e-07,
"loss": 3.8799986839294434,
"memory(GiB)": 49.87,
"step": 5,
"token_acc": 0.5189393939393939,
"train_speed(iter/s)": 0.029099
},
{
"epoch": 0.02902757619738752,
"grad_norm": 229.68177795410156,
"learning_rate": 5.555555555555555e-07,
"loss": 3.666259765625,
"memory(GiB)": 49.87,
"step": 10,
"token_acc": 0.5258358662613982,
"train_speed(iter/s)": 0.036058
},
{
"epoch": 0.04354136429608128,
"grad_norm": 148.16238403320312,
"learning_rate": 8.333333333333333e-07,
"loss": 2.4450706481933593,
"memory(GiB)": 49.87,
"step": 15,
"token_acc": 0.5792682926829268,
"train_speed(iter/s)": 0.039309
},
{
"epoch": 0.05805515239477504,
"grad_norm": 34.427486419677734,
"learning_rate": 9.99907702300141e-07,
"loss": 0.839083480834961,
"memory(GiB)": 49.87,
"step": 20,
"token_acc": 0.7400611620795107,
"train_speed(iter/s)": 0.041029
},
{
"epoch": 0.07256894049346879,
"grad_norm": 19.932300567626953,
"learning_rate": 9.988697444723761e-07,
"loss": 0.378426194190979,
"memory(GiB)": 49.87,
"step": 25,
"token_acc": 0.9141104294478528,
"train_speed(iter/s)": 0.041967
},
{
"epoch": 0.08708272859216255,
"grad_norm": 24.542755126953125,
"learning_rate": 9.966808593197956e-07,
"loss": 0.24850196838378907,
"memory(GiB)": 49.87,
"step": 30,
"token_acc": 0.9134328358208955,
"train_speed(iter/s)": 0.042819
},
{
"epoch": 0.10159651669085631,
"grad_norm": 70.33385467529297,
"learning_rate": 9.933460967532452e-07,
"loss": 0.22510967254638672,
"memory(GiB)": 49.87,
"step": 35,
"token_acc": 0.9174311926605505,
"train_speed(iter/s)": 0.043434
},
{
"epoch": 0.11611030478955008,
"grad_norm": 20.665966033935547,
"learning_rate": 9.888731503027535e-07,
"loss": 0.21692299842834473,
"memory(GiB)": 49.87,
"step": 40,
"token_acc": 0.9357798165137615,
"train_speed(iter/s)": 0.044038
},
{
"epoch": 0.13062409288824384,
"grad_norm": 17.56533432006836,
"learning_rate": 9.83272339368022e-07,
"loss": 0.2063352346420288,
"memory(GiB)": 49.87,
"step": 45,
"token_acc": 0.9425981873111783,
"train_speed(iter/s)": 0.044317
},
{
"epoch": 0.14513788098693758,
"grad_norm": 11.449491500854492,
"learning_rate": 9.765565854108502e-07,
"loss": 0.18542113304138183,
"memory(GiB)": 49.87,
"step": 50,
"token_acc": 0.9415384615384615,
"train_speed(iter/s)": 0.044609
},
{
"epoch": 0.15965166908563136,
"grad_norm": 13.59149169921875,
"learning_rate": 9.687413821444199e-07,
"loss": 0.16923766136169432,
"memory(GiB)": 49.87,
"step": 55,
"token_acc": 0.9447852760736196,
"train_speed(iter/s)": 0.044808
},
{
"epoch": 0.1741654571843251,
"grad_norm": 12.386320114135742,
"learning_rate": 9.598447597882179e-07,
"loss": 0.1599480152130127,
"memory(GiB)": 49.87,
"step": 60,
"token_acc": 0.9341692789968652,
"train_speed(iter/s)": 0.045084
},
{
"epoch": 0.18867924528301888,
"grad_norm": 12.34672737121582,
"learning_rate": 9.498872434710622e-07,
"loss": 0.16525213718414306,
"memory(GiB)": 49.87,
"step": 65,
"token_acc": 0.9476923076923077,
"train_speed(iter/s)": 0.045312
},
{
"epoch": 0.20319303338171263,
"grad_norm": 9.779081344604492,
"learning_rate": 9.388918058781945e-07,
"loss": 0.14802794456481932,
"memory(GiB)": 49.87,
"step": 70,
"token_acc": 0.9444444444444444,
"train_speed(iter/s)": 0.045487
},
{
"epoch": 0.21770682148040638,
"grad_norm": 8.754064559936523,
"learning_rate": 9.268838142516943e-07,
"loss": 0.15607104301452637,
"memory(GiB)": 49.87,
"step": 75,
"token_acc": 0.9335347432024169,
"train_speed(iter/s)": 0.045649
},
{
"epoch": 0.23222060957910015,
"grad_norm": 11.27889347076416,
"learning_rate": 9.138909718664787e-07,
"loss": 0.13595396280288696,
"memory(GiB)": 49.87,
"step": 80,
"token_acc": 0.9320987654320988,
"train_speed(iter/s)": 0.045813
},
{
"epoch": 0.2467343976777939,
"grad_norm": 9.61460018157959,
"learning_rate": 8.999432541169144e-07,
"loss": 0.1399265170097351,
"memory(GiB)": 49.87,
"step": 85,
"token_acc": 0.9604863221884499,
"train_speed(iter/s)": 0.045971
},
{
"epoch": 0.2612481857764877,
"grad_norm": 10.189661979675293,
"learning_rate": 8.850728393614901e-07,
"loss": 0.14257581233978273,
"memory(GiB)": 49.87,
"step": 90,
"token_acc": 0.9613095238095238,
"train_speed(iter/s)": 0.0461
},
{
"epoch": 0.2757619738751814,
"grad_norm": 15.340847969055176,
"learning_rate": 8.693140346850975e-07,
"loss": 0.13374924659729004,
"memory(GiB)": 49.87,
"step": 95,
"token_acc": 0.9664634146341463,
"train_speed(iter/s)": 0.04616
},
{
"epoch": 0.29027576197387517,
"grad_norm": 9.699533462524414,
"learning_rate": 8.527031967501906e-07,
"loss": 0.12369405031204224,
"memory(GiB)": 49.87,
"step": 100,
"token_acc": 0.9497041420118343,
"train_speed(iter/s)": 0.046232
},
{
"epoch": 0.29027576197387517,
"eval_loss": 0.14079514145851135,
"eval_runtime": 29.9586,
"eval_samples_per_second": 14.854,
"eval_steps_per_second": 1.869,
"eval_token_acc": 0.9537385691231846,
"step": 100
},
{
"epoch": 0.3047895500725689,
"grad_norm": 9.751545906066895,
"learning_rate": 8.352786479194287e-07,
"loss": 0.12529947757720947,
"memory(GiB)": 49.87,
"step": 105,
"token_acc": 0.9373134328358209,
"train_speed(iter/s)": 0.041661
},
{
"epoch": 0.3193033381712627,
"grad_norm": 17.604825973510742,
"learning_rate": 8.170805878433099e-07,
"loss": 0.12675491571426392,
"memory(GiB)": 49.87,
"step": 110,
"token_acc": 0.9649122807017544,
"train_speed(iter/s)": 0.041885
},
{
"epoch": 0.33381712626995647,
"grad_norm": 9.811626434326172,
"learning_rate": 7.981510007167717e-07,
"loss": 0.13588259220123292,
"memory(GiB)": 49.87,
"step": 115,
"token_acc": 0.9520958083832335,
"train_speed(iter/s)": 0.042137
},
{
"epoch": 0.3483309143686502,
"grad_norm": 12.495682716369629,
"learning_rate": 7.785335584187219e-07,
"loss": 0.11566004753112794,
"memory(GiB)": 49.87,
"step": 120,
"token_acc": 0.9491017964071856,
"train_speed(iter/s)": 0.042342
},
{
"epoch": 0.36284470246734396,
"grad_norm": 11.247044563293457,
"learning_rate": 7.582735197579656e-07,
"loss": 0.13109700679779052,
"memory(GiB)": 49.87,
"step": 125,
"token_acc": 0.9548192771084337,
"train_speed(iter/s)": 0.042526
},
{
"epoch": 0.37735849056603776,
"grad_norm": 10.986481666564941,
"learning_rate": 7.374176260579745e-07,
"loss": 0.13217699527740479,
"memory(GiB)": 49.87,
"step": 130,
"token_acc": 0.9473684210526315,
"train_speed(iter/s)": 0.042698
},
{
"epoch": 0.3918722786647315,
"grad_norm": 8.799654006958008,
"learning_rate": 7.160139933213898e-07,
"loss": 0.1257235050201416,
"memory(GiB)": 49.87,
"step": 135,
"token_acc": 0.941358024691358,
"train_speed(iter/s)": 0.042868
},
{
"epoch": 0.40638606676342526,
"grad_norm": 11.893990516662598,
"learning_rate": 6.941120012230463e-07,
"loss": 0.12591309547424318,
"memory(GiB)": 49.87,
"step": 140,
"token_acc": 0.9570552147239264,
"train_speed(iter/s)": 0.043009
},
{
"epoch": 0.420899854862119,
"grad_norm": 30.306238174438477,
"learning_rate": 6.717621791876146e-07,
"loss": 0.1210709571838379,
"memory(GiB)": 49.87,
"step": 145,
"token_acc": 0.9305135951661632,
"train_speed(iter/s)": 0.043167
},
{
"epoch": 0.43541364296081275,
"grad_norm": 11.80178165435791,
"learning_rate": 6.490160898146918e-07,
"loss": 0.12539260387420653,
"memory(GiB)": 49.87,
"step": 150,
"token_acc": 0.9698795180722891,
"train_speed(iter/s)": 0.043309
},
{
"epoch": 0.44992743105950656,
"grad_norm": 12.080060005187988,
"learning_rate": 6.259262099202849e-07,
"loss": 0.1291128873825073,
"memory(GiB)": 49.87,
"step": 155,
"token_acc": 0.950920245398773,
"train_speed(iter/s)": 0.043468
},
{
"epoch": 0.4644412191582003,
"grad_norm": 6.417619705200195,
"learning_rate": 6.025458094691323e-07,
"loss": 0.11431529521942138,
"memory(GiB)": 49.87,
"step": 160,
"token_acc": 0.9542682926829268,
"train_speed(iter/s)": 0.043601
},
{
"epoch": 0.47895500725689405,
"grad_norm": 7.19518518447876,
"learning_rate": 5.78928828677177e-07,
"loss": 0.13335959911346434,
"memory(GiB)": 49.87,
"step": 165,
"token_acc": 0.9634146341463414,
"train_speed(iter/s)": 0.043742
},
{
"epoch": 0.4934687953555878,
"grad_norm": 6.8175740242004395,
"learning_rate": 5.551297535677235e-07,
"loss": 0.1238396167755127,
"memory(GiB)": 49.87,
"step": 170,
"token_acc": 0.9597523219814241,
"train_speed(iter/s)": 0.043871
},
{
"epoch": 0.5079825834542816,
"grad_norm": 11.478657722473145,
"learning_rate": 5.312034902683779e-07,
"loss": 0.13186312913894654,
"memory(GiB)": 49.87,
"step": 175,
"token_acc": 0.9457831325301205,
"train_speed(iter/s)": 0.043978
},
{
"epoch": 0.5224963715529753,
"grad_norm": 22.4121150970459,
"learning_rate": 5.072052383387786e-07,
"loss": 0.1387540578842163,
"memory(GiB)": 49.87,
"step": 180,
"token_acc": 0.9520958083832335,
"train_speed(iter/s)": 0.044081
},
{
"epoch": 0.5370101596516691,
"grad_norm": 9.121978759765625,
"learning_rate": 4.831903634213598e-07,
"loss": 0.11786762475967408,
"memory(GiB)": 49.87,
"step": 185,
"token_acc": 0.9663608562691132,
"train_speed(iter/s)": 0.044167
},
{
"epoch": 0.5515239477503628,
"grad_norm": 8.12821102142334,
"learning_rate": 4.592142695089488e-07,
"loss": 0.12599266767501832,
"memory(GiB)": 49.87,
"step": 190,
"token_acc": 0.9706744868035191,
"train_speed(iter/s)": 0.044264
},
{
"epoch": 0.5660377358490566,
"grad_norm": 7.080636978149414,
"learning_rate": 4.353322711238869e-07,
"loss": 0.11752383708953858,
"memory(GiB)": 49.87,
"step": 195,
"token_acc": 0.9357798165137615,
"train_speed(iter/s)": 0.044354
},
{
"epoch": 0.5805515239477503,
"grad_norm": 10.683778762817383,
"learning_rate": 4.115994657035658e-07,
"loss": 0.12174248695373535,
"memory(GiB)": 49.87,
"step": 200,
"token_acc": 0.9601226993865031,
"train_speed(iter/s)": 0.044436
},
{
"epoch": 0.5805515239477503,
"eval_loss": 0.12456289678812027,
"eval_runtime": 30.3669,
"eval_samples_per_second": 14.654,
"eval_steps_per_second": 1.844,
"eval_token_acc": 0.9569661108122647,
"step": 200
},
{
"epoch": 0.5950653120464441,
"grad_norm": 7.245314598083496,
"learning_rate": 3.8807060648679257e-07,
"loss": 0.12915533781051636,
"memory(GiB)": 49.87,
"step": 205,
"token_acc": 0.9492537313432836,
"train_speed(iter/s)": 0.042189
},
{
"epoch": 0.6095791001451378,
"grad_norm": 10.728217124938965,
"learning_rate": 3.64799976194246e-07,
"loss": 0.10868642330169678,
"memory(GiB)": 49.87,
"step": 210,
"token_acc": 0.9563953488372093,
"train_speed(iter/s)": 0.042301
},
{
"epoch": 0.6240928882438317,
"grad_norm": 8.868558883666992,
"learning_rate": 3.4184126179445096e-07,
"loss": 0.120005202293396,
"memory(GiB)": 49.87,
"step": 215,
"token_acc": 0.9554896142433235,
"train_speed(iter/s)": 0.042401
},
{
"epoch": 0.6386066763425254,
"grad_norm": 7.093891620635986,
"learning_rate": 3.192474306441936e-07,
"loss": 0.12071295976638793,
"memory(GiB)": 49.87,
"step": 220,
"token_acc": 0.9702380952380952,
"train_speed(iter/s)": 0.042514
},
{
"epoch": 0.6531204644412192,
"grad_norm": 5.627134323120117,
"learning_rate": 2.9707060828913224e-07,
"loss": 0.11332046985626221,
"memory(GiB)": 49.87,
"step": 225,
"token_acc": 0.9386503067484663,
"train_speed(iter/s)": 0.042613
},
{
"epoch": 0.6676342525399129,
"grad_norm": 9.86678695678711,
"learning_rate": 2.75361958206525e-07,
"loss": 0.12341060638427734,
"memory(GiB)": 49.87,
"step": 230,
"token_acc": 0.9528023598820059,
"train_speed(iter/s)": 0.042729
},
{
"epoch": 0.6821480406386067,
"grad_norm": 7.587142467498779,
"learning_rate": 2.5417156376751557e-07,
"loss": 0.11968934535980225,
"memory(GiB)": 49.87,
"step": 235,
"token_acc": 0.9701492537313433,
"train_speed(iter/s)": 0.042825
},
{
"epoch": 0.6966618287373004,
"grad_norm": 6.26280403137207,
"learning_rate": 2.3354831269130132e-07,
"loss": 0.1178591012954712,
"memory(GiB)": 49.87,
"step": 240,
"token_acc": 0.9603658536585366,
"train_speed(iter/s)": 0.04292
},
{
"epoch": 0.7111756168359942,
"grad_norm": 9.032163619995117,
"learning_rate": 2.1353978425775004e-07,
"loss": 0.12919411659240723,
"memory(GiB)": 49.87,
"step": 245,
"token_acc": 0.9518072289156626,
"train_speed(iter/s)": 0.04302
},
{
"epoch": 0.7256894049346879,
"grad_norm": 12.530137062072754,
"learning_rate": 1.9419213953868235e-07,
"loss": 0.10113420486450195,
"memory(GiB)": 49.87,
"step": 250,
"token_acc": 0.9615384615384616,
"train_speed(iter/s)": 0.043104
},
{
"epoch": 0.7402031930333817,
"grad_norm": 6.279452800750732,
"learning_rate": 1.7555001490105486e-07,
"loss": 0.11017693281173706,
"memory(GiB)": 49.87,
"step": 255,
"token_acc": 0.9631901840490797,
"train_speed(iter/s)": 0.043201
},
{
"epoch": 0.7547169811320755,
"grad_norm": 43.64086151123047,
"learning_rate": 1.57656419027747e-07,
"loss": 0.13607945442199706,
"memory(GiB)": 49.87,
"step": 260,
"token_acc": 0.9575757575757575,
"train_speed(iter/s)": 0.043278
},
{
"epoch": 0.7692307692307693,
"grad_norm": 12.17973804473877,
"learning_rate": 1.4055263369352672e-07,
"loss": 0.1254185438156128,
"memory(GiB)": 49.87,
"step": 265,
"token_acc": 0.9637462235649547,
"train_speed(iter/s)": 0.043365
},
{
"epoch": 0.783744557329463,
"grad_norm": 10.712772369384766,
"learning_rate": 1.2427811852511395e-07,
"loss": 0.11515959501266479,
"memory(GiB)": 49.87,
"step": 270,
"token_acc": 0.9667673716012085,
"train_speed(iter/s)": 0.043444
},
{
"epoch": 0.7982583454281568,
"grad_norm": 6.96333122253418,
"learning_rate": 1.0887041996506857e-07,
"loss": 0.1222030520439148,
"memory(GiB)": 49.87,
"step": 275,
"token_acc": 0.9635258358662614,
"train_speed(iter/s)": 0.043528
},
{
"epoch": 0.8127721335268505,
"grad_norm": 6.654318809509277,
"learning_rate": 9.43650846495247e-08,
"loss": 0.1167829990386963,
"memory(GiB)": 49.87,
"step": 280,
"token_acc": 0.9697885196374623,
"train_speed(iter/s)": 0.043596
},
{
"epoch": 0.8272859216255443,
"grad_norm": 9.63259220123291,
"learning_rate": 8.079557739962128e-08,
"loss": 0.11740148067474365,
"memory(GiB)": 49.87,
"step": 285,
"token_acc": 0.9634146341463414,
"train_speed(iter/s)": 0.043666
},
{
"epoch": 0.841799709724238,
"grad_norm": 13.925198554992676,
"learning_rate": 6.819320401582257e-08,
"loss": 0.11932878494262696,
"memory(GiB)": 49.87,
"step": 290,
"token_acc": 0.946875,
"train_speed(iter/s)": 0.043721
},
{
"epoch": 0.8563134978229318,
"grad_norm": 11.304753303527832,
"learning_rate": 5.658703905325185e-08,
"loss": 0.13282599449157714,
"memory(GiB)": 49.87,
"step": 295,
"token_acc": 0.9246987951807228,
"train_speed(iter/s)": 0.043794
},
{
"epoch": 0.8708272859216255,
"grad_norm": 6.113060474395752,
"learning_rate": 4.600385874466256e-08,
"loss": 0.10344053506851196,
"memory(GiB)": 49.87,
"step": 300,
"token_acc": 0.9604863221884499,
"train_speed(iter/s)": 0.04385
},
{
"epoch": 0.8708272859216255,
"eval_loss": 0.12102954089641571,
"eval_runtime": 30.2478,
"eval_samples_per_second": 14.712,
"eval_steps_per_second": 1.851,
"eval_token_acc": 0.956428187197418,
"step": 300
},
{
"epoch": 0.8853410740203193,
"grad_norm": 9.591761589050293,
"learning_rate": 3.646807922580097e-08,
"loss": 0.12371342182159424,
"memory(GiB)": 49.87,
"step": 305,
"token_acc": 0.9606060606060606,
"train_speed(iter/s)": 0.042375
},
{
"epoch": 0.8998548621190131,
"grad_norm": 6.957780361175537,
"learning_rate": 2.800170020567566e-08,
"loss": 0.11073534488677979,
"memory(GiB)": 49.87,
"step": 310,
"token_acc": 0.9457831325301205,
"train_speed(iter/s)": 0.042451
},
{
"epoch": 0.9143686502177069,
"grad_norm": 8.52478313446045,
"learning_rate": 2.0624254211693894e-08,
"loss": 0.1204371452331543,
"memory(GiB)": 49.87,
"step": 315,
"token_acc": 0.9355828220858896,
"train_speed(iter/s)": 0.042532
},
{
"epoch": 0.9288824383164006,
"grad_norm": 10.085247993469238,
"learning_rate": 1.4352761526756907e-08,
"loss": 0.12228823900222778,
"memory(GiB)": 49.87,
"step": 320,
"token_acc": 0.937888198757764,
"train_speed(iter/s)": 0.042597
},
{
"epoch": 0.9433962264150944,
"grad_norm": 13.328726768493652,
"learning_rate": 9.201690922279404e-09,
"loss": 0.1078119158744812,
"memory(GiB)": 49.87,
"step": 325,
"token_acc": 0.9603658536585366,
"train_speed(iter/s)": 0.042678
},
{
"epoch": 0.9579100145137881,
"grad_norm": 16.931018829345703,
"learning_rate": 5.182926277723821e-09,
"loss": 0.1230659008026123,
"memory(GiB)": 49.87,
"step": 330,
"token_acc": 0.939209726443769,
"train_speed(iter/s)": 0.042761
},
{
"epoch": 0.9724238026124818,
"grad_norm": 8.692817687988281,
"learning_rate": 2.3057391636606695e-09,
"loss": 0.11264588832855224,
"memory(GiB)": 49.87,
"step": 335,
"token_acc": 0.93993993993994,
"train_speed(iter/s)": 0.042831
},
{
"epoch": 0.9869375907111756,
"grad_norm": 7.866856098175049,
"learning_rate": 5.767674516083954e-10,
"loss": 0.1168862223625183,
"memory(GiB)": 49.87,
"step": 340,
"token_acc": 0.9660493827160493,
"train_speed(iter/s)": 0.042902
},
{
"epoch": 1.0,
"grad_norm": 11.080939292907715,
"learning_rate": 0.0,
"loss": 0.12124216556549072,
"memory(GiB)": 49.87,
"step": 345,
"token_acc": 0.935374149659864,
"train_speed(iter/s)": 0.04301
},
{
"epoch": 1.0,
"eval_loss": 0.12013684213161469,
"eval_runtime": 30.5384,
"eval_samples_per_second": 14.572,
"eval_steps_per_second": 1.834,
"eval_token_acc": 0.956428187197418,
"step": 345
}
],
"logging_steps": 5,
"max_steps": 345,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.752508131096658e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}