goblinV1 / lora /trainer_state.json
ASHu2's picture
LoRA adapter + training metadata
406a5a9 verified
Raw
History Blame Contribute Delete
21.2 kB
{
"best_global_step": 846,
"best_metric": 0.11049881,
"best_model_checkpoint": "/root/outputs/rune-goblin-vision-lora/v2-20260607-171815/checkpoint-846",
"epoch": 3.0,
"eval_steps": 200,
"global_step": 846,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0035460992907801418,
"grad_norm": 26.47808074951172,
"learning_rate": 3.846153846153847e-06,
"loss": 4.957160472869873,
"step": 1,
"token_acc": 0.32794774836713647
},
{
"epoch": 0.03546099290780142,
"grad_norm": 6.980465412139893,
"learning_rate": 3.846153846153846e-05,
"loss": 4.304361979166667,
"step": 10,
"token_acc": 0.371801140994295
},
{
"epoch": 0.07092198581560284,
"grad_norm": 2.8949131965637207,
"learning_rate": 7.692307692307693e-05,
"loss": 2.537805938720703,
"step": 20,
"token_acc": 0.5499852876268942
},
{
"epoch": 0.10638297872340426,
"grad_norm": 2.3080947399139404,
"learning_rate": 9.999412884518409e-05,
"loss": 1.4758130073547364,
"step": 30,
"token_acc": 0.719080560740822
},
{
"epoch": 0.14184397163120568,
"grad_norm": 1.9129527807235718,
"learning_rate": 9.992809418734932e-05,
"loss": 0.8724543571472168,
"step": 40,
"token_acc": 0.8118754734677681
},
{
"epoch": 0.1773049645390071,
"grad_norm": 1.6698404550552368,
"learning_rate": 9.978878316629133e-05,
"loss": 0.6030772686004638,
"step": 50,
"token_acc": 0.857075642417662
},
{
"epoch": 0.2127659574468085,
"grad_norm": 1.4783146381378174,
"learning_rate": 9.957640024014426e-05,
"loss": 0.4381204605102539,
"step": 60,
"token_acc": 0.8833357600465929
},
{
"epoch": 0.24822695035460993,
"grad_norm": 1.3394635915756226,
"learning_rate": 9.929125711013952e-05,
"loss": 0.36449878215789794,
"step": 70,
"token_acc": 0.9001910219675263
},
{
"epoch": 0.28368794326241137,
"grad_norm": 1.8082902431488037,
"learning_rate": 9.893377226314113e-05,
"loss": 0.3078165054321289,
"step": 80,
"token_acc": 0.9116833988069256
},
{
"epoch": 0.3191489361702128,
"grad_norm": 1.3050870895385742,
"learning_rate": 9.850447035745866e-05,
"loss": 0.2708670854568481,
"step": 90,
"token_acc": 0.9215828380924772
},
{
"epoch": 0.3546099290780142,
"grad_norm": 1.157996654510498,
"learning_rate": 9.800398145283874e-05,
"loss": 0.2403315305709839,
"step": 100,
"token_acc": 0.9257353477848332
},
{
"epoch": 0.3900709219858156,
"grad_norm": 1.1475191116333008,
"learning_rate": 9.74330400857655e-05,
"loss": 0.2249774932861328,
"step": 110,
"token_acc": 0.9299674267100977
},
{
"epoch": 0.425531914893617,
"grad_norm": 1.1834532022476196,
"learning_rate": 9.679248419142703e-05,
"loss": 0.21108412742614746,
"step": 120,
"token_acc": 0.9321722859918679
},
{
"epoch": 0.46099290780141844,
"grad_norm": 1.025472640991211,
"learning_rate": 9.608325387392986e-05,
"loss": 0.1984718918800354,
"step": 130,
"token_acc": 0.9349572712000584
},
{
"epoch": 0.49645390070921985,
"grad_norm": 1.3528136014938354,
"learning_rate": 9.530639002656665e-05,
"loss": 0.18717693090438842,
"step": 140,
"token_acc": 0.9383361356824329
},
{
"epoch": 0.5319148936170213,
"grad_norm": 0.9476079344749451,
"learning_rate": 9.446303280416168e-05,
"loss": 0.18122910261154174,
"step": 150,
"token_acc": 0.939964093357271
},
{
"epoch": 0.5673758865248227,
"grad_norm": 1.1301629543304443,
"learning_rate": 9.35544199497364e-05,
"loss": 0.17390866279602052,
"step": 160,
"token_acc": 0.9416140097490522
},
{
"epoch": 0.6028368794326241,
"grad_norm": 0.9774699211120605,
"learning_rate": 9.258188497795093e-05,
"loss": 0.1724635124206543,
"step": 170,
"token_acc": 0.9417198946266826
},
{
"epoch": 0.6382978723404256,
"grad_norm": 0.9606502652168274,
"learning_rate": 9.154685521798736e-05,
"loss": 0.16570632457733153,
"step": 180,
"token_acc": 0.9430933137398971
},
{
"epoch": 0.6737588652482269,
"grad_norm": 0.7653209567070007,
"learning_rate": 9.045084971874738e-05,
"loss": 0.16216459274291992,
"step": 190,
"token_acc": 0.945176232177752
},
{
"epoch": 0.7092198581560284,
"grad_norm": 0.8915665149688721,
"learning_rate": 8.929547701943848e-05,
"loss": 0.16271411180496215,
"step": 200,
"token_acc": 0.942606371451275
},
{
"epoch": 0.7092198581560284,
"eval_loss": 0.15719343721866608,
"eval_runtime": 201.5815,
"eval_samples_per_second": 2.48,
"eval_steps_per_second": 2.48,
"eval_token_acc": 0.945529659545907,
"step": 200
},
{
"epoch": 0.7446808510638298,
"grad_norm": 0.8328582644462585,
"learning_rate": 8.808243278882094e-05,
"loss": 0.1601296544075012,
"step": 210,
"token_acc": 0.9429678447276941
},
{
"epoch": 0.7801418439716312,
"grad_norm": 0.6964325308799744,
"learning_rate": 8.681349733658002e-05,
"loss": 0.156463623046875,
"step": 220,
"token_acc": 0.944890562819784
},
{
"epoch": 0.8156028368794326,
"grad_norm": 0.7970917820930481,
"learning_rate": 8.549053300047603e-05,
"loss": 0.1498551845550537,
"step": 230,
"token_acc": 0.9467647380111422
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.8542683720588684,
"learning_rate": 8.411548141310682e-05,
"loss": 0.150161075592041,
"step": 240,
"token_acc": 0.9477524382195498
},
{
"epoch": 0.8865248226950354,
"grad_norm": 1.1433733701705933,
"learning_rate": 8.269036065229427e-05,
"loss": 0.14733513593673705,
"step": 250,
"token_acc": 0.9477655677655678
},
{
"epoch": 0.9219858156028369,
"grad_norm": 0.8125953674316406,
"learning_rate": 8.121726227927671e-05,
"loss": 0.14813485145568847,
"step": 260,
"token_acc": 0.9468073573519314
},
{
"epoch": 0.9574468085106383,
"grad_norm": 0.6343769431114197,
"learning_rate": 7.96983482690544e-05,
"loss": 0.14432573318481445,
"step": 270,
"token_acc": 0.9457151129125652
},
{
"epoch": 0.9929078014184397,
"grad_norm": 0.6489441990852356,
"learning_rate": 7.813584783739314e-05,
"loss": 0.14110009670257567,
"step": 280,
"token_acc": 0.9487638198146624
},
{
"epoch": 1.0283687943262412,
"grad_norm": 0.6926689743995667,
"learning_rate": 7.653205416914267e-05,
"loss": 0.13882286548614503,
"step": 290,
"token_acc": 0.9491290028173485
},
{
"epoch": 1.0638297872340425,
"grad_norm": 0.6307562589645386,
"learning_rate": 7.48893210526717e-05,
"loss": 0.13428436517715453,
"step": 300,
"token_acc": 0.9507825962090752
},
{
"epoch": 1.099290780141844,
"grad_norm": 0.6745529770851135,
"learning_rate": 7.32100594253589e-05,
"loss": 0.13728001117706298,
"step": 310,
"token_acc": 0.9503636828831463
},
{
"epoch": 1.1347517730496455,
"grad_norm": 0.6265084743499756,
"learning_rate": 7.149673383520977e-05,
"loss": 0.1338452696800232,
"step": 320,
"token_acc": 0.9483374653638618
},
{
"epoch": 1.1702127659574468,
"grad_norm": 0.7385435104370117,
"learning_rate": 6.975185882379271e-05,
"loss": 0.1393455147743225,
"step": 330,
"token_acc": 0.9486471763210459
},
{
"epoch": 1.2056737588652482,
"grad_norm": 0.5905711650848389,
"learning_rate": 6.79779952358024e-05,
"loss": 0.13502193689346315,
"step": 340,
"token_acc": 0.9485331772624186
},
{
"epoch": 1.2411347517730495,
"grad_norm": 0.7370989322662354,
"learning_rate": 6.617774646066712e-05,
"loss": 0.13421342372894288,
"step": 350,
"token_acc": 0.9494764589587757
},
{
"epoch": 1.2765957446808511,
"grad_norm": 0.5736089944839478,
"learning_rate": 6.43537546117158e-05,
"loss": 0.13304685354232787,
"step": 360,
"token_acc": 0.9498352251922373
},
{
"epoch": 1.3120567375886525,
"grad_norm": 0.5789754986763,
"learning_rate": 6.250869664851227e-05,
"loss": 0.13168127536773683,
"step": 370,
"token_acc": 0.9499291450165328
},
{
"epoch": 1.3475177304964538,
"grad_norm": 0.5693713426589966,
"learning_rate": 6.0645280448048044e-05,
"loss": 0.12925996780395507,
"step": 380,
"token_acc": 0.9515637471881576
},
{
"epoch": 1.3829787234042552,
"grad_norm": 0.7503437399864197,
"learning_rate": 5.876624083055939e-05,
"loss": 0.1295076847076416,
"step": 390,
"token_acc": 0.9505007983742197
},
{
"epoch": 1.4184397163120568,
"grad_norm": 0.6699873805046082,
"learning_rate": 5.687433554580147e-05,
"loss": 0.12774388790130614,
"step": 400,
"token_acc": 0.9517989761338236
},
{
"epoch": 1.4184397163120568,
"eval_loss": 0.1278059333562851,
"eval_runtime": 202.2442,
"eval_samples_per_second": 2.472,
"eval_steps_per_second": 2.472,
"eval_token_acc": 0.9520795241511792,
"step": 400
},
{
"epoch": 1.4539007092198581,
"grad_norm": 0.5635619163513184,
"learning_rate": 5.4972341225670354e-05,
"loss": 0.1255749225616455,
"step": 410,
"token_acc": 0.9524740290295726
},
{
"epoch": 1.4893617021276595,
"grad_norm": 0.7221083641052246,
"learning_rate": 5.306304930911278e-05,
"loss": 0.12711741924285888,
"step": 420,
"token_acc": 0.9516117426967909
},
{
"epoch": 1.524822695035461,
"grad_norm": 0.5900425314903259,
"learning_rate": 5.1149261945304526e-05,
"loss": 0.1292075514793396,
"step": 430,
"token_acc": 0.9518457901436624
},
{
"epoch": 1.5602836879432624,
"grad_norm": 0.6121543645858765,
"learning_rate": 4.923378788111019e-05,
"loss": 0.12657049894332886,
"step": 440,
"token_acc": 0.9530577088716624
},
{
"epoch": 1.5957446808510638,
"grad_norm": 0.5986051559448242,
"learning_rate": 4.731943833885973e-05,
"loss": 0.12117983102798462,
"step": 450,
"token_acc": 0.9550748752079867
},
{
"epoch": 1.6312056737588652,
"grad_norm": 0.5218138694763184,
"learning_rate": 4.54090228904921e-05,
"loss": 0.12389755249023438,
"step": 460,
"token_acc": 0.9530423518405239
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.6366024017333984,
"learning_rate": 4.350534533412097e-05,
"loss": 0.12183566093444824,
"step": 470,
"token_acc": 0.9533030027297543
},
{
"epoch": 1.702127659574468,
"grad_norm": 0.631432056427002,
"learning_rate": 4.16111995790744e-05,
"loss": 0.1201132893562317,
"step": 480,
"token_acc": 0.9545209419378033
},
{
"epoch": 1.7375886524822695,
"grad_norm": 0.6317788362503052,
"learning_rate": 3.9729365545447514e-05,
"loss": 0.12237818241119384,
"step": 490,
"token_acc": 0.9546474590994644
},
{
"epoch": 1.773049645390071,
"grad_norm": 0.5718140006065369,
"learning_rate": 3.786260508418655e-05,
"loss": 0.124367356300354,
"step": 500,
"token_acc": 0.9517145369284877
},
{
"epoch": 1.8085106382978724,
"grad_norm": 0.6416216492652893,
"learning_rate": 3.601365792369161e-05,
"loss": 0.12365785837173462,
"step": 510,
"token_acc": 0.9522010751125962
},
{
"epoch": 1.8439716312056738,
"grad_norm": 0.5563585758209229,
"learning_rate": 3.418523764888758e-05,
"loss": 0.11884108781814576,
"step": 520,
"token_acc": 0.9552927391893377
},
{
"epoch": 1.8794326241134751,
"grad_norm": 0.6853976249694824,
"learning_rate": 3.238002771866391e-05,
"loss": 0.11873785257339478,
"step": 530,
"token_acc": 0.9553233651594307
},
{
"epoch": 1.9148936170212765,
"grad_norm": 0.64034104347229,
"learning_rate": 3.060067752752874e-05,
"loss": 0.1139642357826233,
"step": 540,
"token_acc": 0.9571830173718734
},
{
"epoch": 1.950354609929078,
"grad_norm": 0.5898419618606567,
"learning_rate": 2.8849798517257065e-05,
"loss": 0.11692265272140503,
"step": 550,
"token_acc": 0.9547944708486014
},
{
"epoch": 1.9858156028368794,
"grad_norm": 0.5608242750167847,
"learning_rate": 2.7129960344239824e-05,
"loss": 0.11538001298904418,
"step": 560,
"token_acc": 0.9559320788003528
},
{
"epoch": 2.021276595744681,
"grad_norm": 0.6120197772979736,
"learning_rate": 2.5443687108158836e-05,
"loss": 0.11315921545028687,
"step": 570,
"token_acc": 0.9558374442095372
},
{
"epoch": 2.0567375886524824,
"grad_norm": 0.5597257018089294,
"learning_rate": 2.379345364752239e-05,
"loss": 0.11295346021652222,
"step": 580,
"token_acc": 0.9562364842575963
},
{
"epoch": 2.0921985815602837,
"grad_norm": 0.4704679846763611,
"learning_rate": 2.2181681907498502e-05,
"loss": 0.10958367586135864,
"step": 590,
"token_acc": 0.9576675281185164
},
{
"epoch": 2.127659574468085,
"grad_norm": 0.534522294998169,
"learning_rate": 2.061073738537635e-05,
"loss": 0.11023097038269043,
"step": 600,
"token_acc": 0.9576889723948547
},
{
"epoch": 2.127659574468085,
"eval_loss": 0.11636195331811905,
"eval_runtime": 201.0697,
"eval_samples_per_second": 2.487,
"eval_steps_per_second": 2.487,
"eval_token_acc": 0.9559214942023283,
"step": 600
},
{
"epoch": 2.1631205673758864,
"grad_norm": 0.4464743733406067,
"learning_rate": 1.9082925658872853e-05,
"loss": 0.10994529724121094,
"step": 610,
"token_acc": 0.9586940836940837
},
{
"epoch": 2.198581560283688,
"grad_norm": 0.5795279741287231,
"learning_rate": 1.7600489002379443e-05,
"loss": 0.11219470500946045,
"step": 620,
"token_acc": 0.9579232995658467
},
{
"epoch": 2.2340425531914896,
"grad_norm": 0.43980446457862854,
"learning_rate": 1.6165603096115107e-05,
"loss": 0.10888147354125977,
"step": 630,
"token_acc": 0.9575937589464644
},
{
"epoch": 2.269503546099291,
"grad_norm": 0.5029824376106262,
"learning_rate": 1.4780373833015737e-05,
"loss": 0.10863748788833619,
"step": 640,
"token_acc": 0.9594982078853047
},
{
"epoch": 2.3049645390070923,
"grad_norm": 0.5658282041549683,
"learning_rate": 1.3446834228045812e-05,
"loss": 0.10928837060928345,
"step": 650,
"token_acc": 0.9579486439699942
},
{
"epoch": 2.3404255319148937,
"grad_norm": 0.5395434498786926,
"learning_rate": 1.216694143446857e-05,
"loss": 0.10922106504440307,
"step": 660,
"token_acc": 0.9575124269801532
},
{
"epoch": 2.375886524822695,
"grad_norm": 0.5449179410934448,
"learning_rate": 1.0942573871453733e-05,
"loss": 0.1074299693107605,
"step": 670,
"token_acc": 0.9578538497840056
},
{
"epoch": 2.4113475177304964,
"grad_norm": 0.5345823168754578,
"learning_rate": 9.775528467238327e-06,
"loss": 0.10802021026611328,
"step": 680,
"token_acc": 0.9587758112094396
},
{
"epoch": 2.4468085106382977,
"grad_norm": 0.5855720639228821,
"learning_rate": 8.6675180218867e-06,
"loss": 0.10763558149337768,
"step": 690,
"token_acc": 0.9585245065909257
},
{
"epoch": 2.482269503546099,
"grad_norm": 0.5211033821105957,
"learning_rate": 7.62016869352028e-06,
"loss": 0.1050539493560791,
"step": 700,
"token_acc": 0.9588449213264681
},
{
"epoch": 2.5177304964539005,
"grad_norm": 0.5373179912567139,
"learning_rate": 6.6350176117061845e-06,
"loss": 0.10771543979644775,
"step": 710,
"token_acc": 0.9592036553524804
},
{
"epoch": 2.5531914893617023,
"grad_norm": 0.5263897180557251,
"learning_rate": 5.7135106215077335e-06,
"loss": 0.10759412050247193,
"step": 720,
"token_acc": 0.9581606835099556
},
{
"epoch": 2.5886524822695036,
"grad_norm": 0.46306130290031433,
"learning_rate": 4.857000161507353e-06,
"loss": 0.10637538433074951,
"step": 730,
"token_acc": 0.9585609205447527
},
{
"epoch": 2.624113475177305,
"grad_norm": 0.5812863707542419,
"learning_rate": 4.0667432789165075e-06,
"loss": 0.10533280372619629,
"step": 740,
"token_acc": 0.9585522263686098
},
{
"epoch": 2.6595744680851063,
"grad_norm": 0.6535531878471375,
"learning_rate": 3.3438997846855393e-06,
"loss": 0.10743522644042969,
"step": 750,
"token_acc": 0.9587139335943163
},
{
"epoch": 2.6950354609929077,
"grad_norm": 0.5804023742675781,
"learning_rate": 2.689530551321179e-06,
"loss": 0.10793532133102417,
"step": 760,
"token_acc": 0.9582205746061168
},
{
"epoch": 2.7304964539007095,
"grad_norm": 0.6999026536941528,
"learning_rate": 2.104595955909844e-06,
"loss": 0.10848350524902343,
"step": 770,
"token_acc": 0.957983193277311
},
{
"epoch": 2.7659574468085104,
"grad_norm": 0.5102624297142029,
"learning_rate": 1.5899544706318381e-06,
"loss": 0.1073201060295105,
"step": 780,
"token_acc": 0.958284841431481
},
{
"epoch": 2.801418439716312,
"grad_norm": 0.6007110476493835,
"learning_rate": 1.1463614028350389e-06,
"loss": 0.1058308243751526,
"step": 790,
"token_acc": 0.9592070831989102
},
{
"epoch": 2.8368794326241136,
"grad_norm": 0.5462249517440796,
"learning_rate": 7.744677865171967e-07,
"loss": 0.1059834361076355,
"step": 800,
"token_acc": 0.9579558070407908
},
{
"epoch": 2.8368794326241136,
"eval_loss": 0.11057131737470627,
"eval_runtime": 216.6008,
"eval_samples_per_second": 2.308,
"eval_steps_per_second": 2.308,
"eval_token_acc": 0.957229152683593,
"step": 800
},
{
"epoch": 2.872340425531915,
"grad_norm": 0.4730088412761688,
"learning_rate": 4.7481942684378113e-07,
"loss": 0.10582698583602905,
"step": 810,
"token_acc": 0.9595293603380446
},
{
"epoch": 2.9078014184397163,
"grad_norm": 0.47516319155693054,
"learning_rate": 2.478560991036383e-07,
"loss": 0.10610353946685791,
"step": 820,
"token_acc": 0.9585180379860886
},
{
"epoch": 2.9432624113475176,
"grad_norm": 0.459794819355011,
"learning_rate": 9.391090327811048e-08,
"loss": 0.10496950149536133,
"step": 830,
"token_acc": 0.9590602804589328
},
{
"epoch": 2.978723404255319,
"grad_norm": 0.5275819897651672,
"learning_rate": 1.3209775170852645e-08,
"loss": 0.10786118507385253,
"step": 840,
"token_acc": 0.9574242977015688
},
{
"epoch": 3.0,
"eval_loss": 0.110498808324337,
"eval_runtime": 200.6458,
"eval_samples_per_second": 2.492,
"eval_steps_per_second": 2.492,
"eval_token_acc": 0.9572985858772884,
"step": 846
}
],
"logging_steps": 10,
"max_steps": 846,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.56432289067704e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}