{ "best_global_step": 846, "best_metric": 0.11049881, "best_model_checkpoint": "/root/outputs/rune-goblin-vision-lora/v2-20260607-171815/checkpoint-846", "epoch": 3.0, "eval_steps": 200, "global_step": 846, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035460992907801418, "grad_norm": 26.47808074951172, "learning_rate": 3.846153846153847e-06, "loss": 4.957160472869873, "step": 1, "token_acc": 0.32794774836713647 }, { "epoch": 0.03546099290780142, "grad_norm": 6.980465412139893, "learning_rate": 3.846153846153846e-05, "loss": 4.304361979166667, "step": 10, "token_acc": 0.371801140994295 }, { "epoch": 0.07092198581560284, "grad_norm": 2.8949131965637207, "learning_rate": 7.692307692307693e-05, "loss": 2.537805938720703, "step": 20, "token_acc": 0.5499852876268942 }, { "epoch": 0.10638297872340426, "grad_norm": 2.3080947399139404, "learning_rate": 9.999412884518409e-05, "loss": 1.4758130073547364, "step": 30, "token_acc": 0.719080560740822 }, { "epoch": 0.14184397163120568, "grad_norm": 1.9129527807235718, "learning_rate": 9.992809418734932e-05, "loss": 0.8724543571472168, "step": 40, "token_acc": 0.8118754734677681 }, { "epoch": 0.1773049645390071, "grad_norm": 1.6698404550552368, "learning_rate": 9.978878316629133e-05, "loss": 0.6030772686004638, "step": 50, "token_acc": 0.857075642417662 }, { "epoch": 0.2127659574468085, "grad_norm": 1.4783146381378174, "learning_rate": 9.957640024014426e-05, "loss": 0.4381204605102539, "step": 60, "token_acc": 0.8833357600465929 }, { "epoch": 0.24822695035460993, "grad_norm": 1.3394635915756226, "learning_rate": 9.929125711013952e-05, "loss": 0.36449878215789794, "step": 70, "token_acc": 0.9001910219675263 }, { "epoch": 0.28368794326241137, "grad_norm": 1.8082902431488037, "learning_rate": 9.893377226314113e-05, "loss": 0.3078165054321289, "step": 80, "token_acc": 0.9116833988069256 }, { "epoch": 0.3191489361702128, "grad_norm": 1.3050870895385742, "learning_rate": 9.850447035745866e-05, "loss": 0.2708670854568481, "step": 90, "token_acc": 0.9215828380924772 }, { "epoch": 0.3546099290780142, "grad_norm": 1.157996654510498, "learning_rate": 9.800398145283874e-05, "loss": 0.2403315305709839, "step": 100, "token_acc": 0.9257353477848332 }, { "epoch": 0.3900709219858156, "grad_norm": 1.1475191116333008, "learning_rate": 9.74330400857655e-05, "loss": 0.2249774932861328, "step": 110, "token_acc": 0.9299674267100977 }, { "epoch": 0.425531914893617, "grad_norm": 1.1834532022476196, "learning_rate": 9.679248419142703e-05, "loss": 0.21108412742614746, "step": 120, "token_acc": 0.9321722859918679 }, { "epoch": 0.46099290780141844, "grad_norm": 1.025472640991211, "learning_rate": 9.608325387392986e-05, "loss": 0.1984718918800354, "step": 130, "token_acc": 0.9349572712000584 }, { "epoch": 0.49645390070921985, "grad_norm": 1.3528136014938354, "learning_rate": 9.530639002656665e-05, "loss": 0.18717693090438842, "step": 140, "token_acc": 0.9383361356824329 }, { "epoch": 0.5319148936170213, "grad_norm": 0.9476079344749451, "learning_rate": 9.446303280416168e-05, "loss": 0.18122910261154174, "step": 150, "token_acc": 0.939964093357271 }, { "epoch": 0.5673758865248227, "grad_norm": 1.1301629543304443, "learning_rate": 9.35544199497364e-05, "loss": 0.17390866279602052, "step": 160, "token_acc": 0.9416140097490522 }, { "epoch": 0.6028368794326241, "grad_norm": 0.9774699211120605, "learning_rate": 9.258188497795093e-05, "loss": 0.1724635124206543, "step": 170, "token_acc": 0.9417198946266826 }, { "epoch": 0.6382978723404256, "grad_norm": 0.9606502652168274, "learning_rate": 9.154685521798736e-05, "loss": 0.16570632457733153, "step": 180, "token_acc": 0.9430933137398971 }, { "epoch": 0.6737588652482269, "grad_norm": 0.7653209567070007, "learning_rate": 9.045084971874738e-05, "loss": 0.16216459274291992, "step": 190, "token_acc": 0.945176232177752 }, { "epoch": 0.7092198581560284, "grad_norm": 0.8915665149688721, "learning_rate": 8.929547701943848e-05, "loss": 0.16271411180496215, "step": 200, "token_acc": 0.942606371451275 }, { "epoch": 0.7092198581560284, "eval_loss": 0.15719343721866608, "eval_runtime": 201.5815, "eval_samples_per_second": 2.48, "eval_steps_per_second": 2.48, "eval_token_acc": 0.945529659545907, "step": 200 }, { "epoch": 0.7446808510638298, "grad_norm": 0.8328582644462585, "learning_rate": 8.808243278882094e-05, "loss": 0.1601296544075012, "step": 210, "token_acc": 0.9429678447276941 }, { "epoch": 0.7801418439716312, "grad_norm": 0.6964325308799744, "learning_rate": 8.681349733658002e-05, "loss": 0.156463623046875, "step": 220, "token_acc": 0.944890562819784 }, { "epoch": 0.8156028368794326, "grad_norm": 0.7970917820930481, "learning_rate": 8.549053300047603e-05, "loss": 0.1498551845550537, "step": 230, "token_acc": 0.9467647380111422 }, { "epoch": 0.851063829787234, "grad_norm": 0.8542683720588684, "learning_rate": 8.411548141310682e-05, "loss": 0.150161075592041, "step": 240, "token_acc": 0.9477524382195498 }, { "epoch": 0.8865248226950354, "grad_norm": 1.1433733701705933, "learning_rate": 8.269036065229427e-05, "loss": 0.14733513593673705, "step": 250, "token_acc": 0.9477655677655678 }, { "epoch": 0.9219858156028369, "grad_norm": 0.8125953674316406, "learning_rate": 8.121726227927671e-05, "loss": 0.14813485145568847, "step": 260, "token_acc": 0.9468073573519314 }, { "epoch": 0.9574468085106383, "grad_norm": 0.6343769431114197, "learning_rate": 7.96983482690544e-05, "loss": 0.14432573318481445, "step": 270, "token_acc": 0.9457151129125652 }, { "epoch": 0.9929078014184397, "grad_norm": 0.6489441990852356, "learning_rate": 7.813584783739314e-05, "loss": 0.14110009670257567, "step": 280, "token_acc": 0.9487638198146624 }, { "epoch": 1.0283687943262412, "grad_norm": 0.6926689743995667, "learning_rate": 7.653205416914267e-05, "loss": 0.13882286548614503, "step": 290, "token_acc": 0.9491290028173485 }, { "epoch": 1.0638297872340425, "grad_norm": 0.6307562589645386, "learning_rate": 7.48893210526717e-05, "loss": 0.13428436517715453, "step": 300, "token_acc": 0.9507825962090752 }, { "epoch": 1.099290780141844, "grad_norm": 0.6745529770851135, "learning_rate": 7.32100594253589e-05, "loss": 0.13728001117706298, "step": 310, "token_acc": 0.9503636828831463 }, { "epoch": 1.1347517730496455, "grad_norm": 0.6265084743499756, "learning_rate": 7.149673383520977e-05, "loss": 0.1338452696800232, "step": 320, "token_acc": 0.9483374653638618 }, { "epoch": 1.1702127659574468, "grad_norm": 0.7385435104370117, "learning_rate": 6.975185882379271e-05, "loss": 0.1393455147743225, "step": 330, "token_acc": 0.9486471763210459 }, { "epoch": 1.2056737588652482, "grad_norm": 0.5905711650848389, "learning_rate": 6.79779952358024e-05, "loss": 0.13502193689346315, "step": 340, "token_acc": 0.9485331772624186 }, { "epoch": 1.2411347517730495, "grad_norm": 0.7370989322662354, "learning_rate": 6.617774646066712e-05, "loss": 0.13421342372894288, "step": 350, "token_acc": 0.9494764589587757 }, { "epoch": 1.2765957446808511, "grad_norm": 0.5736089944839478, "learning_rate": 6.43537546117158e-05, "loss": 0.13304685354232787, "step": 360, "token_acc": 0.9498352251922373 }, { "epoch": 1.3120567375886525, "grad_norm": 0.5789754986763, "learning_rate": 6.250869664851227e-05, "loss": 0.13168127536773683, "step": 370, "token_acc": 0.9499291450165328 }, { "epoch": 1.3475177304964538, "grad_norm": 0.5693713426589966, "learning_rate": 6.0645280448048044e-05, "loss": 0.12925996780395507, "step": 380, "token_acc": 0.9515637471881576 }, { "epoch": 1.3829787234042552, "grad_norm": 0.7503437399864197, "learning_rate": 5.876624083055939e-05, "loss": 0.1295076847076416, "step": 390, "token_acc": 0.9505007983742197 }, { "epoch": 1.4184397163120568, "grad_norm": 0.6699873805046082, "learning_rate": 5.687433554580147e-05, "loss": 0.12774388790130614, "step": 400, "token_acc": 0.9517989761338236 }, { "epoch": 1.4184397163120568, "eval_loss": 0.1278059333562851, "eval_runtime": 202.2442, "eval_samples_per_second": 2.472, "eval_steps_per_second": 2.472, "eval_token_acc": 0.9520795241511792, "step": 400 }, { "epoch": 1.4539007092198581, "grad_norm": 0.5635619163513184, "learning_rate": 5.4972341225670354e-05, "loss": 0.1255749225616455, "step": 410, "token_acc": 0.9524740290295726 }, { "epoch": 1.4893617021276595, "grad_norm": 0.7221083641052246, "learning_rate": 5.306304930911278e-05, "loss": 0.12711741924285888, "step": 420, "token_acc": 0.9516117426967909 }, { "epoch": 1.524822695035461, "grad_norm": 0.5900425314903259, "learning_rate": 5.1149261945304526e-05, "loss": 0.1292075514793396, "step": 430, "token_acc": 0.9518457901436624 }, { "epoch": 1.5602836879432624, "grad_norm": 0.6121543645858765, "learning_rate": 4.923378788111019e-05, "loss": 0.12657049894332886, "step": 440, "token_acc": 0.9530577088716624 }, { "epoch": 1.5957446808510638, "grad_norm": 0.5986051559448242, "learning_rate": 4.731943833885973e-05, "loss": 0.12117983102798462, "step": 450, "token_acc": 0.9550748752079867 }, { "epoch": 1.6312056737588652, "grad_norm": 0.5218138694763184, "learning_rate": 4.54090228904921e-05, "loss": 0.12389755249023438, "step": 460, "token_acc": 0.9530423518405239 }, { "epoch": 1.6666666666666665, "grad_norm": 0.6366024017333984, "learning_rate": 4.350534533412097e-05, "loss": 0.12183566093444824, "step": 470, "token_acc": 0.9533030027297543 }, { "epoch": 1.702127659574468, "grad_norm": 0.631432056427002, "learning_rate": 4.16111995790744e-05, "loss": 0.1201132893562317, "step": 480, "token_acc": 0.9545209419378033 }, { "epoch": 1.7375886524822695, "grad_norm": 0.6317788362503052, "learning_rate": 3.9729365545447514e-05, "loss": 0.12237818241119384, "step": 490, "token_acc": 0.9546474590994644 }, { "epoch": 1.773049645390071, "grad_norm": 0.5718140006065369, "learning_rate": 3.786260508418655e-05, "loss": 0.124367356300354, "step": 500, "token_acc": 0.9517145369284877 }, { "epoch": 1.8085106382978724, "grad_norm": 0.6416216492652893, "learning_rate": 3.601365792369161e-05, "loss": 0.12365785837173462, "step": 510, "token_acc": 0.9522010751125962 }, { "epoch": 1.8439716312056738, "grad_norm": 0.5563585758209229, "learning_rate": 3.418523764888758e-05, "loss": 0.11884108781814576, "step": 520, "token_acc": 0.9552927391893377 }, { "epoch": 1.8794326241134751, "grad_norm": 0.6853976249694824, "learning_rate": 3.238002771866391e-05, "loss": 0.11873785257339478, "step": 530, "token_acc": 0.9553233651594307 }, { "epoch": 1.9148936170212765, "grad_norm": 0.64034104347229, "learning_rate": 3.060067752752874e-05, "loss": 0.1139642357826233, "step": 540, "token_acc": 0.9571830173718734 }, { "epoch": 1.950354609929078, "grad_norm": 0.5898419618606567, "learning_rate": 2.8849798517257065e-05, "loss": 0.11692265272140503, "step": 550, "token_acc": 0.9547944708486014 }, { "epoch": 1.9858156028368794, "grad_norm": 0.5608242750167847, "learning_rate": 2.7129960344239824e-05, "loss": 0.11538001298904418, "step": 560, "token_acc": 0.9559320788003528 }, { "epoch": 2.021276595744681, "grad_norm": 0.6120197772979736, "learning_rate": 2.5443687108158836e-05, "loss": 0.11315921545028687, "step": 570, "token_acc": 0.9558374442095372 }, { "epoch": 2.0567375886524824, "grad_norm": 0.5597257018089294, "learning_rate": 2.379345364752239e-05, "loss": 0.11295346021652222, "step": 580, "token_acc": 0.9562364842575963 }, { "epoch": 2.0921985815602837, "grad_norm": 0.4704679846763611, "learning_rate": 2.2181681907498502e-05, "loss": 0.10958367586135864, "step": 590, "token_acc": 0.9576675281185164 }, { "epoch": 2.127659574468085, "grad_norm": 0.534522294998169, "learning_rate": 2.061073738537635e-05, "loss": 0.11023097038269043, "step": 600, "token_acc": 0.9576889723948547 }, { "epoch": 2.127659574468085, "eval_loss": 0.11636195331811905, "eval_runtime": 201.0697, "eval_samples_per_second": 2.487, "eval_steps_per_second": 2.487, "eval_token_acc": 0.9559214942023283, "step": 600 }, { "epoch": 2.1631205673758864, "grad_norm": 0.4464743733406067, "learning_rate": 1.9082925658872853e-05, "loss": 0.10994529724121094, "step": 610, "token_acc": 0.9586940836940837 }, { "epoch": 2.198581560283688, "grad_norm": 0.5795279741287231, "learning_rate": 1.7600489002379443e-05, "loss": 0.11219470500946045, "step": 620, "token_acc": 0.9579232995658467 }, { "epoch": 2.2340425531914896, "grad_norm": 0.43980446457862854, "learning_rate": 1.6165603096115107e-05, "loss": 0.10888147354125977, "step": 630, "token_acc": 0.9575937589464644 }, { "epoch": 2.269503546099291, "grad_norm": 0.5029824376106262, "learning_rate": 1.4780373833015737e-05, "loss": 0.10863748788833619, "step": 640, "token_acc": 0.9594982078853047 }, { "epoch": 2.3049645390070923, "grad_norm": 0.5658282041549683, "learning_rate": 1.3446834228045812e-05, "loss": 0.10928837060928345, "step": 650, "token_acc": 0.9579486439699942 }, { "epoch": 2.3404255319148937, "grad_norm": 0.5395434498786926, "learning_rate": 1.216694143446857e-05, "loss": 0.10922106504440307, "step": 660, "token_acc": 0.9575124269801532 }, { "epoch": 2.375886524822695, "grad_norm": 0.5449179410934448, "learning_rate": 1.0942573871453733e-05, "loss": 0.1074299693107605, "step": 670, "token_acc": 0.9578538497840056 }, { "epoch": 2.4113475177304964, "grad_norm": 0.5345823168754578, "learning_rate": 9.775528467238327e-06, "loss": 0.10802021026611328, "step": 680, "token_acc": 0.9587758112094396 }, { "epoch": 2.4468085106382977, "grad_norm": 0.5855720639228821, "learning_rate": 8.6675180218867e-06, "loss": 0.10763558149337768, "step": 690, "token_acc": 0.9585245065909257 }, { "epoch": 2.482269503546099, "grad_norm": 0.5211033821105957, "learning_rate": 7.62016869352028e-06, "loss": 0.1050539493560791, "step": 700, "token_acc": 0.9588449213264681 }, { "epoch": 2.5177304964539005, "grad_norm": 0.5373179912567139, "learning_rate": 6.6350176117061845e-06, "loss": 0.10771543979644775, "step": 710, "token_acc": 0.9592036553524804 }, { "epoch": 2.5531914893617023, "grad_norm": 0.5263897180557251, "learning_rate": 5.7135106215077335e-06, "loss": 0.10759412050247193, "step": 720, "token_acc": 0.9581606835099556 }, { "epoch": 2.5886524822695036, "grad_norm": 0.46306130290031433, "learning_rate": 4.857000161507353e-06, "loss": 0.10637538433074951, "step": 730, "token_acc": 0.9585609205447527 }, { "epoch": 2.624113475177305, "grad_norm": 0.5812863707542419, "learning_rate": 4.0667432789165075e-06, "loss": 0.10533280372619629, "step": 740, "token_acc": 0.9585522263686098 }, { "epoch": 2.6595744680851063, "grad_norm": 0.6535531878471375, "learning_rate": 3.3438997846855393e-06, "loss": 0.10743522644042969, "step": 750, "token_acc": 0.9587139335943163 }, { "epoch": 2.6950354609929077, "grad_norm": 0.5804023742675781, "learning_rate": 2.689530551321179e-06, "loss": 0.10793532133102417, "step": 760, "token_acc": 0.9582205746061168 }, { "epoch": 2.7304964539007095, "grad_norm": 0.6999026536941528, "learning_rate": 2.104595955909844e-06, "loss": 0.10848350524902343, "step": 770, "token_acc": 0.957983193277311 }, { "epoch": 2.7659574468085104, "grad_norm": 0.5102624297142029, "learning_rate": 1.5899544706318381e-06, "loss": 0.1073201060295105, "step": 780, "token_acc": 0.958284841431481 }, { "epoch": 2.801418439716312, "grad_norm": 0.6007110476493835, "learning_rate": 1.1463614028350389e-06, "loss": 0.1058308243751526, "step": 790, "token_acc": 0.9592070831989102 }, { "epoch": 2.8368794326241136, "grad_norm": 0.5462249517440796, "learning_rate": 7.744677865171967e-07, "loss": 0.1059834361076355, "step": 800, "token_acc": 0.9579558070407908 }, { "epoch": 2.8368794326241136, "eval_loss": 0.11057131737470627, "eval_runtime": 216.6008, "eval_samples_per_second": 2.308, "eval_steps_per_second": 2.308, "eval_token_acc": 0.957229152683593, "step": 800 }, { "epoch": 2.872340425531915, "grad_norm": 0.4730088412761688, "learning_rate": 4.7481942684378113e-07, "loss": 0.10582698583602905, "step": 810, "token_acc": 0.9595293603380446 }, { "epoch": 2.9078014184397163, "grad_norm": 0.47516319155693054, "learning_rate": 2.478560991036383e-07, "loss": 0.10610353946685791, "step": 820, "token_acc": 0.9585180379860886 }, { "epoch": 2.9432624113475176, "grad_norm": 0.459794819355011, "learning_rate": 9.391090327811048e-08, "loss": 0.10496950149536133, "step": 830, "token_acc": 0.9590602804589328 }, { "epoch": 2.978723404255319, "grad_norm": 0.5275819897651672, "learning_rate": 1.3209775170852645e-08, "loss": 0.10786118507385253, "step": 840, "token_acc": 0.9574242977015688 }, { "epoch": 3.0, "eval_loss": 0.110498808324337, "eval_runtime": 200.6458, "eval_samples_per_second": 2.492, "eval_steps_per_second": 2.492, "eval_token_acc": 0.9572985858772884, "step": 846 } ], "logging_steps": 10, "max_steps": 846, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.56432289067704e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }