{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.20149103364900262, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020149103364900263, "grad_norm": 18.375, "learning_rate": 1.998791053798106e-05, "loss": 1.9277, "mean_token_accuracy": 0.679860633611679, "num_tokens": 9373.0, "step": 10 }, { "epoch": 0.004029820672980053, "grad_norm": 13.5625, "learning_rate": 1.9974477802404462e-05, "loss": 1.2796, "mean_token_accuracy": 0.7233692526817321, "num_tokens": 20789.0, "step": 20 }, { "epoch": 0.006044731009470079, "grad_norm": 13.0, "learning_rate": 1.996104506682786e-05, "loss": 1.2607, "mean_token_accuracy": 0.7299719333648682, "num_tokens": 32661.0, "step": 30 }, { "epoch": 0.008059641345960105, "grad_norm": 12.8125, "learning_rate": 1.994761233125126e-05, "loss": 1.2356, "mean_token_accuracy": 0.7324558198451996, "num_tokens": 43049.0, "step": 40 }, { "epoch": 0.01007455168245013, "grad_norm": 12.125, "learning_rate": 1.9934179595674662e-05, "loss": 1.1324, "mean_token_accuracy": 0.7531639993190765, "num_tokens": 52956.0, "step": 50 }, { "epoch": 0.012089462018940157, "grad_norm": 16.125, "learning_rate": 1.992074686009806e-05, "loss": 1.1775, "mean_token_accuracy": 0.7408373892307282, "num_tokens": 63513.0, "step": 60 }, { "epoch": 0.014104372355430184, "grad_norm": 14.0625, "learning_rate": 1.990731412452146e-05, "loss": 1.2446, "mean_token_accuracy": 0.7307547807693482, "num_tokens": 74794.0, "step": 70 }, { "epoch": 0.01611928269192021, "grad_norm": 11.875, "learning_rate": 1.989388138894486e-05, "loss": 1.2428, "mean_token_accuracy": 0.7255984365940094, "num_tokens": 86903.0, "step": 80 }, { "epoch": 0.018134193028410236, "grad_norm": 14.4375, "learning_rate": 1.988044865336826e-05, "loss": 1.2766, "mean_token_accuracy": 0.7225647568702698, "num_tokens": 97159.0, "step": 90 }, { "epoch": 0.02014910336490026, "grad_norm": 12.5625, "learning_rate": 1.986701591779166e-05, "loss": 1.1458, "mean_token_accuracy": 0.7415299773216247, "num_tokens": 107437.0, "step": 100 }, { "epoch": 0.02216401370139029, "grad_norm": 16.75, "learning_rate": 1.985358318221506e-05, "loss": 1.2748, "mean_token_accuracy": 0.7202155470848084, "num_tokens": 117867.0, "step": 110 }, { "epoch": 0.024178924037880314, "grad_norm": 18.125, "learning_rate": 1.984015044663846e-05, "loss": 1.1689, "mean_token_accuracy": 0.7355200052261353, "num_tokens": 128288.0, "step": 120 }, { "epoch": 0.02619383437437034, "grad_norm": 12.6875, "learning_rate": 1.982671771106186e-05, "loss": 1.2324, "mean_token_accuracy": 0.7224856972694397, "num_tokens": 139627.0, "step": 130 }, { "epoch": 0.028208744710860368, "grad_norm": 11.5625, "learning_rate": 1.981328497548526e-05, "loss": 1.1365, "mean_token_accuracy": 0.7402825653553009, "num_tokens": 150498.0, "step": 140 }, { "epoch": 0.030223655047350393, "grad_norm": 14.75, "learning_rate": 1.979985223990866e-05, "loss": 1.1178, "mean_token_accuracy": 0.7426175236701965, "num_tokens": 161754.0, "step": 150 }, { "epoch": 0.03223856538384042, "grad_norm": 11.4375, "learning_rate": 1.978641950433206e-05, "loss": 1.2596, "mean_token_accuracy": 0.7134447395801544, "num_tokens": 173087.0, "step": 160 }, { "epoch": 0.03425347572033045, "grad_norm": 12.75, "learning_rate": 1.9772986768755458e-05, "loss": 1.0652, "mean_token_accuracy": 0.7474986433982849, "num_tokens": 184747.0, "step": 170 }, { "epoch": 0.03626838605682047, "grad_norm": 11.8125, "learning_rate": 1.9759554033178857e-05, "loss": 1.1436, "mean_token_accuracy": 0.7323237180709838, "num_tokens": 195331.0, "step": 180 }, { "epoch": 0.0382832963933105, "grad_norm": 9.875, "learning_rate": 1.974612129760226e-05, "loss": 1.0312, "mean_token_accuracy": 0.7625056743621826, "num_tokens": 208260.0, "step": 190 }, { "epoch": 0.04029820672980052, "grad_norm": 14.9375, "learning_rate": 1.9732688562025658e-05, "loss": 1.0084, "mean_token_accuracy": 0.7631498157978058, "num_tokens": 218822.0, "step": 200 }, { "epoch": 0.04231311706629055, "grad_norm": 11.625, "learning_rate": 1.9719255826449057e-05, "loss": 0.9813, "mean_token_accuracy": 0.7651655077934265, "num_tokens": 228580.0, "step": 210 }, { "epoch": 0.04432802740278058, "grad_norm": 17.875, "learning_rate": 1.970582309087246e-05, "loss": 1.07, "mean_token_accuracy": 0.7532146275043488, "num_tokens": 239159.0, "step": 220 }, { "epoch": 0.046342937739270604, "grad_norm": 11.25, "learning_rate": 1.9692390355295858e-05, "loss": 1.113, "mean_token_accuracy": 0.7436384916305542, "num_tokens": 251695.0, "step": 230 }, { "epoch": 0.04835784807576063, "grad_norm": 13.375, "learning_rate": 1.9678957619719257e-05, "loss": 0.929, "mean_token_accuracy": 0.7755303025245667, "num_tokens": 261128.0, "step": 240 }, { "epoch": 0.050372758412250654, "grad_norm": 12.8125, "learning_rate": 1.9665524884142656e-05, "loss": 1.0999, "mean_token_accuracy": 0.7514171898365021, "num_tokens": 271560.0, "step": 250 }, { "epoch": 0.05238766874874068, "grad_norm": 13.0, "learning_rate": 1.9652092148566058e-05, "loss": 1.0339, "mean_token_accuracy": 0.7604846298694611, "num_tokens": 282223.0, "step": 260 }, { "epoch": 0.054402579085230704, "grad_norm": 12.9375, "learning_rate": 1.9638659412989457e-05, "loss": 1.0473, "mean_token_accuracy": 0.7622893512248993, "num_tokens": 292726.0, "step": 270 }, { "epoch": 0.056417489421720736, "grad_norm": 15.0, "learning_rate": 1.9625226677412856e-05, "loss": 0.9894, "mean_token_accuracy": 0.764206200838089, "num_tokens": 303785.0, "step": 280 }, { "epoch": 0.05843239975821076, "grad_norm": 10.3125, "learning_rate": 1.9611793941836258e-05, "loss": 1.109, "mean_token_accuracy": 0.7469749927520752, "num_tokens": 314725.0, "step": 290 }, { "epoch": 0.060447310094700786, "grad_norm": 12.625, "learning_rate": 1.9598361206259657e-05, "loss": 1.2098, "mean_token_accuracy": 0.718773603439331, "num_tokens": 326635.0, "step": 300 }, { "epoch": 0.06246222043119081, "grad_norm": 11.125, "learning_rate": 1.9584928470683055e-05, "loss": 1.1025, "mean_token_accuracy": 0.7460452795028687, "num_tokens": 337866.0, "step": 310 }, { "epoch": 0.06447713076768084, "grad_norm": 10.6875, "learning_rate": 1.9571495735106458e-05, "loss": 1.0772, "mean_token_accuracy": 0.7526730418205261, "num_tokens": 348512.0, "step": 320 }, { "epoch": 0.06649204110417087, "grad_norm": 13.375, "learning_rate": 1.9558062999529857e-05, "loss": 1.157, "mean_token_accuracy": 0.7320702195167541, "num_tokens": 360281.0, "step": 330 }, { "epoch": 0.0685069514406609, "grad_norm": 12.0, "learning_rate": 1.9544630263953255e-05, "loss": 1.0157, "mean_token_accuracy": 0.760700649023056, "num_tokens": 371068.0, "step": 340 }, { "epoch": 0.07052186177715092, "grad_norm": 17.375, "learning_rate": 1.9531197528376654e-05, "loss": 0.8851, "mean_token_accuracy": 0.7925353944301605, "num_tokens": 380947.0, "step": 350 }, { "epoch": 0.07253677211364094, "grad_norm": 11.3125, "learning_rate": 1.9517764792800056e-05, "loss": 1.0325, "mean_token_accuracy": 0.7617525398731232, "num_tokens": 391552.0, "step": 360 }, { "epoch": 0.07455168245013097, "grad_norm": 10.9375, "learning_rate": 1.9504332057223455e-05, "loss": 0.9852, "mean_token_accuracy": 0.7655075788497925, "num_tokens": 403321.0, "step": 370 }, { "epoch": 0.076566592786621, "grad_norm": 11.1875, "learning_rate": 1.9490899321646854e-05, "loss": 1.0527, "mean_token_accuracy": 0.7569321393966675, "num_tokens": 414435.0, "step": 380 }, { "epoch": 0.07858150312311102, "grad_norm": 14.6875, "learning_rate": 1.9477466586070256e-05, "loss": 0.9602, "mean_token_accuracy": 0.7720924854278565, "num_tokens": 423506.0, "step": 390 }, { "epoch": 0.08059641345960104, "grad_norm": 11.0, "learning_rate": 1.9464033850493655e-05, "loss": 1.0475, "mean_token_accuracy": 0.7505548059940338, "num_tokens": 436556.0, "step": 400 }, { "epoch": 0.08261132379609107, "grad_norm": 11.9375, "learning_rate": 1.9450601114917054e-05, "loss": 1.0775, "mean_token_accuracy": 0.7474610984325409, "num_tokens": 448248.0, "step": 410 }, { "epoch": 0.0846262341325811, "grad_norm": 10.25, "learning_rate": 1.9437168379340453e-05, "loss": 1.0487, "mean_token_accuracy": 0.7566307663917542, "num_tokens": 460102.0, "step": 420 }, { "epoch": 0.08664114446907113, "grad_norm": 12.0625, "learning_rate": 1.9423735643763855e-05, "loss": 0.9919, "mean_token_accuracy": 0.7676237523555756, "num_tokens": 471590.0, "step": 430 }, { "epoch": 0.08865605480556116, "grad_norm": 13.125, "learning_rate": 1.9410302908187254e-05, "loss": 1.0473, "mean_token_accuracy": 0.7525161623954773, "num_tokens": 482096.0, "step": 440 }, { "epoch": 0.09067096514205118, "grad_norm": 13.4375, "learning_rate": 1.9396870172610653e-05, "loss": 1.0347, "mean_token_accuracy": 0.7515169024467468, "num_tokens": 493585.0, "step": 450 }, { "epoch": 0.09268587547854121, "grad_norm": 10.9375, "learning_rate": 1.9383437437034055e-05, "loss": 1.0487, "mean_token_accuracy": 0.7547510921955108, "num_tokens": 505989.0, "step": 460 }, { "epoch": 0.09470078581503123, "grad_norm": 12.25, "learning_rate": 1.9370004701457454e-05, "loss": 1.018, "mean_token_accuracy": 0.7596003413200378, "num_tokens": 516900.0, "step": 470 }, { "epoch": 0.09671569615152126, "grad_norm": 11.1875, "learning_rate": 1.9356571965880853e-05, "loss": 0.9797, "mean_token_accuracy": 0.7699940800666809, "num_tokens": 526427.0, "step": 480 }, { "epoch": 0.09873060648801128, "grad_norm": 10.3125, "learning_rate": 1.9343139230304255e-05, "loss": 1.0817, "mean_token_accuracy": 0.7470319092273712, "num_tokens": 537981.0, "step": 490 }, { "epoch": 0.10074551682450131, "grad_norm": 13.25, "learning_rate": 1.9329706494727654e-05, "loss": 1.0089, "mean_token_accuracy": 0.7595715343952179, "num_tokens": 549174.0, "step": 500 }, { "epoch": 0.10276042716099133, "grad_norm": 12.625, "learning_rate": 1.9316273759151052e-05, "loss": 1.0164, "mean_token_accuracy": 0.7571583390235901, "num_tokens": 559988.0, "step": 510 }, { "epoch": 0.10477533749748136, "grad_norm": 14.3125, "learning_rate": 1.930284102357445e-05, "loss": 1.1148, "mean_token_accuracy": 0.7423564851284027, "num_tokens": 571510.0, "step": 520 }, { "epoch": 0.10679024783397138, "grad_norm": 14.6875, "learning_rate": 1.9289408287997854e-05, "loss": 1.053, "mean_token_accuracy": 0.7485374748706818, "num_tokens": 583020.0, "step": 530 }, { "epoch": 0.10880515817046141, "grad_norm": 11.4375, "learning_rate": 1.9275975552421252e-05, "loss": 0.9756, "mean_token_accuracy": 0.7606720209121705, "num_tokens": 594042.0, "step": 540 }, { "epoch": 0.11082006850695145, "grad_norm": 13.3125, "learning_rate": 1.926254281684465e-05, "loss": 0.9514, "mean_token_accuracy": 0.7702824532985687, "num_tokens": 605932.0, "step": 550 }, { "epoch": 0.11283497884344147, "grad_norm": 10.625, "learning_rate": 1.9249110081268053e-05, "loss": 1.0008, "mean_token_accuracy": 0.7583375632762909, "num_tokens": 617431.0, "step": 560 }, { "epoch": 0.1148498891799315, "grad_norm": 10.875, "learning_rate": 1.9235677345691452e-05, "loss": 0.998, "mean_token_accuracy": 0.7597042858600617, "num_tokens": 629827.0, "step": 570 }, { "epoch": 0.11686479951642152, "grad_norm": 12.5625, "learning_rate": 1.922224461011485e-05, "loss": 0.9512, "mean_token_accuracy": 0.7806954503059387, "num_tokens": 640144.0, "step": 580 }, { "epoch": 0.11887970985291155, "grad_norm": 10.5625, "learning_rate": 1.920881187453825e-05, "loss": 0.9292, "mean_token_accuracy": 0.7761410176753998, "num_tokens": 652386.0, "step": 590 }, { "epoch": 0.12089462018940157, "grad_norm": 11.0, "learning_rate": 1.9195379138961652e-05, "loss": 1.0768, "mean_token_accuracy": 0.7544383645057678, "num_tokens": 663460.0, "step": 600 }, { "epoch": 0.1229095305258916, "grad_norm": 14.3125, "learning_rate": 1.918194640338505e-05, "loss": 0.8975, "mean_token_accuracy": 0.7799494147300721, "num_tokens": 673425.0, "step": 610 }, { "epoch": 0.12492444086238162, "grad_norm": 10.375, "learning_rate": 1.916851366780845e-05, "loss": 0.899, "mean_token_accuracy": 0.7885317802429199, "num_tokens": 683817.0, "step": 620 }, { "epoch": 0.12693935119887165, "grad_norm": 13.375, "learning_rate": 1.9155080932231852e-05, "loss": 0.998, "mean_token_accuracy": 0.7671383440494537, "num_tokens": 694196.0, "step": 630 }, { "epoch": 0.12895426153536169, "grad_norm": 11.625, "learning_rate": 1.914164819665525e-05, "loss": 0.9808, "mean_token_accuracy": 0.7700311303138733, "num_tokens": 704564.0, "step": 640 }, { "epoch": 0.1309691718718517, "grad_norm": 13.25, "learning_rate": 1.912821546107865e-05, "loss": 1.0077, "mean_token_accuracy": 0.7643253684043885, "num_tokens": 715775.0, "step": 650 }, { "epoch": 0.13298408220834174, "grad_norm": 13.625, "learning_rate": 1.911478272550205e-05, "loss": 0.9457, "mean_token_accuracy": 0.7678769171237946, "num_tokens": 726005.0, "step": 660 }, { "epoch": 0.13499899254483175, "grad_norm": 13.5, "learning_rate": 1.910134998992545e-05, "loss": 1.0155, "mean_token_accuracy": 0.7607427120208741, "num_tokens": 738053.0, "step": 670 }, { "epoch": 0.1370139028813218, "grad_norm": 11.6875, "learning_rate": 1.908791725434885e-05, "loss": 0.9395, "mean_token_accuracy": 0.7723658442497253, "num_tokens": 748480.0, "step": 680 }, { "epoch": 0.1390288132178118, "grad_norm": 15.6875, "learning_rate": 1.907448451877225e-05, "loss": 0.9639, "mean_token_accuracy": 0.7676171123981476, "num_tokens": 759972.0, "step": 690 }, { "epoch": 0.14104372355430184, "grad_norm": 12.875, "learning_rate": 1.906105178319565e-05, "loss": 0.9557, "mean_token_accuracy": 0.7719902992248535, "num_tokens": 771123.0, "step": 700 }, { "epoch": 0.14305863389079185, "grad_norm": 12.8125, "learning_rate": 1.904761904761905e-05, "loss": 1.0022, "mean_token_accuracy": 0.7667870819568634, "num_tokens": 782532.0, "step": 710 }, { "epoch": 0.1450735442272819, "grad_norm": 11.0625, "learning_rate": 1.903418631204245e-05, "loss": 0.9519, "mean_token_accuracy": 0.7708106875419617, "num_tokens": 794067.0, "step": 720 }, { "epoch": 0.14708845456377193, "grad_norm": 14.125, "learning_rate": 1.902075357646585e-05, "loss": 0.9718, "mean_token_accuracy": 0.766555666923523, "num_tokens": 804871.0, "step": 730 }, { "epoch": 0.14910336490026194, "grad_norm": 12.8125, "learning_rate": 1.900732084088925e-05, "loss": 0.9852, "mean_token_accuracy": 0.7678309619426728, "num_tokens": 815050.0, "step": 740 }, { "epoch": 0.15111827523675198, "grad_norm": 11.3125, "learning_rate": 1.8993888105312648e-05, "loss": 0.9951, "mean_token_accuracy": 0.7627758264541626, "num_tokens": 826248.0, "step": 750 }, { "epoch": 0.153133185573242, "grad_norm": 17.25, "learning_rate": 1.8980455369736047e-05, "loss": 1.0433, "mean_token_accuracy": 0.7571396887302398, "num_tokens": 835706.0, "step": 760 }, { "epoch": 0.15514809590973203, "grad_norm": 10.9375, "learning_rate": 1.896702263415945e-05, "loss": 1.0518, "mean_token_accuracy": 0.7517435431480408, "num_tokens": 847261.0, "step": 770 }, { "epoch": 0.15716300624622204, "grad_norm": 10.4375, "learning_rate": 1.8953589898582848e-05, "loss": 0.9629, "mean_token_accuracy": 0.7732720315456391, "num_tokens": 858655.0, "step": 780 }, { "epoch": 0.15917791658271208, "grad_norm": 12.5, "learning_rate": 1.8940157163006247e-05, "loss": 1.0231, "mean_token_accuracy": 0.7555422127246857, "num_tokens": 870002.0, "step": 790 }, { "epoch": 0.1611928269192021, "grad_norm": 11.0, "learning_rate": 1.892672442742965e-05, "loss": 1.1283, "mean_token_accuracy": 0.7441882312297821, "num_tokens": 881131.0, "step": 800 }, { "epoch": 0.16320773725569213, "grad_norm": 12.4375, "learning_rate": 1.8913291691853048e-05, "loss": 1.0252, "mean_token_accuracy": 0.7630669414997101, "num_tokens": 893437.0, "step": 810 }, { "epoch": 0.16522264759218214, "grad_norm": 11.0, "learning_rate": 1.8899858956276447e-05, "loss": 1.0528, "mean_token_accuracy": 0.7483877301216125, "num_tokens": 904976.0, "step": 820 }, { "epoch": 0.16723755792867218, "grad_norm": 12.375, "learning_rate": 1.8886426220699846e-05, "loss": 0.8715, "mean_token_accuracy": 0.7899761021137237, "num_tokens": 915631.0, "step": 830 }, { "epoch": 0.1692524682651622, "grad_norm": 13.375, "learning_rate": 1.8872993485123248e-05, "loss": 1.0548, "mean_token_accuracy": 0.7494987368583679, "num_tokens": 927141.0, "step": 840 }, { "epoch": 0.17126737860165223, "grad_norm": 11.0, "learning_rate": 1.8859560749546647e-05, "loss": 0.9579, "mean_token_accuracy": 0.7668360054492951, "num_tokens": 938792.0, "step": 850 }, { "epoch": 0.17328228893814226, "grad_norm": 13.125, "learning_rate": 1.8846128013970046e-05, "loss": 0.8595, "mean_token_accuracy": 0.7870603501796722, "num_tokens": 949894.0, "step": 860 }, { "epoch": 0.17529719927463228, "grad_norm": 12.625, "learning_rate": 1.8832695278393448e-05, "loss": 0.9216, "mean_token_accuracy": 0.7846542239189148, "num_tokens": 961003.0, "step": 870 }, { "epoch": 0.17731210961112231, "grad_norm": 12.125, "learning_rate": 1.8819262542816847e-05, "loss": 1.0052, "mean_token_accuracy": 0.7603223979473114, "num_tokens": 971577.0, "step": 880 }, { "epoch": 0.17932701994761233, "grad_norm": 12.4375, "learning_rate": 1.8805829807240245e-05, "loss": 0.9299, "mean_token_accuracy": 0.7757908642292023, "num_tokens": 982234.0, "step": 890 }, { "epoch": 0.18134193028410237, "grad_norm": 11.0, "learning_rate": 1.8792397071663648e-05, "loss": 1.0312, "mean_token_accuracy": 0.7591780245304107, "num_tokens": 992997.0, "step": 900 }, { "epoch": 0.18335684062059238, "grad_norm": 10.5625, "learning_rate": 1.8778964336087047e-05, "loss": 0.8999, "mean_token_accuracy": 0.779550439119339, "num_tokens": 1004102.0, "step": 910 }, { "epoch": 0.18537175095708242, "grad_norm": 12.625, "learning_rate": 1.8765531600510445e-05, "loss": 0.8892, "mean_token_accuracy": 0.7890210688114166, "num_tokens": 1015447.0, "step": 920 }, { "epoch": 0.18738666129357243, "grad_norm": 12.125, "learning_rate": 1.8752098864933844e-05, "loss": 1.0344, "mean_token_accuracy": 0.7584980130195618, "num_tokens": 1026939.0, "step": 930 }, { "epoch": 0.18940157163006247, "grad_norm": 10.9375, "learning_rate": 1.8738666129357246e-05, "loss": 0.9686, "mean_token_accuracy": 0.7649740993976593, "num_tokens": 1037937.0, "step": 940 }, { "epoch": 0.19141648196655248, "grad_norm": 8.75, "learning_rate": 1.8725233393780645e-05, "loss": 1.0364, "mean_token_accuracy": 0.7554452955722809, "num_tokens": 1049173.0, "step": 950 }, { "epoch": 0.19343139230304252, "grad_norm": 13.625, "learning_rate": 1.8711800658204044e-05, "loss": 1.0173, "mean_token_accuracy": 0.7559767007827759, "num_tokens": 1060166.0, "step": 960 }, { "epoch": 0.19544630263953255, "grad_norm": 11.1875, "learning_rate": 1.8698367922627446e-05, "loss": 0.9464, "mean_token_accuracy": 0.7735530078411103, "num_tokens": 1070458.0, "step": 970 }, { "epoch": 0.19746121297602257, "grad_norm": 11.1875, "learning_rate": 1.8684935187050845e-05, "loss": 0.9397, "mean_token_accuracy": 0.7724673867225647, "num_tokens": 1081477.0, "step": 980 }, { "epoch": 0.1994761233125126, "grad_norm": 15.1875, "learning_rate": 1.8671502451474244e-05, "loss": 1.0769, "mean_token_accuracy": 0.7459556341171265, "num_tokens": 1094205.0, "step": 990 }, { "epoch": 0.20149103364900262, "grad_norm": 16.5, "learning_rate": 1.8658069715897643e-05, "loss": 0.9763, "mean_token_accuracy": 0.7707934081554413, "num_tokens": 1104929.0, "step": 1000 } ], "logging_steps": 10, "max_steps": 14889, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1337180456005632.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }