klora_2000_skill / 34 /trainer_state.json
RayDu0010's picture
Upload folder using huggingface_hub
5e18460 verified
raw
history blame
23.8 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 666,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015015015015015015,
"grad_norm": 1.168221116065979,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.4071,
"step": 5
},
{
"epoch": 0.03003003003003003,
"grad_norm": 0.9412075281143188,
"learning_rate": 3.2142857142857143e-06,
"loss": 1.4071,
"step": 10
},
{
"epoch": 0.04504504504504504,
"grad_norm": 0.5820316672325134,
"learning_rate": 4.9999999999999996e-06,
"loss": 1.4308,
"step": 15
},
{
"epoch": 0.06006006006006006,
"grad_norm": 0.7234917283058167,
"learning_rate": 6.785714285714286e-06,
"loss": 1.403,
"step": 20
},
{
"epoch": 0.07507507507507508,
"grad_norm": 0.7134122252464294,
"learning_rate": 8.571428571428571e-06,
"loss": 1.3703,
"step": 25
},
{
"epoch": 0.09009009009009009,
"grad_norm": 0.5349926352500916,
"learning_rate": 1.0357142857142857e-05,
"loss": 1.3204,
"step": 30
},
{
"epoch": 0.10510510510510511,
"grad_norm": 0.5786008238792419,
"learning_rate": 1.2142857142857144e-05,
"loss": 1.2867,
"step": 35
},
{
"epoch": 0.12012012012012012,
"grad_norm": 0.506987452507019,
"learning_rate": 1.3928571428571429e-05,
"loss": 1.3269,
"step": 40
},
{
"epoch": 0.13513513513513514,
"grad_norm": 0.5051132440567017,
"learning_rate": 1.5714285714285715e-05,
"loss": 1.3177,
"step": 45
},
{
"epoch": 0.15015015015015015,
"grad_norm": 0.4060744643211365,
"learning_rate": 1.7500000000000002e-05,
"loss": 1.1999,
"step": 50
},
{
"epoch": 0.16516516516516516,
"grad_norm": 0.5537149906158447,
"learning_rate": 1.928571428571429e-05,
"loss": 1.304,
"step": 55
},
{
"epoch": 0.18018018018018017,
"grad_norm": 0.5138696432113647,
"learning_rate": 2.107142857142857e-05,
"loss": 1.2876,
"step": 60
},
{
"epoch": 0.19519519519519518,
"grad_norm": 0.501071035861969,
"learning_rate": 2.2857142857142858e-05,
"loss": 1.2592,
"step": 65
},
{
"epoch": 0.21021021021021022,
"grad_norm": 0.505533754825592,
"learning_rate": 2.464285714285714e-05,
"loss": 1.2247,
"step": 70
},
{
"epoch": 0.22522522522522523,
"grad_norm": 0.5126124620437622,
"learning_rate": 2.6428571428571428e-05,
"loss": 1.2843,
"step": 75
},
{
"epoch": 0.24024024024024024,
"grad_norm": 0.5336039066314697,
"learning_rate": 2.8214285714285714e-05,
"loss": 1.2725,
"step": 80
},
{
"epoch": 0.2552552552552553,
"grad_norm": 0.5905837416648865,
"learning_rate": 3e-05,
"loss": 1.2371,
"step": 85
},
{
"epoch": 0.2702702702702703,
"grad_norm": 1.3594383001327515,
"learning_rate": 2.9999259655754585e-05,
"loss": 1.1873,
"step": 90
},
{
"epoch": 0.2852852852852853,
"grad_norm": 0.5035964846611023,
"learning_rate": 2.9997038696099626e-05,
"loss": 1.1599,
"step": 95
},
{
"epoch": 0.3003003003003003,
"grad_norm": 0.5989760756492615,
"learning_rate": 2.9993337340271743e-05,
"loss": 1.2299,
"step": 100
},
{
"epoch": 0.3153153153153153,
"grad_norm": 0.6236782073974609,
"learning_rate": 2.9988155953641272e-05,
"loss": 1.125,
"step": 105
},
{
"epoch": 0.3303303303303303,
"grad_norm": 0.635779082775116,
"learning_rate": 2.998149504767618e-05,
"loss": 1.1218,
"step": 110
},
{
"epoch": 0.34534534534534533,
"grad_norm": 0.6790189146995544,
"learning_rate": 2.9973355279891595e-05,
"loss": 1.1742,
"step": 115
},
{
"epoch": 0.36036036036036034,
"grad_norm": 0.5752054452896118,
"learning_rate": 2.996373745378487e-05,
"loss": 1.1375,
"step": 120
},
{
"epoch": 0.37537537537537535,
"grad_norm": 0.6132190227508545,
"learning_rate": 2.995264251875631e-05,
"loss": 1.1516,
"step": 125
},
{
"epoch": 0.39039039039039036,
"grad_norm": 0.6133520603179932,
"learning_rate": 2.9940071570015415e-05,
"loss": 1.0994,
"step": 130
},
{
"epoch": 0.40540540540540543,
"grad_norm": 0.634513795375824,
"learning_rate": 2.9926025848472798e-05,
"loss": 1.101,
"step": 135
},
{
"epoch": 0.42042042042042044,
"grad_norm": 0.6447832584381104,
"learning_rate": 2.991050674061767e-05,
"loss": 1.059,
"step": 140
},
{
"epoch": 0.43543543543543545,
"grad_norm": 0.671330451965332,
"learning_rate": 2.9893515778380997e-05,
"loss": 1.1333,
"step": 145
},
{
"epoch": 0.45045045045045046,
"grad_norm": 0.7105020880699158,
"learning_rate": 2.9875054638984253e-05,
"loss": 1.0852,
"step": 150
},
{
"epoch": 0.46546546546546547,
"grad_norm": 0.6580174565315247,
"learning_rate": 2.9855125144773885e-05,
"loss": 1.0793,
"step": 155
},
{
"epoch": 0.4804804804804805,
"grad_norm": 0.6566060185432434,
"learning_rate": 2.9833729263041407e-05,
"loss": 1.0913,
"step": 160
},
{
"epoch": 0.4954954954954955,
"grad_norm": 0.7263543605804443,
"learning_rate": 2.9810869105829202e-05,
"loss": 1.0181,
"step": 165
},
{
"epoch": 0.5105105105105106,
"grad_norm": 0.7267442345619202,
"learning_rate": 2.9786546929722055e-05,
"loss": 1.032,
"step": 170
},
{
"epoch": 0.5255255255255256,
"grad_norm": 0.8429233431816101,
"learning_rate": 2.9760765135624387e-05,
"loss": 0.9719,
"step": 175
},
{
"epoch": 0.5405405405405406,
"grad_norm": 0.7601214051246643,
"learning_rate": 2.9733526268523238e-05,
"loss": 1.0216,
"step": 180
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.7685151100158691,
"learning_rate": 2.9704833017237077e-05,
"loss": 0.9199,
"step": 185
},
{
"epoch": 0.5705705705705706,
"grad_norm": 0.9268845915794373,
"learning_rate": 2.967468821415038e-05,
"loss": 1.0086,
"step": 190
},
{
"epoch": 0.5855855855855856,
"grad_norm": 0.9145250916481018,
"learning_rate": 2.9643094834933997e-05,
"loss": 0.946,
"step": 195
},
{
"epoch": 0.6006006006006006,
"grad_norm": 0.9080163240432739,
"learning_rate": 2.9610055998251473e-05,
"loss": 0.9132,
"step": 200
},
{
"epoch": 0.6156156156156156,
"grad_norm": 0.9134447574615479,
"learning_rate": 2.9575574965451156e-05,
"loss": 0.8778,
"step": 205
},
{
"epoch": 0.6306306306306306,
"grad_norm": 0.8887650370597839,
"learning_rate": 2.9539655140244263e-05,
"loss": 0.9295,
"step": 210
},
{
"epoch": 0.6456456456456456,
"grad_norm": 1.0790126323699951,
"learning_rate": 2.9502300068368922e-05,
"loss": 0.8752,
"step": 215
},
{
"epoch": 0.6606606606606606,
"grad_norm": 0.9577323794364929,
"learning_rate": 2.946351343724013e-05,
"loss": 0.8857,
"step": 220
},
{
"epoch": 0.6756756756756757,
"grad_norm": 0.9615657329559326,
"learning_rate": 2.9423299075585775e-05,
"loss": 0.9026,
"step": 225
},
{
"epoch": 0.6906906906906907,
"grad_norm": 0.8258851766586304,
"learning_rate": 2.9381660953068686e-05,
"loss": 0.9374,
"step": 230
},
{
"epoch": 0.7057057057057057,
"grad_norm": 0.8674429655075073,
"learning_rate": 2.9338603179894784e-05,
"loss": 0.9496,
"step": 235
},
{
"epoch": 0.7207207207207207,
"grad_norm": 0.9960718750953674,
"learning_rate": 2.929413000640735e-05,
"loss": 0.8618,
"step": 240
},
{
"epoch": 0.7357357357357357,
"grad_norm": 0.9150928258895874,
"learning_rate": 2.9248245822667457e-05,
"loss": 0.9003,
"step": 245
},
{
"epoch": 0.7507507507507507,
"grad_norm": 0.9323129057884216,
"learning_rate": 2.920095515802062e-05,
"loss": 0.8615,
"step": 250
},
{
"epoch": 0.7657657657657657,
"grad_norm": 1.002323031425476,
"learning_rate": 2.9152262680649704e-05,
"loss": 0.876,
"step": 255
},
{
"epoch": 0.7807807807807807,
"grad_norm": 1.0666371583938599,
"learning_rate": 2.9102173197114094e-05,
"loss": 0.8508,
"step": 260
},
{
"epoch": 0.7957957957957958,
"grad_norm": 0.9196386933326721,
"learning_rate": 2.9050691651875243e-05,
"loss": 0.8039,
"step": 265
},
{
"epoch": 0.8108108108108109,
"grad_norm": 0.9974318742752075,
"learning_rate": 2.8997823126808583e-05,
"loss": 0.832,
"step": 270
},
{
"epoch": 0.8258258258258259,
"grad_norm": 0.93741375207901,
"learning_rate": 2.894357284070189e-05,
"loss": 0.8235,
"step": 275
},
{
"epoch": 0.8408408408408409,
"grad_norm": 1.0261346101760864,
"learning_rate": 2.888794614874011e-05,
"loss": 0.803,
"step": 280
},
{
"epoch": 0.8558558558558559,
"grad_norm": 0.9743596911430359,
"learning_rate": 2.883094854197676e-05,
"loss": 0.7929,
"step": 285
},
{
"epoch": 0.8708708708708709,
"grad_norm": 1.0855077505111694,
"learning_rate": 2.877258564679185e-05,
"loss": 0.7591,
"step": 290
},
{
"epoch": 0.8858858858858859,
"grad_norm": 0.9973369836807251,
"learning_rate": 2.8712863224336533e-05,
"loss": 0.7941,
"step": 295
},
{
"epoch": 0.9009009009009009,
"grad_norm": 1.0347039699554443,
"learning_rate": 2.8651787169964374e-05,
"loss": 0.7256,
"step": 300
},
{
"epoch": 0.9159159159159159,
"grad_norm": 1.189362645149231,
"learning_rate": 2.8589363512649432e-05,
"loss": 0.7328,
"step": 305
},
{
"epoch": 0.9309309309309309,
"grad_norm": 1.0265212059020996,
"learning_rate": 2.8525598414391104e-05,
"loss": 0.7552,
"step": 310
},
{
"epoch": 0.9459459459459459,
"grad_norm": 1.0244401693344116,
"learning_rate": 2.846049816960585e-05,
"loss": 0.7563,
"step": 315
},
{
"epoch": 0.960960960960961,
"grad_norm": 1.2380430698394775,
"learning_rate": 2.83940692045059e-05,
"loss": 0.779,
"step": 320
},
{
"epoch": 0.975975975975976,
"grad_norm": 1.1021960973739624,
"learning_rate": 2.8326318076464852e-05,
"loss": 0.7572,
"step": 325
},
{
"epoch": 0.990990990990991,
"grad_norm": 1.007034182548523,
"learning_rate": 2.8257251473370408e-05,
"loss": 0.7074,
"step": 330
},
{
"epoch": 1.006006006006006,
"grad_norm": 1.062544345855713,
"learning_rate": 2.8186876212964185e-05,
"loss": 0.6919,
"step": 335
},
{
"epoch": 1.021021021021021,
"grad_norm": 1.149069905281067,
"learning_rate": 2.811519924216873e-05,
"loss": 0.6393,
"step": 340
},
{
"epoch": 1.0360360360360361,
"grad_norm": 1.0859824419021606,
"learning_rate": 2.8042227636401757e-05,
"loss": 0.6682,
"step": 345
},
{
"epoch": 1.0510510510510511,
"grad_norm": 1.0685064792633057,
"learning_rate": 2.796796859887772e-05,
"loss": 0.6079,
"step": 350
},
{
"epoch": 1.0660660660660661,
"grad_norm": 1.0562701225280762,
"learning_rate": 2.7892429459896766e-05,
"loss": 0.6154,
"step": 355
},
{
"epoch": 1.0810810810810811,
"grad_norm": 1.1275193691253662,
"learning_rate": 2.7815617676121138e-05,
"loss": 0.6308,
"step": 360
},
{
"epoch": 1.0960960960960962,
"grad_norm": 1.0415915250778198,
"learning_rate": 2.773754082983912e-05,
"loss": 0.6441,
"step": 365
},
{
"epoch": 1.1111111111111112,
"grad_norm": 1.1084564924240112,
"learning_rate": 2.7658206628216556e-05,
"loss": 0.5814,
"step": 370
},
{
"epoch": 1.1261261261261262,
"grad_norm": 1.0700006484985352,
"learning_rate": 2.7577622902536064e-05,
"loss": 0.5895,
"step": 375
},
{
"epoch": 1.1411411411411412,
"grad_norm": 1.1039676666259766,
"learning_rate": 2.7495797607423986e-05,
"loss": 0.6004,
"step": 380
},
{
"epoch": 1.1561561561561562,
"grad_norm": 1.235087513923645,
"learning_rate": 2.7412738820065173e-05,
"loss": 0.6174,
"step": 385
},
{
"epoch": 1.1711711711711712,
"grad_norm": 1.1676405668258667,
"learning_rate": 2.732845473940566e-05,
"loss": 0.5685,
"step": 390
},
{
"epoch": 1.1861861861861862,
"grad_norm": 1.2204903364181519,
"learning_rate": 2.7242953685343327e-05,
"loss": 0.6005,
"step": 395
},
{
"epoch": 1.2012012012012012,
"grad_norm": 1.1344414949417114,
"learning_rate": 2.7156244097906614e-05,
"loss": 0.6182,
"step": 400
},
{
"epoch": 1.2162162162162162,
"grad_norm": 1.2518144845962524,
"learning_rate": 2.7068334536421408e-05,
"loss": 0.5759,
"step": 405
},
{
"epoch": 1.2312312312312312,
"grad_norm": 1.248169183731079,
"learning_rate": 2.6979233678666102e-05,
"loss": 0.5642,
"step": 410
},
{
"epoch": 1.2462462462462462,
"grad_norm": 1.1738685369491577,
"learning_rate": 2.6888950320014993e-05,
"loss": 0.5953,
"step": 415
},
{
"epoch": 1.2612612612612613,
"grad_norm": 1.3666422367095947,
"learning_rate": 2.6797493372570098e-05,
"loss": 0.5547,
"step": 420
},
{
"epoch": 1.2762762762762763,
"grad_norm": 1.2075399160385132,
"learning_rate": 2.6704871864281377e-05,
"loss": 0.5817,
"step": 425
},
{
"epoch": 1.2912912912912913,
"grad_norm": 1.1854655742645264,
"learning_rate": 2.6611094938055586e-05,
"loss": 0.5291,
"step": 430
},
{
"epoch": 1.3063063063063063,
"grad_norm": 1.2468748092651367,
"learning_rate": 2.651617185085375e-05,
"loss": 0.5862,
"step": 435
},
{
"epoch": 1.3213213213213213,
"grad_norm": 1.2312390804290771,
"learning_rate": 2.642011197277738e-05,
"loss": 0.5579,
"step": 440
},
{
"epoch": 1.3363363363363363,
"grad_norm": 1.2079895734786987,
"learning_rate": 2.6322924786143544e-05,
"loss": 0.5381,
"step": 445
},
{
"epoch": 1.3513513513513513,
"grad_norm": 1.2430604696273804,
"learning_rate": 2.6224619884548814e-05,
"loss": 0.5199,
"step": 450
},
{
"epoch": 1.3663663663663663,
"grad_norm": 1.3518890142440796,
"learning_rate": 2.612520697192229e-05,
"loss": 0.5885,
"step": 455
},
{
"epoch": 1.3813813813813813,
"grad_norm": 1.2342760562896729,
"learning_rate": 2.6024695861567675e-05,
"loss": 0.5597,
"step": 460
},
{
"epoch": 1.3963963963963963,
"grad_norm": 1.205000638961792,
"learning_rate": 2.592309647519458e-05,
"loss": 0.5427,
"step": 465
},
{
"epoch": 1.4114114114114114,
"grad_norm": 1.304811716079712,
"learning_rate": 2.5820418841939152e-05,
"loss": 0.5439,
"step": 470
},
{
"epoch": 1.4264264264264264,
"grad_norm": 1.3263685703277588,
"learning_rate": 2.5716673097374047e-05,
"loss": 0.5013,
"step": 475
},
{
"epoch": 1.4414414414414414,
"grad_norm": 1.227352261543274,
"learning_rate": 2.5611869482507924e-05,
"loss": 0.5695,
"step": 480
},
{
"epoch": 1.4564564564564564,
"grad_norm": 1.2734203338623047,
"learning_rate": 2.550601834277454e-05,
"loss": 0.5228,
"step": 485
},
{
"epoch": 1.4714714714714714,
"grad_norm": 1.147456169128418,
"learning_rate": 2.539913012701152e-05,
"loss": 0.5174,
"step": 490
},
{
"epoch": 1.4864864864864864,
"grad_norm": 1.2142225503921509,
"learning_rate": 2.529121538642892e-05,
"loss": 0.4837,
"step": 495
},
{
"epoch": 1.5015015015015014,
"grad_norm": 1.177823543548584,
"learning_rate": 2.51822847735677e-05,
"loss": 0.5203,
"step": 500
},
{
"epoch": 1.5165165165165164,
"grad_norm": 1.3306628465652466,
"learning_rate": 2.5072349041248175e-05,
"loss": 0.5125,
"step": 505
},
{
"epoch": 1.5315315315315314,
"grad_norm": 1.3492076396942139,
"learning_rate": 2.496141904150859e-05,
"loss": 0.5235,
"step": 510
},
{
"epoch": 1.5465465465465464,
"grad_norm": 1.0597307682037354,
"learning_rate": 2.484950572453386e-05,
"loss": 0.5086,
"step": 515
},
{
"epoch": 1.5615615615615615,
"grad_norm": 1.1566152572631836,
"learning_rate": 2.4736620137574686e-05,
"loss": 0.4862,
"step": 520
},
{
"epoch": 1.5765765765765765,
"grad_norm": 1.1374027729034424,
"learning_rate": 2.4622773423857032e-05,
"loss": 0.5468,
"step": 525
},
{
"epoch": 1.5915915915915915,
"grad_norm": 1.0723894834518433,
"learning_rate": 2.4507976821482138e-05,
"loss": 0.523,
"step": 530
},
{
"epoch": 1.6066066066066065,
"grad_norm": 1.4630274772644043,
"learning_rate": 2.4392241662317205e-05,
"loss": 0.4643,
"step": 535
},
{
"epoch": 1.6216216216216215,
"grad_norm": 1.3217425346374512,
"learning_rate": 2.4275579370876772e-05,
"loss": 0.489,
"step": 540
},
{
"epoch": 1.6366366366366365,
"grad_norm": 1.301561951637268,
"learning_rate": 2.4158001463194998e-05,
"loss": 0.4548,
"step": 545
},
{
"epoch": 1.6516516516516515,
"grad_norm": 1.132656455039978,
"learning_rate": 2.4039519545688848e-05,
"loss": 0.4805,
"step": 550
},
{
"epoch": 1.6666666666666665,
"grad_norm": 1.1595160961151123,
"learning_rate": 2.392014531401244e-05,
"loss": 0.5021,
"step": 555
},
{
"epoch": 1.6816816816816815,
"grad_norm": 1.075697660446167,
"learning_rate": 2.37998905519025e-05,
"loss": 0.4383,
"step": 560
},
{
"epoch": 1.6966966966966965,
"grad_norm": 1.2260234355926514,
"learning_rate": 2.3678767130015174e-05,
"loss": 0.4359,
"step": 565
},
{
"epoch": 1.7117117117117115,
"grad_norm": 1.1868109703063965,
"learning_rate": 2.3556787004754253e-05,
"loss": 0.4411,
"step": 570
},
{
"epoch": 1.7267267267267268,
"grad_norm": 1.1722155809402466,
"learning_rate": 2.3433962217090904e-05,
"loss": 0.4586,
"step": 575
},
{
"epoch": 1.7417417417417418,
"grad_norm": 1.1019175052642822,
"learning_rate": 2.3310304891375092e-05,
"loss": 0.4072,
"step": 580
},
{
"epoch": 1.7567567567567568,
"grad_norm": 1.2503912448883057,
"learning_rate": 2.3185827234138756e-05,
"loss": 0.4602,
"step": 585
},
{
"epoch": 1.7717717717717718,
"grad_norm": 1.1760993003845215,
"learning_rate": 2.306054153289085e-05,
"loss": 0.4525,
"step": 590
},
{
"epoch": 1.7867867867867868,
"grad_norm": 1.2905851602554321,
"learning_rate": 2.2934460154904436e-05,
"loss": 0.4767,
"step": 595
},
{
"epoch": 1.8018018018018018,
"grad_norm": 1.2083145380020142,
"learning_rate": 2.280759554599587e-05,
"loss": 0.4617,
"step": 600
},
{
"epoch": 1.8168168168168168,
"grad_norm": 1.2210261821746826,
"learning_rate": 2.2679960229296244e-05,
"loss": 0.4468,
"step": 605
},
{
"epoch": 1.8318318318318318,
"grad_norm": 1.238731026649475,
"learning_rate": 2.255156680401518e-05,
"loss": 0.4182,
"step": 610
},
{
"epoch": 1.8468468468468469,
"grad_norm": 1.33894944190979,
"learning_rate": 2.242242794419715e-05,
"loss": 0.4642,
"step": 615
},
{
"epoch": 1.8618618618618619,
"grad_norm": 1.2281477451324463,
"learning_rate": 2.2292556397470394e-05,
"loss": 0.445,
"step": 620
},
{
"epoch": 1.8768768768768769,
"grad_norm": 1.1752314567565918,
"learning_rate": 2.2161964983788535e-05,
"loss": 0.3754,
"step": 625
},
{
"epoch": 1.8918918918918919,
"grad_norm": 1.0684999227523804,
"learning_rate": 2.2030666594165135e-05,
"loss": 0.4132,
"step": 630
},
{
"epoch": 1.906906906906907,
"grad_norm": 1.2152754068374634,
"learning_rate": 2.1898674189401148e-05,
"loss": 0.3813,
"step": 635
},
{
"epoch": 1.921921921921922,
"grad_norm": 1.2044199705123901,
"learning_rate": 2.1766000798805542e-05,
"loss": 0.4362,
"step": 640
},
{
"epoch": 1.936936936936937,
"grad_norm": 1.1659032106399536,
"learning_rate": 2.1632659518909156e-05,
"loss": 0.3985,
"step": 645
},
{
"epoch": 1.951951951951952,
"grad_norm": 1.2562652826309204,
"learning_rate": 2.1498663512171885e-05,
"loss": 0.3964,
"step": 650
},
{
"epoch": 1.966966966966967,
"grad_norm": 1.1590440273284912,
"learning_rate": 2.13640260056834e-05,
"loss": 0.3917,
"step": 655
},
{
"epoch": 1.981981981981982,
"grad_norm": 1.2917598485946655,
"learning_rate": 2.1228760289857456e-05,
"loss": 0.3642,
"step": 660
},
{
"epoch": 1.996996996996997,
"grad_norm": 1.40131413936615,
"learning_rate": 2.1092879717119955e-05,
"loss": 0.4154,
"step": 665
}
],
"logging_steps": 5,
"max_steps": 1665,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.581609998216069e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}