gemma2b_it_toolcalling_finetuned / trainer_state.json
Abdulvajid's picture
Upload folder using huggingface_hub
a06b81b verified
Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity, "... is not valid JSON
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.49797696856520385,
"eval_steps": 10,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012449424214130097,
"grad_norm": Infinity,
"learning_rate": 0.0,
"loss": 2.7527,
"mean_token_accuracy": 0.6098426431417465,
"num_tokens": 4577.0,
"step": 1
},
{
"epoch": 0.012449424214130096,
"grad_norm": 16.535795211791992,
"learning_rate": 9.876543209876543e-06,
"loss": 2.4622,
"mean_token_accuracy": 0.6381922413905462,
"num_tokens": 40313.0,
"step": 10
},
{
"epoch": 0.012449424214130096,
"eval_loss": 2.009472608566284,
"eval_mean_token_accuracy": 0.652686527967453,
"eval_num_tokens": 40313.0,
"eval_runtime": 22.1397,
"eval_samples_per_second": 2.258,
"eval_steps_per_second": 2.258,
"step": 10
},
{
"epoch": 0.024898848428260192,
"grad_norm": 6.280624866485596,
"learning_rate": 2.2222222222222223e-05,
"loss": 1.6438,
"mean_token_accuracy": 0.6907053753733635,
"num_tokens": 82255.0,
"step": 20
},
{
"epoch": 0.024898848428260192,
"eval_loss": 1.292786717414856,
"eval_mean_token_accuracy": 0.7414309787750244,
"eval_num_tokens": 82255.0,
"eval_runtime": 22.3447,
"eval_samples_per_second": 2.238,
"eval_steps_per_second": 2.238,
"step": 20
},
{
"epoch": 0.03734827264239029,
"grad_norm": 3.875969648361206,
"learning_rate": 3.45679012345679e-05,
"loss": 1.0252,
"mean_token_accuracy": 0.7984871238470077,
"num_tokens": 120722.0,
"step": 30
},
{
"epoch": 0.03734827264239029,
"eval_loss": 0.8175942897796631,
"eval_mean_token_accuracy": 0.8347543370723725,
"eval_num_tokens": 120722.0,
"eval_runtime": 22.4004,
"eval_samples_per_second": 2.232,
"eval_steps_per_second": 2.232,
"step": 30
},
{
"epoch": 0.049797696856520385,
"grad_norm": 2.5878844261169434,
"learning_rate": 4.691358024691358e-05,
"loss": 0.7542,
"mean_token_accuracy": 0.8444898426532745,
"num_tokens": 161994.0,
"step": 40
},
{
"epoch": 0.049797696856520385,
"eval_loss": 0.6629099249839783,
"eval_mean_token_accuracy": 0.8559932851791382,
"eval_num_tokens": 161994.0,
"eval_runtime": 22.2776,
"eval_samples_per_second": 2.244,
"eval_steps_per_second": 2.244,
"step": 40
},
{
"epoch": 0.06224712107065048,
"grad_norm": 1.784697413444519,
"learning_rate": 5.925925925925926e-05,
"loss": 0.6373,
"mean_token_accuracy": 0.8567869439721107,
"num_tokens": 202956.0,
"step": 50
},
{
"epoch": 0.06224712107065048,
"eval_loss": 0.5834522843360901,
"eval_mean_token_accuracy": 0.8706429171562194,
"eval_num_tokens": 202956.0,
"eval_runtime": 22.3133,
"eval_samples_per_second": 2.241,
"eval_steps_per_second": 2.241,
"step": 50
},
{
"epoch": 0.07469654528478058,
"grad_norm": 2.509762763977051,
"learning_rate": 7.160493827160494e-05,
"loss": 0.5859,
"mean_token_accuracy": 0.8670632347464562,
"num_tokens": 245518.0,
"step": 60
},
{
"epoch": 0.07469654528478058,
"eval_loss": 0.5327284932136536,
"eval_mean_token_accuracy": 0.8751986050605773,
"eval_num_tokens": 245518.0,
"eval_runtime": 22.29,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 2.243,
"step": 60
},
{
"epoch": 0.08714596949891068,
"grad_norm": 1.8014663457870483,
"learning_rate": 8.395061728395062e-05,
"loss": 0.5217,
"mean_token_accuracy": 0.8772685110569001,
"num_tokens": 286841.0,
"step": 70
},
{
"epoch": 0.08714596949891068,
"eval_loss": 0.4707345962524414,
"eval_mean_token_accuracy": 0.8786909699440002,
"eval_num_tokens": 286841.0,
"eval_runtime": 22.2481,
"eval_samples_per_second": 2.247,
"eval_steps_per_second": 2.247,
"step": 70
},
{
"epoch": 0.09959539371304077,
"grad_norm": 1.7159992456436157,
"learning_rate": 9.62962962962963e-05,
"loss": 0.4529,
"mean_token_accuracy": 0.8761002823710442,
"num_tokens": 330576.0,
"step": 80
},
{
"epoch": 0.09959539371304077,
"eval_loss": 0.38634419441223145,
"eval_mean_token_accuracy": 0.8834530138969421,
"eval_num_tokens": 330576.0,
"eval_runtime": 22.3312,
"eval_samples_per_second": 2.239,
"eval_steps_per_second": 2.239,
"step": 80
},
{
"epoch": 0.11204481792717087,
"grad_norm": 2.0236318111419678,
"learning_rate": 9.997687265620273e-05,
"loss": 0.3827,
"mean_token_accuracy": 0.8846412658691406,
"num_tokens": 371225.0,
"step": 90
},
{
"epoch": 0.11204481792717087,
"eval_loss": 0.37090864777565,
"eval_mean_token_accuracy": 0.8859660923480988,
"eval_num_tokens": 371225.0,
"eval_runtime": 22.3399,
"eval_samples_per_second": 2.238,
"eval_steps_per_second": 2.238,
"step": 90
},
{
"epoch": 0.12449424214130096,
"grad_norm": 2.2110610008239746,
"learning_rate": 9.98636473719804e-05,
"loss": 0.3966,
"mean_token_accuracy": 0.8800512284040451,
"num_tokens": 415356.0,
"step": 100
},
{
"epoch": 0.12449424214130096,
"eval_loss": 0.3614565134048462,
"eval_mean_token_accuracy": 0.8883592307567596,
"eval_num_tokens": 415356.0,
"eval_runtime": 22.2481,
"eval_samples_per_second": 2.247,
"eval_steps_per_second": 2.247,
"step": 100
},
{
"epoch": 0.13694366635543107,
"grad_norm": 2.259178876876831,
"learning_rate": 9.965628974662144e-05,
"loss": 0.3822,
"mean_token_accuracy": 0.8851513683795929,
"num_tokens": 453785.0,
"step": 110
},
{
"epoch": 0.13694366635543107,
"eval_loss": 0.35502663254737854,
"eval_mean_token_accuracy": 0.8901030969619751,
"eval_num_tokens": 453785.0,
"eval_runtime": 22.0185,
"eval_samples_per_second": 2.271,
"eval_steps_per_second": 2.271,
"step": 110
},
{
"epoch": 0.14939309056956115,
"grad_norm": 1.4706709384918213,
"learning_rate": 9.935519122880152e-05,
"loss": 0.3597,
"mean_token_accuracy": 0.8902897864580155,
"num_tokens": 493793.0,
"step": 120
},
{
"epoch": 0.14939309056956115,
"eval_loss": 0.34856370091438293,
"eval_mean_token_accuracy": 0.8903208661079407,
"eval_num_tokens": 493793.0,
"eval_runtime": 22.339,
"eval_samples_per_second": 2.238,
"eval_steps_per_second": 2.238,
"step": 120
},
{
"epoch": 0.16184251478369124,
"grad_norm": 1.2550042867660522,
"learning_rate": 9.896092023077865e-05,
"loss": 0.3342,
"mean_token_accuracy": 0.8952602833509445,
"num_tokens": 533894.0,
"step": 130
},
{
"epoch": 0.16184251478369124,
"eval_loss": 0.34542137384414673,
"eval_mean_token_accuracy": 0.891883670091629,
"eval_num_tokens": 533894.0,
"eval_runtime": 22.258,
"eval_samples_per_second": 2.246,
"eval_steps_per_second": 2.246,
"step": 130
},
{
"epoch": 0.17429193899782136,
"grad_norm": 1.4467822313308716,
"learning_rate": 9.847422105534739e-05,
"loss": 0.364,
"mean_token_accuracy": 0.8903828382492065,
"num_tokens": 574730.0,
"step": 140
},
{
"epoch": 0.17429193899782136,
"eval_loss": 0.3417295515537262,
"eval_mean_token_accuracy": 0.8922731137275696,
"eval_num_tokens": 574730.0,
"eval_runtime": 22.3005,
"eval_samples_per_second": 2.242,
"eval_steps_per_second": 2.242,
"step": 140
},
{
"epoch": 0.18674136321195145,
"grad_norm": 1.324472188949585,
"learning_rate": 9.78960124907478e-05,
"loss": 0.3447,
"mean_token_accuracy": 0.893255865573883,
"num_tokens": 616097.0,
"step": 150
},
{
"epoch": 0.18674136321195145,
"eval_loss": 0.3354889154434204,
"eval_mean_token_accuracy": 0.8933182156085968,
"eval_num_tokens": 616097.0,
"eval_runtime": 22.248,
"eval_samples_per_second": 2.247,
"eval_steps_per_second": 2.247,
"step": 150
},
{
"epoch": 0.19919078742608154,
"grad_norm": 1.350631833076477,
"learning_rate": 9.722738607618171e-05,
"loss": 0.3799,
"mean_token_accuracy": 0.8845137342810631,
"num_tokens": 658786.0,
"step": 160
},
{
"epoch": 0.19919078742608154,
"eval_loss": 0.3358314633369446,
"eval_mean_token_accuracy": 0.8949730741977692,
"eval_num_tokens": 658786.0,
"eval_runtime": 22.2937,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 2.243,
"step": 160
},
{
"epoch": 0.21164021164021163,
"grad_norm": 1.375606656074524,
"learning_rate": 9.646960404121042e-05,
"loss": 0.3545,
"mean_token_accuracy": 0.894567859172821,
"num_tokens": 699144.0,
"step": 170
},
{
"epoch": 0.21164021164021163,
"eval_loss": 0.33176079392433167,
"eval_mean_token_accuracy": 0.8978442597389221,
"eval_num_tokens": 699144.0,
"eval_runtime": 22.321,
"eval_samples_per_second": 2.24,
"eval_steps_per_second": 2.24,
"step": 170
},
{
"epoch": 0.22408963585434175,
"grad_norm": 1.2101880311965942,
"learning_rate": 9.562409692292424e-05,
"loss": 0.3595,
"mean_token_accuracy": 0.8943482771515846,
"num_tokens": 739037.0,
"step": 180
},
{
"epoch": 0.22408963585434175,
"eval_loss": 0.3300569951534271,
"eval_mean_token_accuracy": 0.8991416382789612,
"eval_num_tokens": 739037.0,
"eval_runtime": 22.3075,
"eval_samples_per_second": 2.241,
"eval_steps_per_second": 2.241,
"step": 180
},
{
"epoch": 0.23653906006847183,
"grad_norm": 1.403489589691162,
"learning_rate": 9.469246086538175e-05,
"loss": 0.3525,
"mean_token_accuracy": 0.892733770608902,
"num_tokens": 780872.0,
"step": 190
},
{
"epoch": 0.23653906006847183,
"eval_loss": 0.32495585083961487,
"eval_mean_token_accuracy": 0.9010837972164154,
"eval_num_tokens": 780872.0,
"eval_runtime": 22.2334,
"eval_samples_per_second": 2.249,
"eval_steps_per_second": 2.249,
"step": 190
},
{
"epoch": 0.24898848428260192,
"grad_norm": 2.186126470565796,
"learning_rate": 9.367645460641716e-05,
"loss": 0.333,
"mean_token_accuracy": 0.8986451297998428,
"num_tokens": 821731.0,
"step": 200
},
{
"epoch": 0.24898848428260192,
"eval_loss": 0.32020682096481323,
"eval_mean_token_accuracy": 0.901833120584488,
"eval_num_tokens": 821731.0,
"eval_runtime": 22.2962,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 2.243,
"step": 200
},
{
"epoch": 0.26143790849673204,
"grad_norm": 1.3674637079238892,
"learning_rate": 9.257799615750385e-05,
"loss": 0.3311,
"mean_token_accuracy": 0.8988270297646522,
"num_tokens": 863318.0,
"step": 210
},
{
"epoch": 0.26143790849673204,
"eval_loss": 0.312335729598999,
"eval_mean_token_accuracy": 0.9045632266998291,
"eval_num_tokens": 863318.0,
"eval_runtime": 21.8783,
"eval_samples_per_second": 2.285,
"eval_steps_per_second": 2.285,
"step": 210
},
{
"epoch": 0.27388733271086213,
"grad_norm": 1.2784392833709717,
"learning_rate": 9.139915918294213e-05,
"loss": 0.3258,
"mean_token_accuracy": 0.9025416001677513,
"num_tokens": 906386.0,
"step": 220
},
{
"epoch": 0.27388733271086213,
"eval_loss": 0.30771327018737793,
"eval_mean_token_accuracy": 0.9066664147377014,
"eval_num_tokens": 906386.0,
"eval_runtime": 22.2842,
"eval_samples_per_second": 2.244,
"eval_steps_per_second": 2.244,
"step": 220
},
{
"epoch": 0.2863367569249922,
"grad_norm": 1.1845546960830688,
"learning_rate": 9.014216908520618e-05,
"loss": 0.3005,
"mean_token_accuracy": 0.9102896004915237,
"num_tokens": 946683.0,
"step": 230
},
{
"epoch": 0.2863367569249922,
"eval_loss": 0.306180477142334,
"eval_mean_token_accuracy": 0.9058228933811188,
"eval_num_tokens": 946683.0,
"eval_runtime": 22.2708,
"eval_samples_per_second": 2.245,
"eval_steps_per_second": 2.245,
"step": 230
},
{
"epoch": 0.2987861811391223,
"grad_norm": 1.3700170516967773,
"learning_rate": 8.88093988038406e-05,
"loss": 0.3457,
"mean_token_accuracy": 0.8991452261805535,
"num_tokens": 988276.0,
"step": 240
},
{
"epoch": 0.2987861811391223,
"eval_loss": 0.30229029059410095,
"eval_mean_token_accuracy": 0.9061234760284423,
"eval_num_tokens": 988276.0,
"eval_runtime": 22.3208,
"eval_samples_per_second": 2.24,
"eval_steps_per_second": 2.24,
"step": 240
},
{
"epoch": 0.3112356053532524,
"grad_norm": 1.7757388353347778,
"learning_rate": 8.740336433583704e-05,
"loss": 0.2971,
"mean_token_accuracy": 0.9136147618293762,
"num_tokens": 1027142.0,
"step": 250
},
{
"epoch": 0.3112356053532524,
"eval_loss": 0.3006957471370697,
"eval_mean_token_accuracy": 0.9066220009326935,
"eval_num_tokens": 1027142.0,
"eval_runtime": 22.3249,
"eval_samples_per_second": 2.24,
"eval_steps_per_second": 2.24,
"step": 250
},
{
"epoch": 0.3236850295673825,
"grad_norm": 1.1962240934371948,
"learning_rate": 8.592671998594794e-05,
"loss": 0.3075,
"mean_token_accuracy": 0.9059017911553383,
"num_tokens": 1066879.0,
"step": 260
},
{
"epoch": 0.3236850295673825,
"eval_loss": 0.2966723144054413,
"eval_mean_token_accuracy": 0.908552759885788,
"eval_num_tokens": 1066879.0,
"eval_runtime": 22.2952,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 2.243,
"step": 260
},
{
"epoch": 0.33613445378151263,
"grad_norm": 1.075523018836975,
"learning_rate": 8.438225335590333e-05,
"loss": 0.3203,
"mean_token_accuracy": 0.9030415132641793,
"num_tokens": 1107335.0,
"step": 270
},
{
"epoch": 0.33613445378151263,
"eval_loss": 0.2949363589286804,
"eval_mean_token_accuracy": 0.9083844482898712,
"eval_num_tokens": 1107335.0,
"eval_runtime": 22.2589,
"eval_samples_per_second": 2.246,
"eval_steps_per_second": 2.246,
"step": 270
},
{
"epoch": 0.3485838779956427,
"grad_norm": 1.1951340436935425,
"learning_rate": 8.27728800819905e-05,
"loss": 0.3404,
"mean_token_accuracy": 0.8968317583203316,
"num_tokens": 1148632.0,
"step": 280
},
{
"epoch": 0.3485838779956427,
"eval_loss": 0.2915548086166382,
"eval_mean_token_accuracy": 0.9077938544750214,
"eval_num_tokens": 1148632.0,
"eval_runtime": 22.2818,
"eval_samples_per_second": 2.244,
"eval_steps_per_second": 2.244,
"step": 280
},
{
"epoch": 0.3610333022097728,
"grad_norm": 0.9940551519393921,
"learning_rate": 8.11016383309305e-05,
"loss": 0.2957,
"mean_token_accuracy": 0.9087760657072067,
"num_tokens": 1188823.0,
"step": 290
},
{
"epoch": 0.3610333022097728,
"eval_loss": 0.2891790270805359,
"eval_mean_token_accuracy": 0.9090994548797607,
"eval_num_tokens": 1188823.0,
"eval_runtime": 22.27,
"eval_samples_per_second": 2.245,
"eval_steps_per_second": 2.245,
"step": 290
},
{
"epoch": 0.3734827264239029,
"grad_norm": 1.0126793384552002,
"learning_rate": 7.937168306444242e-05,
"loss": 0.3636,
"mean_token_accuracy": 0.89151521474123,
"num_tokens": 1230550.0,
"step": 300
},
{
"epoch": 0.3734827264239029,
"eval_loss": 0.2865450978279114,
"eval_mean_token_accuracy": 0.9095333778858185,
"eval_num_tokens": 1230550.0,
"eval_runtime": 22.2643,
"eval_samples_per_second": 2.246,
"eval_steps_per_second": 2.246,
"step": 300
},
{
"epoch": 0.385932150638033,
"grad_norm": 1.1640918254852295,
"learning_rate": 7.758628008332261e-05,
"loss": 0.298,
"mean_token_accuracy": 0.9120915725827217,
"num_tokens": 1269322.0,
"step": 310
},
{
"epoch": 0.385932150638033,
"eval_loss": 0.28298419713974,
"eval_mean_token_accuracy": 0.9108501935005188,
"eval_num_tokens": 1269322.0,
"eval_runtime": 22.2371,
"eval_samples_per_second": 2.248,
"eval_steps_per_second": 2.248,
"step": 310
},
{
"epoch": 0.3983815748521631,
"grad_norm": 1.2163608074188232,
"learning_rate": 7.574879986228245e-05,
"loss": 0.304,
"mean_token_accuracy": 0.9081651225686074,
"num_tokens": 1311242.0,
"step": 320
},
{
"epoch": 0.3983815748521631,
"eval_loss": 0.2835235297679901,
"eval_mean_token_accuracy": 0.9103049302101135,
"eval_num_tokens": 1311242.0,
"eval_runtime": 22.3204,
"eval_samples_per_second": 2.24,
"eval_steps_per_second": 2.24,
"step": 320
},
{
"epoch": 0.41083099906629317,
"grad_norm": 2.2140297889709473,
"learning_rate": 7.38627111871833e-05,
"loss": 0.3334,
"mean_token_accuracy": 0.9038107171654701,
"num_tokens": 1351414.0,
"step": 330
},
{
"epoch": 0.41083099906629317,
"eval_loss": 0.2817688286304474,
"eval_mean_token_accuracy": 0.9108499121665955,
"eval_num_tokens": 1351414.0,
"eval_runtime": 22.3432,
"eval_samples_per_second": 2.238,
"eval_steps_per_second": 2.238,
"step": 330
},
{
"epoch": 0.42328042328042326,
"grad_norm": 1.1960259675979614,
"learning_rate": 7.193157460668005e-05,
"loss": 0.2933,
"mean_token_accuracy": 0.9120548516511917,
"num_tokens": 1392404.0,
"step": 340
},
{
"epoch": 0.42328042328042326,
"eval_loss": 0.2820216417312622,
"eval_mean_token_accuracy": 0.9112770104408264,
"eval_num_tokens": 1392404.0,
"eval_runtime": 22.2719,
"eval_samples_per_second": 2.245,
"eval_steps_per_second": 2.245,
"step": 340
},
{
"epoch": 0.4357298474945534,
"grad_norm": 0.950002908706665,
"learning_rate": 6.99590357106354e-05,
"loss": 0.3248,
"mean_token_accuracy": 0.9039641574025155,
"num_tokens": 1433105.0,
"step": 350
},
{
"epoch": 0.4357298474945534,
"eval_loss": 0.2805749773979187,
"eval_mean_token_accuracy": 0.9103002834320069,
"eval_num_tokens": 1433105.0,
"eval_runtime": 22.3037,
"eval_samples_per_second": 2.242,
"eval_steps_per_second": 2.242,
"step": 350
},
{
"epoch": 0.4481792717086835,
"grad_norm": 0.9667631983757019,
"learning_rate": 6.79488182479938e-05,
"loss": 0.2963,
"mean_token_accuracy": 0.9092229396104813,
"num_tokens": 1474722.0,
"step": 360
},
{
"epoch": 0.4481792717086835,
"eval_loss": 0.2767864167690277,
"eval_mean_token_accuracy": 0.911848417520523,
"eval_num_tokens": 1474722.0,
"eval_runtime": 22.3405,
"eval_samples_per_second": 2.238,
"eval_steps_per_second": 2.238,
"step": 360
},
{
"epoch": 0.4606286959228136,
"grad_norm": 1.0764143466949463,
"learning_rate": 6.590471709710703e-05,
"loss": 0.3335,
"mean_token_accuracy": 0.8984953165054321,
"num_tokens": 1516420.0,
"step": 370
},
{
"epoch": 0.4606286959228136,
"eval_loss": 0.2774994671344757,
"eval_mean_token_accuracy": 0.911427743434906,
"eval_num_tokens": 1516420.0,
"eval_runtime": 22.3317,
"eval_samples_per_second": 2.239,
"eval_steps_per_second": 2.239,
"step": 370
},
{
"epoch": 0.47307812013694367,
"grad_norm": 1.0027621984481812,
"learning_rate": 6.383059110178204e-05,
"loss": 0.3434,
"mean_token_accuracy": 0.9006192669272423,
"num_tokens": 1557572.0,
"step": 380
},
{
"epoch": 0.47307812013694367,
"eval_loss": 0.2765714228153229,
"eval_mean_token_accuracy": 0.9118164765834809,
"eval_num_tokens": 1557572.0,
"eval_runtime": 22.3501,
"eval_samples_per_second": 2.237,
"eval_steps_per_second": 2.237,
"step": 380
},
{
"epoch": 0.48552754435107376,
"grad_norm": 1.2028086185455322,
"learning_rate": 6.173035578657512e-05,
"loss": 0.3116,
"mean_token_accuracy": 0.9067506313323974,
"num_tokens": 1600657.0,
"step": 390
},
{
"epoch": 0.48552754435107376,
"eval_loss": 0.2748585045337677,
"eval_mean_token_accuracy": 0.9116013216972351,
"eval_num_tokens": 1600657.0,
"eval_runtime": 22.3017,
"eval_samples_per_second": 2.242,
"eval_steps_per_second": 2.242,
"step": 390
},
{
"epoch": 0.49797696856520385,
"grad_norm": 0.9993801116943359,
"learning_rate": 5.9607975965084526e-05,
"loss": 0.2804,
"mean_token_accuracy": 0.9129077598452568,
"num_tokens": 1643316.0,
"step": 400
},
{
"epoch": 0.49797696856520385,
"eval_loss": 0.27263733744621277,
"eval_mean_token_accuracy": 0.9128299736976624,
"eval_num_tokens": 1643316.0,
"eval_runtime": 22.3273,
"eval_samples_per_second": 2.239,
"eval_steps_per_second": 2.239,
"step": 400
}
],
"logging_steps": 10,
"max_steps": 804,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.064033082156032e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}