environment_test_affine-7B / trainer_state.json
Gege24's picture
Upload task output 1
70729c9 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.012,
"eval_steps": 500,
"global_step": 75,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.033851010352373125,
"clip_ratio/high_mean": 0.011871843505650759,
"clip_ratio/low_mean": 0.024242424033582212,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.03611426735296845,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.6,
"completions/max_terminated_length": 374.6,
"completions/mean_length": 297.675,
"completions/mean_terminated_length": 297.675,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.33769991919398307,
"epoch": 0.0008,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.030647173523902893,
"kl": 0.022074293252080678,
"learning_rate": 8.529119999999999e-07,
"loss": -0.0006066907197237014,
"num_tokens": 136458.0,
"reward": 0.9300000309944153,
"reward_std": 0.23334523439407348,
"rewards/env_goofspiel_reward/mean": 0.9300000309944153,
"rewards/env_goofspiel_reward/std": 0.3451612591743469,
"sampling/importance_sampling_ratio/max": 1.5456702947616576,
"sampling/importance_sampling_ratio/mean": 0.32863556742668154,
"sampling/importance_sampling_ratio/min": 0.00010910680049249776,
"sampling/sampling_logp_difference/max": 7.469822406768799,
"sampling/sampling_logp_difference/mean": 0.680775272846222,
"step": 5,
"step_time": 4.723534681799992
},
{
"clip_ratio/high_max": 0.052361111342906955,
"clip_ratio/high_mean": 0.014340277761220932,
"clip_ratio/low_mean": 0.015763888787478208,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.030104166455566884,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.2,
"completions/max_terminated_length": 374.2,
"completions/mean_length": 291.9125,
"completions/mean_terminated_length": 291.9125,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.3392209455370903,
"epoch": 0.0016,
"frac_reward_zero_std": 0.4375,
"grad_norm": 0.02777782641351223,
"kl": 0.03302585552446544,
"learning_rate": 1.919052e-06,
"loss": -0.0005224664695560932,
"num_tokens": 270583.0,
"reward": 0.8775000214576721,
"reward_std": 0.26516505479812624,
"rewards/env_goofspiel_reward/mean": 0.8775000214576721,
"rewards/env_goofspiel_reward/std": 0.3663728296756744,
"sampling/importance_sampling_ratio/max": 1.54481360912323,
"sampling/importance_sampling_ratio/mean": 0.3381913095712662,
"sampling/importance_sampling_ratio/min": 2.4473399389535188e-05,
"sampling/sampling_logp_difference/max": 8.968151187896728,
"sampling/sampling_logp_difference/mean": 0.7433344721794128,
"step": 10,
"step_time": 4.138045286000079
},
{
"clip_ratio/high_max": 0.05089646503329277,
"clip_ratio/high_mean": 0.018314393889158963,
"clip_ratio/low_mean": 0.026897096075117588,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.04521148977801204,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.4,
"completions/max_terminated_length": 374.4,
"completions/mean_length": 284.84375,
"completions/mean_terminated_length": 284.84375,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.34051789045333863,
"epoch": 0.0024,
"frac_reward_zero_std": 0.5875,
"grad_norm": 0.013990325853228569,
"kl": 0.040778578049503265,
"learning_rate": 2.985192e-06,
"loss": -0.0005226288456469774,
"num_tokens": 403304.0,
"reward": 0.9674375176429748,
"reward_std": 0.19100722074508666,
"rewards/env_goofspiel_reward/mean": 0.9674375176429748,
"rewards/env_goofspiel_reward/std": 0.3267829120159149,
"sampling/importance_sampling_ratio/max": 1.7882837533950806,
"sampling/importance_sampling_ratio/mean": 0.3831939160823822,
"sampling/importance_sampling_ratio/min": 0.00034590021532494576,
"sampling/sampling_logp_difference/max": 6.792400264739991,
"sampling/sampling_logp_difference/mean": 0.6026189684867859,
"step": 15,
"step_time": 4.541797615800033
},
{
"clip_ratio/high_max": 0.08027777820825577,
"clip_ratio/high_mean": 0.02395833358168602,
"clip_ratio/low_mean": 0.019027777854353188,
"clip_ratio/low_min": 0.00625,
"clip_ratio/region_mean": 0.04298611143603921,
"completions/clipped_ratio": 0.0,
"completions/max_length": 373.6,
"completions/max_terminated_length": 373.6,
"completions/mean_length": 281.60625,
"completions/mean_terminated_length": 281.60625,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.28771451860666275,
"epoch": 0.0032,
"frac_reward_zero_std": 0.5375,
"grad_norm": 0.029177065938711166,
"kl": 0.0942368695512414,
"learning_rate": 4.051332e-06,
"loss": -0.00038087132852524517,
"num_tokens": 536119.0,
"reward": 0.9637500643730164,
"reward_std": 0.2068287342786789,
"rewards/env_goofspiel_reward/mean": 0.9637500643730164,
"rewards/env_goofspiel_reward/std": 0.3382142722606659,
"sampling/importance_sampling_ratio/max": 1.751455307006836,
"sampling/importance_sampling_ratio/mean": 0.45835237503051757,
"sampling/importance_sampling_ratio/min": 0.00027077984723291595,
"sampling/sampling_logp_difference/max": 8.25740842819214,
"sampling/sampling_logp_difference/mean": 0.5326088547706604,
"step": 20,
"step_time": 4.071906338800045
},
{
"clip_ratio/high_max": 0.05111111141741276,
"clip_ratio/high_mean": 0.013914141431450843,
"clip_ratio/low_mean": 0.02760506859049201,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.04151920983567834,
"completions/clipped_ratio": 0.0,
"completions/max_length": 378.6,
"completions/max_terminated_length": 378.6,
"completions/mean_length": 301.01875,
"completions/mean_terminated_length": 301.01875,
"completions/min_length": 218.8,
"completions/min_terminated_length": 218.8,
"entropy": 0.2572413206100464,
"epoch": 0.004,
"frac_reward_zero_std": 0.5375,
"grad_norm": 0.04749641567468643,
"kl": 0.23950345497578382,
"learning_rate": 5.117472e-06,
"loss": -0.00030293280724436045,
"num_tokens": 674875.0,
"reward": 0.9487500309944152,
"reward_std": 0.20682873725891113,
"rewards/env_goofspiel_reward/mean": 0.9487500309944152,
"rewards/env_goofspiel_reward/std": 0.34452574253082274,
"sampling/importance_sampling_ratio/max": 2.11174156665802,
"sampling/importance_sampling_ratio/mean": 0.43660367727279664,
"sampling/importance_sampling_ratio/min": 0.00013589896843768656,
"sampling/sampling_logp_difference/max": 8.112329578399658,
"sampling/sampling_logp_difference/mean": 0.5582964062690735,
"step": 25,
"step_time": 4.199152118200027
},
{
"clip_ratio/high_max": 0.032361111417412755,
"clip_ratio/high_mean": 0.009340277779847384,
"clip_ratio/low_mean": 0.018115530349314214,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.027455807756632568,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.2,
"completions/max_terminated_length": 374.2,
"completions/mean_length": 283.40625,
"completions/mean_terminated_length": 283.40625,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.273418403416872,
"epoch": 0.0048,
"frac_reward_zero_std": 0.5875,
"grad_norm": 0.005710980389267206,
"kl": 1.6874765895307065,
"learning_rate": 6.183612e-06,
"loss": -0.0006473449524492025,
"num_tokens": 806865.0,
"reward": 0.9825000166893005,
"reward_std": 0.19091882407665253,
"rewards/env_goofspiel_reward/mean": 0.9825000166893005,
"rewards/env_goofspiel_reward/std": 0.3281997382640839,
"sampling/importance_sampling_ratio/max": 2.13806095123291,
"sampling/importance_sampling_ratio/mean": 0.48566290736198425,
"sampling/importance_sampling_ratio/min": 1.583069079060806e-05,
"sampling/sampling_logp_difference/max": 9.758009147644042,
"sampling/sampling_logp_difference/mean": 0.5621577501296997,
"step": 30,
"step_time": 4.3287319488001685
},
{
"clip_ratio/high_max": 0.02222222238779068,
"clip_ratio/high_mean": 0.00555555559694767,
"clip_ratio/low_mean": 0.01631944449618459,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.021875000093132257,
"completions/clipped_ratio": 0.0,
"completions/max_length": 366.4,
"completions/max_terminated_length": 366.4,
"completions/mean_length": 290.65625,
"completions/mean_terminated_length": 290.65625,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.2778049871325493,
"epoch": 0.0056,
"frac_reward_zero_std": 0.55,
"grad_norm": 0.025574276223778725,
"kl": 0.22191586308181285,
"learning_rate": 7.249752e-06,
"loss": -0.0005257311277091503,
"num_tokens": 941915.0,
"reward": 0.99000004529953,
"reward_std": 0.20152543485164642,
"rewards/env_goofspiel_reward/mean": 0.99000004529953,
"rewards/env_goofspiel_reward/std": 0.32238503098487853,
"sampling/importance_sampling_ratio/max": 1.8528586864471435,
"sampling/importance_sampling_ratio/mean": 0.5378320515155792,
"sampling/importance_sampling_ratio/min": 0.0016222307924181223,
"sampling/sampling_logp_difference/max": 6.151937532424927,
"sampling/sampling_logp_difference/mean": 0.41074748039245607,
"step": 35,
"step_time": 4.195248219399855
},
{
"clip_ratio/high_max": 0.03625000007450581,
"clip_ratio/high_mean": 0.009062500018626452,
"clip_ratio/low_mean": 0.010277777817100287,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.019340277835726737,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.4,
"completions/max_terminated_length": 374.4,
"completions/mean_length": 290.64375,
"completions/mean_terminated_length": 290.64375,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.3076802439987659,
"epoch": 0.0064,
"frac_reward_zero_std": 0.4875,
"grad_norm": 0.03911300376057625,
"kl": 0.30147731937468053,
"learning_rate": 7.4629793691100655e-06,
"loss": -0.0009569000452756881,
"num_tokens": 1076583.0,
"reward": 0.9787500500679016,
"reward_std": 0.23864853978157044,
"rewards/env_goofspiel_reward/mean": 0.9787500500679016,
"rewards/env_goofspiel_reward/std": 0.3355243980884552,
"sampling/importance_sampling_ratio/max": 1.7326099634170533,
"sampling/importance_sampling_ratio/mean": 0.5706644296646118,
"sampling/importance_sampling_ratio/min": 0.008987322356551886,
"sampling/sampling_logp_difference/max": 4.789818382263183,
"sampling/sampling_logp_difference/mean": 0.32595881819725037,
"step": 40,
"step_time": 4.120446558600088
},
{
"clip_ratio/high_max": 0.02430555559694767,
"clip_ratio/high_mean": 0.006076388899236918,
"clip_ratio/low_mean": 0.018705808185040952,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.02478219708427787,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.6,
"completions/max_terminated_length": 374.6,
"completions/mean_length": 298.1,
"completions/mean_terminated_length": 298.1,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.30067678913474083,
"epoch": 0.0072,
"frac_reward_zero_std": 0.5625,
"grad_norm": 0.019556289538741112,
"kl": 0.4184106796979904,
"learning_rate": 7.462976806120193e-06,
"loss": -0.00040965699590742586,
"num_tokens": 1213169.0,
"reward": 0.9524999856948853,
"reward_std": 0.22273863554000856,
"rewards/env_goofspiel_reward/mean": 0.9524999856948853,
"rewards/env_goofspiel_reward/std": 0.36751508712768555,
"sampling/importance_sampling_ratio/max": 1.7973033905029296,
"sampling/importance_sampling_ratio/mean": 0.6051283955574036,
"sampling/importance_sampling_ratio/min": 0.0006137289259640965,
"sampling/sampling_logp_difference/max": 5.231348609924316,
"sampling/sampling_logp_difference/mean": 0.3217563569545746,
"step": 45,
"step_time": 4.313124376999985
},
{
"clip_ratio/high_max": 0.02430555559694767,
"clip_ratio/high_mean": 0.006076388899236918,
"clip_ratio/low_mean": 0.018645833339542152,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.024722222238779068,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.0,
"completions/max_terminated_length": 374.0,
"completions/mean_length": 299.70625,
"completions/mean_terminated_length": 299.70625,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.26526937559247016,
"epoch": 0.008,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.09333564341068268,
"kl": 0.6234624680131674,
"learning_rate": 7.4629722716015665e-06,
"loss": -0.0008450452238321305,
"num_tokens": 1351131.0,
"reward": 1.0012500405311584,
"reward_std": 0.19622212946414946,
"rewards/env_goofspiel_reward/mean": 1.0012500405311584,
"rewards/env_goofspiel_reward/std": 0.33813255429267886,
"sampling/importance_sampling_ratio/max": 1.8347083568572997,
"sampling/importance_sampling_ratio/mean": 0.6785839080810547,
"sampling/importance_sampling_ratio/min": 0.002685157069936395,
"sampling/sampling_logp_difference/max": 5.393667411804199,
"sampling/sampling_logp_difference/mean": 0.3046145349740982,
"step": 50,
"step_time": 4.5290676355998585
},
{
"clip_ratio/high_max": 0.03125,
"clip_ratio/high_mean": 0.0078125,
"clip_ratio/low_mean": 0.014444444514811038,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.022256944421678783,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.6,
"completions/max_terminated_length": 374.6,
"completions/mean_length": 283.7625,
"completions/mean_terminated_length": 283.7625,
"completions/min_length": 207.0,
"completions/min_terminated_length": 207.0,
"entropy": 0.25032062605023386,
"epoch": 0.0088,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.0669218897819519,
"kl": 0.47833866626024246,
"learning_rate": 7.4629657655573805e-06,
"loss": -0.0006249105092138052,
"num_tokens": 1483536.0,
"reward": 0.9561875462532043,
"reward_std": 0.2599501311779022,
"rewards/env_goofspiel_reward/mean": 0.9561875462532043,
"rewards/env_goofspiel_reward/std": 0.3719723880290985,
"sampling/importance_sampling_ratio/max": 1.9648807287216186,
"sampling/importance_sampling_ratio/mean": 0.7620458364486694,
"sampling/importance_sampling_ratio/min": 0.0028414088767021893,
"sampling/sampling_logp_difference/max": 4.346728658676147,
"sampling/sampling_logp_difference/mean": 0.2154034972190857,
"step": 55,
"step_time": 4.209427154400236
},
{
"clip_ratio/high_max": 0.005000000074505806,
"clip_ratio/high_mean": 0.0012500000186264515,
"clip_ratio/low_mean": 0.015659722313284875,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.016909722238779068,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.8,
"completions/max_terminated_length": 374.8,
"completions/mean_length": 294.2,
"completions/mean_terminated_length": 294.2,
"completions/min_length": 218.8,
"completions/min_terminated_length": 218.8,
"entropy": 0.25066495686769485,
"epoch": 0.0096,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.03984799236059189,
"kl": 0.5187893055379391,
"learning_rate": 7.462957287992218e-06,
"loss": -0.001143309846520424,
"num_tokens": 1618874.0,
"reward": 0.993750023841858,
"reward_std": 0.19622212946414946,
"rewards/env_goofspiel_reward/mean": 0.993750023841858,
"rewards/env_goofspiel_reward/std": 0.35103108882904055,
"sampling/importance_sampling_ratio/max": 1.7096198081970215,
"sampling/importance_sampling_ratio/mean": 0.6896162152290344,
"sampling/importance_sampling_ratio/min": 0.0015241437591612338,
"sampling/sampling_logp_difference/max": 4.673358488082886,
"sampling/sampling_logp_difference/mean": 0.26624326705932616,
"step": 60,
"step_time": 4.191420737200042
},
{
"clip_ratio/high_max": 0.025,
"clip_ratio/high_mean": 0.00625,
"clip_ratio/low_mean": 0.0078125,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0140625,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.0,
"completions/max_terminated_length": 374.0,
"completions/mean_length": 273.53125,
"completions/mean_terminated_length": 273.53125,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.2336573876440525,
"epoch": 0.0104,
"frac_reward_zero_std": 0.7125,
"grad_norm": 0.009051427245140076,
"kl": 0.6452277667820454,
"learning_rate": 7.462946838912051e-06,
"loss": -0.0009178260341286659,
"num_tokens": 1748056.0,
"reward": 1.0800000190734864,
"reward_std": 0.1484924226999283,
"rewards/env_goofspiel_reward/mean": 1.0800000190734864,
"rewards/env_goofspiel_reward/std": 0.28696190714836123,
"sampling/importance_sampling_ratio/max": 1.254857563972473,
"sampling/importance_sampling_ratio/mean": 0.7130017876625061,
"sampling/importance_sampling_ratio/min": 0.003967047110199929,
"sampling/sampling_logp_difference/max": 3.8618431091308594,
"sampling/sampling_logp_difference/mean": 0.20099806785583496,
"step": 65,
"step_time": 4.494912574199771
},
{
"clip_ratio/high_max": 0.00555555559694767,
"clip_ratio/high_mean": 0.0013888888992369176,
"clip_ratio/low_mean": 0.010104166716337204,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.011493055615574121,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.6,
"completions/max_terminated_length": 374.6,
"completions/mean_length": 281.1125,
"completions/mean_terminated_length": 281.1125,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.19834314305335282,
"epoch": 0.0112,
"frac_reward_zero_std": 0.7125,
"grad_norm": 0.018667038530111313,
"kl": 0.4677882671356201,
"learning_rate": 7.462934418324241e-06,
"loss": -0.0008302273228764534,
"num_tokens": 1879948.0,
"reward": 1.0912500381469727,
"reward_std": 0.14318912029266356,
"rewards/env_goofspiel_reward/mean": 1.0912500381469727,
"rewards/env_goofspiel_reward/std": 0.2670675128698349,
"sampling/importance_sampling_ratio/max": 1.8250314712524414,
"sampling/importance_sampling_ratio/mean": 0.8290145397186279,
"sampling/importance_sampling_ratio/min": 0.05519633814692497,
"sampling/sampling_logp_difference/max": 2.754664158821106,
"sampling/sampling_logp_difference/mean": 0.12412183284759522,
"step": 70,
"step_time": 4.16713200400036
},
{
"clip_ratio/high_max": 0.00625,
"clip_ratio/high_mean": 0.0015625,
"clip_ratio/low_mean": 0.01158775258809328,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.01315025258809328,
"completions/clipped_ratio": 0.0,
"completions/max_length": 374.4,
"completions/max_terminated_length": 374.4,
"completions/mean_length": 292.4125,
"completions/mean_terminated_length": 292.4125,
"completions/min_length": 212.0,
"completions/min_terminated_length": 212.0,
"entropy": 0.20496653467416764,
"epoch": 0.012,
"frac_reward_zero_std": 0.6625,
"grad_norm": 0.03426237776875496,
"kl": 0.4435719080269337,
"learning_rate": 7.4629200262375374e-06,
"loss": -0.000939619354903698,
"num_tokens": 2015567.0,
"reward": 1.0687500715255738,
"reward_std": 0.1644023284316063,
"rewards/env_goofspiel_reward/mean": 1.0687500715255738,
"rewards/env_goofspiel_reward/std": 0.29003112614154813,
"sampling/importance_sampling_ratio/max": 1.699583315849304,
"sampling/importance_sampling_ratio/mean": 0.7789812088012695,
"sampling/importance_sampling_ratio/min": 0.0047567693516612055,
"sampling/sampling_logp_difference/max": 4.406649398803711,
"sampling/sampling_logp_difference/mean": 0.18859796077013016,
"step": 75,
"step_time": 4.104378796799847
},
{
"epoch": 0.012,
"eval_clip_ratio/high_max": 0.0,
"eval_clip_ratio/high_mean": 0.0,
"eval_clip_ratio/low_mean": 0.0,
"eval_clip_ratio/low_min": 0.0,
"eval_clip_ratio/region_mean": 0.0,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 373.0,
"eval_completions/max_terminated_length": 373.0,
"eval_completions/mean_length": 286.4583333333333,
"eval_completions/mean_terminated_length": 286.4583333333333,
"eval_completions/min_length": 235.33333333333334,
"eval_completions/min_terminated_length": 235.33333333333334,
"eval_entropy": 0.1776730790734291,
"eval_frac_reward_zero_std": 0.9166666666666666,
"eval_kl": 0.3452555288871129,
"eval_loss": 6.334867521218257e-06,
"eval_num_tokens": 2015567.0,
"eval_reward": 1.1750000317891438,
"eval_reward_std": 0.035355339447657265,
"eval_rewards/env_goofspiel_reward/mean": 1.1750000317891438,
"eval_rewards/env_goofspiel_reward/std": 0.07071068386236827,
"eval_runtime": 2.2216,
"eval_samples_per_second": 4.501,
"eval_sampling/importance_sampling_ratio/max": 1.2363848288853962,
"eval_sampling/importance_sampling_ratio/mean": 0.8729836543401083,
"eval_sampling/importance_sampling_ratio/min": 0.3416567128151655,
"eval_sampling/sampling_logp_difference/max": 1.6411640246709187,
"eval_sampling/sampling_logp_difference/mean": 0.12927521020174026,
"eval_steps_per_second": 0.9,
"step": 75
}
],
"logging_steps": 5,
"max_steps": 18750,
"num_input_tokens_seen": 2015567,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}