route_background_semantic / trainer_state.json
Thibaut's picture
End of training
3ed150d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.120313143798929,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04120313143798929,
"grad_norm": 2.6929988861083984,
"learning_rate": 5.9401e-05,
"loss": 1.2381,
"step": 100
},
{
"epoch": 0.08240626287597858,
"grad_norm": 0.9781515002250671,
"learning_rate": 5.8802000000000004e-05,
"loss": 0.5408,
"step": 200
},
{
"epoch": 0.12360939431396786,
"grad_norm": 1.4702354669570923,
"learning_rate": 5.8203e-05,
"loss": 0.3679,
"step": 300
},
{
"epoch": 0.16481252575195715,
"grad_norm": 0.458916038274765,
"learning_rate": 5.7604e-05,
"loss": 0.4243,
"step": 400
},
{
"epoch": 0.20601565718994644,
"grad_norm": 2.584094524383545,
"learning_rate": 5.7005e-05,
"loss": 0.341,
"step": 500
},
{
"epoch": 0.24721878862793573,
"grad_norm": 4.845738410949707,
"learning_rate": 5.6406e-05,
"loss": 0.3247,
"step": 600
},
{
"epoch": 0.288421920065925,
"grad_norm": 0.6360086798667908,
"learning_rate": 5.5806999999999996e-05,
"loss": 0.3736,
"step": 700
},
{
"epoch": 0.3296250515039143,
"grad_norm": 0.5916720628738403,
"learning_rate": 5.5208000000000004e-05,
"loss": 0.3775,
"step": 800
},
{
"epoch": 0.37082818294190356,
"grad_norm": 1.5302131175994873,
"learning_rate": 5.4609000000000005e-05,
"loss": 0.3549,
"step": 900
},
{
"epoch": 0.4120313143798929,
"grad_norm": 1.928609848022461,
"learning_rate": 5.401e-05,
"loss": 0.311,
"step": 1000
},
{
"epoch": 0.45323444581788214,
"grad_norm": 4.012447834014893,
"learning_rate": 5.3411e-05,
"loss": 0.3266,
"step": 1100
},
{
"epoch": 0.49443757725587145,
"grad_norm": 1.2283966541290283,
"learning_rate": 5.2812e-05,
"loss": 0.3581,
"step": 1200
},
{
"epoch": 0.5356407086938607,
"grad_norm": 2.1378538608551025,
"learning_rate": 5.2213e-05,
"loss": 0.3654,
"step": 1300
},
{
"epoch": 0.57684384013185,
"grad_norm": 1.014821171760559,
"learning_rate": 5.1614000000000004e-05,
"loss": 0.3019,
"step": 1400
},
{
"epoch": 0.6180469715698393,
"grad_norm": 0.6981366872787476,
"learning_rate": 5.1015e-05,
"loss": 0.297,
"step": 1500
},
{
"epoch": 0.6592501030078286,
"grad_norm": 0.8797981142997742,
"learning_rate": 5.0416e-05,
"loss": 0.3543,
"step": 1600
},
{
"epoch": 0.7004532344458179,
"grad_norm": 0.34348738193511963,
"learning_rate": 4.9817e-05,
"loss": 0.3102,
"step": 1700
},
{
"epoch": 0.7416563658838071,
"grad_norm": 0.9645235538482666,
"learning_rate": 4.9218e-05,
"loss": 0.2859,
"step": 1800
},
{
"epoch": 0.7828594973217965,
"grad_norm": 1.9135812520980835,
"learning_rate": 4.8619e-05,
"loss": 0.3493,
"step": 1900
},
{
"epoch": 0.8240626287597858,
"grad_norm": 1.7853527069091797,
"learning_rate": 4.8020000000000004e-05,
"loss": 0.3256,
"step": 2000
},
{
"epoch": 0.865265760197775,
"grad_norm": 1.7780035734176636,
"learning_rate": 4.7421000000000006e-05,
"loss": 0.2621,
"step": 2100
},
{
"epoch": 0.9064688916357643,
"grad_norm": 0.8148425221443176,
"learning_rate": 4.6822e-05,
"loss": 0.3273,
"step": 2200
},
{
"epoch": 0.9476720230737536,
"grad_norm": 2.2365009784698486,
"learning_rate": 4.6223e-05,
"loss": 0.2879,
"step": 2300
},
{
"epoch": 0.9888751545117429,
"grad_norm": 1.7118935585021973,
"learning_rate": 4.5624e-05,
"loss": 0.2715,
"step": 2400
},
{
"epoch": 1.0,
"eval_accuracy_Autre r\u00e9paration": 0.2533258414054248,
"eval_accuracy_D\u00e9coupe": 0.08133862998794246,
"eval_accuracy_Emergence": 0.0,
"eval_accuracy_Gla\u00e7age ou Ressuage": 0.0,
"eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0,
"eval_accuracy_Unlabeled": NaN,
"eval_iou_Autre r\u00e9paration": 0.23621639872040598,
"eval_iou_D\u00e9coupe": 0.07661059644544693,
"eval_iou_Emergence": 0.0,
"eval_iou_Gla\u00e7age ou Ressuage": 0.0,
"eval_iou_Reflet m\u00e9t\u00e9o": 0.0,
"eval_iou_Unlabeled": 0.0,
"eval_loss": 0.2682347893714905,
"eval_mean_accuracy": 0.06693289427867345,
"eval_mean_iou": 0.05213783252764215,
"eval_overall_accuracy": 0.18279080675579093,
"eval_runtime": 150.5227,
"eval_samples_per_second": 12.045,
"eval_steps_per_second": 3.016,
"step": 2427
},
{
"epoch": 1.0300782859497322,
"grad_norm": 3.537090539932251,
"learning_rate": 4.5025000000000003e-05,
"loss": 0.2763,
"step": 2500
},
{
"epoch": 1.0712814173877214,
"grad_norm": 2.1730239391326904,
"learning_rate": 4.4426000000000005e-05,
"loss": 0.2981,
"step": 2600
},
{
"epoch": 1.1124845488257107,
"grad_norm": 1.0320223569869995,
"learning_rate": 4.3827e-05,
"loss": 0.3227,
"step": 2700
},
{
"epoch": 1.1536876802637002,
"grad_norm": 4.7768635749816895,
"learning_rate": 4.3228e-05,
"loss": 0.3398,
"step": 2800
},
{
"epoch": 1.1948908117016894,
"grad_norm": 1.5758723020553589,
"learning_rate": 4.2629e-05,
"loss": 0.334,
"step": 2900
},
{
"epoch": 1.2360939431396787,
"grad_norm": 4.915160655975342,
"learning_rate": 4.203e-05,
"loss": 0.2577,
"step": 3000
},
{
"epoch": 1.277297074577668,
"grad_norm": 0.7495476603507996,
"learning_rate": 4.1431e-05,
"loss": 0.2807,
"step": 3100
},
{
"epoch": 1.3185002060156572,
"grad_norm": 1.0287623405456543,
"learning_rate": 4.0832e-05,
"loss": 0.3277,
"step": 3200
},
{
"epoch": 1.3597033374536465,
"grad_norm": 3.6160237789154053,
"learning_rate": 4.0233e-05,
"loss": 0.3073,
"step": 3300
},
{
"epoch": 1.4009064688916357,
"grad_norm": 6.738962173461914,
"learning_rate": 3.9634e-05,
"loss": 0.2744,
"step": 3400
},
{
"epoch": 1.442109600329625,
"grad_norm": 0.7060651779174805,
"learning_rate": 3.9035e-05,
"loss": 0.2976,
"step": 3500
},
{
"epoch": 1.4833127317676142,
"grad_norm": 4.404435634613037,
"learning_rate": 3.8436e-05,
"loss": 0.2646,
"step": 3600
},
{
"epoch": 1.5245158632056035,
"grad_norm": 1.1246055364608765,
"learning_rate": 3.7837000000000004e-05,
"loss": 0.3497,
"step": 3700
},
{
"epoch": 1.5657189946435928,
"grad_norm": 3.132385015487671,
"learning_rate": 3.7238000000000005e-05,
"loss": 0.2437,
"step": 3800
},
{
"epoch": 1.6069221260815822,
"grad_norm": 0.3945494592189789,
"learning_rate": 3.6639e-05,
"loss": 0.2616,
"step": 3900
},
{
"epoch": 1.6481252575195715,
"grad_norm": 0.8652153015136719,
"learning_rate": 3.604e-05,
"loss": 0.2466,
"step": 4000
},
{
"epoch": 1.6893283889575608,
"grad_norm": 0.44899633526802063,
"learning_rate": 3.544100000000001e-05,
"loss": 0.2562,
"step": 4100
},
{
"epoch": 1.73053152039555,
"grad_norm": 3.39601993560791,
"learning_rate": 3.4842e-05,
"loss": 0.2795,
"step": 4200
},
{
"epoch": 1.7717346518335395,
"grad_norm": 2.5917625427246094,
"learning_rate": 3.4243000000000004e-05,
"loss": 0.2933,
"step": 4300
},
{
"epoch": 1.8129377832715288,
"grad_norm": 1.0517610311508179,
"learning_rate": 3.3644000000000005e-05,
"loss": 0.2632,
"step": 4400
},
{
"epoch": 1.854140914709518,
"grad_norm": 1.573089361190796,
"learning_rate": 3.3045000000000006e-05,
"loss": 0.2554,
"step": 4500
},
{
"epoch": 1.8953440461475073,
"grad_norm": 1.3932527303695679,
"learning_rate": 3.2446e-05,
"loss": 0.2676,
"step": 4600
},
{
"epoch": 1.9365471775854965,
"grad_norm": 7.98951530456543,
"learning_rate": 3.1847e-05,
"loss": 0.2906,
"step": 4700
},
{
"epoch": 1.9777503090234858,
"grad_norm": 0.578360378742218,
"learning_rate": 3.1248e-05,
"loss": 0.2815,
"step": 4800
},
{
"epoch": 2.0,
"eval_accuracy_Autre r\u00e9paration": 0.19815143518295517,
"eval_accuracy_D\u00e9coupe": 0.11079467411500263,
"eval_accuracy_Emergence": 0.4089615931721195,
"eval_accuracy_Gla\u00e7age ou Ressuage": 0.0,
"eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0,
"eval_accuracy_Unlabeled": NaN,
"eval_iou_Autre r\u00e9paration": 0.19162433877536195,
"eval_iou_D\u00e9coupe": 0.10140688937641373,
"eval_iou_Emergence": 0.40571014840298464,
"eval_iou_Gla\u00e7age ou Ressuage": 0.0,
"eval_iou_Reflet m\u00e9t\u00e9o": 0.0,
"eval_iou_Unlabeled": 0.0,
"eval_loss": 0.26819199323654175,
"eval_mean_accuracy": 0.14358154049401545,
"eval_mean_iou": 0.11645689609246006,
"eval_overall_accuracy": 0.15928522569775833,
"eval_runtime": 140.1786,
"eval_samples_per_second": 12.934,
"eval_steps_per_second": 3.239,
"step": 4854
},
{
"epoch": 2.018953440461475,
"grad_norm": 0.5752081871032715,
"learning_rate": 3.0649000000000004e-05,
"loss": 0.2768,
"step": 4900
},
{
"epoch": 2.0601565718994643,
"grad_norm": 0.6111757755279541,
"learning_rate": 3.0050000000000002e-05,
"loss": 0.2226,
"step": 5000
},
{
"epoch": 2.1013597033374536,
"grad_norm": 0.48088550567626953,
"learning_rate": 2.9451e-05,
"loss": 0.334,
"step": 5100
},
{
"epoch": 2.142562834775443,
"grad_norm": 1.2190054655075073,
"learning_rate": 2.8851999999999998e-05,
"loss": 0.2868,
"step": 5200
},
{
"epoch": 2.183765966213432,
"grad_norm": 2.414565324783325,
"learning_rate": 2.8253e-05,
"loss": 0.3291,
"step": 5300
},
{
"epoch": 2.2249690976514214,
"grad_norm": 0.2674981653690338,
"learning_rate": 2.7653999999999996e-05,
"loss": 0.2687,
"step": 5400
},
{
"epoch": 2.2661722290894106,
"grad_norm": 2.053374767303467,
"learning_rate": 2.7054999999999998e-05,
"loss": 0.2559,
"step": 5500
},
{
"epoch": 2.3073753605274003,
"grad_norm": 3.9835445880889893,
"learning_rate": 2.6455999999999995e-05,
"loss": 0.282,
"step": 5600
},
{
"epoch": 2.348578491965389,
"grad_norm": 3.391972303390503,
"learning_rate": 2.5857e-05,
"loss": 0.3191,
"step": 5700
},
{
"epoch": 2.389781623403379,
"grad_norm": 0.4526354968547821,
"learning_rate": 2.5258e-05,
"loss": 0.2732,
"step": 5800
},
{
"epoch": 2.430984754841368,
"grad_norm": 1.3189719915390015,
"learning_rate": 2.4659e-05,
"loss": 0.242,
"step": 5900
},
{
"epoch": 2.4721878862793574,
"grad_norm": 1.6163711547851562,
"learning_rate": 2.406e-05,
"loss": 0.278,
"step": 6000
},
{
"epoch": 2.5133910177173466,
"grad_norm": 1.5330442190170288,
"learning_rate": 2.3460999999999998e-05,
"loss": 0.29,
"step": 6100
},
{
"epoch": 2.554594149155336,
"grad_norm": 4.686217784881592,
"learning_rate": 2.2862e-05,
"loss": 0.2586,
"step": 6200
},
{
"epoch": 2.595797280593325,
"grad_norm": 3.333735942840576,
"learning_rate": 2.2263e-05,
"loss": 0.2794,
"step": 6300
},
{
"epoch": 2.6370004120313144,
"grad_norm": 1.2093195915222168,
"learning_rate": 2.1663999999999998e-05,
"loss": 0.2466,
"step": 6400
},
{
"epoch": 2.6782035434693037,
"grad_norm": 1.6071631908416748,
"learning_rate": 2.1065e-05,
"loss": 0.21,
"step": 6500
},
{
"epoch": 2.719406674907293,
"grad_norm": 1.4164949655532837,
"learning_rate": 2.0465999999999997e-05,
"loss": 0.2822,
"step": 6600
},
{
"epoch": 2.760609806345282,
"grad_norm": 8.471506118774414,
"learning_rate": 1.9866999999999998e-05,
"loss": 0.2475,
"step": 6700
},
{
"epoch": 2.8018129377832715,
"grad_norm": 8.533307075500488,
"learning_rate": 1.9267999999999996e-05,
"loss": 0.2806,
"step": 6800
},
{
"epoch": 2.8430160692212607,
"grad_norm": 0.49498608708381653,
"learning_rate": 1.8669e-05,
"loss": 0.2682,
"step": 6900
},
{
"epoch": 2.88421920065925,
"grad_norm": 1.339969515800476,
"learning_rate": 1.807e-05,
"loss": 0.2435,
"step": 7000
},
{
"epoch": 2.9254223320972392,
"grad_norm": 1.8642264604568481,
"learning_rate": 1.7471e-05,
"loss": 0.2518,
"step": 7100
},
{
"epoch": 2.9666254635352285,
"grad_norm": 2.9471471309661865,
"learning_rate": 1.6872e-05,
"loss": 0.2638,
"step": 7200
},
{
"epoch": 3.0,
"eval_accuracy_Autre r\u00e9paration": 0.30393303904730357,
"eval_accuracy_D\u00e9coupe": 0.23455367948789083,
"eval_accuracy_Emergence": 0.5085131571199683,
"eval_accuracy_Gla\u00e7age ou Ressuage": 0.003045137463105984,
"eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0,
"eval_accuracy_Unlabeled": NaN,
"eval_iou_Autre r\u00e9paration": 0.2853778307692313,
"eval_iou_D\u00e9coupe": 0.21276477560584842,
"eval_iou_Emergence": 0.49725063677040354,
"eval_iou_Gla\u00e7age ou Ressuage": 0.002998539305038369,
"eval_iou_Reflet m\u00e9t\u00e9o": 0.0,
"eval_iou_Unlabeled": 0.0,
"eval_loss": 0.2419871985912323,
"eval_mean_accuracy": 0.2100090026236537,
"eval_mean_iou": 0.16639863040842026,
"eval_overall_accuracy": 0.2563620151228916,
"eval_runtime": 137.8421,
"eval_samples_per_second": 13.153,
"eval_steps_per_second": 3.294,
"step": 7281
},
{
"epoch": 3.0078285949732178,
"grad_norm": 0.4593660533428192,
"learning_rate": 1.6272999999999998e-05,
"loss": 0.2486,
"step": 7300
},
{
"epoch": 3.0490317264112075,
"grad_norm": 0.8246074318885803,
"learning_rate": 1.5674e-05,
"loss": 0.2251,
"step": 7400
},
{
"epoch": 3.0902348578491967,
"grad_norm": 0.9824215769767761,
"learning_rate": 1.5075000000000002e-05,
"loss": 0.2386,
"step": 7500
},
{
"epoch": 3.131437989287186,
"grad_norm": 6.623724937438965,
"learning_rate": 1.4476e-05,
"loss": 0.2635,
"step": 7600
},
{
"epoch": 3.1726411207251752,
"grad_norm": 0.816888689994812,
"learning_rate": 1.3877e-05,
"loss": 0.2821,
"step": 7700
},
{
"epoch": 3.2138442521631645,
"grad_norm": 0.45224809646606445,
"learning_rate": 1.3277999999999999e-05,
"loss": 0.2238,
"step": 7800
},
{
"epoch": 3.2550473836011538,
"grad_norm": 0.9230859279632568,
"learning_rate": 1.2678999999999998e-05,
"loss": 0.2238,
"step": 7900
},
{
"epoch": 3.296250515039143,
"grad_norm": 2.5414812564849854,
"learning_rate": 1.2079999999999998e-05,
"loss": 0.2046,
"step": 8000
},
{
"epoch": 3.3374536464771323,
"grad_norm": 1.6467418670654297,
"learning_rate": 1.1480999999999997e-05,
"loss": 0.2343,
"step": 8100
},
{
"epoch": 3.3786567779151215,
"grad_norm": 0.6073494553565979,
"learning_rate": 1.0882000000000004e-05,
"loss": 0.2162,
"step": 8200
},
{
"epoch": 3.419859909353111,
"grad_norm": 2.7378017902374268,
"learning_rate": 1.0283000000000003e-05,
"loss": 0.2868,
"step": 8300
},
{
"epoch": 3.4610630407911,
"grad_norm": 1.4614454507827759,
"learning_rate": 9.684000000000002e-06,
"loss": 0.2145,
"step": 8400
},
{
"epoch": 3.5022661722290893,
"grad_norm": 2.336061954498291,
"learning_rate": 9.085000000000002e-06,
"loss": 0.2918,
"step": 8500
},
{
"epoch": 3.5434693036670786,
"grad_norm": 1.7232545614242554,
"learning_rate": 8.486000000000001e-06,
"loss": 0.2854,
"step": 8600
},
{
"epoch": 3.584672435105068,
"grad_norm": 0.514677882194519,
"learning_rate": 7.887000000000001e-06,
"loss": 0.2514,
"step": 8700
},
{
"epoch": 3.6258755665430575,
"grad_norm": 0.9662112593650818,
"learning_rate": 7.2879999999999995e-06,
"loss": 0.2714,
"step": 8800
},
{
"epoch": 3.6670786979810464,
"grad_norm": 10.60983657836914,
"learning_rate": 6.688999999999999e-06,
"loss": 0.2548,
"step": 8900
},
{
"epoch": 3.708281829419036,
"grad_norm": 2.669593572616577,
"learning_rate": 6.0899999999999984e-06,
"loss": 0.2591,
"step": 9000
},
{
"epoch": 3.749484960857025,
"grad_norm": 1.071542501449585,
"learning_rate": 5.490999999999998e-06,
"loss": 0.2763,
"step": 9100
},
{
"epoch": 3.7906880922950146,
"grad_norm": 2.664677381515503,
"learning_rate": 4.891999999999997e-06,
"loss": 0.2178,
"step": 9200
},
{
"epoch": 3.831891223733004,
"grad_norm": 9.70131778717041,
"learning_rate": 4.292999999999997e-06,
"loss": 0.2674,
"step": 9300
},
{
"epoch": 3.873094355170993,
"grad_norm": 4.843862056732178,
"learning_rate": 3.694000000000003e-06,
"loss": 0.2581,
"step": 9400
},
{
"epoch": 3.9142974866089824,
"grad_norm": 0.8629316091537476,
"learning_rate": 3.0950000000000026e-06,
"loss": 0.2642,
"step": 9500
},
{
"epoch": 3.9555006180469716,
"grad_norm": 4.216986179351807,
"learning_rate": 2.496000000000002e-06,
"loss": 0.1965,
"step": 9600
},
{
"epoch": 3.996703749484961,
"grad_norm": 2.241065502166748,
"learning_rate": 1.8970000000000013e-06,
"loss": 0.2703,
"step": 9700
},
{
"epoch": 4.0,
"eval_accuracy_Autre r\u00e9paration": 0.36122042935066995,
"eval_accuracy_D\u00e9coupe": 0.28433059478878114,
"eval_accuracy_Emergence": 0.5473337114203988,
"eval_accuracy_Gla\u00e7age ou Ressuage": 0.0446288018012878,
"eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0,
"eval_accuracy_Unlabeled": NaN,
"eval_iou_Autre r\u00e9paration": 0.33834292206885924,
"eval_iou_D\u00e9coupe": 0.251159370886517,
"eval_iou_Emergence": 0.5319718670461905,
"eval_iou_Gla\u00e7age ou Ressuage": 0.042908421138837,
"eval_iou_Reflet m\u00e9t\u00e9o": 0.0,
"eval_iou_Unlabeled": 0.0,
"eval_loss": 0.2333020269870758,
"eval_mean_accuracy": 0.24750270747222752,
"eval_mean_iou": 0.1940637635234006,
"eval_overall_accuracy": 0.30742270034207847,
"eval_runtime": 138.4117,
"eval_samples_per_second": 13.099,
"eval_steps_per_second": 3.28,
"step": 9708
},
{
"epoch": 4.03790688092295,
"grad_norm": 1.052063226699829,
"learning_rate": 1.298000000000001e-06,
"loss": 0.242,
"step": 9800
},
{
"epoch": 4.07911001236094,
"grad_norm": 6.82352876663208,
"learning_rate": 6.990000000000005e-07,
"loss": 0.2482,
"step": 9900
},
{
"epoch": 4.120313143798929,
"grad_norm": 2.648499011993408,
"learning_rate": 1e-07,
"loss": 0.2197,
"step": 10000
},
{
"epoch": 4.120313143798929,
"eval_accuracy_Autre r\u00e9paration": 0.34369405810457515,
"eval_accuracy_D\u00e9coupe": 0.2864541960267422,
"eval_accuracy_Emergence": 0.5548598133737452,
"eval_accuracy_Gla\u00e7age ou Ressuage": 0.03860482159488221,
"eval_accuracy_Reflet m\u00e9t\u00e9o": 0.0,
"eval_accuracy_Unlabeled": NaN,
"eval_iou_Autre r\u00e9paration": 0.32304877421180617,
"eval_iou_D\u00e9coupe": 0.2515107459482324,
"eval_iou_Emergence": 0.5379450939388203,
"eval_iou_Gla\u00e7age ou Ressuage": 0.03692047935180606,
"eval_iou_Reflet m\u00e9t\u00e9o": 0.0,
"eval_iou_Unlabeled": 0.0,
"eval_loss": 0.23602528870105743,
"eval_mean_accuracy": 0.244722577819989,
"eval_mean_iou": 0.19157084890844414,
"eval_overall_accuracy": 0.29617685609695316,
"eval_runtime": 141.1695,
"eval_samples_per_second": 12.843,
"eval_steps_per_second": 3.216,
"step": 10000
},
{
"epoch": 4.120313143798929,
"step": 10000,
"total_flos": 8.912029734867567e+18,
"train_loss": 0.29374205589294433,
"train_runtime": 4666.1544,
"train_samples_per_second": 8.572,
"train_steps_per_second": 2.143
}
],
"logging_steps": 100,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.912029734867567e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}