random_alg8uhxEeaD617M8 / trainer_state.json
cutelemonlili's picture
Add files using upload-large-folder tool
e83251a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9850187265917603,
"eval_steps": 500,
"global_step": 132,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0149812734082397,
"grad_norm": 3.4725940183346564,
"learning_rate": 9.998583973465647e-06,
"loss": 0.4096,
"step": 1
},
{
"epoch": 0.0299625468164794,
"grad_norm": 1.7080772069425743,
"learning_rate": 9.994336695915041e-06,
"loss": 0.2677,
"step": 2
},
{
"epoch": 0.0449438202247191,
"grad_norm": 2.328266957494605,
"learning_rate": 9.987260573051268e-06,
"loss": 0.3443,
"step": 3
},
{
"epoch": 0.0599250936329588,
"grad_norm": 1.576822654462278,
"learning_rate": 9.977359612865424e-06,
"loss": 0.2629,
"step": 4
},
{
"epoch": 0.0749063670411985,
"grad_norm": 1.3075505712241573,
"learning_rate": 9.964639423366442e-06,
"loss": 0.3325,
"step": 5
},
{
"epoch": 0.0898876404494382,
"grad_norm": 0.959551968208088,
"learning_rate": 9.949107209404664e-06,
"loss": 0.231,
"step": 6
},
{
"epoch": 0.10486891385767791,
"grad_norm": 0.8685181811411783,
"learning_rate": 9.930771768590934e-06,
"loss": 0.2765,
"step": 7
},
{
"epoch": 0.1198501872659176,
"grad_norm": 0.7937704205042632,
"learning_rate": 9.909643486313533e-06,
"loss": 0.2832,
"step": 8
},
{
"epoch": 0.1348314606741573,
"grad_norm": 0.8053349247726148,
"learning_rate": 9.885734329855798e-06,
"loss": 0.1951,
"step": 9
},
{
"epoch": 0.149812734082397,
"grad_norm": 0.7381432641352577,
"learning_rate": 9.859057841617709e-06,
"loss": 0.14,
"step": 10
},
{
"epoch": 0.1647940074906367,
"grad_norm": 0.7337517919878004,
"learning_rate": 9.829629131445342e-06,
"loss": 0.2365,
"step": 11
},
{
"epoch": 0.1797752808988764,
"grad_norm": 0.7483031371321254,
"learning_rate": 9.797464868072489e-06,
"loss": 0.2211,
"step": 12
},
{
"epoch": 0.1947565543071161,
"grad_norm": 0.7519387894526419,
"learning_rate": 9.762583269679304e-06,
"loss": 0.2328,
"step": 13
},
{
"epoch": 0.20973782771535582,
"grad_norm": 0.7249812207597669,
"learning_rate": 9.725004093573343e-06,
"loss": 0.2247,
"step": 14
},
{
"epoch": 0.2247191011235955,
"grad_norm": 0.7401985739866241,
"learning_rate": 9.68474862499881e-06,
"loss": 0.2395,
"step": 15
},
{
"epoch": 0.2397003745318352,
"grad_norm": 0.8047282659961688,
"learning_rate": 9.641839665080363e-06,
"loss": 0.2842,
"step": 16
},
{
"epoch": 0.2546816479400749,
"grad_norm": 0.6387679955496787,
"learning_rate": 9.596301517908329e-06,
"loss": 0.2155,
"step": 17
},
{
"epoch": 0.2696629213483146,
"grad_norm": 0.7318861277372435,
"learning_rate": 9.548159976772593e-06,
"loss": 0.211,
"step": 18
},
{
"epoch": 0.2846441947565543,
"grad_norm": 0.6576543270263697,
"learning_rate": 9.497442309553017e-06,
"loss": 0.2319,
"step": 19
},
{
"epoch": 0.299625468164794,
"grad_norm": 0.65904809453707,
"learning_rate": 9.444177243274619e-06,
"loss": 0.2196,
"step": 20
},
{
"epoch": 0.3146067415730337,
"grad_norm": 0.6184819776155891,
"learning_rate": 9.388394947836278e-06,
"loss": 0.2153,
"step": 21
},
{
"epoch": 0.3295880149812734,
"grad_norm": 0.6489091422231905,
"learning_rate": 9.330127018922195e-06,
"loss": 0.2156,
"step": 22
},
{
"epoch": 0.3445692883895131,
"grad_norm": 0.7185451315026277,
"learning_rate": 9.269406460105742e-06,
"loss": 0.2075,
"step": 23
},
{
"epoch": 0.3595505617977528,
"grad_norm": 0.6658645393018615,
"learning_rate": 9.206267664155906e-06,
"loss": 0.1809,
"step": 24
},
{
"epoch": 0.37453183520599254,
"grad_norm": 0.6952332409240808,
"learning_rate": 9.140746393556853e-06,
"loss": 0.19,
"step": 25
},
{
"epoch": 0.3895131086142322,
"grad_norm": 0.6496513424460431,
"learning_rate": 9.07287976025168e-06,
"loss": 0.2935,
"step": 26
},
{
"epoch": 0.4044943820224719,
"grad_norm": 0.5683394892830653,
"learning_rate": 9.002706204621802e-06,
"loss": 0.2274,
"step": 27
},
{
"epoch": 0.41947565543071164,
"grad_norm": 0.5550931663628429,
"learning_rate": 8.930265473713939e-06,
"loss": 0.2192,
"step": 28
},
{
"epoch": 0.4344569288389513,
"grad_norm": 0.6303634775949936,
"learning_rate": 8.85559859872694e-06,
"loss": 0.2223,
"step": 29
},
{
"epoch": 0.449438202247191,
"grad_norm": 0.6147551420751435,
"learning_rate": 8.778747871771293e-06,
"loss": 0.1647,
"step": 30
},
{
"epoch": 0.46441947565543074,
"grad_norm": 0.6145539321542911,
"learning_rate": 8.69975682191442e-06,
"loss": 0.2336,
"step": 31
},
{
"epoch": 0.4794007490636704,
"grad_norm": 0.654327925138206,
"learning_rate": 8.61867019052535e-06,
"loss": 0.2604,
"step": 32
},
{
"epoch": 0.4943820224719101,
"grad_norm": 0.5918563237454284,
"learning_rate": 8.535533905932739e-06,
"loss": 0.2375,
"step": 33
},
{
"epoch": 0.5093632958801498,
"grad_norm": 0.6873861511815068,
"learning_rate": 8.450395057410561e-06,
"loss": 0.2094,
"step": 34
},
{
"epoch": 0.5243445692883895,
"grad_norm": 0.6428306911470563,
"learning_rate": 8.363301868506264e-06,
"loss": 0.2366,
"step": 35
},
{
"epoch": 0.5393258426966292,
"grad_norm": 0.6058033041672071,
"learning_rate": 8.274303669726427e-06,
"loss": 0.189,
"step": 36
},
{
"epoch": 0.5543071161048689,
"grad_norm": 0.5746659583826536,
"learning_rate": 8.183450870595443e-06,
"loss": 0.1894,
"step": 37
},
{
"epoch": 0.5692883895131086,
"grad_norm": 0.6315068428290347,
"learning_rate": 8.090794931103026e-06,
"loss": 0.1858,
"step": 38
},
{
"epoch": 0.5842696629213483,
"grad_norm": 0.6001403356204732,
"learning_rate": 7.996388332556735e-06,
"loss": 0.2061,
"step": 39
},
{
"epoch": 0.599250936329588,
"grad_norm": 0.6234697289373689,
"learning_rate": 7.900284547855992e-06,
"loss": 0.1895,
"step": 40
},
{
"epoch": 0.6142322097378277,
"grad_norm": 0.6078340995557628,
"learning_rate": 7.80253801120447e-06,
"loss": 0.2629,
"step": 41
},
{
"epoch": 0.6292134831460674,
"grad_norm": 0.6062294869638835,
"learning_rate": 7.703204087277989e-06,
"loss": 0.2343,
"step": 42
},
{
"epoch": 0.6441947565543071,
"grad_norm": 0.6266244424067957,
"learning_rate": 7.602339039865362e-06,
"loss": 0.1662,
"step": 43
},
{
"epoch": 0.6591760299625468,
"grad_norm": 0.6870719075450415,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2217,
"step": 44
},
{
"epoch": 0.6741573033707865,
"grad_norm": 0.6510380261455756,
"learning_rate": 7.396244933600285e-06,
"loss": 0.2368,
"step": 45
},
{
"epoch": 0.6891385767790262,
"grad_norm": 0.6158948462237728,
"learning_rate": 7.291132608637053e-06,
"loss": 0.2193,
"step": 46
},
{
"epoch": 0.704119850187266,
"grad_norm": 0.5663618497000916,
"learning_rate": 7.1847225618467975e-06,
"loss": 0.1668,
"step": 47
},
{
"epoch": 0.7191011235955056,
"grad_norm": 0.6469446107112001,
"learning_rate": 7.0770750650094335e-06,
"loss": 0.1944,
"step": 48
},
{
"epoch": 0.7340823970037453,
"grad_norm": 0.61757816629356,
"learning_rate": 6.968251090809708e-06,
"loss": 0.188,
"step": 49
},
{
"epoch": 0.7490636704119851,
"grad_norm": 0.5802883823079219,
"learning_rate": 6.858312278301638e-06,
"loss": 0.1928,
"step": 50
},
{
"epoch": 0.7640449438202247,
"grad_norm": 0.6537313290106945,
"learning_rate": 6.747320897995493e-06,
"loss": 0.219,
"step": 51
},
{
"epoch": 0.7790262172284644,
"grad_norm": 0.6055845124158611,
"learning_rate": 6.635339816587109e-06,
"loss": 0.1919,
"step": 52
},
{
"epoch": 0.7940074906367042,
"grad_norm": 0.6203253397296223,
"learning_rate": 6.522432461349536e-06,
"loss": 0.2227,
"step": 53
},
{
"epoch": 0.8089887640449438,
"grad_norm": 0.6149950649489568,
"learning_rate": 6.408662784207149e-06,
"loss": 0.2017,
"step": 54
},
{
"epoch": 0.8239700374531835,
"grad_norm": 0.585925698951599,
"learning_rate": 6.294095225512604e-06,
"loss": 0.2072,
"step": 55
},
{
"epoch": 0.8389513108614233,
"grad_norm": 0.6239810997246404,
"learning_rate": 6.178794677547138e-06,
"loss": 0.2616,
"step": 56
},
{
"epoch": 0.8539325842696629,
"grad_norm": 0.6492483647132805,
"learning_rate": 6.062826447764883e-06,
"loss": 0.2329,
"step": 57
},
{
"epoch": 0.8689138576779026,
"grad_norm": 0.5861991200300652,
"learning_rate": 5.946256221802052e-06,
"loss": 0.2286,
"step": 58
},
{
"epoch": 0.8838951310861424,
"grad_norm": 0.6547615662837896,
"learning_rate": 5.829150026271871e-06,
"loss": 0.2493,
"step": 59
},
{
"epoch": 0.898876404494382,
"grad_norm": 0.5655049662438126,
"learning_rate": 5.711574191366427e-06,
"loss": 0.2469,
"step": 60
},
{
"epoch": 0.9138576779026217,
"grad_norm": 0.6952516362877342,
"learning_rate": 5.593595313286526e-06,
"loss": 0.1732,
"step": 61
},
{
"epoch": 0.9288389513108615,
"grad_norm": 0.6222742837403629,
"learning_rate": 5.475280216520913e-06,
"loss": 0.2222,
"step": 62
},
{
"epoch": 0.9438202247191011,
"grad_norm": 0.6182712469593838,
"learning_rate": 5.356695915996162e-06,
"loss": 0.2172,
"step": 63
},
{
"epoch": 0.9588014981273408,
"grad_norm": 0.5750452008511898,
"learning_rate": 5.237909579118713e-06,
"loss": 0.2089,
"step": 64
},
{
"epoch": 0.9737827715355806,
"grad_norm": 0.6075834895934711,
"learning_rate": 5.118988487730537e-06,
"loss": 0.1963,
"step": 65
},
{
"epoch": 0.9887640449438202,
"grad_norm": 0.6265428920405006,
"learning_rate": 5e-06,
"loss": 0.1665,
"step": 66
},
{
"epoch": 1.0112359550561798,
"grad_norm": 1.2627975080853935,
"learning_rate": 4.881011512269464e-06,
"loss": 0.3101,
"step": 67
},
{
"epoch": 1.0262172284644195,
"grad_norm": 0.6628220147961363,
"learning_rate": 4.762090420881289e-06,
"loss": 0.1551,
"step": 68
},
{
"epoch": 1.0411985018726593,
"grad_norm": 0.641361131687303,
"learning_rate": 4.643304084003839e-06,
"loss": 0.1755,
"step": 69
},
{
"epoch": 1.0561797752808988,
"grad_norm": 0.6112884664295263,
"learning_rate": 4.524719783479088e-06,
"loss": 0.174,
"step": 70
},
{
"epoch": 1.0711610486891385,
"grad_norm": 0.6202594191629818,
"learning_rate": 4.4064046867134755e-06,
"loss": 0.1612,
"step": 71
},
{
"epoch": 1.0861423220973783,
"grad_norm": 0.6223525152895909,
"learning_rate": 4.2884258086335755e-06,
"loss": 0.1715,
"step": 72
},
{
"epoch": 1.101123595505618,
"grad_norm": 0.6465221436305756,
"learning_rate": 4.1708499737281305e-06,
"loss": 0.0986,
"step": 73
},
{
"epoch": 1.1161048689138577,
"grad_norm": 0.7381206938936858,
"learning_rate": 4.053743778197951e-06,
"loss": 0.2076,
"step": 74
},
{
"epoch": 1.1310861423220975,
"grad_norm": 0.5039291746245833,
"learning_rate": 3.937173552235117e-06,
"loss": 0.1186,
"step": 75
},
{
"epoch": 1.146067415730337,
"grad_norm": 0.6528881282246385,
"learning_rate": 3.821205322452863e-06,
"loss": 0.1992,
"step": 76
},
{
"epoch": 1.1610486891385767,
"grad_norm": 0.6343479202643421,
"learning_rate": 3.705904774487396e-06,
"loss": 0.1775,
"step": 77
},
{
"epoch": 1.1760299625468165,
"grad_norm": 0.6352198407586305,
"learning_rate": 3.5913372157928515e-06,
"loss": 0.1386,
"step": 78
},
{
"epoch": 1.1910112359550562,
"grad_norm": 0.6749892840132967,
"learning_rate": 3.477567538650466e-06,
"loss": 0.1729,
"step": 79
},
{
"epoch": 1.205992509363296,
"grad_norm": 0.7298668817817314,
"learning_rate": 3.3646601834128924e-06,
"loss": 0.1661,
"step": 80
},
{
"epoch": 1.2209737827715357,
"grad_norm": 0.6565615391167543,
"learning_rate": 3.252679102004509e-06,
"loss": 0.139,
"step": 81
},
{
"epoch": 1.2359550561797752,
"grad_norm": 0.6581807260569481,
"learning_rate": 3.141687721698363e-06,
"loss": 0.1559,
"step": 82
},
{
"epoch": 1.250936329588015,
"grad_norm": 0.6834974107318784,
"learning_rate": 3.0317489091902936e-06,
"loss": 0.1621,
"step": 83
},
{
"epoch": 1.2659176029962547,
"grad_norm": 0.6041607110127414,
"learning_rate": 2.9229249349905686e-06,
"loss": 0.1651,
"step": 84
},
{
"epoch": 1.2808988764044944,
"grad_norm": 0.6112071242824599,
"learning_rate": 2.8152774381532033e-06,
"loss": 0.1485,
"step": 85
},
{
"epoch": 1.2958801498127341,
"grad_norm": 0.6494692990598454,
"learning_rate": 2.708867391362948e-06,
"loss": 0.1619,
"step": 86
},
{
"epoch": 1.3108614232209739,
"grad_norm": 0.6200711494427865,
"learning_rate": 2.603755066399718e-06,
"loss": 0.1222,
"step": 87
},
{
"epoch": 1.3258426966292136,
"grad_norm": 0.5552452072437992,
"learning_rate": 2.5000000000000015e-06,
"loss": 0.1797,
"step": 88
},
{
"epoch": 1.3408239700374531,
"grad_norm": 0.6847709659230087,
"learning_rate": 2.3976609601346395e-06,
"loss": 0.1578,
"step": 89
},
{
"epoch": 1.3558052434456929,
"grad_norm": 0.6123305532745199,
"learning_rate": 2.296795912722014e-06,
"loss": 0.1397,
"step": 90
},
{
"epoch": 1.3707865168539326,
"grad_norm": 0.6295783834094151,
"learning_rate": 2.1974619887955294e-06,
"loss": 0.1665,
"step": 91
},
{
"epoch": 1.3857677902621723,
"grad_norm": 0.6027900505774648,
"learning_rate": 2.09971545214401e-06,
"loss": 0.1232,
"step": 92
},
{
"epoch": 1.4007490636704119,
"grad_norm": 0.5658908653969518,
"learning_rate": 2.0036116674432653e-06,
"loss": 0.1598,
"step": 93
},
{
"epoch": 1.4157303370786516,
"grad_norm": 0.5478299694183959,
"learning_rate": 1.9092050688969736e-06,
"loss": 0.1468,
"step": 94
},
{
"epoch": 1.4307116104868913,
"grad_norm": 0.6508112635569822,
"learning_rate": 1.8165491294045596e-06,
"loss": 0.147,
"step": 95
},
{
"epoch": 1.445692883895131,
"grad_norm": 0.6133506596449134,
"learning_rate": 1.7256963302735752e-06,
"loss": 0.1643,
"step": 96
},
{
"epoch": 1.4606741573033708,
"grad_norm": 0.6525319954671859,
"learning_rate": 1.6366981314937374e-06,
"loss": 0.1366,
"step": 97
},
{
"epoch": 1.4756554307116105,
"grad_norm": 0.6512204891139485,
"learning_rate": 1.549604942589441e-06,
"loss": 0.1138,
"step": 98
},
{
"epoch": 1.4906367041198503,
"grad_norm": 0.5892094139698941,
"learning_rate": 1.4644660940672628e-06,
"loss": 0.1215,
"step": 99
},
{
"epoch": 1.50561797752809,
"grad_norm": 0.5573205660230812,
"learning_rate": 1.3813298094746491e-06,
"loss": 0.1517,
"step": 100
},
{
"epoch": 1.5205992509363297,
"grad_norm": 0.5856445664800555,
"learning_rate": 1.3002431780855817e-06,
"loss": 0.146,
"step": 101
},
{
"epoch": 1.5355805243445693,
"grad_norm": 0.6044356941260213,
"learning_rate": 1.2212521282287093e-06,
"loss": 0.1317,
"step": 102
},
{
"epoch": 1.550561797752809,
"grad_norm": 0.638376701047395,
"learning_rate": 1.144401401273062e-06,
"loss": 0.1386,
"step": 103
},
{
"epoch": 1.5655430711610487,
"grad_norm": 0.6280815326169782,
"learning_rate": 1.0697345262860638e-06,
"loss": 0.1493,
"step": 104
},
{
"epoch": 1.5805243445692883,
"grad_norm": 0.5806540586906052,
"learning_rate": 9.972937953781985e-07,
"loss": 0.1406,
"step": 105
},
{
"epoch": 1.595505617977528,
"grad_norm": 0.6201864366516323,
"learning_rate": 9.271202397483214e-07,
"loss": 0.1754,
"step": 106
},
{
"epoch": 1.6104868913857677,
"grad_norm": 0.5327484519118663,
"learning_rate": 8.592536064431467e-07,
"loss": 0.1114,
"step": 107
},
{
"epoch": 1.6254681647940075,
"grad_norm": 0.6261105466064324,
"learning_rate": 7.937323358440935e-07,
"loss": 0.1786,
"step": 108
},
{
"epoch": 1.6404494382022472,
"grad_norm": 0.5992553182332926,
"learning_rate": 7.305935398942598e-07,
"loss": 0.1576,
"step": 109
},
{
"epoch": 1.655430711610487,
"grad_norm": 0.6338801175160249,
"learning_rate": 6.698729810778065e-07,
"loss": 0.1709,
"step": 110
},
{
"epoch": 1.6704119850187267,
"grad_norm": 0.6359543329537571,
"learning_rate": 6.116050521637218e-07,
"loss": 0.1579,
"step": 111
},
{
"epoch": 1.6853932584269664,
"grad_norm": 0.6118207534358003,
"learning_rate": 5.558227567253832e-07,
"loss": 0.1372,
"step": 112
},
{
"epoch": 1.7003745318352061,
"grad_norm": 0.5986622163541178,
"learning_rate": 5.025576904469842e-07,
"loss": 0.1558,
"step": 113
},
{
"epoch": 1.7153558052434457,
"grad_norm": 0.553030663176511,
"learning_rate": 4.5184002322740784e-07,
"loss": 0.1387,
"step": 114
},
{
"epoch": 1.7303370786516854,
"grad_norm": 0.582078865450689,
"learning_rate": 4.036984820916723e-07,
"loss": 0.1554,
"step": 115
},
{
"epoch": 1.7453183520599251,
"grad_norm": 0.539769781766892,
"learning_rate": 3.581603349196372e-07,
"loss": 0.101,
"step": 116
},
{
"epoch": 1.7602996254681647,
"grad_norm": 0.5787331747341751,
"learning_rate": 3.1525137500119207e-07,
"loss": 0.1048,
"step": 117
},
{
"epoch": 1.7752808988764044,
"grad_norm": 0.5620283880223796,
"learning_rate": 2.7499590642665773e-07,
"loss": 0.1816,
"step": 118
},
{
"epoch": 1.7902621722846441,
"grad_norm": 0.5903859357503745,
"learning_rate": 2.3741673032069757e-07,
"loss": 0.183,
"step": 119
},
{
"epoch": 1.8052434456928839,
"grad_norm": 0.5745085735666299,
"learning_rate": 2.0253513192751374e-07,
"loss": 0.1327,
"step": 120
},
{
"epoch": 1.8202247191011236,
"grad_norm": 0.6345914868587128,
"learning_rate": 1.7037086855465902e-07,
"loss": 0.1991,
"step": 121
},
{
"epoch": 1.8352059925093633,
"grad_norm": 0.5985145886139585,
"learning_rate": 1.4094215838229176e-07,
"loss": 0.119,
"step": 122
},
{
"epoch": 1.850187265917603,
"grad_norm": 0.6287342014234429,
"learning_rate": 1.1426567014420297e-07,
"loss": 0.1549,
"step": 123
},
{
"epoch": 1.8651685393258428,
"grad_norm": 0.5506379977166188,
"learning_rate": 9.035651368646647e-08,
"loss": 0.1472,
"step": 124
},
{
"epoch": 1.8801498127340825,
"grad_norm": 0.5420865786138673,
"learning_rate": 6.922823140906754e-08,
"loss": 0.1317,
"step": 125
},
{
"epoch": 1.895131086142322,
"grad_norm": 0.6404577926395193,
"learning_rate": 5.089279059533658e-08,
"loss": 0.176,
"step": 126
},
{
"epoch": 1.9101123595505618,
"grad_norm": 0.5787744435042076,
"learning_rate": 3.536057663355852e-08,
"loss": 0.1427,
"step": 127
},
{
"epoch": 1.9250936329588015,
"grad_norm": 0.5581207126806131,
"learning_rate": 2.264038713457706e-08,
"loss": 0.1467,
"step": 128
},
{
"epoch": 1.940074906367041,
"grad_norm": 0.5309384337432321,
"learning_rate": 1.2739426948732426e-08,
"loss": 0.1113,
"step": 129
},
{
"epoch": 1.9550561797752808,
"grad_norm": 0.63068841802962,
"learning_rate": 5.6633040849601865e-09,
"loss": 0.16,
"step": 130
},
{
"epoch": 1.9700374531835205,
"grad_norm": 0.5757589293513341,
"learning_rate": 1.4160265343549084e-09,
"loss": 0.1473,
"step": 131
},
{
"epoch": 1.9850187265917603,
"grad_norm": 0.5413659197944799,
"learning_rate": 0.0,
"loss": 0.1221,
"step": 132
},
{
"epoch": 1.9850187265917603,
"step": 132,
"total_flos": 21372068167680.0,
"train_loss": 0.18907840533012693,
"train_runtime": 1094.0515,
"train_samples_per_second": 9.742,
"train_steps_per_second": 0.121
}
],
"logging_steps": 1,
"max_steps": 132,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 21372068167680.0,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}