jvlv4 / trainer_state.json
lakomey's picture
📦 upload model files
c9a3133 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 400,
"global_step": 23658,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00507227998985544,
"grad_norm": 9.971283912658691,
"learning_rate": 5.6338028169014084e-06,
"loss": 9.5562,
"step": 40
},
{
"epoch": 0.01014455997971088,
"grad_norm": 7.090546607971191,
"learning_rate": 1.1267605633802817e-05,
"loss": 5.3074,
"step": 80
},
{
"epoch": 0.01521683996956632,
"grad_norm": 3.03216290473938,
"learning_rate": 1.6901408450704224e-05,
"loss": 4.6246,
"step": 120
},
{
"epoch": 0.02028911995942176,
"grad_norm": 2.908621311187744,
"learning_rate": 2.2535211267605634e-05,
"loss": 4.4839,
"step": 160
},
{
"epoch": 0.0253613999492772,
"grad_norm": 4.422246932983398,
"learning_rate": 2.8169014084507046e-05,
"loss": 4.4611,
"step": 200
},
{
"epoch": 0.03043367993913264,
"grad_norm": 5.368454933166504,
"learning_rate": 3.380281690140845e-05,
"loss": 4.4124,
"step": 240
},
{
"epoch": 0.03550595992898808,
"grad_norm": 6.5281853675842285,
"learning_rate": 3.943661971830986e-05,
"loss": 4.369,
"step": 280
},
{
"epoch": 0.04057823991884352,
"grad_norm": 3.8368284702301025,
"learning_rate": 4.507042253521127e-05,
"loss": 4.3359,
"step": 320
},
{
"epoch": 0.04565051990869896,
"grad_norm": 3.0560672283172607,
"learning_rate": 5.070422535211268e-05,
"loss": 4.3045,
"step": 360
},
{
"epoch": 0.0507227998985544,
"grad_norm": 3.674811363220215,
"learning_rate": 5.633802816901409e-05,
"loss": 4.2641,
"step": 400
},
{
"epoch": 0.0507227998985544,
"eval_action_accuracy": 0.12628173828125,
"eval_loss": 4.223208427429199,
"eval_runtime": 35.638,
"eval_samples_per_second": 57.467,
"eval_steps_per_second": 3.592,
"step": 400
},
{
"epoch": 0.05579507988840984,
"grad_norm": 3.5248770713806152,
"learning_rate": 6.197183098591549e-05,
"loss": 4.1921,
"step": 440
},
{
"epoch": 0.06086735987826528,
"grad_norm": 3.289992570877075,
"learning_rate": 6.76056338028169e-05,
"loss": 4.114,
"step": 480
},
{
"epoch": 0.06593963986812072,
"grad_norm": 2.6550910472869873,
"learning_rate": 7.323943661971832e-05,
"loss": 4.072,
"step": 520
},
{
"epoch": 0.07101191985797616,
"grad_norm": 2.488555669784546,
"learning_rate": 7.887323943661972e-05,
"loss": 4.0293,
"step": 560
},
{
"epoch": 0.0760841998478316,
"grad_norm": 2.4804906845092773,
"learning_rate": 8.450704225352113e-05,
"loss": 3.9972,
"step": 600
},
{
"epoch": 0.08115647983768703,
"grad_norm": 2.8795552253723145,
"learning_rate": 9.014084507042254e-05,
"loss": 3.9798,
"step": 640
},
{
"epoch": 0.08622875982754248,
"grad_norm": 2.2428362369537354,
"learning_rate": 9.577464788732394e-05,
"loss": 3.9605,
"step": 680
},
{
"epoch": 0.09130103981739791,
"grad_norm": 1.8302778005599976,
"learning_rate": 9.999995314564068e-05,
"loss": 3.9435,
"step": 720
},
{
"epoch": 0.09637331980725336,
"grad_norm": 2.1241495609283447,
"learning_rate": 9.999882864540755e-05,
"loss": 3.931,
"step": 760
},
{
"epoch": 0.1014455997971088,
"grad_norm": 1.8298007249832153,
"learning_rate": 9.999620484431368e-05,
"loss": 3.9123,
"step": 800
},
{
"epoch": 0.1014455997971088,
"eval_action_accuracy": 0.13214111328125,
"eval_loss": 3.9098541736602783,
"eval_runtime": 30.3817,
"eval_samples_per_second": 67.409,
"eval_steps_per_second": 4.213,
"step": 800
},
{
"epoch": 0.10651787978696424,
"grad_norm": 1.7416859865188599,
"learning_rate": 9.999208182103823e-05,
"loss": 3.9008,
"step": 840
},
{
"epoch": 0.11159015977681969,
"grad_norm": 1.6751515865325928,
"learning_rate": 9.998645969921714e-05,
"loss": 3.8869,
"step": 880
},
{
"epoch": 0.11666243976667512,
"grad_norm": 1.5095834732055664,
"learning_rate": 9.997933864743942e-05,
"loss": 3.8741,
"step": 920
},
{
"epoch": 0.12173471975653057,
"grad_norm": 1.510201334953308,
"learning_rate": 9.997071887924204e-05,
"loss": 3.8671,
"step": 960
},
{
"epoch": 0.126806999746386,
"grad_norm": 1.5421382188796997,
"learning_rate": 9.996060065310357e-05,
"loss": 3.8587,
"step": 1000
},
{
"epoch": 0.13187927973624144,
"grad_norm": 1.5371688604354858,
"learning_rate": 9.994898427243645e-05,
"loss": 3.8549,
"step": 1040
},
{
"epoch": 0.13695155972609688,
"grad_norm": 1.5408425331115723,
"learning_rate": 9.99358700855778e-05,
"loss": 3.8422,
"step": 1080
},
{
"epoch": 0.1420238397159523,
"grad_norm": 1.7944141626358032,
"learning_rate": 9.99212584857791e-05,
"loss": 3.838,
"step": 1120
},
{
"epoch": 0.14709611970580777,
"grad_norm": 1.500591516494751,
"learning_rate": 9.990514991119424e-05,
"loss": 3.8274,
"step": 1160
},
{
"epoch": 0.1521683996956632,
"grad_norm": 1.2871513366699219,
"learning_rate": 9.988754484486657e-05,
"loss": 3.8198,
"step": 1200
},
{
"epoch": 0.1521683996956632,
"eval_action_accuracy": 0.133544921875,
"eval_loss": 3.82753849029541,
"eval_runtime": 252.3214,
"eval_samples_per_second": 8.117,
"eval_steps_per_second": 0.507,
"step": 1200
},
{
"epoch": 0.15724067968551864,
"grad_norm": 1.2817726135253906,
"learning_rate": 9.98684438147143e-05,
"loss": 3.8133,
"step": 1240
},
{
"epoch": 0.16231295967537407,
"grad_norm": 1.4230026006698608,
"learning_rate": 9.984784739351463e-05,
"loss": 3.8104,
"step": 1280
},
{
"epoch": 0.16738523966522953,
"grad_norm": 1.4561232328414917,
"learning_rate": 9.982575619888667e-05,
"loss": 3.8052,
"step": 1320
},
{
"epoch": 0.17245751965508496,
"grad_norm": 1.2932708263397217,
"learning_rate": 9.980217089327291e-05,
"loss": 3.8026,
"step": 1360
},
{
"epoch": 0.1775297996449404,
"grad_norm": 1.2332886457443237,
"learning_rate": 9.977709218391926e-05,
"loss": 3.795,
"step": 1400
},
{
"epoch": 0.18260207963479583,
"grad_norm": 1.2658277750015259,
"learning_rate": 9.975052082285399e-05,
"loss": 3.7948,
"step": 1440
},
{
"epoch": 0.1876743596246513,
"grad_norm": 1.2154490947723389,
"learning_rate": 9.972245760686504e-05,
"loss": 3.7817,
"step": 1480
},
{
"epoch": 0.19274663961450672,
"grad_norm": 1.2812718152999878,
"learning_rate": 9.969290337747618e-05,
"loss": 3.7757,
"step": 1520
},
{
"epoch": 0.19781891960436215,
"grad_norm": 1.3050552606582642,
"learning_rate": 9.966185902092184e-05,
"loss": 3.778,
"step": 1560
},
{
"epoch": 0.2028911995942176,
"grad_norm": 1.2343411445617676,
"learning_rate": 9.962932546812042e-05,
"loss": 3.7715,
"step": 1600
},
{
"epoch": 0.2028911995942176,
"eval_action_accuracy": 0.134033203125,
"eval_loss": 3.7644243240356445,
"eval_runtime": 201.2869,
"eval_samples_per_second": 10.175,
"eval_steps_per_second": 0.636,
"step": 1600
},
{
"epoch": 0.20796347958407305,
"grad_norm": 1.263569712638855,
"learning_rate": 9.959530369464645e-05,
"loss": 3.7654,
"step": 1640
},
{
"epoch": 0.21303575957392848,
"grad_norm": 1.182700276374817,
"learning_rate": 9.955979472070133e-05,
"loss": 3.7597,
"step": 1680
},
{
"epoch": 0.2181080395637839,
"grad_norm": 1.1188846826553345,
"learning_rate": 9.952279961108268e-05,
"loss": 3.7569,
"step": 1720
},
{
"epoch": 0.22318031955363937,
"grad_norm": 1.126511573791504,
"learning_rate": 9.94843194751525e-05,
"loss": 3.7503,
"step": 1760
},
{
"epoch": 0.2282525995434948,
"grad_norm": 1.1715574264526367,
"learning_rate": 9.944435546680384e-05,
"loss": 3.7482,
"step": 1800
},
{
"epoch": 0.23332487953335024,
"grad_norm": 1.2102463245391846,
"learning_rate": 9.940290878442622e-05,
"loss": 3.7484,
"step": 1840
},
{
"epoch": 0.23839715952320567,
"grad_norm": 1.0691642761230469,
"learning_rate": 9.935998067086967e-05,
"loss": 3.7423,
"step": 1880
},
{
"epoch": 0.24346943951306113,
"grad_norm": 1.204828143119812,
"learning_rate": 9.931557241340752e-05,
"loss": 3.7366,
"step": 1920
},
{
"epoch": 0.24854171950291656,
"grad_norm": 1.1264965534210205,
"learning_rate": 9.926968534369774e-05,
"loss": 3.7337,
"step": 1960
},
{
"epoch": 0.253613999492772,
"grad_norm": 1.0742850303649902,
"learning_rate": 9.922232083774302e-05,
"loss": 3.7288,
"step": 2000
},
{
"epoch": 0.253613999492772,
"eval_action_accuracy": 0.1356201171875,
"eval_loss": 3.7233614921569824,
"eval_runtime": 192.5766,
"eval_samples_per_second": 10.635,
"eval_steps_per_second": 0.665,
"step": 2000
},
{
"epoch": 0.25868627948262746,
"grad_norm": 1.2803360223770142,
"learning_rate": 9.917348031584952e-05,
"loss": 3.7233,
"step": 2040
},
{
"epoch": 0.2637585594724829,
"grad_norm": 1.0941964387893677,
"learning_rate": 9.91231652425843e-05,
"loss": 3.7238,
"step": 2080
},
{
"epoch": 0.2688308394623383,
"grad_norm": 1.1122888326644897,
"learning_rate": 9.907137712673133e-05,
"loss": 3.7228,
"step": 2120
},
{
"epoch": 0.27390311945219376,
"grad_norm": 1.1875488758087158,
"learning_rate": 9.901811752124635e-05,
"loss": 3.7163,
"step": 2160
},
{
"epoch": 0.2789753994420492,
"grad_norm": 0.9829818606376648,
"learning_rate": 9.896338802321021e-05,
"loss": 3.7091,
"step": 2200
},
{
"epoch": 0.2840476794319046,
"grad_norm": 1.1234384775161743,
"learning_rate": 9.890719027378103e-05,
"loss": 3.7129,
"step": 2240
},
{
"epoch": 0.2891199594217601,
"grad_norm": 0.9604063630104065,
"learning_rate": 9.884952595814497e-05,
"loss": 3.7119,
"step": 2280
},
{
"epoch": 0.29419223941161554,
"grad_norm": 1.0036152601242065,
"learning_rate": 9.87903968054657e-05,
"loss": 3.7038,
"step": 2320
},
{
"epoch": 0.299264519401471,
"grad_norm": 0.9695063829421997,
"learning_rate": 9.872980458883257e-05,
"loss": 3.7012,
"step": 2360
},
{
"epoch": 0.3043367993913264,
"grad_norm": 1.0221703052520752,
"learning_rate": 9.866775112520734e-05,
"loss": 3.6981,
"step": 2400
},
{
"epoch": 0.3043367993913264,
"eval_action_accuracy": 0.1356201171875,
"eval_loss": 3.699704647064209,
"eval_runtime": 94.9575,
"eval_samples_per_second": 21.568,
"eval_steps_per_second": 1.348,
"step": 2400
},
{
"epoch": 0.30940907938118184,
"grad_norm": 0.992612361907959,
"learning_rate": 9.86042382753699e-05,
"loss": 3.6976,
"step": 2440
},
{
"epoch": 0.3144813593710373,
"grad_norm": 1.0093075037002563,
"learning_rate": 9.853926794386223e-05,
"loss": 3.6934,
"step": 2480
},
{
"epoch": 0.3195536393608927,
"grad_norm": 1.016074299812317,
"learning_rate": 9.847284207893149e-05,
"loss": 3.6916,
"step": 2520
},
{
"epoch": 0.32462591935074814,
"grad_norm": 0.9795224070549011,
"learning_rate": 9.840496267247142e-05,
"loss": 3.6842,
"step": 2560
},
{
"epoch": 0.3296981993406036,
"grad_norm": 0.9658825397491455,
"learning_rate": 9.83356317599628e-05,
"loss": 3.6824,
"step": 2600
},
{
"epoch": 0.33477047933045906,
"grad_norm": 0.9373340010643005,
"learning_rate": 9.826485142041225e-05,
"loss": 3.6843,
"step": 2640
},
{
"epoch": 0.3398427593203145,
"grad_norm": 0.9244083166122437,
"learning_rate": 9.819262377628999e-05,
"loss": 3.6786,
"step": 2680
},
{
"epoch": 0.3449150393101699,
"grad_norm": 0.9753099083900452,
"learning_rate": 9.811895099346613e-05,
"loss": 3.6793,
"step": 2720
},
{
"epoch": 0.34998731930002536,
"grad_norm": 0.91774582862854,
"learning_rate": 9.804383528114575e-05,
"loss": 3.6759,
"step": 2760
},
{
"epoch": 0.3550595992898808,
"grad_norm": 0.9600816965103149,
"learning_rate": 9.796727889180268e-05,
"loss": 3.6719,
"step": 2800
},
{
"epoch": 0.3550595992898808,
"eval_action_accuracy": 0.136962890625,
"eval_loss": 3.670527458190918,
"eval_runtime": 255.8314,
"eval_samples_per_second": 8.005,
"eval_steps_per_second": 0.5,
"step": 2800
},
{
"epoch": 0.3601318792797362,
"grad_norm": 0.9501272439956665,
"learning_rate": 9.788928412111189e-05,
"loss": 3.6677,
"step": 2840
},
{
"epoch": 0.36520415926959165,
"grad_norm": 0.9030894041061401,
"learning_rate": 9.780985330788073e-05,
"loss": 3.6643,
"step": 2880
},
{
"epoch": 0.37027643925944714,
"grad_norm": 0.9130657911300659,
"learning_rate": 9.772898883397871e-05,
"loss": 3.6622,
"step": 2920
},
{
"epoch": 0.3753487192493026,
"grad_norm": 0.9452864527702332,
"learning_rate": 9.764669312426617e-05,
"loss": 3.6627,
"step": 2960
},
{
"epoch": 0.380420999239158,
"grad_norm": 0.8997258543968201,
"learning_rate": 9.756296864652149e-05,
"loss": 3.6584,
"step": 3000
},
{
"epoch": 0.38549327922901344,
"grad_norm": 0.9096789956092834,
"learning_rate": 9.74778179113671e-05,
"loss": 3.6542,
"step": 3040
},
{
"epoch": 0.3905655592188689,
"grad_norm": 0.8591508269309998,
"learning_rate": 9.739124347219422e-05,
"loss": 3.6526,
"step": 3080
},
{
"epoch": 0.3956378392087243,
"grad_norm": 0.9069055318832397,
"learning_rate": 9.73032479250863e-05,
"loss": 3.6522,
"step": 3120
},
{
"epoch": 0.40071011919857974,
"grad_norm": 0.8761900067329407,
"learning_rate": 9.721383390874117e-05,
"loss": 3.6511,
"step": 3160
},
{
"epoch": 0.4057823991884352,
"grad_norm": 0.8452698588371277,
"learning_rate": 9.712300410439186e-05,
"loss": 3.6458,
"step": 3200
},
{
"epoch": 0.4057823991884352,
"eval_action_accuracy": 0.1363525390625,
"eval_loss": 3.6467831134796143,
"eval_runtime": 225.4966,
"eval_samples_per_second": 9.082,
"eval_steps_per_second": 0.568,
"step": 3200
},
{
"epoch": 0.41085467917829066,
"grad_norm": 0.9788098931312561,
"learning_rate": 9.703076123572625e-05,
"loss": 3.6447,
"step": 3240
},
{
"epoch": 0.4159269591681461,
"grad_norm": 0.8087634444236755,
"learning_rate": 9.69371080688054e-05,
"loss": 3.6406,
"step": 3280
},
{
"epoch": 0.4209992391580015,
"grad_norm": 0.8617214560508728,
"learning_rate": 9.684204741198056e-05,
"loss": 3.64,
"step": 3320
},
{
"epoch": 0.42607151914785696,
"grad_norm": 0.8623685240745544,
"learning_rate": 9.674558211580902e-05,
"loss": 3.6414,
"step": 3360
},
{
"epoch": 0.4311437991377124,
"grad_norm": 0.8432904481887817,
"learning_rate": 9.664771507296857e-05,
"loss": 3.6386,
"step": 3400
},
{
"epoch": 0.4362160791275678,
"grad_norm": 0.8696951270103455,
"learning_rate": 9.654844921817078e-05,
"loss": 3.6364,
"step": 3440
},
{
"epoch": 0.44128835911742326,
"grad_norm": 0.9080593585968018,
"learning_rate": 9.644778752807306e-05,
"loss": 3.6295,
"step": 3480
},
{
"epoch": 0.44636063910727874,
"grad_norm": 0.8657037019729614,
"learning_rate": 9.634573302118925e-05,
"loss": 3.6337,
"step": 3520
},
{
"epoch": 0.4514329190971342,
"grad_norm": 0.8938254117965698,
"learning_rate": 9.624228875779928e-05,
"loss": 3.6296,
"step": 3560
},
{
"epoch": 0.4565051990869896,
"grad_norm": 0.9272467494010925,
"learning_rate": 9.613745783985724e-05,
"loss": 3.6274,
"step": 3600
},
{
"epoch": 0.4565051990869896,
"eval_action_accuracy": 0.13897705078125,
"eval_loss": 3.62253475189209,
"eval_runtime": 233.2421,
"eval_samples_per_second": 8.781,
"eval_steps_per_second": 0.549,
"step": 3600
},
{
"epoch": 0.46157747907684504,
"grad_norm": 0.9175124168395996,
"learning_rate": 9.603124341089854e-05,
"loss": 3.625,
"step": 3640
},
{
"epoch": 0.4666497590667005,
"grad_norm": 0.8574010133743286,
"learning_rate": 9.592364865594543e-05,
"loss": 3.6249,
"step": 3680
},
{
"epoch": 0.4717220390565559,
"grad_norm": 0.926366925239563,
"learning_rate": 9.58146768014117e-05,
"loss": 3.6219,
"step": 3720
},
{
"epoch": 0.47679431904641134,
"grad_norm": 0.9614307284355164,
"learning_rate": 9.570433111500582e-05,
"loss": 3.6162,
"step": 3760
},
{
"epoch": 0.48186659903626683,
"grad_norm": 0.8699825406074524,
"learning_rate": 9.559261490563294e-05,
"loss": 3.6203,
"step": 3800
},
{
"epoch": 0.48693887902612226,
"grad_norm": 0.9095637202262878,
"learning_rate": 9.547953152329573e-05,
"loss": 3.6145,
"step": 3840
},
{
"epoch": 0.4920111590159777,
"grad_norm": 0.8285691142082214,
"learning_rate": 9.536508435899388e-05,
"loss": 3.6128,
"step": 3880
},
{
"epoch": 0.4970834390058331,
"grad_norm": 0.8655169010162354,
"learning_rate": 9.524927684462242e-05,
"loss": 3.6122,
"step": 3920
},
{
"epoch": 0.5021557189956886,
"grad_norm": 0.7997918128967285,
"learning_rate": 9.513211245286883e-05,
"loss": 3.6087,
"step": 3960
},
{
"epoch": 0.507227998985544,
"grad_norm": 0.8405264616012573,
"learning_rate": 9.501359469710889e-05,
"loss": 3.6071,
"step": 4000
},
{
"epoch": 0.507227998985544,
"eval_action_accuracy": 0.13909912109375,
"eval_loss": 3.599052667617798,
"eval_runtime": 150.3774,
"eval_samples_per_second": 13.619,
"eval_steps_per_second": 0.851,
"step": 4000
},
{
"epoch": 0.5123002789753994,
"grad_norm": 0.8451080918312073,
"learning_rate": 9.489372713130131e-05,
"loss": 3.6066,
"step": 4040
},
{
"epoch": 0.5173725589652549,
"grad_norm": 0.8817796111106873,
"learning_rate": 9.477251334988122e-05,
"loss": 3.6002,
"step": 4080
},
{
"epoch": 0.5224448389551103,
"grad_norm": 0.8508093953132629,
"learning_rate": 9.464995698765227e-05,
"loss": 3.6005,
"step": 4120
},
{
"epoch": 0.5275171189449658,
"grad_norm": 0.8357366919517517,
"learning_rate": 9.452606171967777e-05,
"loss": 3.6006,
"step": 4160
},
{
"epoch": 0.5325893989348212,
"grad_norm": 0.8193793296813965,
"learning_rate": 9.440083126117039e-05,
"loss": 3.5962,
"step": 4200
},
{
"epoch": 0.5376616789246766,
"grad_norm": 0.8299962878227234,
"learning_rate": 9.427426936738078e-05,
"loss": 3.5972,
"step": 4240
},
{
"epoch": 0.5427339589145321,
"grad_norm": 0.8073312640190125,
"learning_rate": 9.414637983348498e-05,
"loss": 3.5979,
"step": 4280
},
{
"epoch": 0.5478062389043875,
"grad_norm": 0.8564472794532776,
"learning_rate": 9.401716649447059e-05,
"loss": 3.597,
"step": 4320
},
{
"epoch": 0.552878518894243,
"grad_norm": 0.8052465319633484,
"learning_rate": 9.388663322502182e-05,
"loss": 3.5952,
"step": 4360
},
{
"epoch": 0.5579507988840984,
"grad_norm": 0.8336448669433594,
"learning_rate": 9.37547839394032e-05,
"loss": 3.5849,
"step": 4400
},
{
"epoch": 0.5579507988840984,
"eval_action_accuracy": 0.13922119140625,
"eval_loss": 3.5810041427612305,
"eval_runtime": 198.5321,
"eval_samples_per_second": 10.316,
"eval_steps_per_second": 0.645,
"step": 4400
},
{
"epoch": 0.5630230788739539,
"grad_norm": 0.7685551047325134,
"learning_rate": 9.362162259134232e-05,
"loss": 3.5918,
"step": 4440
},
{
"epoch": 0.5680953588638092,
"grad_norm": 0.8058731555938721,
"learning_rate": 9.348715317391121e-05,
"loss": 3.5858,
"step": 4480
},
{
"epoch": 0.5731676388536647,
"grad_norm": 0.8100183010101318,
"learning_rate": 9.335137971940659e-05,
"loss": 3.5839,
"step": 4520
},
{
"epoch": 0.5782399188435202,
"grad_norm": 0.781609833240509,
"learning_rate": 9.321430629922897e-05,
"loss": 3.5818,
"step": 4560
},
{
"epoch": 0.5833121988333756,
"grad_norm": 0.8029278516769409,
"learning_rate": 9.307593702376061e-05,
"loss": 3.5768,
"step": 4600
},
{
"epoch": 0.5883844788232311,
"grad_norm": 0.7732046246528625,
"learning_rate": 9.293627604224217e-05,
"loss": 3.5787,
"step": 4640
},
{
"epoch": 0.5934567588130865,
"grad_norm": 0.8962281942367554,
"learning_rate": 9.279532754264837e-05,
"loss": 3.5776,
"step": 4680
},
{
"epoch": 0.598529038802942,
"grad_norm": 0.7350103855133057,
"learning_rate": 9.265309575156235e-05,
"loss": 3.5764,
"step": 4720
},
{
"epoch": 0.6036013187927973,
"grad_norm": 0.7271069288253784,
"learning_rate": 9.250958493404897e-05,
"loss": 3.5736,
"step": 4760
},
{
"epoch": 0.6086735987826528,
"grad_norm": 0.7939120531082153,
"learning_rate": 9.236479939352692e-05,
"loss": 3.5729,
"step": 4800
},
{
"epoch": 0.6086735987826528,
"eval_action_accuracy": 0.138916015625,
"eval_loss": 3.5635457038879395,
"eval_runtime": 240.0733,
"eval_samples_per_second": 8.531,
"eval_steps_per_second": 0.533,
"step": 4800
},
{
"epoch": 0.6137458787725082,
"grad_norm": 0.756108820438385,
"learning_rate": 9.221874347163956e-05,
"loss": 3.573,
"step": 4840
},
{
"epoch": 0.6188181587623637,
"grad_norm": 0.7624183893203735,
"learning_rate": 9.207142154812496e-05,
"loss": 3.5688,
"step": 4880
},
{
"epoch": 0.6238904387522192,
"grad_norm": 0.7523402571678162,
"learning_rate": 9.192283804068427e-05,
"loss": 3.5684,
"step": 4920
},
{
"epoch": 0.6289627187420745,
"grad_norm": 0.780206024646759,
"learning_rate": 9.177299740484952e-05,
"loss": 3.5652,
"step": 4960
},
{
"epoch": 0.63403499873193,
"grad_norm": 0.7238260507583618,
"learning_rate": 9.162190413384988e-05,
"loss": 3.5691,
"step": 5000
},
{
"epoch": 0.6391072787217854,
"grad_norm": 0.8539979457855225,
"learning_rate": 9.146956275847689e-05,
"loss": 3.5625,
"step": 5040
},
{
"epoch": 0.6441795587116409,
"grad_norm": 0.8123564720153809,
"learning_rate": 9.131597784694868e-05,
"loss": 3.5631,
"step": 5080
},
{
"epoch": 0.6492518387014963,
"grad_norm": 0.7440022230148315,
"learning_rate": 9.116115400477294e-05,
"loss": 3.5569,
"step": 5120
},
{
"epoch": 0.6543241186913518,
"grad_norm": 0.7737935185432434,
"learning_rate": 9.100509587460883e-05,
"loss": 3.5575,
"step": 5160
},
{
"epoch": 0.6593963986812073,
"grad_norm": 0.7868227958679199,
"learning_rate": 9.084780813612779e-05,
"loss": 3.5549,
"step": 5200
},
{
"epoch": 0.6593963986812073,
"eval_action_accuracy": 0.138671875,
"eval_loss": 3.54689621925354,
"eval_runtime": 275.3848,
"eval_samples_per_second": 7.437,
"eval_steps_per_second": 0.465,
"step": 5200
},
{
"epoch": 0.6644686786710626,
"grad_norm": 0.7422760128974915,
"learning_rate": 9.06892955058731e-05,
"loss": 3.5545,
"step": 5240
},
{
"epoch": 0.6695409586609181,
"grad_norm": 0.7535783648490906,
"learning_rate": 9.052956273711861e-05,
"loss": 3.5478,
"step": 5280
},
{
"epoch": 0.6746132386507735,
"grad_norm": 0.7200169563293457,
"learning_rate": 9.036861461972607e-05,
"loss": 3.5501,
"step": 5320
},
{
"epoch": 0.679685518640629,
"grad_norm": 0.8011751770973206,
"learning_rate": 9.020645598000158e-05,
"loss": 3.5506,
"step": 5360
},
{
"epoch": 0.6847577986304844,
"grad_norm": 0.7450407147407532,
"learning_rate": 9.004309168055081e-05,
"loss": 3.5499,
"step": 5400
},
{
"epoch": 0.6898300786203398,
"grad_norm": 0.7414825558662415,
"learning_rate": 8.987852662013321e-05,
"loss": 3.5465,
"step": 5440
},
{
"epoch": 0.6949023586101953,
"grad_norm": 0.7712804079055786,
"learning_rate": 8.971276573351513e-05,
"loss": 3.5432,
"step": 5480
},
{
"epoch": 0.6999746386000507,
"grad_norm": 0.745634913444519,
"learning_rate": 8.954581399132183e-05,
"loss": 3.544,
"step": 5520
},
{
"epoch": 0.7050469185899062,
"grad_norm": 0.7983254790306091,
"learning_rate": 8.937767639988839e-05,
"loss": 3.5411,
"step": 5560
},
{
"epoch": 0.7101191985797616,
"grad_norm": 0.7511922717094421,
"learning_rate": 8.920835800110964e-05,
"loss": 3.539,
"step": 5600
},
{
"epoch": 0.7101191985797616,
"eval_action_accuracy": 0.1375732421875,
"eval_loss": 3.526184558868408,
"eval_runtime": 188.9796,
"eval_samples_per_second": 10.837,
"eval_steps_per_second": 0.677,
"step": 5600
},
{
"epoch": 0.7151914785696171,
"grad_norm": 0.74385005235672,
"learning_rate": 8.903786387228895e-05,
"loss": 3.5383,
"step": 5640
},
{
"epoch": 0.7202637585594724,
"grad_norm": 0.7223657369613647,
"learning_rate": 8.886619912598599e-05,
"loss": 3.5411,
"step": 5680
},
{
"epoch": 0.7253360385493279,
"grad_norm": 0.7494250535964966,
"learning_rate": 8.869336890986338e-05,
"loss": 3.5366,
"step": 5720
},
{
"epoch": 0.7304083185391833,
"grad_norm": 0.752515971660614,
"learning_rate": 8.851937840653234e-05,
"loss": 3.5378,
"step": 5760
},
{
"epoch": 0.7354805985290388,
"grad_norm": 0.783234715461731,
"learning_rate": 8.83442328333974e-05,
"loss": 3.5354,
"step": 5800
},
{
"epoch": 0.7405528785188943,
"grad_norm": 0.8204740881919861,
"learning_rate": 8.816793744249971e-05,
"loss": 3.5301,
"step": 5840
},
{
"epoch": 0.7456251585087497,
"grad_norm": 0.7523532509803772,
"learning_rate": 8.799049752035975e-05,
"loss": 3.5323,
"step": 5880
},
{
"epoch": 0.7506974384986052,
"grad_norm": 0.7469347715377808,
"learning_rate": 8.781191838781876e-05,
"loss": 3.5265,
"step": 5920
},
{
"epoch": 0.7557697184884605,
"grad_norm": 0.760491669178009,
"learning_rate": 8.76322053998791e-05,
"loss": 3.525,
"step": 5960
},
{
"epoch": 0.760841998478316,
"grad_norm": 0.739592969417572,
"learning_rate": 8.745136394554381e-05,
"loss": 3.5243,
"step": 6000
},
{
"epoch": 0.760841998478316,
"eval_action_accuracy": 0.140625,
"eval_loss": 3.5108320713043213,
"eval_runtime": 219.3428,
"eval_samples_per_second": 9.337,
"eval_steps_per_second": 0.584,
"step": 6000
},
{
"epoch": 0.7659142784681714,
"grad_norm": 0.7475005388259888,
"learning_rate": 8.726939944765485e-05,
"loss": 3.5238,
"step": 6040
},
{
"epoch": 0.7709865584580269,
"grad_norm": 0.800801157951355,
"learning_rate": 8.708631736273066e-05,
"loss": 3.519,
"step": 6080
},
{
"epoch": 0.7760588384478824,
"grad_norm": 0.7466565370559692,
"learning_rate": 8.690212318080235e-05,
"loss": 3.5242,
"step": 6120
},
{
"epoch": 0.7811311184377377,
"grad_norm": 0.6981168389320374,
"learning_rate": 8.671682242524928e-05,
"loss": 3.5167,
"step": 6160
},
{
"epoch": 0.7862033984275932,
"grad_norm": 0.7748053669929504,
"learning_rate": 8.653042065263326e-05,
"loss": 3.515,
"step": 6200
},
{
"epoch": 0.7912756784174486,
"grad_norm": 0.7502596974372864,
"learning_rate": 8.634292345253198e-05,
"loss": 3.517,
"step": 6240
},
{
"epoch": 0.7963479584073041,
"grad_norm": 0.7047079801559448,
"learning_rate": 8.615433644737143e-05,
"loss": 3.516,
"step": 6280
},
{
"epoch": 0.8014202383971595,
"grad_norm": 0.7337208986282349,
"learning_rate": 8.59646652922573e-05,
"loss": 3.5121,
"step": 6320
},
{
"epoch": 0.806492518387015,
"grad_norm": 0.7593693733215332,
"learning_rate": 8.577391567480533e-05,
"loss": 3.5129,
"step": 6360
},
{
"epoch": 0.8115647983768705,
"grad_norm": 0.7421781420707703,
"learning_rate": 8.558209331497084e-05,
"loss": 3.5113,
"step": 6400
},
{
"epoch": 0.8115647983768705,
"eval_action_accuracy": 0.1082763671875,
"eval_loss": 3.5035910606384277,
"eval_runtime": 197.1129,
"eval_samples_per_second": 10.39,
"eval_steps_per_second": 0.649,
"step": 6400
},
{
"epoch": 0.8166370783667258,
"grad_norm": 0.748630166053772,
"learning_rate": 8.538920396487719e-05,
"loss": 3.5104,
"step": 6440
},
{
"epoch": 0.8217093583565813,
"grad_norm": 0.7239616513252258,
"learning_rate": 8.519525340864324e-05,
"loss": 3.5074,
"step": 6480
},
{
"epoch": 0.8267816383464367,
"grad_norm": 0.7293316721916199,
"learning_rate": 8.500024746220996e-05,
"loss": 3.5049,
"step": 6520
},
{
"epoch": 0.8318539183362922,
"grad_norm": 0.7635191082954407,
"learning_rate": 8.4804191973166e-05,
"loss": 3.5058,
"step": 6560
},
{
"epoch": 0.8369261983261476,
"grad_norm": 0.7280616164207458,
"learning_rate": 8.46070928205724e-05,
"loss": 3.5009,
"step": 6600
},
{
"epoch": 0.841998478316003,
"grad_norm": 0.7468705177307129,
"learning_rate": 8.440895591478614e-05,
"loss": 3.5002,
"step": 6640
},
{
"epoch": 0.8470707583058584,
"grad_norm": 0.7745243310928345,
"learning_rate": 8.420978719728311e-05,
"loss": 3.4971,
"step": 6680
},
{
"epoch": 0.8521430382957139,
"grad_norm": 0.7581838369369507,
"learning_rate": 8.400959264047985e-05,
"loss": 3.5001,
"step": 6720
},
{
"epoch": 0.8572153182855694,
"grad_norm": 0.7228087186813354,
"learning_rate": 8.380837824755439e-05,
"loss": 3.5033,
"step": 6760
},
{
"epoch": 0.8622875982754248,
"grad_norm": 9.996931076049805,
"learning_rate": 8.360615005226632e-05,
"loss": 5.4383,
"step": 6800
},
{
"epoch": 0.8622875982754248,
"eval_action_accuracy": 0.00238037109375,
"eval_loss": 6.149868011474609,
"eval_runtime": 135.6492,
"eval_samples_per_second": 15.098,
"eval_steps_per_second": 0.944,
"step": 6800
},
{
"epoch": 0.8673598782652803,
"grad_norm": 2.9023468494415283,
"learning_rate": 8.340291411877589e-05,
"loss": 5.3437,
"step": 6840
},
{
"epoch": 0.8724321582551356,
"grad_norm": 337.53240966796875,
"learning_rate": 8.319867654146204e-05,
"loss": 4.8335,
"step": 6880
},
{
"epoch": 0.8775044382449911,
"grad_norm": 0.6138980984687805,
"learning_rate": 8.299344344473979e-05,
"loss": 4.7753,
"step": 6920
},
{
"epoch": 0.8825767182348465,
"grad_norm": 0.6990567445755005,
"learning_rate": 8.278722098287644e-05,
"loss": 4.4881,
"step": 6960
},
{
"epoch": 0.887648998224702,
"grad_norm": 0.48511803150177,
"learning_rate": 8.25800153398072e-05,
"loss": 4.4721,
"step": 7000
},
{
"epoch": 0.8927212782145575,
"grad_norm": 2.293332815170288,
"learning_rate": 8.23718327289496e-05,
"loss": 4.4603,
"step": 7040
},
{
"epoch": 0.8977935582044129,
"grad_norm": 2.738636016845703,
"learning_rate": 8.216267939301723e-05,
"loss": 4.4459,
"step": 7080
},
{
"epoch": 0.9028658381942684,
"grad_norm": 4.165946960449219,
"learning_rate": 8.195256160383256e-05,
"loss": 4.4312,
"step": 7120
},
{
"epoch": 0.9079381181841237,
"grad_norm": 2.03474760055542,
"learning_rate": 8.174148566213883e-05,
"loss": 4.4189,
"step": 7160
},
{
"epoch": 0.9130103981739792,
"grad_norm": 2.4315898418426514,
"learning_rate": 8.152945789741115e-05,
"loss": 4.4123,
"step": 7200
},
{
"epoch": 0.9130103981739792,
"eval_action_accuracy": 0.07720947265625,
"eval_loss": 4.408840179443359,
"eval_runtime": 225.9743,
"eval_samples_per_second": 9.063,
"eval_steps_per_second": 0.566,
"step": 7200
},
{
"epoch": 0.9180826781638346,
"grad_norm": 3.4418368339538574,
"learning_rate": 8.13164846676667e-05,
"loss": 4.4018,
"step": 7240
},
{
"epoch": 0.9231549581536901,
"grad_norm": 2.912829637527466,
"learning_rate": 8.110257235927399e-05,
"loss": 4.4006,
"step": 7280
},
{
"epoch": 0.9282272381435456,
"grad_norm": 3.1742701530456543,
"learning_rate": 8.088772738676147e-05,
"loss": 4.3849,
"step": 7320
},
{
"epoch": 0.933299518133401,
"grad_norm": 2.608149528503418,
"learning_rate": 8.06719561926251e-05,
"loss": 4.3848,
"step": 7360
},
{
"epoch": 0.9383717981232564,
"grad_norm": 3.916184425354004,
"learning_rate": 8.045526524713522e-05,
"loss": 4.3698,
"step": 7400
},
{
"epoch": 0.9434440781131118,
"grad_norm": 1.0830720663070679,
"learning_rate": 8.023766104814249e-05,
"loss": 4.3521,
"step": 7440
},
{
"epoch": 0.9485163581029673,
"grad_norm": 4.330826282501221,
"learning_rate": 8.0019150120883e-05,
"loss": 4.3412,
"step": 7480
},
{
"epoch": 0.9535886380928227,
"grad_norm": 5.002548694610596,
"learning_rate": 7.97997390177827e-05,
"loss": 4.33,
"step": 7520
},
{
"epoch": 0.9586609180826782,
"grad_norm": 2.605839729309082,
"learning_rate": 7.957943431826084e-05,
"loss": 4.3083,
"step": 7560
},
{
"epoch": 0.9637331980725337,
"grad_norm": 2.4096832275390625,
"learning_rate": 7.93582426285327e-05,
"loss": 4.2866,
"step": 7600
},
{
"epoch": 0.9637331980725337,
"eval_action_accuracy": 0.12493896484375,
"eval_loss": 4.285426139831543,
"eval_runtime": 231.2686,
"eval_samples_per_second": 8.856,
"eval_steps_per_second": 0.553,
"step": 7600
},
{
"epoch": 0.968805478062389,
"grad_norm": 4.2472243309021,
"learning_rate": 7.913617058141147e-05,
"loss": 4.2785,
"step": 7640
},
{
"epoch": 0.9738777580522445,
"grad_norm": 2.99232816696167,
"learning_rate": 7.89132248361094e-05,
"loss": 4.2632,
"step": 7680
},
{
"epoch": 0.9789500380420999,
"grad_norm": 6.356353282928467,
"learning_rate": 7.868941207803807e-05,
"loss": 4.2352,
"step": 7720
},
{
"epoch": 0.9840223180319554,
"grad_norm": 4.926124095916748,
"learning_rate": 7.846473901860789e-05,
"loss": 4.212,
"step": 7760
},
{
"epoch": 0.9890945980218108,
"grad_norm": 4.553915023803711,
"learning_rate": 7.823921239502695e-05,
"loss": 4.1994,
"step": 7800
},
{
"epoch": 0.9941668780116663,
"grad_norm": 3.6262214183807373,
"learning_rate": 7.80128389700989e-05,
"loss": 4.1624,
"step": 7840
},
{
"epoch": 0.9992391580015216,
"grad_norm": 3.167363166809082,
"learning_rate": 7.778562553202017e-05,
"loss": 4.1414,
"step": 7880
},
{
"epoch": 1.0043114379913771,
"grad_norm": 4.281521797180176,
"learning_rate": 7.755757889417648e-05,
"loss": 4.1162,
"step": 7920
},
{
"epoch": 1.0093837179812326,
"grad_norm": 2.3562097549438477,
"learning_rate": 7.73287058949384e-05,
"loss": 4.0933,
"step": 7960
},
{
"epoch": 1.014455997971088,
"grad_norm": 4.595114231109619,
"learning_rate": 7.709901339745642e-05,
"loss": 4.0731,
"step": 8000
},
{
"epoch": 1.014455997971088,
"eval_action_accuracy": 0.12646484375,
"eval_loss": 4.067330360412598,
"eval_runtime": 32.7296,
"eval_samples_per_second": 62.573,
"eval_steps_per_second": 3.911,
"step": 8000
},
{
"epoch": 1.0195282779609434,
"grad_norm": 2.8510732650756836,
"learning_rate": 7.68685082894551e-05,
"loss": 4.0461,
"step": 8040
},
{
"epoch": 1.0246005579507989,
"grad_norm": 3.8514857292175293,
"learning_rate": 7.663719748302649e-05,
"loss": 4.0303,
"step": 8080
},
{
"epoch": 1.0296728379406543,
"grad_norm": 6.056658744812012,
"learning_rate": 7.640508791442292e-05,
"loss": 4.0205,
"step": 8120
},
{
"epoch": 1.0347451179305098,
"grad_norm": 5.616896629333496,
"learning_rate": 7.617218654384895e-05,
"loss": 3.9963,
"step": 8160
},
{
"epoch": 1.0398173979203653,
"grad_norm": 4.593634128570557,
"learning_rate": 7.59385003552527e-05,
"loss": 3.9845,
"step": 8200
},
{
"epoch": 1.0448896779102206,
"grad_norm": 2.184180974960327,
"learning_rate": 7.570403635611645e-05,
"loss": 3.9796,
"step": 8240
},
{
"epoch": 1.049961957900076,
"grad_norm": 3.9647679328918457,
"learning_rate": 7.546880157724637e-05,
"loss": 3.9604,
"step": 8280
},
{
"epoch": 1.0550342378899316,
"grad_norm": 3.0800178050994873,
"learning_rate": 7.523280307256189e-05,
"loss": 3.9499,
"step": 8320
},
{
"epoch": 1.060106517879787,
"grad_norm": 1.8450560569763184,
"learning_rate": 7.499604791888399e-05,
"loss": 3.9388,
"step": 8360
},
{
"epoch": 1.0651787978696423,
"grad_norm": 2.2816243171691895,
"learning_rate": 7.475854321572311e-05,
"loss": 3.9263,
"step": 8400
},
{
"epoch": 1.0651787978696423,
"eval_action_accuracy": 0.13165283203125,
"eval_loss": 3.927708864212036,
"eval_runtime": 32.2116,
"eval_samples_per_second": 63.58,
"eval_steps_per_second": 3.974,
"step": 8400
},
{
"epoch": 1.0702510778594978,
"grad_norm": 3.747340679168701,
"learning_rate": 7.452029608506625e-05,
"loss": 3.9185,
"step": 8440
},
{
"epoch": 1.0753233578493533,
"grad_norm": 3.5790855884552,
"learning_rate": 7.42813136711633e-05,
"loss": 3.9228,
"step": 8480
},
{
"epoch": 1.0803956378392088,
"grad_norm": 2.9461708068847656,
"learning_rate": 7.404160314031293e-05,
"loss": 3.9258,
"step": 8520
},
{
"epoch": 1.0854679178290643,
"grad_norm": 2.2783358097076416,
"learning_rate": 7.380117168064765e-05,
"loss": 3.8958,
"step": 8560
},
{
"epoch": 1.0905401978189195,
"grad_norm": 2.0436387062072754,
"learning_rate": 7.356002650191826e-05,
"loss": 3.891,
"step": 8600
},
{
"epoch": 1.095612477808775,
"grad_norm": 3.3393843173980713,
"learning_rate": 7.331817483527761e-05,
"loss": 3.8801,
"step": 8640
},
{
"epoch": 1.1006847577986305,
"grad_norm": 1.5066816806793213,
"learning_rate": 7.307562393306388e-05,
"loss": 3.9537,
"step": 8680
},
{
"epoch": 1.105757037788486,
"grad_norm": 3.5615596771240234,
"learning_rate": 7.283238106858299e-05,
"loss": 3.8907,
"step": 8720
},
{
"epoch": 1.1108293177783413,
"grad_norm": 1.8178229331970215,
"learning_rate": 7.258845353589051e-05,
"loss": 3.8741,
"step": 8760
},
{
"epoch": 1.1159015977681968,
"grad_norm": 2.923635721206665,
"learning_rate": 7.234384864957303e-05,
"loss": 3.861,
"step": 8800
},
{
"epoch": 1.1159015977681968,
"eval_action_accuracy": 0.1334228515625,
"eval_loss": 3.8630127906799316,
"eval_runtime": 224.4597,
"eval_samples_per_second": 9.124,
"eval_steps_per_second": 0.57,
"step": 8800
},
{
"epoch": 1.1209738777580522,
"grad_norm": 2.2543115615844727,
"learning_rate": 7.209857374452868e-05,
"loss": 3.8702,
"step": 8840
},
{
"epoch": 1.1260461577479077,
"grad_norm": 4.547013282775879,
"learning_rate": 7.185263617574732e-05,
"loss": 3.8536,
"step": 8880
},
{
"epoch": 1.1311184377377632,
"grad_norm": 2.3391380310058594,
"learning_rate": 7.160604331808988e-05,
"loss": 3.8498,
"step": 8920
},
{
"epoch": 1.1361907177276185,
"grad_norm": 1.3658958673477173,
"learning_rate": 7.135880256606728e-05,
"loss": 3.8391,
"step": 8960
},
{
"epoch": 1.141262997717474,
"grad_norm": 1.8794150352478027,
"learning_rate": 7.111092133361864e-05,
"loss": 3.8323,
"step": 9000
},
{
"epoch": 1.1463352777073295,
"grad_norm": 2.647994041442871,
"learning_rate": 7.0862407053889e-05,
"loss": 3.8246,
"step": 9040
},
{
"epoch": 1.151407557697185,
"grad_norm": 1.9109671115875244,
"learning_rate": 7.061326717900643e-05,
"loss": 3.826,
"step": 9080
},
{
"epoch": 1.1564798376870402,
"grad_norm": 2.2575666904449463,
"learning_rate": 7.036350917985849e-05,
"loss": 3.8153,
"step": 9120
},
{
"epoch": 1.1615521176768957,
"grad_norm": 2.557769298553467,
"learning_rate": 7.011314054586834e-05,
"loss": 3.8129,
"step": 9160
},
{
"epoch": 1.1666243976667512,
"grad_norm": 1.6273854970932007,
"learning_rate": 6.986216878477e-05,
"loss": 3.805,
"step": 9200
},
{
"epoch": 1.1666243976667512,
"eval_action_accuracy": 0.134033203125,
"eval_loss": 3.803765296936035,
"eval_runtime": 214.3228,
"eval_samples_per_second": 9.556,
"eval_steps_per_second": 0.597,
"step": 9200
},
{
"epoch": 1.1716966776566067,
"grad_norm": 2.280379295349121,
"learning_rate": 6.961060142238336e-05,
"loss": 3.7966,
"step": 9240
},
{
"epoch": 1.1767689576464622,
"grad_norm": 1.739105463027954,
"learning_rate": 6.935844600238839e-05,
"loss": 3.7988,
"step": 9280
},
{
"epoch": 1.1818412376363174,
"grad_norm": 1.481899619102478,
"learning_rate": 6.910571008609898e-05,
"loss": 3.7919,
"step": 9320
},
{
"epoch": 1.186913517626173,
"grad_norm": 1.5556796789169312,
"learning_rate": 6.885240125223623e-05,
"loss": 3.7861,
"step": 9360
},
{
"epoch": 1.1919857976160284,
"grad_norm": 2.1016335487365723,
"learning_rate": 6.859852709670113e-05,
"loss": 3.7836,
"step": 9400
},
{
"epoch": 1.197058077605884,
"grad_norm": 1.5450862646102905,
"learning_rate": 6.834409523234685e-05,
"loss": 3.7768,
"step": 9440
},
{
"epoch": 1.2021303575957392,
"grad_norm": 1.5863109827041626,
"learning_rate": 6.808911328875039e-05,
"loss": 3.773,
"step": 9480
},
{
"epoch": 1.2072026375855947,
"grad_norm": 1.5427820682525635,
"learning_rate": 6.783358891198378e-05,
"loss": 3.7688,
"step": 9520
},
{
"epoch": 1.2122749175754501,
"grad_norm": 1.2848241329193115,
"learning_rate": 6.757752976438494e-05,
"loss": 3.7613,
"step": 9560
},
{
"epoch": 1.2173471975653056,
"grad_norm": 1.413462519645691,
"learning_rate": 6.732094352432775e-05,
"loss": 3.7541,
"step": 9600
},
{
"epoch": 1.2173471975653056,
"eval_action_accuracy": 0.13427734375,
"eval_loss": 3.7621703147888184,
"eval_runtime": 298.648,
"eval_samples_per_second": 6.858,
"eval_steps_per_second": 0.429,
"step": 9600
},
{
"epoch": 1.2224194775551611,
"grad_norm": 1.9794578552246094,
"learning_rate": 6.706383788599185e-05,
"loss": 3.7602,
"step": 9640
},
{
"epoch": 1.2274917575450166,
"grad_norm": 1.9551501274108887,
"learning_rate": 6.680622055913198e-05,
"loss": 3.7513,
"step": 9680
},
{
"epoch": 1.2325640375348719,
"grad_norm": 1.13520085811615,
"learning_rate": 6.654809926884667e-05,
"loss": 3.7555,
"step": 9720
},
{
"epoch": 1.2376363175247274,
"grad_norm": 2.2174594402313232,
"learning_rate": 6.628948175534677e-05,
"loss": 3.7478,
"step": 9760
},
{
"epoch": 1.2427085975145828,
"grad_norm": 1.2913436889648438,
"learning_rate": 6.603037577372314e-05,
"loss": 3.7403,
"step": 9800
},
{
"epoch": 1.2477808775044383,
"grad_norm": 1.5598777532577515,
"learning_rate": 6.577078909371421e-05,
"loss": 3.7358,
"step": 9840
},
{
"epoch": 1.2528531574942936,
"grad_norm": 1.298091173171997,
"learning_rate": 6.551072949947304e-05,
"loss": 3.7334,
"step": 9880
},
{
"epoch": 1.257925437484149,
"grad_norm": 1.5344069004058838,
"learning_rate": 6.525020478933375e-05,
"loss": 3.7295,
"step": 9920
},
{
"epoch": 1.2629977174740046,
"grad_norm": 1.6545060873031616,
"learning_rate": 6.498922277557782e-05,
"loss": 3.7278,
"step": 9960
},
{
"epoch": 1.26806999746386,
"grad_norm": 1.0720863342285156,
"learning_rate": 6.472779128419974e-05,
"loss": 3.7316,
"step": 10000
},
{
"epoch": 1.26806999746386,
"eval_action_accuracy": 0.13653564453125,
"eval_loss": 3.7235488891601562,
"eval_runtime": 333.1976,
"eval_samples_per_second": 6.147,
"eval_steps_per_second": 0.384,
"step": 10000
},
{
"epoch": 1.2731422774537156,
"grad_norm": 1.3172401189804077,
"learning_rate": 6.446591815467238e-05,
"loss": 3.721,
"step": 10040
},
{
"epoch": 1.2782145574435708,
"grad_norm": 1.9167495965957642,
"learning_rate": 6.420361123971186e-05,
"loss": 3.7172,
"step": 10080
},
{
"epoch": 1.2832868374334263,
"grad_norm": 0.8781843781471252,
"learning_rate": 6.394087840504213e-05,
"loss": 3.7177,
"step": 10120
},
{
"epoch": 1.2883591174232818,
"grad_norm": 1.2992794513702393,
"learning_rate": 6.367772752915906e-05,
"loss": 3.711,
"step": 10160
},
{
"epoch": 1.2934313974131373,
"grad_norm": 1.4007689952850342,
"learning_rate": 6.341416650309422e-05,
"loss": 3.7257,
"step": 10200
},
{
"epoch": 1.2985036774029926,
"grad_norm": 1.4746208190917969,
"learning_rate": 6.31502032301782e-05,
"loss": 3.7169,
"step": 10240
},
{
"epoch": 1.303575957392848,
"grad_norm": 1.7429252862930298,
"learning_rate": 6.28858456258037e-05,
"loss": 3.7152,
"step": 10280
},
{
"epoch": 1.3086482373827035,
"grad_norm": 1.0173007249832153,
"learning_rate": 6.262110161718812e-05,
"loss": 3.7171,
"step": 10320
},
{
"epoch": 1.313720517372559,
"grad_norm": 1.7349942922592163,
"learning_rate": 6.235597914313582e-05,
"loss": 3.6991,
"step": 10360
},
{
"epoch": 1.3187927973624145,
"grad_norm": 1.29923415184021,
"learning_rate": 6.209048615380012e-05,
"loss": 3.696,
"step": 10400
},
{
"epoch": 1.3187927973624145,
"eval_action_accuracy": 0.1380615234375,
"eval_loss": 3.6903038024902344,
"eval_runtime": 303.8751,
"eval_samples_per_second": 6.74,
"eval_steps_per_second": 0.421,
"step": 10400
},
{
"epoch": 1.3238650773522698,
"grad_norm": 1.0686568021774292,
"learning_rate": 6.18246306104449e-05,
"loss": 3.6912,
"step": 10440
},
{
"epoch": 1.3289373573421253,
"grad_norm": 1.2505710124969482,
"learning_rate": 6.155842048520579e-05,
"loss": 3.6875,
"step": 10480
},
{
"epoch": 1.3340096373319807,
"grad_norm": 1.1971619129180908,
"learning_rate": 6.129186376085124e-05,
"loss": 3.6826,
"step": 10520
},
{
"epoch": 1.3390819173218362,
"grad_norm": 1.4397510290145874,
"learning_rate": 6.1024968430543e-05,
"loss": 3.684,
"step": 10560
},
{
"epoch": 1.3441541973116915,
"grad_norm": 0.9133204817771912,
"learning_rate": 6.0757742497596536e-05,
"loss": 3.701,
"step": 10600
},
{
"epoch": 1.349226477301547,
"grad_norm": 1.0655523538589478,
"learning_rate": 6.0490193975241026e-05,
"loss": 3.6814,
"step": 10640
},
{
"epoch": 1.3542987572914025,
"grad_norm": 1.0900158882141113,
"learning_rate": 6.0222330886379006e-05,
"loss": 3.6751,
"step": 10680
},
{
"epoch": 1.359371037281258,
"grad_norm": 0.8660451769828796,
"learning_rate": 5.995416126334583e-05,
"loss": 3.6737,
"step": 10720
},
{
"epoch": 1.3644433172711135,
"grad_norm": 1.2826400995254517,
"learning_rate": 5.9685693147668855e-05,
"loss": 3.6704,
"step": 10760
},
{
"epoch": 1.369515597260969,
"grad_norm": 0.9602544903755188,
"learning_rate": 5.941693458982619e-05,
"loss": 3.6622,
"step": 10800
},
{
"epoch": 1.369515597260969,
"eval_action_accuracy": 0.137451171875,
"eval_loss": 3.6570138931274414,
"eval_runtime": 330.4329,
"eval_samples_per_second": 6.198,
"eval_steps_per_second": 0.387,
"step": 10800
},
{
"epoch": 1.3745878772508242,
"grad_norm": 0.9139319658279419,
"learning_rate": 5.914789364900537e-05,
"loss": 3.6607,
"step": 10840
},
{
"epoch": 1.3796601572406797,
"grad_norm": 0.9442414045333862,
"learning_rate": 5.8878578392861717e-05,
"loss": 3.6577,
"step": 10880
},
{
"epoch": 1.3847324372305352,
"grad_norm": 1.427770733833313,
"learning_rate": 5.8608996897276316e-05,
"loss": 3.6576,
"step": 10920
},
{
"epoch": 1.3898047172203905,
"grad_norm": 0.9186159372329712,
"learning_rate": 5.8339157246113894e-05,
"loss": 3.6547,
"step": 10960
},
{
"epoch": 1.394876997210246,
"grad_norm": 0.8701043128967285,
"learning_rate": 5.806906753098047e-05,
"loss": 3.649,
"step": 11000
},
{
"epoch": 1.3999492772001014,
"grad_norm": 0.9969470500946045,
"learning_rate": 5.7798735850980624e-05,
"loss": 3.65,
"step": 11040
},
{
"epoch": 1.405021557189957,
"grad_norm": 1.0402882099151611,
"learning_rate": 5.752817031247465e-05,
"loss": 3.6432,
"step": 11080
},
{
"epoch": 1.4100938371798124,
"grad_norm": 0.9199869632720947,
"learning_rate": 5.725737902883556e-05,
"loss": 3.6453,
"step": 11120
},
{
"epoch": 1.415166117169668,
"grad_norm": 1.2621873617172241,
"learning_rate": 5.6986370120205635e-05,
"loss": 3.642,
"step": 11160
},
{
"epoch": 1.4202383971595232,
"grad_norm": 1.1047650575637817,
"learning_rate": 5.671515171325309e-05,
"loss": 3.6424,
"step": 11200
},
{
"epoch": 1.4202383971595232,
"eval_action_accuracy": 0.1365966796875,
"eval_loss": 3.631456136703491,
"eval_runtime": 291.4897,
"eval_samples_per_second": 7.026,
"eval_steps_per_second": 0.439,
"step": 11200
},
{
"epoch": 1.4253106771493786,
"grad_norm": 1.544075846672058,
"learning_rate": 5.6443731940928245e-05,
"loss": 3.6375,
"step": 11240
},
{
"epoch": 1.4303829571392341,
"grad_norm": 1.037488579750061,
"learning_rate": 5.617211894221978e-05,
"loss": 3.6328,
"step": 11280
},
{
"epoch": 1.4354552371290894,
"grad_norm": 1.0984132289886475,
"learning_rate": 5.5900320861910524e-05,
"loss": 3.6335,
"step": 11320
},
{
"epoch": 1.440527517118945,
"grad_norm": 0.8929972052574158,
"learning_rate": 5.562834585033333e-05,
"loss": 3.6311,
"step": 11360
},
{
"epoch": 1.4455997971088004,
"grad_norm": 1.275006890296936,
"learning_rate": 5.535620206312667e-05,
"loss": 3.6271,
"step": 11400
},
{
"epoch": 1.4506720770986559,
"grad_norm": 0.8102837204933167,
"learning_rate": 5.508389766098999e-05,
"loss": 3.6243,
"step": 11440
},
{
"epoch": 1.4557443570885114,
"grad_norm": 0.8364322185516357,
"learning_rate": 5.4811440809439075e-05,
"loss": 3.6244,
"step": 11480
},
{
"epoch": 1.4608166370783668,
"grad_norm": 1.1363658905029297,
"learning_rate": 5.453883967856119e-05,
"loss": 3.6205,
"step": 11520
},
{
"epoch": 1.465888917068222,
"grad_norm": 0.9556430578231812,
"learning_rate": 5.426610244277002e-05,
"loss": 3.6202,
"step": 11560
},
{
"epoch": 1.4709611970580776,
"grad_norm": 1.451727271080017,
"learning_rate": 5.399323728056059e-05,
"loss": 3.6111,
"step": 11600
},
{
"epoch": 1.4709611970580776,
"eval_action_accuracy": 0.138916015625,
"eval_loss": 3.6043567657470703,
"eval_runtime": 213.2403,
"eval_samples_per_second": 9.604,
"eval_steps_per_second": 0.6,
"step": 11600
},
{
"epoch": 1.476033477047933,
"grad_norm": 0.8800786733627319,
"learning_rate": 5.372025237426409e-05,
"loss": 3.611,
"step": 11640
},
{
"epoch": 1.4811057570377884,
"grad_norm": 1.2481964826583862,
"learning_rate": 5.3447155909802374e-05,
"loss": 3.6118,
"step": 11680
},
{
"epoch": 1.4861780370276438,
"grad_norm": 0.8092613816261292,
"learning_rate": 5.3173956076442544e-05,
"loss": 3.6074,
"step": 11720
},
{
"epoch": 1.4912503170174993,
"grad_norm": 0.9781074523925781,
"learning_rate": 5.2900661066551473e-05,
"loss": 3.6103,
"step": 11760
},
{
"epoch": 1.4963225970073548,
"grad_norm": 1.2322132587432861,
"learning_rate": 5.262727907535001e-05,
"loss": 3.6052,
"step": 11800
},
{
"epoch": 1.5013948769972103,
"grad_norm": 0.9783710837364197,
"learning_rate": 5.2353818300667276e-05,
"loss": 3.597,
"step": 11840
},
{
"epoch": 1.5064671569870658,
"grad_norm": 0.7827417850494385,
"learning_rate": 5.208028694269491e-05,
"loss": 3.6,
"step": 11880
},
{
"epoch": 1.5115394369769213,
"grad_norm": 1.0520368814468384,
"learning_rate": 5.180669320374108e-05,
"loss": 3.5987,
"step": 11920
},
{
"epoch": 1.5166117169667765,
"grad_norm": 0.9360650777816772,
"learning_rate": 5.153304528798449e-05,
"loss": 3.6012,
"step": 11960
},
{
"epoch": 1.521683996956632,
"grad_norm": 0.6922670602798462,
"learning_rate": 5.1259351401228575e-05,
"loss": 3.5966,
"step": 12000
},
{
"epoch": 1.521683996956632,
"eval_action_accuracy": 0.14019775390625,
"eval_loss": 3.5850491523742676,
"eval_runtime": 210.2848,
"eval_samples_per_second": 9.739,
"eval_steps_per_second": 0.609,
"step": 12000
},
{
"epoch": 1.5267562769464873,
"grad_norm": 0.9070268273353577,
"learning_rate": 5.0985619750655154e-05,
"loss": 3.5986,
"step": 12040
},
{
"epoch": 1.5318285569363428,
"grad_norm": 0.7900585532188416,
"learning_rate": 5.071185854457852e-05,
"loss": 3.5976,
"step": 12080
},
{
"epoch": 1.5369008369261983,
"grad_norm": 0.9794080257415771,
"learning_rate": 5.043807599219923e-05,
"loss": 3.5902,
"step": 12120
},
{
"epoch": 1.5419731169160538,
"grad_norm": 0.9867932200431824,
"learning_rate": 5.016428030335796e-05,
"loss": 3.5825,
"step": 12160
},
{
"epoch": 1.5470453969059093,
"grad_norm": 0.8320935368537903,
"learning_rate": 4.989047968828926e-05,
"loss": 3.5852,
"step": 12200
},
{
"epoch": 1.5521176768957647,
"grad_norm": 1.1050447225570679,
"learning_rate": 4.961668235737548e-05,
"loss": 3.5841,
"step": 12240
},
{
"epoch": 1.5571899568856202,
"grad_norm": 1.1726595163345337,
"learning_rate": 4.934289652090038e-05,
"loss": 3.5749,
"step": 12280
},
{
"epoch": 1.5622622368754755,
"grad_norm": 1.187261700630188,
"learning_rate": 4.906913038880315e-05,
"loss": 3.575,
"step": 12320
},
{
"epoch": 1.567334516865331,
"grad_norm": 0.957165002822876,
"learning_rate": 4.879539217043203e-05,
"loss": 3.5713,
"step": 12360
},
{
"epoch": 1.5724067968551863,
"grad_norm": 1.2354923486709595,
"learning_rate": 4.852169007429829e-05,
"loss": 3.5709,
"step": 12400
},
{
"epoch": 1.5724067968551863,
"eval_action_accuracy": 0.138671875,
"eval_loss": 3.5651776790618896,
"eval_runtime": 35.8538,
"eval_samples_per_second": 57.121,
"eval_steps_per_second": 3.57,
"step": 12400
},
{
"epoch": 1.5774790768450417,
"grad_norm": 0.7800391912460327,
"learning_rate": 4.824803230782992e-05,
"loss": 3.5684,
"step": 12440
},
{
"epoch": 1.5825513568348972,
"grad_norm": 0.8663883209228516,
"learning_rate": 4.79744270771257e-05,
"loss": 3.564,
"step": 12480
},
{
"epoch": 1.5876236368247527,
"grad_norm": 0.9180625081062317,
"learning_rate": 4.770088258670897e-05,
"loss": 3.5626,
"step": 12520
},
{
"epoch": 1.5926959168146082,
"grad_norm": 1.0298022031784058,
"learning_rate": 4.742740703928172e-05,
"loss": 3.5648,
"step": 12560
},
{
"epoch": 1.5977681968044637,
"grad_norm": 1.177050232887268,
"learning_rate": 4.7154008635478525e-05,
"loss": 3.562,
"step": 12600
},
{
"epoch": 1.6028404767943192,
"grad_norm": 0.8682090044021606,
"learning_rate": 4.688069557362071e-05,
"loss": 3.5629,
"step": 12640
},
{
"epoch": 1.6079127567841744,
"grad_norm": 1.0756064653396606,
"learning_rate": 4.660747604947043e-05,
"loss": 3.5634,
"step": 12680
},
{
"epoch": 1.61298503677403,
"grad_norm": 0.8048375844955444,
"learning_rate": 4.6334358255984985e-05,
"loss": 3.555,
"step": 12720
},
{
"epoch": 1.6180573167638852,
"grad_norm": 0.8976615071296692,
"learning_rate": 4.606135038307109e-05,
"loss": 3.558,
"step": 12760
},
{
"epoch": 1.6231295967537407,
"grad_norm": 0.868787944316864,
"learning_rate": 4.578846061733934e-05,
"loss": 3.5558,
"step": 12800
},
{
"epoch": 1.6231295967537407,
"eval_action_accuracy": 0.1395263671875,
"eval_loss": 3.5358495712280273,
"eval_runtime": 211.9369,
"eval_samples_per_second": 9.663,
"eval_steps_per_second": 0.604,
"step": 12800
},
{
"epoch": 1.6282018767435962,
"grad_norm": 0.7598440647125244,
"learning_rate": 4.551569714185862e-05,
"loss": 3.5466,
"step": 12840
},
{
"epoch": 1.6332741567334517,
"grad_norm": 1.093045711517334,
"learning_rate": 4.524306813591085e-05,
"loss": 3.5519,
"step": 12880
},
{
"epoch": 1.6383464367233072,
"grad_norm": 0.7852573394775391,
"learning_rate": 4.497058177474558e-05,
"loss": 3.5472,
"step": 12920
},
{
"epoch": 1.6434187167131626,
"grad_norm": 0.7886530756950378,
"learning_rate": 4.469824622933498e-05,
"loss": 3.545,
"step": 12960
},
{
"epoch": 1.6484909967030181,
"grad_norm": 1.0125157833099365,
"learning_rate": 4.4426069666128705e-05,
"loss": 3.5428,
"step": 13000
},
{
"epoch": 1.6535632766928734,
"grad_norm": 1.0761477947235107,
"learning_rate": 4.415406024680908e-05,
"loss": 3.5373,
"step": 13040
},
{
"epoch": 1.6586355566827289,
"grad_norm": 1.1242908239364624,
"learning_rate": 4.38822261280463e-05,
"loss": 3.5381,
"step": 13080
},
{
"epoch": 1.6637078366725844,
"grad_norm": 0.8285259008407593,
"learning_rate": 4.361057546125393e-05,
"loss": 3.5401,
"step": 13120
},
{
"epoch": 1.6687801166624396,
"grad_norm": 0.9908625483512878,
"learning_rate": 4.333911639234435e-05,
"loss": 3.5299,
"step": 13160
},
{
"epoch": 1.6738523966522951,
"grad_norm": 0.7884112000465393,
"learning_rate": 4.3067857061484574e-05,
"loss": 3.5271,
"step": 13200
},
{
"epoch": 1.6738523966522951,
"eval_action_accuracy": 0.13958740234375,
"eval_loss": 3.521883487701416,
"eval_runtime": 277.2848,
"eval_samples_per_second": 7.386,
"eval_steps_per_second": 0.462,
"step": 13200
},
{
"epoch": 1.6789246766421506,
"grad_norm": 1.062119483947754,
"learning_rate": 4.279680560285212e-05,
"loss": 3.5305,
"step": 13240
},
{
"epoch": 1.683996956632006,
"grad_norm": 0.9488341212272644,
"learning_rate": 4.2525970144391156e-05,
"loss": 3.5266,
"step": 13280
},
{
"epoch": 1.6890692366218616,
"grad_norm": 0.7958804965019226,
"learning_rate": 4.225535880756858e-05,
"loss": 3.5247,
"step": 13320
},
{
"epoch": 1.694141516611717,
"grad_norm": 0.7736839056015015,
"learning_rate": 4.198497970713079e-05,
"loss": 3.5245,
"step": 13360
},
{
"epoch": 1.6992137966015726,
"grad_norm": 1.1035319566726685,
"learning_rate": 4.171484095086002e-05,
"loss": 3.5214,
"step": 13400
},
{
"epoch": 1.7042860765914278,
"grad_norm": 0.9785400629043579,
"learning_rate": 4.144495063933148e-05,
"loss": 3.5174,
"step": 13440
},
{
"epoch": 1.7093583565812833,
"grad_norm": 0.8356533646583557,
"learning_rate": 4.117531686567028e-05,
"loss": 3.514,
"step": 13480
},
{
"epoch": 1.7144306365711386,
"grad_norm": 0.9696844220161438,
"learning_rate": 4.090594771530882e-05,
"loss": 3.5175,
"step": 13520
},
{
"epoch": 1.719502916560994,
"grad_norm": 0.9695699214935303,
"learning_rate": 4.0636851265744305e-05,
"loss": 3.5108,
"step": 13560
},
{
"epoch": 1.7245751965508496,
"grad_norm": 0.9095252156257629,
"learning_rate": 4.036803558629656e-05,
"loss": 3.513,
"step": 13600
},
{
"epoch": 1.7245751965508496,
"eval_action_accuracy": 0.14178466796875,
"eval_loss": 3.5030040740966797,
"eval_runtime": 228.2505,
"eval_samples_per_second": 8.973,
"eval_steps_per_second": 0.561,
"step": 13600
},
{
"epoch": 1.729647476540705,
"grad_norm": 0.9833335876464844,
"learning_rate": 4.0099508737866006e-05,
"loss": 3.5113,
"step": 13640
},
{
"epoch": 1.7347197565305605,
"grad_norm": 0.8735532164573669,
"learning_rate": 3.983127877269199e-05,
"loss": 3.5082,
"step": 13680
},
{
"epoch": 1.739792036520416,
"grad_norm": 0.843338668346405,
"learning_rate": 3.9563353734111285e-05,
"loss": 3.5023,
"step": 13720
},
{
"epoch": 1.7448643165102715,
"grad_norm": 0.8809418678283691,
"learning_rate": 3.929574165631696e-05,
"loss": 3.5043,
"step": 13760
},
{
"epoch": 1.7499365965001268,
"grad_norm": 1.0490487813949585,
"learning_rate": 3.9028450564117354e-05,
"loss": 3.5026,
"step": 13800
},
{
"epoch": 1.7550088764899823,
"grad_norm": 0.9039780497550964,
"learning_rate": 3.8761488472695544e-05,
"loss": 3.5008,
"step": 13840
},
{
"epoch": 1.7600811564798375,
"grad_norm": 1.1890782117843628,
"learning_rate": 3.849486338736893e-05,
"loss": 3.502,
"step": 13880
},
{
"epoch": 1.765153436469693,
"grad_norm": 0.8219150900840759,
"learning_rate": 3.822858330334923e-05,
"loss": 3.4983,
"step": 13920
},
{
"epoch": 1.7702257164595485,
"grad_norm": 0.8659482002258301,
"learning_rate": 3.796265620550267e-05,
"loss": 3.4919,
"step": 13960
},
{
"epoch": 1.775297996449404,
"grad_norm": 0.8262388706207275,
"learning_rate": 3.769709006811064e-05,
"loss": 3.4927,
"step": 14000
},
{
"epoch": 1.775297996449404,
"eval_action_accuracy": 0.140380859375,
"eval_loss": 3.480764389038086,
"eval_runtime": 201.5749,
"eval_samples_per_second": 10.16,
"eval_steps_per_second": 0.635,
"step": 14000
},
{
"epoch": 1.7803702764392595,
"grad_norm": 0.8509693741798401,
"learning_rate": 3.7431892854630414e-05,
"loss": 3.4891,
"step": 14040
},
{
"epoch": 1.785442556429115,
"grad_norm": 0.9192864894866943,
"learning_rate": 3.7167072517456536e-05,
"loss": 3.4856,
"step": 14080
},
{
"epoch": 1.7905148364189705,
"grad_norm": 0.8431236147880554,
"learning_rate": 3.6902636997682216e-05,
"loss": 3.481,
"step": 14120
},
{
"epoch": 1.7955871164088257,
"grad_norm": 0.8478639721870422,
"learning_rate": 3.663859422486129e-05,
"loss": 3.4844,
"step": 14160
},
{
"epoch": 1.8006593963986812,
"grad_norm": 0.8935319781303406,
"learning_rate": 3.637495211677035e-05,
"loss": 3.4808,
"step": 14200
},
{
"epoch": 1.8057316763885365,
"grad_norm": 0.9666652679443359,
"learning_rate": 3.611171857917144e-05,
"loss": 3.4818,
"step": 14240
},
{
"epoch": 1.810803956378392,
"grad_norm": 0.7934905886650085,
"learning_rate": 3.5848901505574814e-05,
"loss": 3.4783,
"step": 14280
},
{
"epoch": 1.8158762363682475,
"grad_norm": 0.8093165755271912,
"learning_rate": 3.558650877700245e-05,
"loss": 3.4764,
"step": 14320
},
{
"epoch": 1.820948516358103,
"grad_norm": 0.9005354046821594,
"learning_rate": 3.532454826175151e-05,
"loss": 3.4735,
"step": 14360
},
{
"epoch": 1.8260207963479584,
"grad_norm": 0.9937548637390137,
"learning_rate": 3.506302781515859e-05,
"loss": 3.4693,
"step": 14400
},
{
"epoch": 1.8260207963479584,
"eval_action_accuracy": 0.14202880859375,
"eval_loss": 3.4609575271606445,
"eval_runtime": 282.6532,
"eval_samples_per_second": 7.246,
"eval_steps_per_second": 0.453,
"step": 14400
},
{
"epoch": 1.831093076337814,
"grad_norm": 1.0596122741699219,
"learning_rate": 3.4801955279363955e-05,
"loss": 3.4687,
"step": 14440
},
{
"epoch": 1.8361653563276694,
"grad_norm": 1.0068224668502808,
"learning_rate": 3.4541338483076644e-05,
"loss": 3.4676,
"step": 14480
},
{
"epoch": 1.8412376363175247,
"grad_norm": 1.050114393234253,
"learning_rate": 3.428118524133941e-05,
"loss": 3.4682,
"step": 14520
},
{
"epoch": 1.8463099163073802,
"grad_norm": 0.9435964226722717,
"learning_rate": 3.402150335529464e-05,
"loss": 3.4658,
"step": 14560
},
{
"epoch": 1.8513821962972357,
"grad_norm": 0.9514583945274353,
"learning_rate": 3.3762300611950236e-05,
"loss": 3.4611,
"step": 14600
},
{
"epoch": 1.856454476287091,
"grad_norm": 0.9340800642967224,
"learning_rate": 3.350358478394626e-05,
"loss": 3.4592,
"step": 14640
},
{
"epoch": 1.8615267562769464,
"grad_norm": 0.8389933109283447,
"learning_rate": 3.324536362932166e-05,
"loss": 3.4604,
"step": 14680
},
{
"epoch": 1.866599036266802,
"grad_norm": 0.856787919998169,
"learning_rate": 3.29876448912819e-05,
"loss": 3.4564,
"step": 14720
},
{
"epoch": 1.8716713162566574,
"grad_norm": 0.8933659791946411,
"learning_rate": 3.2730436297966485e-05,
"loss": 3.4554,
"step": 14760
},
{
"epoch": 1.8767435962465129,
"grad_norm": 0.8152870535850525,
"learning_rate": 3.247374556221745e-05,
"loss": 3.4543,
"step": 14800
},
{
"epoch": 1.8767435962465129,
"eval_action_accuracy": 0.1414794921875,
"eval_loss": 3.439603805541992,
"eval_runtime": 265.6804,
"eval_samples_per_second": 7.709,
"eval_steps_per_second": 0.482,
"step": 14800
},
{
"epoch": 1.8818158762363684,
"grad_norm": 0.8039724826812744,
"learning_rate": 3.2217580381347914e-05,
"loss": 3.4509,
"step": 14840
},
{
"epoch": 1.8868881562262236,
"grad_norm": 1.140610933303833,
"learning_rate": 3.1961948436911386e-05,
"loss": 3.4495,
"step": 14880
},
{
"epoch": 1.8919604362160791,
"grad_norm": 1.1068456172943115,
"learning_rate": 3.17068573944713e-05,
"loss": 3.4469,
"step": 14920
},
{
"epoch": 1.8970327162059346,
"grad_norm": 0.905805766582489,
"learning_rate": 3.1452314903371286e-05,
"loss": 3.4476,
"step": 14960
},
{
"epoch": 1.9021049961957899,
"grad_norm": 0.8861454725265503,
"learning_rate": 3.1198328596505646e-05,
"loss": 3.4424,
"step": 15000
},
{
"epoch": 1.9071772761856454,
"grad_norm": 0.916279137134552,
"learning_rate": 3.09449060900906e-05,
"loss": 3.4418,
"step": 15040
},
{
"epoch": 1.9122495561755009,
"grad_norm": 0.9068135619163513,
"learning_rate": 3.069205498343579e-05,
"loss": 3.4389,
"step": 15080
},
{
"epoch": 1.9173218361653563,
"grad_norm": 0.9586207866668701,
"learning_rate": 3.0439782858716527e-05,
"loss": 3.4393,
"step": 15120
},
{
"epoch": 1.9223941161552118,
"grad_norm": 0.9862120747566223,
"learning_rate": 3.0188097280746263e-05,
"loss": 3.4332,
"step": 15160
},
{
"epoch": 1.9274663961450673,
"grad_norm": 0.8921750783920288,
"learning_rate": 2.9937005796749905e-05,
"loss": 3.4347,
"step": 15200
},
{
"epoch": 1.9274663961450673,
"eval_action_accuracy": 0.1422119140625,
"eval_loss": 3.418545722961426,
"eval_runtime": 269.9638,
"eval_samples_per_second": 7.586,
"eval_steps_per_second": 0.474,
"step": 15200
},
{
"epoch": 1.9325386761349228,
"grad_norm": 0.9488757848739624,
"learning_rate": 2.9686515936137393e-05,
"loss": 3.4321,
"step": 15240
},
{
"epoch": 1.937610956124778,
"grad_norm": 0.8360820412635803,
"learning_rate": 2.9436635210277987e-05,
"loss": 3.4293,
"step": 15280
},
{
"epoch": 1.9426832361146336,
"grad_norm": 0.8490392565727234,
"learning_rate": 2.9187371112274958e-05,
"loss": 3.4273,
"step": 15320
},
{
"epoch": 1.9477555161044888,
"grad_norm": 0.8668012619018555,
"learning_rate": 2.893873111674097e-05,
"loss": 3.4276,
"step": 15360
},
{
"epoch": 1.9528277960943443,
"grad_norm": 0.933603823184967,
"learning_rate": 2.869072267957385e-05,
"loss": 3.4244,
"step": 15400
},
{
"epoch": 1.9579000760841998,
"grad_norm": 0.9964644908905029,
"learning_rate": 2.8443353237733126e-05,
"loss": 3.422,
"step": 15440
},
{
"epoch": 1.9629723560740553,
"grad_norm": 1.1415656805038452,
"learning_rate": 2.8196630209016878e-05,
"loss": 3.4252,
"step": 15480
},
{
"epoch": 1.9680446360639108,
"grad_norm": 1.0899465084075928,
"learning_rate": 2.7950560991839437e-05,
"loss": 3.4179,
"step": 15520
},
{
"epoch": 1.9731169160537663,
"grad_norm": 1.0375711917877197,
"learning_rate": 2.770515296500944e-05,
"loss": 3.4191,
"step": 15560
},
{
"epoch": 1.9781891960436218,
"grad_norm": 0.9184868335723877,
"learning_rate": 2.7460413487508635e-05,
"loss": 3.415,
"step": 15600
},
{
"epoch": 1.9781891960436218,
"eval_action_accuracy": 0.14495849609375,
"eval_loss": 3.3952927589416504,
"eval_runtime": 265.4459,
"eval_samples_per_second": 7.715,
"eval_steps_per_second": 0.482,
"step": 15600
},
{
"epoch": 1.983261476033477,
"grad_norm": 0.939353883266449,
"learning_rate": 2.7216349898271098e-05,
"loss": 3.4138,
"step": 15640
},
{
"epoch": 1.9883337560233325,
"grad_norm": 0.9041317701339722,
"learning_rate": 2.69729695159633e-05,
"loss": 3.409,
"step": 15680
},
{
"epoch": 1.9934060360131878,
"grad_norm": 0.8650372624397278,
"learning_rate": 2.6730279638764523e-05,
"loss": 3.4096,
"step": 15720
},
{
"epoch": 1.9984783160030433,
"grad_norm": 0.9451566338539124,
"learning_rate": 2.648828754414811e-05,
"loss": 3.4077,
"step": 15760
},
{
"epoch": 2.0035505959928988,
"grad_norm": 1.003873586654663,
"learning_rate": 2.624700048866317e-05,
"loss": 3.3977,
"step": 15800
},
{
"epoch": 2.0086228759827542,
"grad_norm": 0.9338696599006653,
"learning_rate": 2.600642570771703e-05,
"loss": 3.3862,
"step": 15840
},
{
"epoch": 2.0136951559726097,
"grad_norm": 1.1462304592132568,
"learning_rate": 2.5766570415358184e-05,
"loss": 3.3863,
"step": 15880
},
{
"epoch": 2.018767435962465,
"grad_norm": 0.9377365708351135,
"learning_rate": 2.5527441804060087e-05,
"loss": 3.3864,
"step": 15920
},
{
"epoch": 2.0238397159523207,
"grad_norm": 1.0909967422485352,
"learning_rate": 2.5289047044505347e-05,
"loss": 3.3844,
"step": 15960
},
{
"epoch": 2.028911995942176,
"grad_norm": 0.924224317073822,
"learning_rate": 2.505139328537082e-05,
"loss": 3.3818,
"step": 16000
},
{
"epoch": 2.028911995942176,
"eval_action_accuracy": 0.144775390625,
"eval_loss": 3.3769216537475586,
"eval_runtime": 29.3615,
"eval_samples_per_second": 69.751,
"eval_steps_per_second": 4.359,
"step": 16000
},
{
"epoch": 2.0339842759320312,
"grad_norm": 1.0833228826522827,
"learning_rate": 2.4814487653113172e-05,
"loss": 3.3775,
"step": 16040
},
{
"epoch": 2.0390565559218867,
"grad_norm": 1.1306848526000977,
"learning_rate": 2.4578337251755213e-05,
"loss": 3.375,
"step": 16080
},
{
"epoch": 2.044128835911742,
"grad_norm": 0.9547039866447449,
"learning_rate": 2.43429491626728e-05,
"loss": 3.3752,
"step": 16120
},
{
"epoch": 2.0492011159015977,
"grad_norm": 0.886420726776123,
"learning_rate": 2.4108330444382632e-05,
"loss": 3.3733,
"step": 16160
},
{
"epoch": 2.054273395891453,
"grad_norm": 0.9832823276519775,
"learning_rate": 2.387448813233041e-05,
"loss": 3.3715,
"step": 16200
},
{
"epoch": 2.0593456758813087,
"grad_norm": 0.9641093015670776,
"learning_rate": 2.364142923868002e-05,
"loss": 3.3693,
"step": 16240
},
{
"epoch": 2.064417955871164,
"grad_norm": 0.9319785833358765,
"learning_rate": 2.3409160752103183e-05,
"loss": 3.3697,
"step": 16280
},
{
"epoch": 2.0694902358610197,
"grad_norm": 0.9033721089363098,
"learning_rate": 2.3177689637569932e-05,
"loss": 3.3655,
"step": 16320
},
{
"epoch": 2.074562515850875,
"grad_norm": 1.0644468069076538,
"learning_rate": 2.294702283613966e-05,
"loss": 3.3684,
"step": 16360
},
{
"epoch": 2.0796347958407306,
"grad_norm": 0.9422540664672852,
"learning_rate": 2.271716726475312e-05,
"loss": 3.3596,
"step": 16400
},
{
"epoch": 2.0796347958407306,
"eval_action_accuracy": 0.145751953125,
"eval_loss": 3.356306791305542,
"eval_runtime": 33.337,
"eval_samples_per_second": 61.433,
"eval_steps_per_second": 3.84,
"step": 16400
},
{
"epoch": 2.0847070758305857,
"grad_norm": 0.9470515847206116,
"learning_rate": 2.2488129816024882e-05,
"loss": 3.3596,
"step": 16440
},
{
"epoch": 2.089779355820441,
"grad_norm": 0.9862939119338989,
"learning_rate": 2.225991735803673e-05,
"loss": 3.3618,
"step": 16480
},
{
"epoch": 2.0948516358102967,
"grad_norm": 0.9781101942062378,
"learning_rate": 2.2032536734131693e-05,
"loss": 3.3537,
"step": 16520
},
{
"epoch": 2.099923915800152,
"grad_norm": 0.9585676789283752,
"learning_rate": 2.1805994762708816e-05,
"loss": 3.355,
"step": 16560
},
{
"epoch": 2.1049961957900076,
"grad_norm": 1.042656421661377,
"learning_rate": 2.1580298237018665e-05,
"loss": 3.3558,
"step": 16600
},
{
"epoch": 2.110068475779863,
"grad_norm": 1.2123557329177856,
"learning_rate": 2.135545392495974e-05,
"loss": 3.3507,
"step": 16640
},
{
"epoch": 2.1151407557697186,
"grad_norm": 1.0929179191589355,
"learning_rate": 2.1131468568875356e-05,
"loss": 3.3528,
"step": 16680
},
{
"epoch": 2.120213035759574,
"grad_norm": 1.0415383577346802,
"learning_rate": 2.0908348885351643e-05,
"loss": 3.3499,
"step": 16720
},
{
"epoch": 2.125285315749429,
"grad_norm": 0.9704334139823914,
"learning_rate": 2.0686101565016002e-05,
"loss": 3.3477,
"step": 16760
},
{
"epoch": 2.1303575957392846,
"grad_norm": 0.930077075958252,
"learning_rate": 2.046473327233656e-05,
"loss": 3.3447,
"step": 16800
},
{
"epoch": 2.1303575957392846,
"eval_action_accuracy": 0.145751953125,
"eval_loss": 3.337432861328125,
"eval_runtime": 207.6445,
"eval_samples_per_second": 9.863,
"eval_steps_per_second": 0.616,
"step": 16800
},
{
"epoch": 2.13542987572914,
"grad_norm": 1.1401617527008057,
"learning_rate": 2.0244250645422225e-05,
"loss": 3.3486,
"step": 16840
},
{
"epoch": 2.1405021557189956,
"grad_norm": 1.0366841554641724,
"learning_rate": 2.0024660295823765e-05,
"loss": 3.3392,
"step": 16880
},
{
"epoch": 2.145574435708851,
"grad_norm": 1.0000267028808594,
"learning_rate": 1.9805968808335412e-05,
"loss": 3.34,
"step": 16920
},
{
"epoch": 2.1506467156987066,
"grad_norm": 1.0371631383895874,
"learning_rate": 1.9588182740797527e-05,
"loss": 3.3381,
"step": 16960
},
{
"epoch": 2.155718995688562,
"grad_norm": 1.0280026197433472,
"learning_rate": 1.9371308623899882e-05,
"loss": 3.337,
"step": 17000
},
{
"epoch": 2.1607912756784176,
"grad_norm": 1.0037304162979126,
"learning_rate": 1.9155352960985833e-05,
"loss": 3.3357,
"step": 17040
},
{
"epoch": 2.165863555668273,
"grad_norm": 1.1836119890213013,
"learning_rate": 1.89403222278573e-05,
"loss": 3.3354,
"step": 17080
},
{
"epoch": 2.1709358356581285,
"grad_norm": 1.016123652458191,
"learning_rate": 1.8726222872580624e-05,
"loss": 3.3296,
"step": 17120
},
{
"epoch": 2.1760081156479836,
"grad_norm": 1.043012261390686,
"learning_rate": 1.8513061315293145e-05,
"loss": 3.3346,
"step": 17160
},
{
"epoch": 2.181080395637839,
"grad_norm": 0.9651705026626587,
"learning_rate": 1.8300843948010744e-05,
"loss": 3.3248,
"step": 17200
},
{
"epoch": 2.181080395637839,
"eval_action_accuracy": 0.14715576171875,
"eval_loss": 3.3139657974243164,
"eval_runtime": 283.1082,
"eval_samples_per_second": 7.234,
"eval_steps_per_second": 0.452,
"step": 17200
},
{
"epoch": 2.1861526756276946,
"grad_norm": 1.0534641742706299,
"learning_rate": 1.808957713443613e-05,
"loss": 3.3245,
"step": 17240
},
{
"epoch": 2.19122495561755,
"grad_norm": 1.0732275247573853,
"learning_rate": 1.787926720976805e-05,
"loss": 3.3223,
"step": 17280
},
{
"epoch": 2.1962972356074055,
"grad_norm": 0.9667773842811584,
"learning_rate": 1.766992048051126e-05,
"loss": 3.3227,
"step": 17320
},
{
"epoch": 2.201369515597261,
"grad_norm": 1.1117255687713623,
"learning_rate": 1.746154322428742e-05,
"loss": 3.3235,
"step": 17360
},
{
"epoch": 2.2064417955871165,
"grad_norm": 1.161832571029663,
"learning_rate": 1.7254141689646948e-05,
"loss": 3.3192,
"step": 17400
},
{
"epoch": 2.211514075576972,
"grad_norm": 1.0808424949645996,
"learning_rate": 1.7047722095881503e-05,
"loss": 3.3177,
"step": 17440
},
{
"epoch": 2.2165863555668275,
"grad_norm": 0.9801045656204224,
"learning_rate": 1.68422906328376e-05,
"loss": 3.3169,
"step": 17480
},
{
"epoch": 2.2216586355566825,
"grad_norm": 1.0057750940322876,
"learning_rate": 1.663785346073094e-05,
"loss": 3.3187,
"step": 17520
},
{
"epoch": 2.226730915546538,
"grad_norm": 1.193770170211792,
"learning_rate": 1.6434416709961726e-05,
"loss": 3.3113,
"step": 17560
},
{
"epoch": 2.2318031955363935,
"grad_norm": 1.0598925352096558,
"learning_rate": 1.6231986480930743e-05,
"loss": 3.3096,
"step": 17600
},
{
"epoch": 2.2318031955363935,
"eval_action_accuracy": 0.1495361328125,
"eval_loss": 3.299508571624756,
"eval_runtime": 317.2452,
"eval_samples_per_second": 6.456,
"eval_steps_per_second": 0.403,
"step": 17600
},
{
"epoch": 2.236875475526249,
"grad_norm": 1.039414644241333,
"learning_rate": 1.6030568843856568e-05,
"loss": 3.3094,
"step": 17640
},
{
"epoch": 2.2419477555161045,
"grad_norm": 1.2232924699783325,
"learning_rate": 1.58301698385934e-05,
"loss": 3.3062,
"step": 17680
},
{
"epoch": 2.24702003550596,
"grad_norm": 0.9876456260681152,
"learning_rate": 1.5630795474450055e-05,
"loss": 3.3048,
"step": 17720
},
{
"epoch": 2.2520923154958155,
"grad_norm": 1.1325111389160156,
"learning_rate": 1.5432451730009705e-05,
"loss": 3.3004,
"step": 17760
},
{
"epoch": 2.257164595485671,
"grad_norm": 1.1618984937667847,
"learning_rate": 1.5235144552950643e-05,
"loss": 3.3,
"step": 17800
},
{
"epoch": 2.2622368754755264,
"grad_norm": 1.0500718355178833,
"learning_rate": 1.5038879859867855e-05,
"loss": 3.3005,
"step": 17840
},
{
"epoch": 2.267309155465382,
"grad_norm": 1.2077546119689941,
"learning_rate": 1.484366353609571e-05,
"loss": 3.3015,
"step": 17880
},
{
"epoch": 2.272381435455237,
"grad_norm": 1.0480679273605347,
"learning_rate": 1.4649501435531365e-05,
"loss": 3.2968,
"step": 17920
},
{
"epoch": 2.2774537154450925,
"grad_norm": 1.0955984592437744,
"learning_rate": 1.4456399380459324e-05,
"loss": 3.2985,
"step": 17960
},
{
"epoch": 2.282525995434948,
"grad_norm": 1.0870320796966553,
"learning_rate": 1.4264363161376787e-05,
"loss": 3.2914,
"step": 18000
},
{
"epoch": 2.282525995434948,
"eval_action_accuracy": 0.1488037109375,
"eval_loss": 3.281026840209961,
"eval_runtime": 256.6774,
"eval_samples_per_second": 7.979,
"eval_steps_per_second": 0.499,
"step": 18000
},
{
"epoch": 2.2875982754248034,
"grad_norm": 1.224732756614685,
"learning_rate": 1.4073398536820049e-05,
"loss": 3.294,
"step": 18040
},
{
"epoch": 2.292670555414659,
"grad_norm": 1.1285483837127686,
"learning_rate": 1.3883511233191748e-05,
"loss": 3.2926,
"step": 18080
},
{
"epoch": 2.2977428354045144,
"grad_norm": 1.109192132949829,
"learning_rate": 1.3694706944589275e-05,
"loss": 3.2901,
"step": 18120
},
{
"epoch": 2.30281511539437,
"grad_norm": 1.0854719877243042,
"learning_rate": 1.3506991332633879e-05,
"loss": 3.2883,
"step": 18160
},
{
"epoch": 2.3078873953842254,
"grad_norm": 1.1602346897125244,
"learning_rate": 1.332037002630101e-05,
"loss": 3.2878,
"step": 18200
},
{
"epoch": 2.3129596753740804,
"grad_norm": 1.1119505167007446,
"learning_rate": 1.3134848621751483e-05,
"loss": 3.2878,
"step": 18240
},
{
"epoch": 2.318031955363936,
"grad_norm": 1.101004719734192,
"learning_rate": 1.295043268216367e-05,
"loss": 3.2847,
"step": 18280
},
{
"epoch": 2.3231042353537914,
"grad_norm": 1.1685012578964233,
"learning_rate": 1.2767127737566631e-05,
"loss": 3.2793,
"step": 18320
},
{
"epoch": 2.328176515343647,
"grad_norm": 1.1972112655639648,
"learning_rate": 1.2584939284674396e-05,
"loss": 3.2767,
"step": 18360
},
{
"epoch": 2.3332487953335024,
"grad_norm": 1.0951577425003052,
"learning_rate": 1.2403872786721005e-05,
"loss": 3.2748,
"step": 18400
},
{
"epoch": 2.3332487953335024,
"eval_action_accuracy": 0.14862060546875,
"eval_loss": 3.2656493186950684,
"eval_runtime": 340.411,
"eval_samples_per_second": 6.016,
"eval_steps_per_second": 0.376,
"step": 18400
},
{
"epoch": 2.338321075323358,
"grad_norm": 1.3195337057113647,
"learning_rate": 1.2223933673296795e-05,
"loss": 3.2745,
"step": 18440
},
{
"epoch": 2.3433933553132134,
"grad_norm": 1.189841389656067,
"learning_rate": 1.204512734018553e-05,
"loss": 3.2712,
"step": 18480
},
{
"epoch": 2.348465635303069,
"grad_norm": 1.161684513092041,
"learning_rate": 1.186745914920262e-05,
"loss": 3.2738,
"step": 18520
},
{
"epoch": 2.3535379152929243,
"grad_norm": 1.2144806385040283,
"learning_rate": 1.1690934428034283e-05,
"loss": 3.2704,
"step": 18560
},
{
"epoch": 2.35861019528278,
"grad_norm": 1.2111247777938843,
"learning_rate": 1.1515558470077875e-05,
"loss": 3.2673,
"step": 18600
},
{
"epoch": 2.363682475272635,
"grad_norm": 1.0861356258392334,
"learning_rate": 1.1341336534283075e-05,
"loss": 3.2668,
"step": 18640
},
{
"epoch": 2.3687547552624904,
"grad_norm": 1.1214416027069092,
"learning_rate": 1.1168273844994248e-05,
"loss": 3.2679,
"step": 18680
},
{
"epoch": 2.373827035252346,
"grad_norm": 1.1087627410888672,
"learning_rate": 1.099637559179375e-05,
"loss": 3.2662,
"step": 18720
},
{
"epoch": 2.3788993152422013,
"grad_norm": 1.155854344367981,
"learning_rate": 1.082564692934634e-05,
"loss": 3.2599,
"step": 18760
},
{
"epoch": 2.383971595232057,
"grad_norm": 1.2042206525802612,
"learning_rate": 1.0656092977244536e-05,
"loss": 3.2585,
"step": 18800
},
{
"epoch": 2.383971595232057,
"eval_action_accuracy": 0.14996337890625,
"eval_loss": 3.2426531314849854,
"eval_runtime": 306.3779,
"eval_samples_per_second": 6.685,
"eval_steps_per_second": 0.418,
"step": 18800
},
{
"epoch": 2.3890438752219123,
"grad_norm": 1.1641093492507935,
"learning_rate": 1.0487718819855196e-05,
"loss": 3.2621,
"step": 18840
},
{
"epoch": 2.394116155211768,
"grad_norm": 1.1490825414657593,
"learning_rate": 1.032052950616697e-05,
"loss": 3.2599,
"step": 18880
},
{
"epoch": 2.3991884352016233,
"grad_norm": 1.1739047765731812,
"learning_rate": 1.0154530049638954e-05,
"loss": 3.255,
"step": 18920
},
{
"epoch": 2.4042607151914783,
"grad_norm": 1.123138189315796,
"learning_rate": 9.989725428050328e-06,
"loss": 3.2555,
"step": 18960
},
{
"epoch": 2.409332995181334,
"grad_norm": 1.2329597473144531,
"learning_rate": 9.8261205833511e-06,
"loss": 3.2542,
"step": 19000
},
{
"epoch": 2.4144052751711893,
"grad_norm": 1.2946821451187134,
"learning_rate": 9.66372042151386e-06,
"loss": 3.2542,
"step": 19040
},
{
"epoch": 2.419477555161045,
"grad_norm": 1.2074557542800903,
"learning_rate": 9.50252981238678e-06,
"loss": 3.2498,
"step": 19080
},
{
"epoch": 2.4245498351509003,
"grad_norm": 1.1963822841644287,
"learning_rate": 9.342553589547438e-06,
"loss": 3.2502,
"step": 19120
},
{
"epoch": 2.4296221151407558,
"grad_norm": 1.121868371963501,
"learning_rate": 9.183796550158014e-06,
"loss": 3.2495,
"step": 19160
},
{
"epoch": 2.4346943951306113,
"grad_norm": 1.1852304935455322,
"learning_rate": 9.026263454821343e-06,
"loss": 3.2449,
"step": 19200
},
{
"epoch": 2.4346943951306113,
"eval_action_accuracy": 0.14996337890625,
"eval_loss": 3.2290799617767334,
"eval_runtime": 225.6468,
"eval_samples_per_second": 9.076,
"eval_steps_per_second": 0.567,
"step": 19200
},
{
"epoch": 2.4397666751204667,
"grad_norm": 1.200818419456482,
"learning_rate": 8.869959027438219e-06,
"loss": 3.245,
"step": 19240
},
{
"epoch": 2.4448389551103222,
"grad_norm": 1.1980122327804565,
"learning_rate": 8.714887955065665e-06,
"loss": 3.2476,
"step": 19280
},
{
"epoch": 2.4499112351001777,
"grad_norm": 1.1194685697555542,
"learning_rate": 8.561054887776498e-06,
"loss": 3.2424,
"step": 19320
},
{
"epoch": 2.454983515090033,
"grad_norm": 1.2032148838043213,
"learning_rate": 8.408464438519759e-06,
"loss": 3.2398,
"step": 19360
},
{
"epoch": 2.4600557950798883,
"grad_norm": 1.1496555805206299,
"learning_rate": 8.257121182982508e-06,
"loss": 3.2414,
"step": 19400
},
{
"epoch": 2.4651280750697437,
"grad_norm": 1.1436223983764648,
"learning_rate": 8.107029659452498e-06,
"loss": 3.2359,
"step": 19440
},
{
"epoch": 2.4702003550595992,
"grad_norm": 1.1477023363113403,
"learning_rate": 7.958194368682215e-06,
"loss": 3.2399,
"step": 19480
},
{
"epoch": 2.4752726350494547,
"grad_norm": 1.2040438652038574,
"learning_rate": 7.810619773753775e-06,
"loss": 3.2335,
"step": 19520
},
{
"epoch": 2.48034491503931,
"grad_norm": 1.1406538486480713,
"learning_rate": 7.664310299945227e-06,
"loss": 3.2381,
"step": 19560
},
{
"epoch": 2.4854171950291657,
"grad_norm": 1.1902375221252441,
"learning_rate": 7.519270334597717e-06,
"loss": 3.2305,
"step": 19600
},
{
"epoch": 2.4854171950291657,
"eval_action_accuracy": 0.1500244140625,
"eval_loss": 3.2100226879119873,
"eval_runtime": 280.9496,
"eval_samples_per_second": 7.29,
"eval_steps_per_second": 0.456,
"step": 19600
},
{
"epoch": 2.490489475019021,
"grad_norm": 1.202954649925232,
"learning_rate": 7.375504226984059e-06,
"loss": 3.2296,
"step": 19640
},
{
"epoch": 2.4955617550088767,
"grad_norm": 1.4342540502548218,
"learning_rate": 7.233016288178213e-06,
"loss": 3.2288,
"step": 19680
},
{
"epoch": 2.5006340349987317,
"grad_norm": 1.2139471769332886,
"learning_rate": 7.091810790926068e-06,
"loss": 3.2284,
"step": 19720
},
{
"epoch": 2.505706314988587,
"grad_norm": 1.219605565071106,
"learning_rate": 6.9518919695172935e-06,
"loss": 3.2277,
"step": 19760
},
{
"epoch": 2.5107785949784427,
"grad_norm": 1.2192333936691284,
"learning_rate": 6.813264019658377e-06,
"loss": 3.2288,
"step": 19800
},
{
"epoch": 2.515850874968298,
"grad_norm": 1.1787699460983276,
"learning_rate": 6.675931098346783e-06,
"loss": 3.223,
"step": 19840
},
{
"epoch": 2.5209231549581537,
"grad_norm": 1.190075397491455,
"learning_rate": 6.5398973237463415e-06,
"loss": 3.2257,
"step": 19880
},
{
"epoch": 2.525995434948009,
"grad_norm": 1.1866856813430786,
"learning_rate": 6.405166775063709e-06,
"loss": 3.2212,
"step": 19920
},
{
"epoch": 2.5310677149378646,
"grad_norm": 1.2677091360092163,
"learning_rate": 6.271743492426097e-06,
"loss": 3.2219,
"step": 19960
},
{
"epoch": 2.53613999492772,
"grad_norm": 1.1606627702713013,
"learning_rate": 6.139631476760088e-06,
"loss": 3.2219,
"step": 20000
},
{
"epoch": 2.53613999492772,
"eval_action_accuracy": 0.154052734375,
"eval_loss": 3.199657678604126,
"eval_runtime": 148.1379,
"eval_samples_per_second": 13.825,
"eval_steps_per_second": 0.864,
"step": 20000
},
{
"epoch": 2.5412122749175756,
"grad_norm": 1.2358750104904175,
"learning_rate": 6.008834689671672e-06,
"loss": 3.219,
"step": 20040
},
{
"epoch": 2.546284554907431,
"grad_norm": 1.2563279867172241,
"learning_rate": 5.879357053327416e-06,
"loss": 3.2216,
"step": 20080
},
{
"epoch": 2.5513568348972866,
"grad_norm": 1.2597004175186157,
"learning_rate": 5.751202450336951e-06,
"loss": 3.2216,
"step": 20120
},
{
"epoch": 2.5564291148871416,
"grad_norm": 1.2089635133743286,
"learning_rate": 5.624374723636399e-06,
"loss": 3.2151,
"step": 20160
},
{
"epoch": 2.561501394876997,
"grad_norm": 1.2823585271835327,
"learning_rate": 5.49887767637327e-06,
"loss": 3.2148,
"step": 20200
},
{
"epoch": 2.5665736748668526,
"grad_norm": 1.1914616823196411,
"learning_rate": 5.3747150717923465e-06,
"loss": 3.214,
"step": 20240
},
{
"epoch": 2.571645954856708,
"grad_norm": 1.1906664371490479,
"learning_rate": 5.251890633122858e-06,
"loss": 3.2127,
"step": 20280
},
{
"epoch": 2.5767182348465636,
"grad_norm": 1.2275757789611816,
"learning_rate": 5.1304080434668055e-06,
"loss": 3.21,
"step": 20320
},
{
"epoch": 2.581790514836419,
"grad_norm": 1.2065469026565552,
"learning_rate": 5.010270945688572e-06,
"loss": 3.2124,
"step": 20360
},
{
"epoch": 2.5868627948262746,
"grad_norm": 1.2290291786193848,
"learning_rate": 4.891482942305614e-06,
"loss": 3.2085,
"step": 20400
},
{
"epoch": 2.5868627948262746,
"eval_action_accuracy": 0.15435791015625,
"eval_loss": 3.1864919662475586,
"eval_runtime": 282.6311,
"eval_samples_per_second": 7.246,
"eval_steps_per_second": 0.453,
"step": 20400
},
{
"epoch": 2.5919350748161296,
"grad_norm": 1.2060233354568481,
"learning_rate": 4.7740475953805096e-06,
"loss": 3.21,
"step": 20440
},
{
"epoch": 2.597007354805985,
"grad_norm": 1.2422938346862793,
"learning_rate": 4.657968426414095e-06,
"loss": 3.2093,
"step": 20480
},
{
"epoch": 2.6020796347958406,
"grad_norm": 1.2729711532592773,
"learning_rate": 4.543248916239878e-06,
"loss": 3.2056,
"step": 20520
},
{
"epoch": 2.607151914785696,
"grad_norm": 1.3254213333129883,
"learning_rate": 4.429892504919636e-06,
"loss": 3.2064,
"step": 20560
},
{
"epoch": 2.6122241947755516,
"grad_norm": 1.2693305015563965,
"learning_rate": 4.317902591640327e-06,
"loss": 3.204,
"step": 20600
},
{
"epoch": 2.617296474765407,
"grad_norm": 1.2927143573760986,
"learning_rate": 4.207282534612067e-06,
"loss": 3.2015,
"step": 20640
},
{
"epoch": 2.6223687547552625,
"grad_norm": 1.2953568696975708,
"learning_rate": 4.098035650967519e-06,
"loss": 3.2035,
"step": 20680
},
{
"epoch": 2.627441034745118,
"grad_norm": 1.2383949756622314,
"learning_rate": 3.9901652166623615e-06,
"loss": 3.2019,
"step": 20720
},
{
"epoch": 2.6325133147349735,
"grad_norm": 1.2561314105987549,
"learning_rate": 3.883674466377074e-06,
"loss": 3.2014,
"step": 20760
},
{
"epoch": 2.637585594724829,
"grad_norm": 1.3677061796188354,
"learning_rate": 3.778566593419924e-06,
"loss": 3.1998,
"step": 20800
},
{
"epoch": 2.637585594724829,
"eval_action_accuracy": 0.1546630859375,
"eval_loss": 3.1765692234039307,
"eval_runtime": 204.9483,
"eval_samples_per_second": 9.993,
"eval_steps_per_second": 0.625,
"step": 20800
},
{
"epoch": 2.6426578747146845,
"grad_norm": 1.288628101348877,
"learning_rate": 3.6748447496312623e-06,
"loss": 3.2009,
"step": 20840
},
{
"epoch": 2.6477301547045395,
"grad_norm": 1.3819936513900757,
"learning_rate": 3.5725120452889226e-06,
"loss": 3.1966,
"step": 20880
},
{
"epoch": 2.652802434694395,
"grad_norm": 1.3419549465179443,
"learning_rate": 3.4715715490150403e-06,
"loss": 3.1976,
"step": 20920
},
{
"epoch": 2.6578747146842505,
"grad_norm": 1.3197323083877563,
"learning_rate": 3.3720262876839827e-06,
"loss": 3.2006,
"step": 20960
},
{
"epoch": 2.662946994674106,
"grad_norm": 1.2988895177841187,
"learning_rate": 3.273879246331607e-06,
"loss": 3.1958,
"step": 21000
},
{
"epoch": 2.6680192746639615,
"grad_norm": 1.2606120109558105,
"learning_rate": 3.1771333680657157e-06,
"loss": 3.1944,
"step": 21040
},
{
"epoch": 2.673091554653817,
"grad_norm": 1.2874621152877808,
"learning_rate": 3.0817915539778695e-06,
"loss": 3.1949,
"step": 21080
},
{
"epoch": 2.6781638346436725,
"grad_norm": 1.3432596921920776,
"learning_rate": 2.9878566630563042e-06,
"loss": 3.1932,
"step": 21120
},
{
"epoch": 2.6832361146335275,
"grad_norm": 1.2724778652191162,
"learning_rate": 2.895331512100269e-06,
"loss": 3.1964,
"step": 21160
},
{
"epoch": 2.688308394623383,
"grad_norm": 1.2716572284698486,
"learning_rate": 2.804218875635539e-06,
"loss": 3.1928,
"step": 21200
},
{
"epoch": 2.688308394623383,
"eval_action_accuracy": 0.15606689453125,
"eval_loss": 3.165931224822998,
"eval_runtime": 297.7287,
"eval_samples_per_second": 6.879,
"eval_steps_per_second": 0.43,
"step": 21200
},
{
"epoch": 2.6933806746132385,
"grad_norm": 1.2943603992462158,
"learning_rate": 2.714521485831206e-06,
"loss": 3.1892,
"step": 21240
},
{
"epoch": 2.698452954603094,
"grad_norm": 1.3462802171707153,
"learning_rate": 2.6262420324177473e-06,
"loss": 3.1934,
"step": 21280
},
{
"epoch": 2.7035252345929495,
"grad_norm": 1.3464304208755493,
"learning_rate": 2.539383162606407e-06,
"loss": 3.1878,
"step": 21320
},
{
"epoch": 2.708597514582805,
"grad_norm": 1.286362886428833,
"learning_rate": 2.4539474810097495e-06,
"loss": 3.1885,
"step": 21360
},
{
"epoch": 2.7136697945726604,
"grad_norm": 1.3978246450424194,
"learning_rate": 2.3699375495636277e-06,
"loss": 3.1854,
"step": 21400
},
{
"epoch": 2.718742074562516,
"grad_norm": 1.3346283435821533,
"learning_rate": 2.287355887450299e-06,
"loss": 3.1846,
"step": 21440
},
{
"epoch": 2.7238143545523714,
"grad_norm": 1.3086987733840942,
"learning_rate": 2.2062049710229293e-06,
"loss": 3.1911,
"step": 21480
},
{
"epoch": 2.728886634542227,
"grad_norm": 1.2781283855438232,
"learning_rate": 2.126487233731289e-06,
"loss": 3.1883,
"step": 21520
},
{
"epoch": 2.7339589145320824,
"grad_norm": 1.2688366174697876,
"learning_rate": 2.0482050660488417e-06,
"loss": 3.1866,
"step": 21560
},
{
"epoch": 2.739031194521938,
"grad_norm": 1.3789799213409424,
"learning_rate": 1.971360815400991e-06,
"loss": 3.1887,
"step": 21600
},
{
"epoch": 2.739031194521938,
"eval_action_accuracy": 0.15606689453125,
"eval_loss": 3.1581926345825195,
"eval_runtime": 335.7569,
"eval_samples_per_second": 6.1,
"eval_steps_per_second": 0.381,
"step": 21600
},
{
"epoch": 2.744103474511793,
"grad_norm": 1.2935245037078857,
"learning_rate": 1.8959567860947602e-06,
"loss": 3.1853,
"step": 21640
},
{
"epoch": 2.7491757545016484,
"grad_norm": 1.3740415573120117,
"learning_rate": 1.821995239249613e-06,
"loss": 3.1829,
"step": 21680
},
{
"epoch": 2.754248034491504,
"grad_norm": 1.3145182132720947,
"learning_rate": 1.7494783927297643e-06,
"loss": 3.1853,
"step": 21720
},
{
"epoch": 2.7593203144813594,
"grad_norm": 1.4077484607696533,
"learning_rate": 1.678408421077532e-06,
"loss": 3.1842,
"step": 21760
},
{
"epoch": 2.764392594471215,
"grad_norm": 1.2955596446990967,
"learning_rate": 1.6087874554482573e-06,
"loss": 3.1853,
"step": 21800
},
{
"epoch": 2.7694648744610704,
"grad_norm": 1.329830288887024,
"learning_rate": 1.5406175835463111e-06,
"loss": 3.1843,
"step": 21840
},
{
"epoch": 2.7745371544509254,
"grad_norm": 1.343755841255188,
"learning_rate": 1.4739008495625427e-06,
"loss": 3.1841,
"step": 21880
},
{
"epoch": 2.779609434440781,
"grad_norm": 1.320789098739624,
"learning_rate": 1.4086392541129366e-06,
"loss": 3.1815,
"step": 21920
},
{
"epoch": 2.7846817144306364,
"grad_norm": 1.338318109512329,
"learning_rate": 1.344834754178692e-06,
"loss": 3.1815,
"step": 21960
},
{
"epoch": 2.789753994420492,
"grad_norm": 1.3006017208099365,
"learning_rate": 1.2824892630474484e-06,
"loss": 3.1824,
"step": 22000
},
{
"epoch": 2.789753994420492,
"eval_action_accuracy": 0.15509033203125,
"eval_loss": 3.1539883613586426,
"eval_runtime": 99.5814,
"eval_samples_per_second": 20.566,
"eval_steps_per_second": 1.285,
"step": 22000
},
{
"epoch": 2.7948262744103474,
"grad_norm": 1.327972650527954,
"learning_rate": 1.2216046502559753e-06,
"loss": 3.1804,
"step": 22040
},
{
"epoch": 2.799898554400203,
"grad_norm": 1.422127604484558,
"learning_rate": 1.1621827415340958e-06,
"loss": 3.1834,
"step": 22080
},
{
"epoch": 2.8049708343900583,
"grad_norm": 1.318961501121521,
"learning_rate": 1.104225318749924e-06,
"loss": 3.1799,
"step": 22120
},
{
"epoch": 2.810043114379914,
"grad_norm": 1.4058647155761719,
"learning_rate": 1.0477341198564582e-06,
"loss": 3.1802,
"step": 22160
},
{
"epoch": 2.8151153943697693,
"grad_norm": 1.3461483716964722,
"learning_rate": 9.927108388394446e-07,
"loss": 3.178,
"step": 22200
},
{
"epoch": 2.820187674359625,
"grad_norm": 1.2846914529800415,
"learning_rate": 9.391571256665899e-07,
"loss": 3.1817,
"step": 22240
},
{
"epoch": 2.8252599543494803,
"grad_norm": 1.3674983978271484,
"learning_rate": 8.870745862380847e-07,
"loss": 3.1779,
"step": 22280
},
{
"epoch": 2.830332234339336,
"grad_norm": 1.3590912818908691,
"learning_rate": 8.364647823384308e-07,
"loss": 3.1788,
"step": 22320
},
{
"epoch": 2.835404514329191,
"grad_norm": 1.2989791631698608,
"learning_rate": 7.873292315896453e-07,
"loss": 3.1788,
"step": 22360
},
{
"epoch": 2.8404767943190463,
"grad_norm": 1.311061978340149,
"learning_rate": 7.39669407405702e-07,
"loss": 3.1778,
"step": 22400
},
{
"epoch": 2.8404767943190463,
"eval_action_accuracy": 0.1556396484375,
"eval_loss": 3.1505887508392334,
"eval_runtime": 336.5852,
"eval_samples_per_second": 6.085,
"eval_steps_per_second": 0.38,
"step": 22400
},
{
"epoch": 2.845549074308902,
"grad_norm": 1.3574533462524414,
"learning_rate": 6.934867389484013e-07,
"loss": 3.1798,
"step": 22440
},
{
"epoch": 2.8506213542987573,
"grad_norm": 1.4348208904266357,
"learning_rate": 6.487826110844808e-07,
"loss": 3.1808,
"step": 22480
},
{
"epoch": 2.855693634288613,
"grad_norm": 1.377144455909729,
"learning_rate": 6.055583643440776e-07,
"loss": 3.1728,
"step": 22520
},
{
"epoch": 2.8607659142784683,
"grad_norm": 1.2860257625579834,
"learning_rate": 5.638152948805819e-07,
"loss": 3.1738,
"step": 22560
},
{
"epoch": 2.8658381942683238,
"grad_norm": 1.3123292922973633,
"learning_rate": 5.235546544317016e-07,
"loss": 3.1754,
"step": 22600
},
{
"epoch": 2.870910474258179,
"grad_norm": 1.3371970653533936,
"learning_rate": 4.847776502819867e-07,
"loss": 3.174,
"step": 22640
},
{
"epoch": 2.8759827542480343,
"grad_norm": 1.3241571187973022,
"learning_rate": 4.47485445226592e-07,
"loss": 3.1782,
"step": 22680
},
{
"epoch": 2.88105503423789,
"grad_norm": 1.4008209705352783,
"learning_rate": 4.116791575364154e-07,
"loss": 3.1776,
"step": 22720
},
{
"epoch": 2.8861273142277453,
"grad_norm": 1.3333276510238647,
"learning_rate": 3.7735986092457543e-07,
"loss": 3.1787,
"step": 22760
},
{
"epoch": 2.8911995942176008,
"grad_norm": 1.3414257764816284,
"learning_rate": 3.445285845141921e-07,
"loss": 3.1776,
"step": 22800
},
{
"epoch": 2.8911995942176008,
"eval_action_accuracy": 0.15618896484375,
"eval_loss": 3.1486361026763916,
"eval_runtime": 309.6189,
"eval_samples_per_second": 6.615,
"eval_steps_per_second": 0.413,
"step": 22800
},
{
"epoch": 2.8962718742074562,
"grad_norm": 1.3558026552200317,
"learning_rate": 3.1318631280755626e-07,
"loss": 3.1795,
"step": 22840
},
{
"epoch": 2.9013441541973117,
"grad_norm": 1.3386595249176025,
"learning_rate": 2.833339856565753e-07,
"loss": 3.172,
"step": 22880
},
{
"epoch": 2.906416434187167,
"grad_norm": 1.3235087394714355,
"learning_rate": 2.549724982346291e-07,
"loss": 3.177,
"step": 22920
},
{
"epoch": 2.9114887141770227,
"grad_norm": 1.504699945449829,
"learning_rate": 2.2810270100968033e-07,
"loss": 3.1757,
"step": 22960
},
{
"epoch": 2.916560994166878,
"grad_norm": 1.3332788944244385,
"learning_rate": 2.027253997188172e-07,
"loss": 3.1752,
"step": 23000
},
{
"epoch": 2.9216332741567337,
"grad_norm": 1.4046924114227295,
"learning_rate": 1.78841355344056e-07,
"loss": 3.1748,
"step": 23040
},
{
"epoch": 2.9267055541465887,
"grad_norm": 1.3087977170944214,
"learning_rate": 1.5645128408953713e-07,
"loss": 3.1736,
"step": 23080
},
{
"epoch": 2.931777834136444,
"grad_norm": 1.274903416633606,
"learning_rate": 1.355558573600535e-07,
"loss": 3.1749,
"step": 23120
},
{
"epoch": 2.9368501141262997,
"grad_norm": 1.5779058933258057,
"learning_rate": 1.161557017409165e-07,
"loss": 3.1741,
"step": 23160
},
{
"epoch": 2.941922394116155,
"grad_norm": 1.2767544984817505,
"learning_rate": 9.825139897915447e-08,
"loss": 3.1755,
"step": 23200
},
{
"epoch": 2.941922394116155,
"eval_action_accuracy": 0.1553955078125,
"eval_loss": 3.1481196880340576,
"eval_runtime": 226.2846,
"eval_samples_per_second": 9.051,
"eval_steps_per_second": 0.566,
"step": 23200
},
{
"epoch": 2.9469946741060107,
"grad_norm": 1.3390287160873413,
"learning_rate": 8.184348596606551e-08,
"loss": 3.174,
"step": 23240
},
{
"epoch": 2.952066954095866,
"grad_norm": 1.3068482875823975,
"learning_rate": 6.693245472114695e-08,
"loss": 3.1747,
"step": 23280
},
{
"epoch": 2.9571392340857217,
"grad_norm": 1.301686406135559,
"learning_rate": 5.3518752377307304e-08,
"loss": 3.1731,
"step": 23320
},
{
"epoch": 2.9622115140755767,
"grad_norm": 1.3553813695907593,
"learning_rate": 4.1602781167487946e-08,
"loss": 3.175,
"step": 23360
},
{
"epoch": 2.967283794065432,
"grad_norm": 1.3697738647460938,
"learning_rate": 3.1184898412572886e-08,
"loss": 3.1749,
"step": 23400
},
{
"epoch": 2.9723560740552877,
"grad_norm": 1.3143235445022583,
"learning_rate": 2.2265416510691693e-08,
"loss": 3.1737,
"step": 23440
},
{
"epoch": 2.977428354045143,
"grad_norm": 1.4149271249771118,
"learning_rate": 1.4844602927849283e-08,
"loss": 3.1723,
"step": 23480
},
{
"epoch": 2.9825006340349987,
"grad_norm": 1.37666916847229,
"learning_rate": 8.922680189898946e-09,
"loss": 3.1717,
"step": 23520
},
{
"epoch": 2.987572914024854,
"grad_norm": 1.3097264766693115,
"learning_rate": 4.499825875886599e-09,
"loss": 3.1736,
"step": 23560
},
{
"epoch": 2.9926451940147096,
"grad_norm": 1.2627308368682861,
"learning_rate": 1.576172612693938e-09,
"loss": 3.175,
"step": 23600
},
{
"epoch": 2.9926451940147096,
"eval_action_accuracy": 0.15625,
"eval_loss": 3.147233486175537,
"eval_runtime": 261.558,
"eval_samples_per_second": 7.83,
"eval_steps_per_second": 0.489,
"step": 23600
},
{
"epoch": 2.997717474004565,
"grad_norm": 1.4098745584487915,
"learning_rate": 1.5180807110271034e-10,
"loss": 3.1737,
"step": 23640
}
],
"logging_steps": 40,
"max_steps": 23658,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.3886060071684145e+19,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}