R2EGym-32B-Agent / trainer_state.json
lllqaq's picture
Add files using upload-large-folder tool
f811a10 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 808,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.024752475247524754,
"grad_norm": 1.4194453954696655,
"learning_rate": 2.1951219512195125e-06,
"loss": 0.4754,
"step": 10
},
{
"epoch": 0.04950495049504951,
"grad_norm": 1.0042498111724854,
"learning_rate": 4.634146341463416e-06,
"loss": 0.3587,
"step": 20
},
{
"epoch": 0.07425742574257425,
"grad_norm": 0.6140494346618652,
"learning_rate": 7.0731707317073175e-06,
"loss": 0.2705,
"step": 30
},
{
"epoch": 0.09900990099009901,
"grad_norm": 0.4005405604839325,
"learning_rate": 9.51219512195122e-06,
"loss": 0.2262,
"step": 40
},
{
"epoch": 0.12376237623762376,
"grad_norm": 0.5535389184951782,
"learning_rate": 9.99731595284969e-06,
"loss": 0.2024,
"step": 50
},
{
"epoch": 0.1485148514851485,
"grad_norm": 0.45378637313842773,
"learning_rate": 9.986416949868223e-06,
"loss": 0.2047,
"step": 60
},
{
"epoch": 0.17326732673267325,
"grad_norm": 0.46589696407318115,
"learning_rate": 9.967153506514677e-06,
"loss": 0.1903,
"step": 70
},
{
"epoch": 0.19801980198019803,
"grad_norm": 0.4678415358066559,
"learning_rate": 9.939557936156527e-06,
"loss": 0.1865,
"step": 80
},
{
"epoch": 0.22277227722772278,
"grad_norm": 0.5036286115646362,
"learning_rate": 9.903676528846353e-06,
"loss": 0.1973,
"step": 90
},
{
"epoch": 0.24752475247524752,
"grad_norm": 0.7928675413131714,
"learning_rate": 9.859569473672816e-06,
"loss": 0.1838,
"step": 100
},
{
"epoch": 0.2722772277227723,
"grad_norm": 0.4357012212276459,
"learning_rate": 9.807310757796782e-06,
"loss": 0.1902,
"step": 110
},
{
"epoch": 0.297029702970297,
"grad_norm": 0.5225412249565125,
"learning_rate": 9.746988042341907e-06,
"loss": 0.1715,
"step": 120
},
{
"epoch": 0.3217821782178218,
"grad_norm": 0.46918612718582153,
"learning_rate": 9.678702515347937e-06,
"loss": 0.1843,
"step": 130
},
{
"epoch": 0.3465346534653465,
"grad_norm": 0.5186185836791992,
"learning_rate": 9.602568722033325e-06,
"loss": 0.1847,
"step": 140
},
{
"epoch": 0.3712871287128713,
"grad_norm": 0.525906503200531,
"learning_rate": 9.518714372651922e-06,
"loss": 0.1836,
"step": 150
},
{
"epoch": 0.39603960396039606,
"grad_norm": 0.47885432839393616,
"learning_rate": 9.427280128266049e-06,
"loss": 0.1721,
"step": 160
},
{
"epoch": 0.4207920792079208,
"grad_norm": 0.45440953969955444,
"learning_rate": 9.328419364795295e-06,
"loss": 0.1805,
"step": 170
},
{
"epoch": 0.44554455445544555,
"grad_norm": 0.47786083817481995,
"learning_rate": 9.222297915736835e-06,
"loss": 0.1802,
"step": 180
},
{
"epoch": 0.47029702970297027,
"grad_norm": 0.40427038073539734,
"learning_rate": 9.109093793988866e-06,
"loss": 0.1747,
"step": 190
},
{
"epoch": 0.49504950495049505,
"grad_norm": 0.5424014329910278,
"learning_rate": 8.988996893243742e-06,
"loss": 0.1734,
"step": 200
},
{
"epoch": 0.5198019801980198,
"grad_norm": 0.636466383934021,
"learning_rate": 8.862208669451748e-06,
"loss": 0.1726,
"step": 210
},
{
"epoch": 0.5445544554455446,
"grad_norm": 0.45117199420928955,
"learning_rate": 8.728941802889816e-06,
"loss": 0.1742,
"step": 220
},
{
"epoch": 0.5693069306930693,
"grad_norm": 0.459605872631073,
"learning_rate": 8.589419841402046e-06,
"loss": 0.1812,
"step": 230
},
{
"epoch": 0.594059405940594,
"grad_norm": 0.4333467483520508,
"learning_rate": 8.443876825410488e-06,
"loss": 0.1745,
"step": 240
},
{
"epoch": 0.6188118811881188,
"grad_norm": 0.39373937249183655,
"learning_rate": 8.292556895325195e-06,
"loss": 0.165,
"step": 250
},
{
"epoch": 0.6435643564356436,
"grad_norm": 0.4536997377872467,
"learning_rate": 8.135713882012102e-06,
"loss": 0.1669,
"step": 260
},
{
"epoch": 0.6683168316831684,
"grad_norm": 0.4184967577457428,
"learning_rate": 7.973610881005702e-06,
"loss": 0.1628,
"step": 270
},
{
"epoch": 0.693069306930693,
"grad_norm": 0.38848263025283813,
"learning_rate": 7.80651981118075e-06,
"loss": 0.1596,
"step": 280
},
{
"epoch": 0.7178217821782178,
"grad_norm": 0.46365565061569214,
"learning_rate": 7.634720958623287e-06,
"loss": 0.1656,
"step": 290
},
{
"epoch": 0.7425742574257426,
"grad_norm": 0.41299328207969666,
"learning_rate": 7.458502506466146e-06,
"loss": 0.1765,
"step": 300
},
{
"epoch": 0.7673267326732673,
"grad_norm": 0.41087886691093445,
"learning_rate": 7.278160051477574e-06,
"loss": 0.1585,
"step": 310
},
{
"epoch": 0.7920792079207921,
"grad_norm": 0.3603922128677368,
"learning_rate": 7.09399610821391e-06,
"loss": 0.1674,
"step": 320
},
{
"epoch": 0.8168316831683168,
"grad_norm": 0.44449886679649353,
"learning_rate": 6.906319601568039e-06,
"loss": 0.1753,
"step": 330
},
{
"epoch": 0.8415841584158416,
"grad_norm": 0.4169905483722687,
"learning_rate": 6.715445348564863e-06,
"loss": 0.171,
"step": 340
},
{
"epoch": 0.8663366336633663,
"grad_norm": 0.4144372344017029,
"learning_rate": 6.521693530273046e-06,
"loss": 0.1678,
"step": 350
},
{
"epoch": 0.8910891089108911,
"grad_norm": 0.35955941677093506,
"learning_rate": 6.325389154718865e-06,
"loss": 0.1608,
"step": 360
},
{
"epoch": 0.9158415841584159,
"grad_norm": 0.49849727749824524,
"learning_rate": 6.126861511703119e-06,
"loss": 0.1648,
"step": 370
},
{
"epoch": 0.9405940594059405,
"grad_norm": 0.40356525778770447,
"learning_rate": 5.926443620435572e-06,
"loss": 0.1603,
"step": 380
},
{
"epoch": 0.9653465346534653,
"grad_norm": 0.3695359230041504,
"learning_rate": 5.724471670913545e-06,
"loss": 0.1553,
"step": 390
},
{
"epoch": 0.9900990099009901,
"grad_norm": 0.4594876766204834,
"learning_rate": 5.521284459981662e-06,
"loss": 0.1623,
"step": 400
},
{
"epoch": 1.0148514851485149,
"grad_norm": 0.39757996797561646,
"learning_rate": 5.317222823018775e-06,
"loss": 0.1223,
"step": 410
},
{
"epoch": 1.0396039603960396,
"grad_norm": 0.4017312228679657,
"learning_rate": 5.112629062205341e-06,
"loss": 0.0924,
"step": 420
},
{
"epoch": 1.0643564356435644,
"grad_norm": 0.4011599123477936,
"learning_rate": 4.907846372330326e-06,
"loss": 0.0949,
"step": 430
},
{
"epoch": 1.0891089108910892,
"grad_norm": 0.397612601518631,
"learning_rate": 4.7032182651008204e-06,
"loss": 0.0875,
"step": 440
},
{
"epoch": 1.113861386138614,
"grad_norm": 0.5288234949111938,
"learning_rate": 4.4990879929200145e-06,
"loss": 0.0958,
"step": 450
},
{
"epoch": 1.1386138613861387,
"grad_norm": 0.36887308955192566,
"learning_rate": 4.295797973100174e-06,
"loss": 0.0906,
"step": 460
},
{
"epoch": 1.1633663366336633,
"grad_norm": 0.42459458112716675,
"learning_rate": 4.093689213476408e-06,
"loss": 0.0912,
"step": 470
},
{
"epoch": 1.188118811881188,
"grad_norm": 0.43703174591064453,
"learning_rate": 3.893100740384766e-06,
"loss": 0.0945,
"step": 480
},
{
"epoch": 1.2128712871287128,
"grad_norm": 0.4258781969547272,
"learning_rate": 3.6943690299642055e-06,
"loss": 0.0965,
"step": 490
},
{
"epoch": 1.2376237623762376,
"grad_norm": 0.48606401681900024,
"learning_rate": 3.4978274437363447e-06,
"loss": 0.092,
"step": 500
},
{
"epoch": 1.2623762376237624,
"grad_norm": 0.4267478287220001,
"learning_rate": 3.3038056694098485e-06,
"loss": 0.0949,
"step": 510
},
{
"epoch": 1.2871287128712872,
"grad_norm": 0.5156121850013733,
"learning_rate": 3.112629167847409e-06,
"loss": 0.0965,
"step": 520
},
{
"epoch": 1.311881188118812,
"grad_norm": 0.4543267488479614,
"learning_rate": 2.9246186271230335e-06,
"loss": 0.094,
"step": 530
},
{
"epoch": 1.3366336633663367,
"grad_norm": 0.3953835070133209,
"learning_rate": 2.7400894245854327e-06,
"loss": 0.0958,
"step": 540
},
{
"epoch": 1.3613861386138613,
"grad_norm": 0.40146708488464355,
"learning_rate": 2.5593510978298487e-06,
"loss": 0.0941,
"step": 550
},
{
"epoch": 1.386138613861386,
"grad_norm": 0.39063599705696106,
"learning_rate": 2.3827068254657493e-06,
"loss": 0.0899,
"step": 560
},
{
"epoch": 1.4108910891089108,
"grad_norm": 0.38410985469818115,
"learning_rate": 2.2104529185513807e-06,
"loss": 0.0972,
"step": 570
},
{
"epoch": 1.4356435643564356,
"grad_norm": 0.4306909143924713,
"learning_rate": 2.0428783235482423e-06,
"loss": 0.0968,
"step": 580
},
{
"epoch": 1.4603960396039604,
"grad_norm": 0.43735161423683167,
"learning_rate": 1.8802641376292913e-06,
"loss": 0.1037,
"step": 590
},
{
"epoch": 1.4851485148514851,
"grad_norm": 0.38794925808906555,
"learning_rate": 1.722883137153874e-06,
"loss": 0.0936,
"step": 600
},
{
"epoch": 1.50990099009901,
"grad_norm": 0.48267433047294617,
"learning_rate": 1.5709993201003827e-06,
"loss": 0.097,
"step": 610
},
{
"epoch": 1.5346534653465347,
"grad_norm": 0.3811907470226288,
"learning_rate": 1.424867463224147e-06,
"loss": 0.0915,
"step": 620
},
{
"epoch": 1.5594059405940595,
"grad_norm": 0.4866475760936737,
"learning_rate": 1.2847326946834427e-06,
"loss": 0.1023,
"step": 630
},
{
"epoch": 1.5841584158415842,
"grad_norm": 0.4222099483013153,
"learning_rate": 1.1508300828504682e-06,
"loss": 0.0991,
"step": 640
},
{
"epoch": 1.608910891089109,
"grad_norm": 0.37634769082069397,
"learning_rate": 1.0233842419970773e-06,
"loss": 0.0899,
"step": 650
},
{
"epoch": 1.6336633663366338,
"grad_norm": 0.4965452551841736,
"learning_rate": 9.026089555166745e-07,
"loss": 0.1001,
"step": 660
},
{
"epoch": 1.6584158415841586,
"grad_norm": 0.3864215016365051,
"learning_rate": 7.887068173143325e-07,
"loss": 0.0994,
"step": 670
},
{
"epoch": 1.6831683168316833,
"grad_norm": 0.4128170311450958,
"learning_rate": 6.818688919666461e-07,
"loss": 0.0989,
"step": 680
},
{
"epoch": 1.7079207920792079,
"grad_norm": 0.3718600869178772,
"learning_rate": 5.822743942214026e-07,
"loss": 0.0944,
"step": 690
},
{
"epoch": 1.7326732673267327,
"grad_norm": 0.41322892904281616,
"learning_rate": 4.900903883747021e-07,
"loss": 0.0929,
"step": 700
},
{
"epoch": 1.7574257425742574,
"grad_norm": 0.3393838107585907,
"learning_rate": 4.054715080297722e-07,
"loss": 0.097,
"step": 710
},
{
"epoch": 1.7821782178217822,
"grad_norm": 0.3705613911151886,
"learning_rate": 3.285596967076055e-07,
"loss": 0.0956,
"step": 720
},
{
"epoch": 1.806930693069307,
"grad_norm": 0.47960299253463745,
"learning_rate": 2.594839697445017e-07,
"loss": 0.1008,
"step": 730
},
{
"epoch": 1.8316831683168315,
"grad_norm": 0.4418397545814514,
"learning_rate": 1.983601978759292e-07,
"loss": 0.1042,
"step": 740
},
{
"epoch": 1.8564356435643563,
"grad_norm": 0.3197237551212311,
"learning_rate": 1.4529091286973994e-07,
"loss": 0.0925,
"step": 750
},
{
"epoch": 1.881188118811881,
"grad_norm": 0.44026193022727966,
"learning_rate": 1.0036513553476013e-07,
"loss": 0.1043,
"step": 760
},
{
"epoch": 1.9059405940594059,
"grad_norm": 0.5169320702552795,
"learning_rate": 6.365822639327724e-08,
"loss": 0.1028,
"step": 770
},
{
"epoch": 1.9306930693069306,
"grad_norm": 0.5262919068336487,
"learning_rate": 3.523175926790745e-08,
"loss": 0.0945,
"step": 780
},
{
"epoch": 1.9554455445544554,
"grad_norm": 0.374805748462677,
"learning_rate": 1.513341799488921e-08,
"loss": 0.1013,
"step": 790
},
{
"epoch": 1.9801980198019802,
"grad_norm": 0.3962545096874237,
"learning_rate": 3.3969164370722953e-09,
"loss": 0.0913,
"step": 800
},
{
"epoch": 2.0,
"step": 808,
"total_flos": 576827298414592.0,
"train_loss": 0.0689804450710221,
"train_runtime": 41658.2625,
"train_samples_per_second": 0.155,
"train_steps_per_second": 0.019
}
],
"logging_steps": 10,
"max_steps": 808,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 576827298414592.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}