Llama-3.1-8B-S2R-ORL / trainer_state.json

Upload folder using huggingface_hub

55a6de3 verified 8 months ago

25.2 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.16301218161683279,
	"eval_steps": 500,
	"global_step": 92,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0070874861572535995,
	"importance_ratio": 0.9994602799415588,
	"kl_div_avg": 2.587483777460875e-06,
	"learning_rate": 0.0,
	"loss_func": "stage2",
	"step": 1,
	"total_loss": 0.8759250640869141
	},
	{
	"epoch": 0.0070874861572535995,
	"importance_ratio": 0.8117017149925232,
	"kl_div_avg": 7.403145332318672e-07,
	"learning_rate": 4.30676558073393e-07,
	"loss_func": "stage2",
	"step": 2,
	"total_loss": -0.3727513253688812
	},
	{
	"epoch": 0.0070874861572535995,
	"importance_ratio": 0.9883006811141968,
	"kl_div_avg": 2.3012535166344605e-05,
	"learning_rate": 6.826061944859853e-07,
	"loss_func": "stage2",
	"step": 3,
	"total_loss": -0.6280329823493958
	},
	{
	"epoch": 0.0070874861572535995,
	"importance_ratio": 0.5251931548118591,
	"kl_div_avg": 4.927766440232517e-06,
	"learning_rate": 8.61353116146786e-07,
	"loss_func": "stage2",
	"step": 4,
	"total_loss": 0.0813111662864685
	},
	{
	"epoch": 0.014174972314507199,
	"importance_ratio": 0.992071807384491,
	"kl_div_avg": 0.0007429516408592463,
	"learning_rate": 1e-06,
	"loss_func": "stage2",
	"step": 5,
	"total_loss": 0.46373996138572693
	},
	{
	"epoch": 0.014174972314507199,
	"importance_ratio": 0.702412486076355,
	"kl_div_avg": 0.000552409328520298,
	"learning_rate": 1e-06,
	"loss_func": "stage2",
	"step": 6,
	"total_loss": -0.48433077335357666
	},
	{
	"epoch": 0.014174972314507199,
	"importance_ratio": 0.6851906180381775,
	"kl_div_avg": 0.00044042925583198667,
	"learning_rate": 9.99645012424565e-07,
	"loss_func": "stage2",
	"step": 7,
	"total_loss": 0.005272107198834419
	},
	{
	"epoch": 0.014174972314507199,
	"importance_ratio": 1.2104086875915527,
	"kl_div_avg": 0.0008309759432449937,
	"learning_rate": 9.992900248491303e-07,
	"loss_func": "stage2",
	"step": 8,
	"total_loss": 1.175075650215149
	},
	{
	"epoch": 0.0212624584717608,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.0013515137834474444,
	"learning_rate": 9.989350372736954e-07,
	"loss_func": "stage2",
	"step": 9,
	"total_loss": -0.1796875
	},
	{
	"epoch": 0.0212624584717608,
	"importance_ratio": 0.8567007780075073,
	"kl_div_avg": 0.0016049568075686693,
	"learning_rate": 9.985800496982605e-07,
	"loss_func": "stage2",
	"step": 10,
	"total_loss": -0.2918521463871002
	},
	{
	"epoch": 0.0212624584717608,
	"importance_ratio": 1.2928454875946045,
	"kl_div_avg": 0.001749544171616435,
	"learning_rate": 9.982250621228256e-07,
	"loss_func": "stage2",
	"step": 11,
	"total_loss": -1.0593750476837158
	},
	{
	"epoch": 0.0212624584717608,
	"importance_ratio": 1.1015655994415283,
	"kl_div_avg": 0.0019894526340067387,
	"learning_rate": 9.978700745473909e-07,
	"loss_func": "stage2",
	"step": 12,
	"total_loss": 0.5328124761581421
	},
	{
	"epoch": 0.028349944629014398,
	"importance_ratio": 1.01776921749115,
	"kl_div_avg": 0.003757305908948183,
	"learning_rate": 9.97515086971956e-07,
	"loss_func": "stage2",
	"step": 13,
	"total_loss": -0.10959765315055847
	},
	{
	"epoch": 0.028349944629014398,
	"importance_ratio": 0.5146582126617432,
	"kl_div_avg": 0.002980178687721491,
	"learning_rate": 9.97160099396521e-07,
	"loss_func": "stage2",
	"step": 14,
	"total_loss": -0.4417858421802521
	},
	{
	"epoch": 0.028349944629014398,
	"importance_ratio": 0.2671862244606018,
	"kl_div_avg": 0.0029413679149001837,
	"learning_rate": 9.968051118210861e-07,
	"loss_func": "stage2",
	"step": 15,
	"total_loss": 0.20184588432312012
	},
	{
	"epoch": 0.028349944629014398,
	"importance_ratio": 0.2749362587928772,
	"kl_div_avg": 0.0027144472114741802,
	"learning_rate": 9.964501242456512e-07,
	"loss_func": "stage2",
	"step": 16,
	"total_loss": 0.2587171494960785
	},
	{
	"epoch": 0.035437430786267994,
	"importance_ratio": 0.9978752136230469,
	"kl_div_avg": 0.005673160310834646,
	"learning_rate": 9.960951366702165e-07,
	"loss_func": "stage2",
	"step": 17,
	"total_loss": -0.29379865527153015
	},
	{
	"epoch": 0.035437430786267994,
	"importance_ratio": 0.6526201367378235,
	"kl_div_avg": 0.00475339125841856,
	"learning_rate": 9.957401490947816e-07,
	"loss_func": "stage2",
	"step": 18,
	"total_loss": -0.2138231247663498
	},
	{
	"epoch": 0.035437430786267994,
	"importance_ratio": 0.7303690314292908,
	"kl_div_avg": 0.006084108259528875,
	"learning_rate": 9.95385161519347e-07,
	"loss_func": "stage2",
	"step": 19,
	"total_loss": 0.36607834696769714
	},
	{
	"epoch": 0.035437430786267994,
	"importance_ratio": 0.9157307147979736,
	"kl_div_avg": 0.005460726097226143,
	"learning_rate": 9.950301739439118e-07,
	"loss_func": "stage2",
	"step": 20,
	"total_loss": 0.2832968533039093
	},
	{
	"epoch": 0.0425249169435216,
	"importance_ratio": 0.9975234270095825,
	"kl_div_avg": 0.007166164927184582,
	"learning_rate": 9.94675186368477e-07,
	"loss_func": "stage2",
	"step": 21,
	"total_loss": -0.15122467279434204
	},
	{
	"epoch": 0.0425249169435216,
	"importance_ratio": 0.4565725028514862,
	"kl_div_avg": 0.006681992672383785,
	"learning_rate": 9.943201987930422e-07,
	"loss_func": "stage2",
	"step": 22,
	"total_loss": 1.452988862991333
	},
	{
	"epoch": 0.0425249169435216,
	"importance_ratio": 0.3932248651981354,
	"kl_div_avg": 0.0055625829845666885,
	"learning_rate": 9.939652112176073e-07,
	"loss_func": "stage2",
	"step": 23,
	"total_loss": -0.06093749403953552
	},
	{
	"epoch": 0.0425249169435216,
	"importance_ratio": 0.018905332311987877,
	"kl_div_avg": 0.007485650479793549,
	"learning_rate": 9.936102236421726e-07,
	"loss_func": "stage2",
	"step": 24,
	"total_loss": -0.0018584587378427386
	},
	{
	"epoch": 0.04961240310077519,
	"importance_ratio": 1.004979133605957,
	"kl_div_avg": 0.0073785921558737755,
	"learning_rate": 9.932552360667376e-07,
	"loss_func": "stage2",
	"step": 25,
	"total_loss": -0.4546505808830261
	},
	{
	"epoch": 0.04961240310077519,
	"importance_ratio": 0.4345252811908722,
	"kl_div_avg": 0.011518791317939758,
	"learning_rate": 9.929002484913027e-07,
	"loss_func": "stage2",
	"step": 26,
	"total_loss": 0.17052176594734192
	},
	{
	"epoch": 0.04961240310077519,
	"importance_ratio": 0.9074916243553162,
	"kl_div_avg": 0.007095410488545895,
	"learning_rate": 9.925452609158678e-07,
	"loss_func": "stage2",
	"step": 27,
	"total_loss": -0.1952303647994995
	},
	{
	"epoch": 0.04961240310077519,
	"importance_ratio": 0.3815934360027313,
	"kl_div_avg": 0.00811639055609703,
	"learning_rate": 9.921902733404331e-07,
	"loss_func": "stage2",
	"step": 28,
	"total_loss": 0.1002814769744873
	},
	{
	"epoch": 0.056699889258028796,
	"importance_ratio": 0.999740719795227,
	"kl_div_avg": 0.011646868661046028,
	"learning_rate": 9.918352857649982e-07,
	"loss_func": "stage2",
	"step": 29,
	"total_loss": -0.0001066727563738823
	},
	{
	"epoch": 0.056699889258028796,
	"importance_ratio": 1.0092053413391113,
	"kl_div_avg": 0.013259533792734146,
	"learning_rate": 9.914802981895633e-07,
	"loss_func": "stage2",
	"step": 30,
	"total_loss": 1.2793704271316528
	},
	{
	"epoch": 0.056699889258028796,
	"importance_ratio": 1.6615486145019531,
	"kl_div_avg": 0.014400625601410866,
	"learning_rate": 9.911253106141284e-07,
	"loss_func": "stage2",
	"step": 31,
	"total_loss": -0.41830766201019287
	},
	{
	"epoch": 0.056699889258028796,
	"importance_ratio": 1.7373325824737549,
	"kl_div_avg": 0.014305144548416138,
	"learning_rate": 9.907703230386937e-07,
	"loss_func": "stage2",
	"step": 32,
	"total_loss": -0.23473186790943146
	},
	{
	"epoch": 0.06378737541528239,
	"importance_ratio": 0.9925450086593628,
	"kl_div_avg": 0.018568212166428566,
	"learning_rate": 9.904153354632587e-07,
	"loss_func": "stage2",
	"step": 33,
	"total_loss": -0.7624663710594177
	},
	{
	"epoch": 0.06378737541528239,
	"importance_ratio": 0.5851880311965942,
	"kl_div_avg": 0.01448909379541874,
	"learning_rate": 9.900603478878238e-07,
	"loss_func": "stage2",
	"step": 34,
	"total_loss": -0.40603700280189514
	},
	{
	"epoch": 0.06378737541528239,
	"importance_ratio": 0.7805031538009644,
	"kl_div_avg": 0.012439063750207424,
	"learning_rate": 9.897053603123891e-07,
	"loss_func": "stage2",
	"step": 35,
	"total_loss": 0.022615084424614906
	},
	{
	"epoch": 0.06378737541528239,
	"importance_ratio": 0.3514450490474701,
	"kl_div_avg": 0.00605101278051734,
	"learning_rate": 9.893503727369542e-07,
	"loss_func": "stage2",
	"step": 36,
	"total_loss": 1.1687499284744263
	},
	{
	"epoch": 0.07087486157253599,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.02013654261827469,
	"learning_rate": 9.889953851615193e-07,
	"loss_func": "stage2",
	"step": 37,
	"total_loss": 0.6367186903953552
	},
	{
	"epoch": 0.07087486157253599,
	"importance_ratio": 0.5006582140922546,
	"kl_div_avg": 0.02115897834300995,
	"learning_rate": 9.886403975860844e-07,
	"loss_func": "stage2",
	"step": 38,
	"total_loss": 0.5093749761581421
	},
	{
	"epoch": 0.07087486157253599,
	"importance_ratio": 0.7131205797195435,
	"kl_div_avg": 0.01793592795729637,
	"learning_rate": 9.882854100106497e-07,
	"loss_func": "stage2",
	"step": 39,
	"total_loss": -0.2651502192020416
	},
	{
	"epoch": 0.07087486157253599,
	"importance_ratio": 1.344174861907959,
	"kl_div_avg": 0.017681246623396873,
	"learning_rate": 9.879304224352148e-07,
	"loss_func": "stage2",
	"step": 40,
	"total_loss": -1.0468751192092896
	},
	{
	"epoch": 0.07796234772978959,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.018570249900221825,
	"learning_rate": 9.875754348597799e-07,
	"loss_func": "stage2",
	"step": 41,
	"total_loss": 0.2343750298023224
	},
	{
	"epoch": 0.07796234772978959,
	"importance_ratio": 0.5109913349151611,
	"kl_div_avg": 0.021320415660738945,
	"learning_rate": 9.87220447284345e-07,
	"loss_func": "stage2",
	"step": 42,
	"total_loss": 0.1827033907175064
	},
	{
	"epoch": 0.07796234772978959,
	"importance_ratio": 0.7171033620834351,
	"kl_div_avg": 0.02094135992228985,
	"learning_rate": 9.8686545970891e-07,
	"loss_func": "stage2",
	"step": 43,
	"total_loss": -0.2705467939376831
	},
	{
	"epoch": 0.07796234772978959,
	"importance_ratio": 1.52182936668396,
	"kl_div_avg": 0.014365588314831257,
	"learning_rate": 9.865104721334753e-07,
	"loss_func": "stage2",
	"step": 44,
	"total_loss": 2.1366288661956787
	},
	{
	"epoch": 0.0850498338870432,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.016806455329060555,
	"learning_rate": 9.861554845580404e-07,
	"loss_func": "stage2",
	"step": 45
	},
	{
	"epoch": 0.0850498338870432,
	"importance_ratio": 1.0074646472930908,
	"kl_div_avg": 0.017333338037133217,
	"learning_rate": 9.858004969826055e-07,
	"loss_func": "stage2",
	"step": 46
	},
	{
	"epoch": 0.0850498338870432,
	"importance_ratio": 1.1291639804840088,
	"kl_div_avg": 0.020245596766471863,
	"learning_rate": 9.854455094071708e-07,
	"loss_func": "stage2",
	"step": 47
	},
	{
	"epoch": 0.0850498338870432,
	"importance_ratio": 1.1358391046524048,
	"kl_div_avg": 0.020210057497024536,
	"learning_rate": 9.850905218317359e-07,
	"loss_func": "stage2",
	"step": 48
	},
	{
	"epoch": 0.09213732004429678,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.02534015104174614,
	"learning_rate": 9.84735534256301e-07,
	"loss_func": "stage2",
	"step": 49,
	"total_loss": 0.9980467557907104
	},
	{
	"epoch": 0.09213732004429678,
	"importance_ratio": 0.9623415470123291,
	"kl_div_avg": 0.019495680928230286,
	"learning_rate": 9.84380546680866e-07,
	"loss_func": "stage2",
	"step": 50,
	"total_loss": -1.5856661796569824
	},
	{
	"epoch": 0.09213732004429678,
	"importance_ratio": 0.8258026242256165,
	"kl_div_avg": 0.01926257833838463,
	"learning_rate": 9.840255591054313e-07,
	"loss_func": "stage2",
	"step": 51,
	"total_loss": 0.9310374855995178
	},
	{
	"epoch": 0.09213732004429678,
	"importance_ratio": 1.4061079025268555,
	"kl_div_avg": 0.023863907903432846,
	"learning_rate": 9.836705715299964e-07,
	"loss_func": "stage2",
	"step": 52,
	"total_loss": -0.44497591257095337
	},
	{
	"epoch": 0.09922480620155039,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.02894335240125656,
	"learning_rate": 9.833155839545615e-07,
	"loss_func": "stage2",
	"step": 53,
	"total_loss": 0.216145858168602
	},
	{
	"epoch": 0.09922480620155039,
	"importance_ratio": 1.144304633140564,
	"kl_div_avg": 0.023014899343252182,
	"learning_rate": 9.829605963791266e-07,
	"loss_func": "stage2",
	"step": 54,
	"total_loss": 0.273160457611084
	},
	{
	"epoch": 0.09922480620155039,
	"importance_ratio": 1.070732593536377,
	"kl_div_avg": 0.02296144887804985,
	"learning_rate": 9.82605608803692e-07,
	"loss_func": "stage2",
	"step": 55,
	"total_loss": 1.7041797637939453
	},
	{
	"epoch": 0.09922480620155039,
	"importance_ratio": 0.6545230150222778,
	"kl_div_avg": 0.029904816299676895,
	"learning_rate": 9.82250621228257e-07,
	"loss_func": "stage2",
	"step": 56,
	"total_loss": -0.8607988357543945
	},
	{
	"epoch": 0.10631229235880399,
	"importance_ratio": 0.9810570478439331,
	"kl_div_avg": 0.03316723555326462,
	"learning_rate": 9.81895633652822e-07,
	"loss_func": "stage2",
	"step": 57,
	"total_loss": 0.35031646490097046
	},
	{
	"epoch": 0.10631229235880399,
	"importance_ratio": 0.6538653373718262,
	"kl_div_avg": 0.03011605143547058,
	"learning_rate": 9.815406460773874e-07,
	"loss_func": "stage2",
	"step": 58,
	"total_loss": 0.20810076594352722
	},
	{
	"epoch": 0.10631229235880399,
	"importance_ratio": 0.4686840772628784,
	"kl_div_avg": 0.02438838593661785,
	"learning_rate": 9.811856585019522e-07,
	"loss_func": "stage2",
	"step": 59,
	"total_loss": -0.08729205280542374
	},
	{
	"epoch": 0.10631229235880399,
	"importance_ratio": 0.5834656357765198,
	"kl_div_avg": 0.021679764613509178,
	"learning_rate": 9.808306709265175e-07,
	"loss_func": "stage2",
	"step": 60,
	"total_loss": -0.21443292498588562
	},
	{
	"epoch": 0.11339977851605759,
	"importance_ratio": 0.9842519760131836,
	"kl_div_avg": 0.031047554686665535,
	"learning_rate": 9.804756833510826e-07,
	"loss_func": "stage2",
	"step": 61,
	"total_loss": 0.5077100992202759
	},
	{
	"epoch": 0.11339977851605759,
	"importance_ratio": 0.8212682008743286,
	"kl_div_avg": 0.02822922170162201,
	"learning_rate": 9.80120695775648e-07,
	"loss_func": "stage2",
	"step": 62,
	"total_loss": -0.017715616151690483
	},
	{
	"epoch": 0.11339977851605759,
	"importance_ratio": 0.6670711040496826,
	"kl_div_avg": 0.026936011388897896,
	"learning_rate": 9.79765708200213e-07,
	"loss_func": "stage2",
	"step": 63,
	"total_loss": 0.19557428359985352
	},
	{
	"epoch": 0.11339977851605759,
	"importance_ratio": 0.8946332931518555,
	"kl_div_avg": 0.027077743783593178,
	"learning_rate": 9.79410720624778e-07,
	"loss_func": "stage2",
	"step": 64,
	"total_loss": 0.0037591925356537104
	},
	{
	"epoch": 0.12048726467331118,
	"importance_ratio": 1.0073069334030151,
	"kl_div_avg": 0.04011579975485802,
	"learning_rate": 9.790557330493432e-07,
	"loss_func": "stage2",
	"step": 65,
	"total_loss": 0.23827587068080902
	},
	{
	"epoch": 0.12048726467331118,
	"importance_ratio": 0.9326987266540527,
	"kl_div_avg": 0.03654163330793381,
	"learning_rate": 9.787007454739083e-07,
	"loss_func": "stage2",
	"step": 66,
	"total_loss": -0.17129582166671753
	},
	{
	"epoch": 0.12048726467331118,
	"importance_ratio": 0.446792870759964,
	"kl_div_avg": 0.032430682331323624,
	"learning_rate": 9.783457578984736e-07,
	"loss_func": "stage2",
	"step": 67,
	"total_loss": 0.657434344291687
	},
	{
	"epoch": 0.12048726467331118,
	"importance_ratio": 0.7778047323226929,
	"kl_div_avg": 0.030584165826439857,
	"learning_rate": 9.779907703230386e-07,
	"loss_func": "stage2",
	"step": 68,
	"total_loss": -0.47304269671440125
	},
	{
	"epoch": 0.12757475083056477,
	"importance_ratio": 1.0042237043380737,
	"kl_div_avg": 0.029375018551945686,
	"learning_rate": 9.776357827476037e-07,
	"loss_func": "stage2",
	"step": 69,
	"total_loss": -0.3034500181674957
	},
	{
	"epoch": 0.12757475083056477,
	"importance_ratio": 0.9879562854766846,
	"kl_div_avg": 0.02754083275794983,
	"learning_rate": 9.772807951721688e-07,
	"loss_func": "stage2",
	"step": 70,
	"total_loss": 0.734062910079956
	},
	{
	"epoch": 0.12757475083056477,
	"importance_ratio": 0.9155079126358032,
	"kl_div_avg": 0.038585443049669266,
	"learning_rate": 9.769258075967341e-07,
	"loss_func": "stage2",
	"step": 71,
	"total_loss": -0.24773724377155304
	},
	{
	"epoch": 0.12757475083056477,
	"importance_ratio": 0.18012166023254395,
	"kl_div_avg": 0.029818303883075714,
	"learning_rate": 9.765708200212992e-07,
	"loss_func": "stage2",
	"step": 72,
	"total_loss": 0.18523141741752625
	},
	{
	"epoch": 0.13466223698781837,
	"importance_ratio": 1.011343002319336,
	"kl_div_avg": 0.03788266330957413,
	"learning_rate": 9.762158324458643e-07,
	"loss_func": "stage2",
	"step": 73,
	"total_loss": 1.4373860359191895
	},
	{
	"epoch": 0.13466223698781837,
	"importance_ratio": 0.22852367162704468,
	"kl_div_avg": 0.03607971966266632,
	"learning_rate": 9.758608448704296e-07,
	"loss_func": "stage2",
	"step": 74,
	"total_loss": 0.4375
	},
	{
	"epoch": 0.13466223698781837,
	"importance_ratio": 0.3465356230735779,
	"kl_div_avg": 0.04296875,
	"learning_rate": 9.755058572949947e-07,
	"loss_func": "stage2",
	"step": 75,
	"total_loss": -0.32148972153663635
	},
	{
	"epoch": 0.13466223698781837,
	"importance_ratio": 0.5801489353179932,
	"kl_div_avg": 0.03706703335046768,
	"learning_rate": 9.751508697195598e-07,
	"loss_func": "stage2",
	"step": 76,
	"total_loss": 0.02452419139444828
	},
	{
	"epoch": 0.14174972314507198,
	"importance_ratio": 1.0018153190612793,
	"kl_div_avg": 0.03378720581531525,
	"learning_rate": 9.747958821441248e-07,
	"loss_func": "stage2",
	"step": 77,
	"total_loss": 0.22162221372127533
	},
	{
	"epoch": 0.14174972314507198,
	"importance_ratio": 0.3109496533870697,
	"kl_div_avg": 0.03676421567797661,
	"learning_rate": 9.744408945686901e-07,
	"loss_func": "stage2",
	"step": 78,
	"total_loss": 0.010569405741989613
	},
	{
	"epoch": 0.14174972314507198,
	"importance_ratio": 0.2784914970397949,
	"kl_div_avg": 0.030661292374134064,
	"learning_rate": 9.740859069932552e-07,
	"loss_func": "stage2",
	"step": 79,
	"total_loss": 0.104059599339962
	},
	{
	"epoch": 0.14174972314507198,
	"importance_ratio": 0.2472458779811859,
	"kl_div_avg": 0.03948213905096054,
	"learning_rate": 9.737309194178203e-07,
	"loss_func": "stage2",
	"step": 80,
	"total_loss": 0.031388457864522934
	},
	{
	"epoch": 0.14883720930232558,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.04266459494829178,
	"learning_rate": 9.733759318423854e-07,
	"loss_func": "stage2",
	"step": 81,
	"total_loss": 0.7539063096046448
	},
	{
	"epoch": 0.14883720930232558,
	"importance_ratio": 0.49731332063674927,
	"kl_div_avg": 0.038056716322898865,
	"learning_rate": 9.730209442669507e-07,
	"loss_func": "stage2",
	"step": 82,
	"total_loss": -0.053492218255996704
	},
	{
	"epoch": 0.14883720930232558,
	"importance_ratio": 1.0697609186172485,
	"kl_div_avg": 0.032977428287267685,
	"learning_rate": 9.726659566915158e-07,
	"loss_func": "stage2",
	"step": 83,
	"total_loss": -1.6140198707580566
	},
	{
	"epoch": 0.14883720930232558,
	"importance_ratio": 1.6595419645309448,
	"kl_div_avg": 0.03368090093135834,
	"learning_rate": 9.723109691160809e-07,
	"loss_func": "stage2",
	"step": 84,
	"total_loss": 6.782899379730225
	},
	{
	"epoch": 0.15592469545957918,
	"importance_ratio": 1.0058972835540771,
	"kl_div_avg": 0.03366704285144806,
	"learning_rate": 9.719559815406462e-07,
	"loss_func": "stage2",
	"step": 85,
	"total_loss": 0.2537820637226105
	},
	{
	"epoch": 0.15592469545957918,
	"importance_ratio": 0.47804346680641174,
	"kl_div_avg": 0.03480301797389984,
	"learning_rate": 9.716009939652112e-07,
	"loss_func": "stage2",
	"step": 86,
	"total_loss": -0.41820311546325684
	},
	{
	"epoch": 0.15592469545957918,
	"importance_ratio": 0.21213342249393463,
	"kl_div_avg": 0.0428822860121727,
	"learning_rate": 9.712460063897763e-07,
	"loss_func": "stage2",
	"step": 87,
	"total_loss": 0.922656238079071
	},
	{
	"epoch": 0.15592469545957918,
	"importance_ratio": 0.44658419489860535,
	"kl_div_avg": 0.04085192829370499,
	"learning_rate": 9.708910188143414e-07,
	"loss_func": "stage2",
	"step": 88,
	"total_loss": -0.24729368090629578
	},
	{
	"epoch": 0.16301218161683279,
	"importance_ratio": 1.0,
	"kl_div_avg": 0.047017261385917664,
	"learning_rate": 9.705360312389065e-07,
	"loss_func": "stage2",
	"step": 89,
	"total_loss": -0.4453124701976776
	},
	{
	"epoch": 0.16301218161683279,
	"importance_ratio": 0.7418298721313477,
	"kl_div_avg": 0.04112287610769272,
	"learning_rate": 9.701810436634718e-07,
	"loss_func": "stage2",
	"step": 90,
	"total_loss": 1.0582090616226196
	},
	{
	"epoch": 0.16301218161683279,
	"importance_ratio": 2.016894817352295,
	"kl_div_avg": 0.0498749241232872,
	"learning_rate": 9.698260560880369e-07,
	"loss_func": "stage2",
	"step": 91,
	"total_loss": -0.4754679799079895
	},
	{
	"epoch": 0.16301218161683279,
	"importance_ratio": 1.4249356985092163,
	"kl_div_avg": 0.04099587723612785,
	"learning_rate": 9.69471068512602e-07,
	"loss_func": "stage2",
	"step": 92,
	"total_loss": 1.3416054248809814
	}
	],
	"logging_steps": 1.0,
	"max_steps": 2822,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 20.0,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}