Llama-3.1-8B-S2R-ORL / trainer_state.json
S2R-data's picture
Upload folder using huggingface_hub
55a6de3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.16301218161683279,
"eval_steps": 500,
"global_step": 92,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0070874861572535995,
"importance_ratio": 0.9994602799415588,
"kl_div_avg": 2.587483777460875e-06,
"learning_rate": 0.0,
"loss_func": "stage2",
"step": 1,
"total_loss": 0.8759250640869141
},
{
"epoch": 0.0070874861572535995,
"importance_ratio": 0.8117017149925232,
"kl_div_avg": 7.403145332318672e-07,
"learning_rate": 4.30676558073393e-07,
"loss_func": "stage2",
"step": 2,
"total_loss": -0.3727513253688812
},
{
"epoch": 0.0070874861572535995,
"importance_ratio": 0.9883006811141968,
"kl_div_avg": 2.3012535166344605e-05,
"learning_rate": 6.826061944859853e-07,
"loss_func": "stage2",
"step": 3,
"total_loss": -0.6280329823493958
},
{
"epoch": 0.0070874861572535995,
"importance_ratio": 0.5251931548118591,
"kl_div_avg": 4.927766440232517e-06,
"learning_rate": 8.61353116146786e-07,
"loss_func": "stage2",
"step": 4,
"total_loss": 0.0813111662864685
},
{
"epoch": 0.014174972314507199,
"importance_ratio": 0.992071807384491,
"kl_div_avg": 0.0007429516408592463,
"learning_rate": 1e-06,
"loss_func": "stage2",
"step": 5,
"total_loss": 0.46373996138572693
},
{
"epoch": 0.014174972314507199,
"importance_ratio": 0.702412486076355,
"kl_div_avg": 0.000552409328520298,
"learning_rate": 1e-06,
"loss_func": "stage2",
"step": 6,
"total_loss": -0.48433077335357666
},
{
"epoch": 0.014174972314507199,
"importance_ratio": 0.6851906180381775,
"kl_div_avg": 0.00044042925583198667,
"learning_rate": 9.99645012424565e-07,
"loss_func": "stage2",
"step": 7,
"total_loss": 0.005272107198834419
},
{
"epoch": 0.014174972314507199,
"importance_ratio": 1.2104086875915527,
"kl_div_avg": 0.0008309759432449937,
"learning_rate": 9.992900248491303e-07,
"loss_func": "stage2",
"step": 8,
"total_loss": 1.175075650215149
},
{
"epoch": 0.0212624584717608,
"importance_ratio": 1.0,
"kl_div_avg": 0.0013515137834474444,
"learning_rate": 9.989350372736954e-07,
"loss_func": "stage2",
"step": 9,
"total_loss": -0.1796875
},
{
"epoch": 0.0212624584717608,
"importance_ratio": 0.8567007780075073,
"kl_div_avg": 0.0016049568075686693,
"learning_rate": 9.985800496982605e-07,
"loss_func": "stage2",
"step": 10,
"total_loss": -0.2918521463871002
},
{
"epoch": 0.0212624584717608,
"importance_ratio": 1.2928454875946045,
"kl_div_avg": 0.001749544171616435,
"learning_rate": 9.982250621228256e-07,
"loss_func": "stage2",
"step": 11,
"total_loss": -1.0593750476837158
},
{
"epoch": 0.0212624584717608,
"importance_ratio": 1.1015655994415283,
"kl_div_avg": 0.0019894526340067387,
"learning_rate": 9.978700745473909e-07,
"loss_func": "stage2",
"step": 12,
"total_loss": 0.5328124761581421
},
{
"epoch": 0.028349944629014398,
"importance_ratio": 1.01776921749115,
"kl_div_avg": 0.003757305908948183,
"learning_rate": 9.97515086971956e-07,
"loss_func": "stage2",
"step": 13,
"total_loss": -0.10959765315055847
},
{
"epoch": 0.028349944629014398,
"importance_ratio": 0.5146582126617432,
"kl_div_avg": 0.002980178687721491,
"learning_rate": 9.97160099396521e-07,
"loss_func": "stage2",
"step": 14,
"total_loss": -0.4417858421802521
},
{
"epoch": 0.028349944629014398,
"importance_ratio": 0.2671862244606018,
"kl_div_avg": 0.0029413679149001837,
"learning_rate": 9.968051118210861e-07,
"loss_func": "stage2",
"step": 15,
"total_loss": 0.20184588432312012
},
{
"epoch": 0.028349944629014398,
"importance_ratio": 0.2749362587928772,
"kl_div_avg": 0.0027144472114741802,
"learning_rate": 9.964501242456512e-07,
"loss_func": "stage2",
"step": 16,
"total_loss": 0.2587171494960785
},
{
"epoch": 0.035437430786267994,
"importance_ratio": 0.9978752136230469,
"kl_div_avg": 0.005673160310834646,
"learning_rate": 9.960951366702165e-07,
"loss_func": "stage2",
"step": 17,
"total_loss": -0.29379865527153015
},
{
"epoch": 0.035437430786267994,
"importance_ratio": 0.6526201367378235,
"kl_div_avg": 0.00475339125841856,
"learning_rate": 9.957401490947816e-07,
"loss_func": "stage2",
"step": 18,
"total_loss": -0.2138231247663498
},
{
"epoch": 0.035437430786267994,
"importance_ratio": 0.7303690314292908,
"kl_div_avg": 0.006084108259528875,
"learning_rate": 9.95385161519347e-07,
"loss_func": "stage2",
"step": 19,
"total_loss": 0.36607834696769714
},
{
"epoch": 0.035437430786267994,
"importance_ratio": 0.9157307147979736,
"kl_div_avg": 0.005460726097226143,
"learning_rate": 9.950301739439118e-07,
"loss_func": "stage2",
"step": 20,
"total_loss": 0.2832968533039093
},
{
"epoch": 0.0425249169435216,
"importance_ratio": 0.9975234270095825,
"kl_div_avg": 0.007166164927184582,
"learning_rate": 9.94675186368477e-07,
"loss_func": "stage2",
"step": 21,
"total_loss": -0.15122467279434204
},
{
"epoch": 0.0425249169435216,
"importance_ratio": 0.4565725028514862,
"kl_div_avg": 0.006681992672383785,
"learning_rate": 9.943201987930422e-07,
"loss_func": "stage2",
"step": 22,
"total_loss": 1.452988862991333
},
{
"epoch": 0.0425249169435216,
"importance_ratio": 0.3932248651981354,
"kl_div_avg": 0.0055625829845666885,
"learning_rate": 9.939652112176073e-07,
"loss_func": "stage2",
"step": 23,
"total_loss": -0.06093749403953552
},
{
"epoch": 0.0425249169435216,
"importance_ratio": 0.018905332311987877,
"kl_div_avg": 0.007485650479793549,
"learning_rate": 9.936102236421726e-07,
"loss_func": "stage2",
"step": 24,
"total_loss": -0.0018584587378427386
},
{
"epoch": 0.04961240310077519,
"importance_ratio": 1.004979133605957,
"kl_div_avg": 0.0073785921558737755,
"learning_rate": 9.932552360667376e-07,
"loss_func": "stage2",
"step": 25,
"total_loss": -0.4546505808830261
},
{
"epoch": 0.04961240310077519,
"importance_ratio": 0.4345252811908722,
"kl_div_avg": 0.011518791317939758,
"learning_rate": 9.929002484913027e-07,
"loss_func": "stage2",
"step": 26,
"total_loss": 0.17052176594734192
},
{
"epoch": 0.04961240310077519,
"importance_ratio": 0.9074916243553162,
"kl_div_avg": 0.007095410488545895,
"learning_rate": 9.925452609158678e-07,
"loss_func": "stage2",
"step": 27,
"total_loss": -0.1952303647994995
},
{
"epoch": 0.04961240310077519,
"importance_ratio": 0.3815934360027313,
"kl_div_avg": 0.00811639055609703,
"learning_rate": 9.921902733404331e-07,
"loss_func": "stage2",
"step": 28,
"total_loss": 0.1002814769744873
},
{
"epoch": 0.056699889258028796,
"importance_ratio": 0.999740719795227,
"kl_div_avg": 0.011646868661046028,
"learning_rate": 9.918352857649982e-07,
"loss_func": "stage2",
"step": 29,
"total_loss": -0.0001066727563738823
},
{
"epoch": 0.056699889258028796,
"importance_ratio": 1.0092053413391113,
"kl_div_avg": 0.013259533792734146,
"learning_rate": 9.914802981895633e-07,
"loss_func": "stage2",
"step": 30,
"total_loss": 1.2793704271316528
},
{
"epoch": 0.056699889258028796,
"importance_ratio": 1.6615486145019531,
"kl_div_avg": 0.014400625601410866,
"learning_rate": 9.911253106141284e-07,
"loss_func": "stage2",
"step": 31,
"total_loss": -0.41830766201019287
},
{
"epoch": 0.056699889258028796,
"importance_ratio": 1.7373325824737549,
"kl_div_avg": 0.014305144548416138,
"learning_rate": 9.907703230386937e-07,
"loss_func": "stage2",
"step": 32,
"total_loss": -0.23473186790943146
},
{
"epoch": 0.06378737541528239,
"importance_ratio": 0.9925450086593628,
"kl_div_avg": 0.018568212166428566,
"learning_rate": 9.904153354632587e-07,
"loss_func": "stage2",
"step": 33,
"total_loss": -0.7624663710594177
},
{
"epoch": 0.06378737541528239,
"importance_ratio": 0.5851880311965942,
"kl_div_avg": 0.01448909379541874,
"learning_rate": 9.900603478878238e-07,
"loss_func": "stage2",
"step": 34,
"total_loss": -0.40603700280189514
},
{
"epoch": 0.06378737541528239,
"importance_ratio": 0.7805031538009644,
"kl_div_avg": 0.012439063750207424,
"learning_rate": 9.897053603123891e-07,
"loss_func": "stage2",
"step": 35,
"total_loss": 0.022615084424614906
},
{
"epoch": 0.06378737541528239,
"importance_ratio": 0.3514450490474701,
"kl_div_avg": 0.00605101278051734,
"learning_rate": 9.893503727369542e-07,
"loss_func": "stage2",
"step": 36,
"total_loss": 1.1687499284744263
},
{
"epoch": 0.07087486157253599,
"importance_ratio": 1.0,
"kl_div_avg": 0.02013654261827469,
"learning_rate": 9.889953851615193e-07,
"loss_func": "stage2",
"step": 37,
"total_loss": 0.6367186903953552
},
{
"epoch": 0.07087486157253599,
"importance_ratio": 0.5006582140922546,
"kl_div_avg": 0.02115897834300995,
"learning_rate": 9.886403975860844e-07,
"loss_func": "stage2",
"step": 38,
"total_loss": 0.5093749761581421
},
{
"epoch": 0.07087486157253599,
"importance_ratio": 0.7131205797195435,
"kl_div_avg": 0.01793592795729637,
"learning_rate": 9.882854100106497e-07,
"loss_func": "stage2",
"step": 39,
"total_loss": -0.2651502192020416
},
{
"epoch": 0.07087486157253599,
"importance_ratio": 1.344174861907959,
"kl_div_avg": 0.017681246623396873,
"learning_rate": 9.879304224352148e-07,
"loss_func": "stage2",
"step": 40,
"total_loss": -1.0468751192092896
},
{
"epoch": 0.07796234772978959,
"importance_ratio": 1.0,
"kl_div_avg": 0.018570249900221825,
"learning_rate": 9.875754348597799e-07,
"loss_func": "stage2",
"step": 41,
"total_loss": 0.2343750298023224
},
{
"epoch": 0.07796234772978959,
"importance_ratio": 0.5109913349151611,
"kl_div_avg": 0.021320415660738945,
"learning_rate": 9.87220447284345e-07,
"loss_func": "stage2",
"step": 42,
"total_loss": 0.1827033907175064
},
{
"epoch": 0.07796234772978959,
"importance_ratio": 0.7171033620834351,
"kl_div_avg": 0.02094135992228985,
"learning_rate": 9.8686545970891e-07,
"loss_func": "stage2",
"step": 43,
"total_loss": -0.2705467939376831
},
{
"epoch": 0.07796234772978959,
"importance_ratio": 1.52182936668396,
"kl_div_avg": 0.014365588314831257,
"learning_rate": 9.865104721334753e-07,
"loss_func": "stage2",
"step": 44,
"total_loss": 2.1366288661956787
},
{
"epoch": 0.0850498338870432,
"importance_ratio": 1.0,
"kl_div_avg": 0.016806455329060555,
"learning_rate": 9.861554845580404e-07,
"loss_func": "stage2",
"step": 45
},
{
"epoch": 0.0850498338870432,
"importance_ratio": 1.0074646472930908,
"kl_div_avg": 0.017333338037133217,
"learning_rate": 9.858004969826055e-07,
"loss_func": "stage2",
"step": 46
},
{
"epoch": 0.0850498338870432,
"importance_ratio": 1.1291639804840088,
"kl_div_avg": 0.020245596766471863,
"learning_rate": 9.854455094071708e-07,
"loss_func": "stage2",
"step": 47
},
{
"epoch": 0.0850498338870432,
"importance_ratio": 1.1358391046524048,
"kl_div_avg": 0.020210057497024536,
"learning_rate": 9.850905218317359e-07,
"loss_func": "stage2",
"step": 48
},
{
"epoch": 0.09213732004429678,
"importance_ratio": 1.0,
"kl_div_avg": 0.02534015104174614,
"learning_rate": 9.84735534256301e-07,
"loss_func": "stage2",
"step": 49,
"total_loss": 0.9980467557907104
},
{
"epoch": 0.09213732004429678,
"importance_ratio": 0.9623415470123291,
"kl_div_avg": 0.019495680928230286,
"learning_rate": 9.84380546680866e-07,
"loss_func": "stage2",
"step": 50,
"total_loss": -1.5856661796569824
},
{
"epoch": 0.09213732004429678,
"importance_ratio": 0.8258026242256165,
"kl_div_avg": 0.01926257833838463,
"learning_rate": 9.840255591054313e-07,
"loss_func": "stage2",
"step": 51,
"total_loss": 0.9310374855995178
},
{
"epoch": 0.09213732004429678,
"importance_ratio": 1.4061079025268555,
"kl_div_avg": 0.023863907903432846,
"learning_rate": 9.836705715299964e-07,
"loss_func": "stage2",
"step": 52,
"total_loss": -0.44497591257095337
},
{
"epoch": 0.09922480620155039,
"importance_ratio": 1.0,
"kl_div_avg": 0.02894335240125656,
"learning_rate": 9.833155839545615e-07,
"loss_func": "stage2",
"step": 53,
"total_loss": 0.216145858168602
},
{
"epoch": 0.09922480620155039,
"importance_ratio": 1.144304633140564,
"kl_div_avg": 0.023014899343252182,
"learning_rate": 9.829605963791266e-07,
"loss_func": "stage2",
"step": 54,
"total_loss": 0.273160457611084
},
{
"epoch": 0.09922480620155039,
"importance_ratio": 1.070732593536377,
"kl_div_avg": 0.02296144887804985,
"learning_rate": 9.82605608803692e-07,
"loss_func": "stage2",
"step": 55,
"total_loss": 1.7041797637939453
},
{
"epoch": 0.09922480620155039,
"importance_ratio": 0.6545230150222778,
"kl_div_avg": 0.029904816299676895,
"learning_rate": 9.82250621228257e-07,
"loss_func": "stage2",
"step": 56,
"total_loss": -0.8607988357543945
},
{
"epoch": 0.10631229235880399,
"importance_ratio": 0.9810570478439331,
"kl_div_avg": 0.03316723555326462,
"learning_rate": 9.81895633652822e-07,
"loss_func": "stage2",
"step": 57,
"total_loss": 0.35031646490097046
},
{
"epoch": 0.10631229235880399,
"importance_ratio": 0.6538653373718262,
"kl_div_avg": 0.03011605143547058,
"learning_rate": 9.815406460773874e-07,
"loss_func": "stage2",
"step": 58,
"total_loss": 0.20810076594352722
},
{
"epoch": 0.10631229235880399,
"importance_ratio": 0.4686840772628784,
"kl_div_avg": 0.02438838593661785,
"learning_rate": 9.811856585019522e-07,
"loss_func": "stage2",
"step": 59,
"total_loss": -0.08729205280542374
},
{
"epoch": 0.10631229235880399,
"importance_ratio": 0.5834656357765198,
"kl_div_avg": 0.021679764613509178,
"learning_rate": 9.808306709265175e-07,
"loss_func": "stage2",
"step": 60,
"total_loss": -0.21443292498588562
},
{
"epoch": 0.11339977851605759,
"importance_ratio": 0.9842519760131836,
"kl_div_avg": 0.031047554686665535,
"learning_rate": 9.804756833510826e-07,
"loss_func": "stage2",
"step": 61,
"total_loss": 0.5077100992202759
},
{
"epoch": 0.11339977851605759,
"importance_ratio": 0.8212682008743286,
"kl_div_avg": 0.02822922170162201,
"learning_rate": 9.80120695775648e-07,
"loss_func": "stage2",
"step": 62,
"total_loss": -0.017715616151690483
},
{
"epoch": 0.11339977851605759,
"importance_ratio": 0.6670711040496826,
"kl_div_avg": 0.026936011388897896,
"learning_rate": 9.79765708200213e-07,
"loss_func": "stage2",
"step": 63,
"total_loss": 0.19557428359985352
},
{
"epoch": 0.11339977851605759,
"importance_ratio": 0.8946332931518555,
"kl_div_avg": 0.027077743783593178,
"learning_rate": 9.79410720624778e-07,
"loss_func": "stage2",
"step": 64,
"total_loss": 0.0037591925356537104
},
{
"epoch": 0.12048726467331118,
"importance_ratio": 1.0073069334030151,
"kl_div_avg": 0.04011579975485802,
"learning_rate": 9.790557330493432e-07,
"loss_func": "stage2",
"step": 65,
"total_loss": 0.23827587068080902
},
{
"epoch": 0.12048726467331118,
"importance_ratio": 0.9326987266540527,
"kl_div_avg": 0.03654163330793381,
"learning_rate": 9.787007454739083e-07,
"loss_func": "stage2",
"step": 66,
"total_loss": -0.17129582166671753
},
{
"epoch": 0.12048726467331118,
"importance_ratio": 0.446792870759964,
"kl_div_avg": 0.032430682331323624,
"learning_rate": 9.783457578984736e-07,
"loss_func": "stage2",
"step": 67,
"total_loss": 0.657434344291687
},
{
"epoch": 0.12048726467331118,
"importance_ratio": 0.7778047323226929,
"kl_div_avg": 0.030584165826439857,
"learning_rate": 9.779907703230386e-07,
"loss_func": "stage2",
"step": 68,
"total_loss": -0.47304269671440125
},
{
"epoch": 0.12757475083056477,
"importance_ratio": 1.0042237043380737,
"kl_div_avg": 0.029375018551945686,
"learning_rate": 9.776357827476037e-07,
"loss_func": "stage2",
"step": 69,
"total_loss": -0.3034500181674957
},
{
"epoch": 0.12757475083056477,
"importance_ratio": 0.9879562854766846,
"kl_div_avg": 0.02754083275794983,
"learning_rate": 9.772807951721688e-07,
"loss_func": "stage2",
"step": 70,
"total_loss": 0.734062910079956
},
{
"epoch": 0.12757475083056477,
"importance_ratio": 0.9155079126358032,
"kl_div_avg": 0.038585443049669266,
"learning_rate": 9.769258075967341e-07,
"loss_func": "stage2",
"step": 71,
"total_loss": -0.24773724377155304
},
{
"epoch": 0.12757475083056477,
"importance_ratio": 0.18012166023254395,
"kl_div_avg": 0.029818303883075714,
"learning_rate": 9.765708200212992e-07,
"loss_func": "stage2",
"step": 72,
"total_loss": 0.18523141741752625
},
{
"epoch": 0.13466223698781837,
"importance_ratio": 1.011343002319336,
"kl_div_avg": 0.03788266330957413,
"learning_rate": 9.762158324458643e-07,
"loss_func": "stage2",
"step": 73,
"total_loss": 1.4373860359191895
},
{
"epoch": 0.13466223698781837,
"importance_ratio": 0.22852367162704468,
"kl_div_avg": 0.03607971966266632,
"learning_rate": 9.758608448704296e-07,
"loss_func": "stage2",
"step": 74,
"total_loss": 0.4375
},
{
"epoch": 0.13466223698781837,
"importance_ratio": 0.3465356230735779,
"kl_div_avg": 0.04296875,
"learning_rate": 9.755058572949947e-07,
"loss_func": "stage2",
"step": 75,
"total_loss": -0.32148972153663635
},
{
"epoch": 0.13466223698781837,
"importance_ratio": 0.5801489353179932,
"kl_div_avg": 0.03706703335046768,
"learning_rate": 9.751508697195598e-07,
"loss_func": "stage2",
"step": 76,
"total_loss": 0.02452419139444828
},
{
"epoch": 0.14174972314507198,
"importance_ratio": 1.0018153190612793,
"kl_div_avg": 0.03378720581531525,
"learning_rate": 9.747958821441248e-07,
"loss_func": "stage2",
"step": 77,
"total_loss": 0.22162221372127533
},
{
"epoch": 0.14174972314507198,
"importance_ratio": 0.3109496533870697,
"kl_div_avg": 0.03676421567797661,
"learning_rate": 9.744408945686901e-07,
"loss_func": "stage2",
"step": 78,
"total_loss": 0.010569405741989613
},
{
"epoch": 0.14174972314507198,
"importance_ratio": 0.2784914970397949,
"kl_div_avg": 0.030661292374134064,
"learning_rate": 9.740859069932552e-07,
"loss_func": "stage2",
"step": 79,
"total_loss": 0.104059599339962
},
{
"epoch": 0.14174972314507198,
"importance_ratio": 0.2472458779811859,
"kl_div_avg": 0.03948213905096054,
"learning_rate": 9.737309194178203e-07,
"loss_func": "stage2",
"step": 80,
"total_loss": 0.031388457864522934
},
{
"epoch": 0.14883720930232558,
"importance_ratio": 1.0,
"kl_div_avg": 0.04266459494829178,
"learning_rate": 9.733759318423854e-07,
"loss_func": "stage2",
"step": 81,
"total_loss": 0.7539063096046448
},
{
"epoch": 0.14883720930232558,
"importance_ratio": 0.49731332063674927,
"kl_div_avg": 0.038056716322898865,
"learning_rate": 9.730209442669507e-07,
"loss_func": "stage2",
"step": 82,
"total_loss": -0.053492218255996704
},
{
"epoch": 0.14883720930232558,
"importance_ratio": 1.0697609186172485,
"kl_div_avg": 0.032977428287267685,
"learning_rate": 9.726659566915158e-07,
"loss_func": "stage2",
"step": 83,
"total_loss": -1.6140198707580566
},
{
"epoch": 0.14883720930232558,
"importance_ratio": 1.6595419645309448,
"kl_div_avg": 0.03368090093135834,
"learning_rate": 9.723109691160809e-07,
"loss_func": "stage2",
"step": 84,
"total_loss": 6.782899379730225
},
{
"epoch": 0.15592469545957918,
"importance_ratio": 1.0058972835540771,
"kl_div_avg": 0.03366704285144806,
"learning_rate": 9.719559815406462e-07,
"loss_func": "stage2",
"step": 85,
"total_loss": 0.2537820637226105
},
{
"epoch": 0.15592469545957918,
"importance_ratio": 0.47804346680641174,
"kl_div_avg": 0.03480301797389984,
"learning_rate": 9.716009939652112e-07,
"loss_func": "stage2",
"step": 86,
"total_loss": -0.41820311546325684
},
{
"epoch": 0.15592469545957918,
"importance_ratio": 0.21213342249393463,
"kl_div_avg": 0.0428822860121727,
"learning_rate": 9.712460063897763e-07,
"loss_func": "stage2",
"step": 87,
"total_loss": 0.922656238079071
},
{
"epoch": 0.15592469545957918,
"importance_ratio": 0.44658419489860535,
"kl_div_avg": 0.04085192829370499,
"learning_rate": 9.708910188143414e-07,
"loss_func": "stage2",
"step": 88,
"total_loss": -0.24729368090629578
},
{
"epoch": 0.16301218161683279,
"importance_ratio": 1.0,
"kl_div_avg": 0.047017261385917664,
"learning_rate": 9.705360312389065e-07,
"loss_func": "stage2",
"step": 89,
"total_loss": -0.4453124701976776
},
{
"epoch": 0.16301218161683279,
"importance_ratio": 0.7418298721313477,
"kl_div_avg": 0.04112287610769272,
"learning_rate": 9.701810436634718e-07,
"loss_func": "stage2",
"step": 90,
"total_loss": 1.0582090616226196
},
{
"epoch": 0.16301218161683279,
"importance_ratio": 2.016894817352295,
"kl_div_avg": 0.0498749241232872,
"learning_rate": 9.698260560880369e-07,
"loss_func": "stage2",
"step": 91,
"total_loss": -0.4754679799079895
},
{
"epoch": 0.16301218161683279,
"importance_ratio": 1.4249356985092163,
"kl_div_avg": 0.04099587723612785,
"learning_rate": 9.69471068512602e-07,
"loss_func": "stage2",
"step": 92,
"total_loss": 1.3416054248809814
}
],
"logging_steps": 1.0,
"max_steps": 2822,
"num_input_tokens_seen": 0,
"num_train_epochs": 20.0,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}