Qwen2-7B-S2R-BI / trainer_state.json
S2R-data's picture
Upload folder using huggingface_hub
ddefe51 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 2000.0,
"global_step": 264,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011363636363636364,
"grad_norm": 5.703509592126629,
"kl": 0.0,
"learning_rate": 5.000000000000001e-07,
"loss": 0.5688,
"step": 1,
"step_loss": 0.5533416867256165
},
{
"epoch": 0.022727272727272728,
"grad_norm": 2.414278384928731,
"kl": 0.1508263200521469,
"learning_rate": 2.438044511330269e-06,
"loss": 0.4403,
"step": 2,
"step_loss": 0.5107974410057068
},
{
"epoch": 0.03409090909090909,
"grad_norm": 2.5451897972697393,
"kl": 0.19495707750320435,
"learning_rate": 3.5717278751869343e-06,
"loss": 0.4802,
"step": 3,
"step_loss": 0.5942587852478027
},
{
"epoch": 0.045454545454545456,
"grad_norm": 1.362435064605772,
"kl": 0.14722466468811035,
"learning_rate": 4.376089022660538e-06,
"loss": 0.4463,
"step": 4,
"step_loss": 0.460592657327652
},
{
"epoch": 0.056818181818181816,
"grad_norm": 1.1743592442810231,
"kl": 0.12074092030525208,
"learning_rate": 5e-06,
"loss": 0.4001,
"step": 5,
"step_loss": 0.39433369040489197
},
{
"epoch": 0.06818181818181818,
"grad_norm": 1.2999424793282481,
"kl": 0.1568446159362793,
"learning_rate": 4.999941322464311e-06,
"loss": 0.4067,
"step": 6,
"step_loss": 0.356352835893631
},
{
"epoch": 0.07954545454545454,
"grad_norm": 1.160541252191944,
"kl": 0.23409639298915863,
"learning_rate": 4.999765292917736e-06,
"loss": 0.3998,
"step": 7,
"step_loss": 0.4068807065486908
},
{
"epoch": 0.09090909090909091,
"grad_norm": 1.227986620612679,
"kl": 0.24431678652763367,
"learning_rate": 4.999471920541589e-06,
"loss": 0.368,
"step": 8,
"step_loss": 0.41308918595314026
},
{
"epoch": 0.10227272727272728,
"grad_norm": 1.118558082955084,
"kl": 0.17894110083580017,
"learning_rate": 4.999061220637533e-06,
"loss": 0.3714,
"step": 9,
"step_loss": 0.2875981032848358
},
{
"epoch": 0.11363636363636363,
"grad_norm": 1.044241767662719,
"kl": 0.2728317379951477,
"learning_rate": 4.998533214626773e-06,
"loss": 0.3746,
"step": 10,
"step_loss": 0.381584107875824
},
{
"epoch": 0.125,
"grad_norm": 1.012833206073384,
"kl": 0.24114784598350525,
"learning_rate": 4.9978879300489484e-06,
"loss": 0.3425,
"step": 11,
"step_loss": 0.3758848309516907
},
{
"epoch": 0.13636363636363635,
"grad_norm": 0.9561570826746042,
"kl": 0.24886220693588257,
"learning_rate": 4.9971254005606865e-06,
"loss": 0.3832,
"step": 12,
"step_loss": 0.40528154373168945
},
{
"epoch": 0.14772727272727273,
"grad_norm": 0.884172522884705,
"kl": 0.23464588820934296,
"learning_rate": 4.996245665933857e-06,
"loss": 0.3678,
"step": 13,
"step_loss": 0.3059731721878052
},
{
"epoch": 0.1590909090909091,
"grad_norm": 1.0353201682385085,
"kl": 0.1602363884449005,
"learning_rate": 4.9952487720534905e-06,
"loss": 0.3777,
"step": 14,
"step_loss": 0.3509451448917389
},
{
"epoch": 0.17045454545454544,
"grad_norm": 0.8535378320105195,
"kl": 0.2152172029018402,
"learning_rate": 4.9941347709153875e-06,
"loss": 0.3539,
"step": 15,
"step_loss": 0.36119481921195984
},
{
"epoch": 0.18181818181818182,
"grad_norm": 1.0118408129491079,
"kl": 0.2640606164932251,
"learning_rate": 4.992903720623408e-06,
"loss": 0.3701,
"step": 16,
"step_loss": 0.43707239627838135
},
{
"epoch": 0.19318181818181818,
"grad_norm": 0.9252651401652947,
"kl": 0.24001294374465942,
"learning_rate": 4.991555685386438e-06,
"loss": 0.3464,
"step": 17,
"step_loss": 0.39861610531806946
},
{
"epoch": 0.20454545454545456,
"grad_norm": 0.9546545294579731,
"kl": 0.2848950922489166,
"learning_rate": 4.990090735515043e-06,
"loss": 0.3729,
"step": 18,
"step_loss": 0.5053370594978333
},
{
"epoch": 0.2159090909090909,
"grad_norm": 0.8545398633463176,
"kl": 0.161998450756073,
"learning_rate": 4.988508947417799e-06,
"loss": 0.359,
"step": 19,
"step_loss": 0.3288613259792328
},
{
"epoch": 0.22727272727272727,
"grad_norm": 0.8025839263192421,
"kl": 0.2056322991847992,
"learning_rate": 4.98681040359731e-06,
"loss": 0.3439,
"step": 20,
"step_loss": 0.35805100202560425
},
{
"epoch": 0.23863636363636365,
"grad_norm": 0.8744693236774967,
"kl": 0.2929481267929077,
"learning_rate": 4.984995192645897e-06,
"loss": 0.3792,
"step": 21,
"step_loss": 0.45748788118362427
},
{
"epoch": 0.25,
"grad_norm": 0.8729052332734021,
"kl": 0.2217060774564743,
"learning_rate": 4.983063409240992e-06,
"loss": 0.3505,
"step": 22,
"step_loss": 0.35472381114959717
},
{
"epoch": 0.26136363636363635,
"grad_norm": 0.8762945408187117,
"kl": 0.2572169601917267,
"learning_rate": 4.981015154140181e-06,
"loss": 0.3625,
"step": 23,
"step_loss": 0.4360867738723755
},
{
"epoch": 0.2727272727272727,
"grad_norm": 0.8055639586247896,
"kl": 0.3179939091205597,
"learning_rate": 4.978850534175967e-06,
"loss": 0.3703,
"step": 24,
"step_loss": 0.4685637056827545
},
{
"epoch": 0.2840909090909091,
"grad_norm": 0.7776274043752576,
"kl": 0.21350209414958954,
"learning_rate": 4.976569662250185e-06,
"loss": 0.3606,
"step": 25,
"step_loss": 0.3343837857246399
},
{
"epoch": 0.29545454545454547,
"grad_norm": 0.8563133494756662,
"kl": 0.23110716044902802,
"learning_rate": 4.974172657328117e-06,
"loss": 0.3468,
"step": 26,
"step_loss": 0.27567654848098755
},
{
"epoch": 0.3068181818181818,
"grad_norm": 0.8318009751857389,
"kl": 0.23791126906871796,
"learning_rate": 4.97165964443229e-06,
"loss": 0.3766,
"step": 27,
"step_loss": 0.43361616134643555
},
{
"epoch": 0.3181818181818182,
"grad_norm": 0.8256316165817771,
"kl": 0.2149941325187683,
"learning_rate": 4.96903075463595e-06,
"loss": 0.3507,
"step": 28,
"step_loss": 0.41853615641593933
},
{
"epoch": 0.32954545454545453,
"grad_norm": 0.897517985957999,
"kl": 0.24494698643684387,
"learning_rate": 4.966286125056234e-06,
"loss": 0.3647,
"step": 29,
"step_loss": 0.3512318730354309
},
{
"epoch": 0.3409090909090909,
"grad_norm": 0.9026937755400121,
"kl": 0.184300035238266,
"learning_rate": 4.963425898847006e-06,
"loss": 0.3518,
"step": 30,
"step_loss": 0.3595576882362366
},
{
"epoch": 0.3522727272727273,
"grad_norm": 0.85148287759613,
"kl": 0.24502253532409668,
"learning_rate": 4.960450225191402e-06,
"loss": 0.3627,
"step": 31,
"step_loss": 0.35294580459594727
},
{
"epoch": 0.36363636363636365,
"grad_norm": 0.7962414335930865,
"kl": 0.2057955265045166,
"learning_rate": 4.957359259294038e-06,
"loss": 0.3458,
"step": 32,
"step_loss": 0.34822237491607666
},
{
"epoch": 0.375,
"grad_norm": 0.800931144269417,
"kl": 0.21041449904441833,
"learning_rate": 4.954153162372928e-06,
"loss": 0.377,
"step": 33,
"step_loss": 0.40625178813934326
},
{
"epoch": 0.38636363636363635,
"grad_norm": 0.8558589097843969,
"kl": 0.2527558505535126,
"learning_rate": 4.950832101651063e-06,
"loss": 0.3845,
"step": 34,
"step_loss": 0.38101768493652344
},
{
"epoch": 0.3977272727272727,
"grad_norm": 0.8387953491003776,
"kl": 0.2718813717365265,
"learning_rate": 4.947396250347695e-06,
"loss": 0.3527,
"step": 35,
"step_loss": 0.3798375129699707
},
{
"epoch": 0.4090909090909091,
"grad_norm": 0.9507472286861052,
"kl": 0.1885739266872406,
"learning_rate": 4.943845787669303e-06,
"loss": 0.3589,
"step": 36,
"step_loss": 0.3146296441555023
},
{
"epoch": 0.42045454545454547,
"grad_norm": 0.8385876510679799,
"kl": 0.16516371071338654,
"learning_rate": 4.9401808988002425e-06,
"loss": 0.3578,
"step": 37,
"step_loss": 0.2985036373138428
},
{
"epoch": 0.4318181818181818,
"grad_norm": 0.8166686735971924,
"kl": 0.24522481858730316,
"learning_rate": 4.936401774893088e-06,
"loss": 0.3593,
"step": 38,
"step_loss": 0.4174098074436188
},
{
"epoch": 0.4431818181818182,
"grad_norm": 0.9075664298384456,
"kl": 0.23953908681869507,
"learning_rate": 4.932508613058665e-06,
"loss": 0.3711,
"step": 39,
"step_loss": 0.3471056818962097
},
{
"epoch": 0.45454545454545453,
"grad_norm": 0.8181070744566579,
"kl": 0.30609607696533203,
"learning_rate": 4.928501616355768e-06,
"loss": 0.3599,
"step": 40,
"step_loss": 0.33987393975257874
},
{
"epoch": 0.4659090909090909,
"grad_norm": 0.7674769366399429,
"kl": 0.3086016774177551,
"learning_rate": 4.924380993780566e-06,
"loss": 0.3434,
"step": 41,
"step_loss": 0.3478284776210785
},
{
"epoch": 0.4772727272727273,
"grad_norm": 0.796828960509896,
"kl": 0.2363354116678238,
"learning_rate": 4.920146960255707e-06,
"loss": 0.3458,
"step": 42,
"step_loss": 0.3613693118095398
},
{
"epoch": 0.48863636363636365,
"grad_norm": 0.8201944175381407,
"kl": 0.21424362063407898,
"learning_rate": 4.915799736619105e-06,
"loss": 0.3572,
"step": 43,
"step_loss": 0.33215075731277466
},
{
"epoch": 0.5,
"grad_norm": 0.846105466640005,
"kl": 0.17349004745483398,
"learning_rate": 4.911339549612422e-06,
"loss": 0.3782,
"step": 44,
"step_loss": 0.3519684672355652
},
{
"epoch": 0.5113636363636364,
"grad_norm": 0.8219513643708384,
"kl": 0.26255759596824646,
"learning_rate": 4.906766631869243e-06,
"loss": 0.362,
"step": 45,
"step_loss": 0.34211111068725586
},
{
"epoch": 0.5227272727272727,
"grad_norm": 0.8293883565647935,
"kl": 0.3788855969905853,
"learning_rate": 4.90208122190294e-06,
"loss": 0.3497,
"step": 46,
"step_loss": 0.40293213725090027
},
{
"epoch": 0.5340909090909091,
"grad_norm": 0.8369516655788055,
"kl": 0.27136877179145813,
"learning_rate": 4.897283564094233e-06,
"loss": 0.3761,
"step": 47,
"step_loss": 0.3964707851409912
},
{
"epoch": 0.5454545454545454,
"grad_norm": 0.8300152564082867,
"kl": 0.25666913390159607,
"learning_rate": 4.892373908678445e-06,
"loss": 0.3539,
"step": 48,
"step_loss": 0.3369652330875397
},
{
"epoch": 0.5568181818181818,
"grad_norm": 0.9104721437976564,
"kl": 0.274257630109787,
"learning_rate": 4.887352511732447e-06,
"loss": 0.389,
"step": 49,
"step_loss": 0.42897558212280273
},
{
"epoch": 0.5681818181818182,
"grad_norm": 0.8195403324929418,
"kl": 0.32372036576271057,
"learning_rate": 4.882219635161306e-06,
"loss": 0.3518,
"step": 50,
"step_loss": 0.3887690007686615
},
{
"epoch": 0.5795454545454546,
"grad_norm": 0.9168553194353436,
"kl": 0.28414681553840637,
"learning_rate": 4.876975546684619e-06,
"loss": 0.3581,
"step": 51,
"step_loss": 0.40320277214050293
},
{
"epoch": 0.5909090909090909,
"grad_norm": 0.8617294656704618,
"kl": 0.2004740983247757,
"learning_rate": 4.8716205198225525e-06,
"loss": 0.3805,
"step": 52,
"step_loss": 0.38499268889427185
},
{
"epoch": 0.6022727272727273,
"grad_norm": 0.7697913025754929,
"kl": 0.2655254900455475,
"learning_rate": 4.866154833881579e-06,
"loss": 0.3375,
"step": 53,
"step_loss": 0.3799101710319519
},
{
"epoch": 0.6136363636363636,
"grad_norm": 0.860433412224209,
"kl": 0.1964799165725708,
"learning_rate": 4.8605787739399055e-06,
"loss": 0.3614,
"step": 54,
"step_loss": 0.3518129587173462
},
{
"epoch": 0.625,
"grad_norm": 0.8208795676466634,
"kl": 0.2083766907453537,
"learning_rate": 4.8548926308326025e-06,
"loss": 0.3497,
"step": 55,
"step_loss": 0.36417239904403687
},
{
"epoch": 0.6363636363636364,
"grad_norm": 0.8494420730908946,
"kl": 0.24671347439289093,
"learning_rate": 4.84909670113644e-06,
"loss": 0.3585,
"step": 56,
"step_loss": 0.28457289934158325
},
{
"epoch": 0.6477272727272727,
"grad_norm": 0.8001936082214277,
"kl": 0.20547156035900116,
"learning_rate": 4.843191287154415e-06,
"loss": 0.3387,
"step": 57,
"step_loss": 0.31630027294158936
},
{
"epoch": 0.6590909090909091,
"grad_norm": 0.8349937030988813,
"kl": 0.24216747283935547,
"learning_rate": 4.837176696899984e-06,
"loss": 0.3562,
"step": 58,
"step_loss": 0.31798893213272095
},
{
"epoch": 0.6704545454545454,
"grad_norm": 0.7997137630513312,
"kl": 0.22188925743103027,
"learning_rate": 4.8310532440810005e-06,
"loss": 0.3563,
"step": 59,
"step_loss": 0.30022716522216797
},
{
"epoch": 0.6818181818181818,
"grad_norm": 0.7732049611711472,
"kl": 0.2248784899711609,
"learning_rate": 4.82482124808335e-06,
"loss": 0.3191,
"step": 60,
"step_loss": 0.36109238862991333
},
{
"epoch": 0.6931818181818182,
"grad_norm": 0.8067392653775698,
"kl": 0.24406936764717102,
"learning_rate": 4.8184810339542925e-06,
"loss": 0.3501,
"step": 61,
"step_loss": 0.3913516402244568
},
{
"epoch": 0.7045454545454546,
"grad_norm": 0.7792045901924106,
"kl": 0.19702410697937012,
"learning_rate": 4.812032932385509e-06,
"loss": 0.3238,
"step": 62,
"step_loss": 0.2769385576248169
},
{
"epoch": 0.7159090909090909,
"grad_norm": 0.8595897752967626,
"kl": 0.2642907202243805,
"learning_rate": 4.805477279695852e-06,
"loss": 0.3538,
"step": 63,
"step_loss": 0.4088588356971741
},
{
"epoch": 0.7272727272727273,
"grad_norm": 0.8639662428592647,
"kl": 0.22237904369831085,
"learning_rate": 4.798814417813807e-06,
"loss": 0.3576,
"step": 64,
"step_loss": 0.3004940152168274
},
{
"epoch": 0.7386363636363636,
"grad_norm": 0.7875668542731125,
"kl": 0.22941315174102783,
"learning_rate": 4.7920446942596535e-06,
"loss": 0.3314,
"step": 65,
"step_loss": 0.2649787664413452
},
{
"epoch": 0.75,
"grad_norm": 0.825774798553009,
"kl": 0.27141043543815613,
"learning_rate": 4.7851684621273435e-06,
"loss": 0.3694,
"step": 66,
"step_loss": 0.4899372458457947
},
{
"epoch": 0.7613636363636364,
"grad_norm": 0.7704537376313643,
"kl": 0.2449103444814682,
"learning_rate": 4.7781860800660836e-06,
"loss": 0.365,
"step": 67,
"step_loss": 0.41683661937713623
},
{
"epoch": 0.7727272727272727,
"grad_norm": 0.7904020967751078,
"kl": 0.30710798501968384,
"learning_rate": 4.771097912261626e-06,
"loss": 0.3577,
"step": 68,
"step_loss": 0.39199817180633545
},
{
"epoch": 0.7840909090909091,
"grad_norm": 0.7703764480757949,
"kl": 0.21822941303253174,
"learning_rate": 4.763904328417276e-06,
"loss": 0.3568,
"step": 69,
"step_loss": 0.3460182249546051
},
{
"epoch": 0.7954545454545454,
"grad_norm": 0.764160468250735,
"kl": 0.24635545909404755,
"learning_rate": 4.756605703734611e-06,
"loss": 0.3519,
"step": 70,
"step_loss": 0.4064783453941345
},
{
"epoch": 0.8068181818181818,
"grad_norm": 0.7481024101498955,
"kl": 0.14451748132705688,
"learning_rate": 4.7492024188939055e-06,
"loss": 0.3391,
"step": 71,
"step_loss": 0.25107917189598083
},
{
"epoch": 0.8181818181818182,
"grad_norm": 0.7802950172161581,
"kl": 0.1930258721113205,
"learning_rate": 4.741694860034281e-06,
"loss": 0.3564,
"step": 72,
"step_loss": 0.34083372354507446
},
{
"epoch": 0.8295454545454546,
"grad_norm": 0.7769810199285,
"kl": 0.23945628106594086,
"learning_rate": 4.734083418733563e-06,
"loss": 0.3725,
"step": 73,
"step_loss": 0.2961326241493225
},
{
"epoch": 0.8409090909090909,
"grad_norm": 0.825406272151327,
"kl": 0.23796045780181885,
"learning_rate": 4.726368491987854e-06,
"loss": 0.3376,
"step": 74,
"step_loss": 0.35123029351234436
},
{
"epoch": 0.8522727272727273,
"grad_norm": 0.7306727160298511,
"kl": 0.13507644832134247,
"learning_rate": 4.718550482190837e-06,
"loss": 0.3445,
"step": 75,
"step_loss": 0.2955048680305481
},
{
"epoch": 0.8636363636363636,
"grad_norm": 0.7579927918209679,
"kl": 0.24724549055099487,
"learning_rate": 4.7106297971127755e-06,
"loss": 0.349,
"step": 76,
"step_loss": 0.3554742932319641
},
{
"epoch": 0.875,
"grad_norm": 0.805419144828077,
"kl": 0.20437797904014587,
"learning_rate": 4.7026068498792535e-06,
"loss": 0.3439,
"step": 77,
"step_loss": 0.30100610852241516
},
{
"epoch": 0.8863636363636364,
"grad_norm": 0.8550394104357324,
"kl": 0.16827110946178436,
"learning_rate": 4.694482058949624e-06,
"loss": 0.3512,
"step": 78,
"step_loss": 0.2656687796115875
},
{
"epoch": 0.8977272727272727,
"grad_norm": 0.7699285730450894,
"kl": 0.17964152991771698,
"learning_rate": 4.686255848095186e-06,
"loss": 0.3407,
"step": 79,
"step_loss": 0.23917566239833832
},
{
"epoch": 0.9090909090909091,
"grad_norm": 0.7978865990480669,
"kl": 0.27529603242874146,
"learning_rate": 4.677928646377076e-06,
"loss": 0.3433,
"step": 80,
"step_loss": 0.33308878540992737
},
{
"epoch": 0.9204545454545454,
"grad_norm": 0.7840380701095863,
"kl": 0.22533753514289856,
"learning_rate": 4.669500888123897e-06,
"loss": 0.344,
"step": 81,
"step_loss": 0.33834126591682434
},
{
"epoch": 0.9318181818181818,
"grad_norm": 0.856812662574151,
"kl": 0.2573813498020172,
"learning_rate": 4.660973012909057e-06,
"loss": 0.359,
"step": 82,
"step_loss": 0.3473721146583557
},
{
"epoch": 0.9431818181818182,
"grad_norm": 0.7265171734197157,
"kl": 0.2186812162399292,
"learning_rate": 4.652345465527847e-06,
"loss": 0.3341,
"step": 83,
"step_loss": 0.3598743975162506
},
{
"epoch": 0.9545454545454546,
"grad_norm": 0.8049253093828008,
"kl": 0.2602262794971466,
"learning_rate": 4.64361869597424e-06,
"loss": 0.3464,
"step": 84,
"step_loss": 0.3444975018501282
},
{
"epoch": 0.9659090909090909,
"grad_norm": 0.7680402534237456,
"kl": 0.2810295522212982,
"learning_rate": 4.634793159417421e-06,
"loss": 0.3373,
"step": 85,
"step_loss": 0.26042619347572327
},
{
"epoch": 0.9772727272727273,
"grad_norm": 0.8011651956250969,
"kl": 0.24380852282047272,
"learning_rate": 4.625869316178043e-06,
"loss": 0.3676,
"step": 86,
"step_loss": 0.4119197428226471
},
{
"epoch": 0.9886363636363636,
"grad_norm": 0.7751572880029601,
"kl": 0.25345954298973083,
"learning_rate": 4.6168476317042224e-06,
"loss": 0.3449,
"step": 87,
"step_loss": 0.38151469826698303
},
{
"epoch": 1.0,
"grad_norm": 0.7947674725517713,
"kl": 0.2837441861629486,
"learning_rate": 4.60772857654726e-06,
"loss": 0.3646,
"step": 88,
"step_loss": 0.4021923542022705
},
{
"epoch": 1.0,
"eval_test_transformed.json_loss": 0.6147641539573669,
"eval_test_transformed.json_runtime": 57.2166,
"eval_test_transformed.json_samples_per_second": 8.739,
"eval_test_transformed.json_steps_per_second": 0.559,
"step": 88
},
{
"epoch": 1.0113636363636365,
"grad_norm": 0.8300174107506423,
"kl": 0.08372548967599869,
"learning_rate": 4.598512626337096e-06,
"loss": 0.2738,
"step": 89,
"step_loss": 0.5384058952331543
},
{
"epoch": 1.0227272727272727,
"grad_norm": 0.77573239267709,
"kl": 0.2220146656036377,
"learning_rate": 4.589200261757507e-06,
"loss": 0.2835,
"step": 90,
"step_loss": 0.23898854851722717
},
{
"epoch": 1.0340909090909092,
"grad_norm": 0.724932328716244,
"kl": 0.3047032654285431,
"learning_rate": 4.5797919685210306e-06,
"loss": 0.2681,
"step": 91,
"step_loss": 0.30961817502975464
},
{
"epoch": 1.0454545454545454,
"grad_norm": 0.6895314893413088,
"kl": 0.24007435142993927,
"learning_rate": 4.570288237343632e-06,
"loss": 0.2602,
"step": 92,
"step_loss": 0.29084306955337524
},
{
"epoch": 1.0568181818181819,
"grad_norm": 0.8345390101480717,
"kl": 0.2587500810623169,
"learning_rate": 4.560689563919113e-06,
"loss": 0.2838,
"step": 93,
"step_loss": 0.2709003686904907
},
{
"epoch": 1.0681818181818181,
"grad_norm": 0.8688624933533857,
"kl": 0.26922354102134705,
"learning_rate": 4.550996448893253e-06,
"loss": 0.2866,
"step": 94,
"step_loss": 0.3056609034538269
},
{
"epoch": 1.0795454545454546,
"grad_norm": 0.7821685731421445,
"kl": 0.25485578179359436,
"learning_rate": 4.5412093978376986e-06,
"loss": 0.2694,
"step": 95,
"step_loss": 0.22295084595680237
},
{
"epoch": 1.0909090909090908,
"grad_norm": 0.8329103508983827,
"kl": 0.3171152174472809,
"learning_rate": 4.531328921223596e-06,
"loss": 0.2676,
"step": 96,
"step_loss": 0.24555529654026031
},
{
"epoch": 1.1022727272727273,
"grad_norm": 0.8278389202666937,
"kl": 0.268743634223938,
"learning_rate": 4.521355534394959e-06,
"loss": 0.2806,
"step": 97,
"step_loss": 0.2850462794303894
},
{
"epoch": 1.1136363636363635,
"grad_norm": 0.7515633501773713,
"kl": 0.21348519623279572,
"learning_rate": 4.511289757541801e-06,
"loss": 0.2714,
"step": 98,
"step_loss": 0.22777102887630463
},
{
"epoch": 1.125,
"grad_norm": 0.7901926539707923,
"kl": 0.2479882836341858,
"learning_rate": 4.501132115672992e-06,
"loss": 0.2621,
"step": 99,
"step_loss": 0.27556145191192627
},
{
"epoch": 1.1363636363636362,
"grad_norm": 0.7721311538003944,
"kl": 0.2527960538864136,
"learning_rate": 4.490883138588882e-06,
"loss": 0.2673,
"step": 100,
"step_loss": 0.2762888967990875
},
{
"epoch": 1.1477272727272727,
"grad_norm": 0.7810424529002389,
"kl": 0.26764342188835144,
"learning_rate": 4.4805433608536655e-06,
"loss": 0.269,
"step": 101,
"step_loss": 0.2725462317466736
},
{
"epoch": 1.1590909090909092,
"grad_norm": 0.7955281338155472,
"kl": 0.3150593042373657,
"learning_rate": 4.470113321767499e-06,
"loss": 0.2761,
"step": 102,
"step_loss": 0.311062216758728
},
{
"epoch": 1.1704545454545454,
"grad_norm": 0.7565768284407913,
"kl": 0.2376776784658432,
"learning_rate": 4.459593565338376e-06,
"loss": 0.2851,
"step": 103,
"step_loss": 0.2452271282672882
},
{
"epoch": 1.1818181818181819,
"grad_norm": 0.8304952134574923,
"kl": 0.2546381950378418,
"learning_rate": 4.448984640253747e-06,
"loss": 0.2818,
"step": 104,
"step_loss": 0.3039626479148865
},
{
"epoch": 1.1931818181818181,
"grad_norm": 0.8207694461248964,
"kl": 0.41062766313552856,
"learning_rate": 4.438287099851905e-06,
"loss": 0.2698,
"step": 105,
"step_loss": 0.3451313078403473
},
{
"epoch": 1.2045454545454546,
"grad_norm": 0.763176799019057,
"kl": 0.24396805465221405,
"learning_rate": 4.427501502093126e-06,
"loss": 0.2666,
"step": 106,
"step_loss": 0.24862422049045563
},
{
"epoch": 1.2159090909090908,
"grad_norm": 0.7653625194049708,
"kl": 0.3014935255050659,
"learning_rate": 4.416628409530563e-06,
"loss": 0.2615,
"step": 107,
"step_loss": 0.23431876301765442
},
{
"epoch": 1.2272727272727273,
"grad_norm": 0.7932503486822488,
"kl": 0.23210333287715912,
"learning_rate": 4.405668389280906e-06,
"loss": 0.2615,
"step": 108,
"step_loss": 0.239517942070961
},
{
"epoch": 1.2386363636363638,
"grad_norm": 0.8303257471879192,
"kl": 0.25863131880760193,
"learning_rate": 4.394622012994803e-06,
"loss": 0.2733,
"step": 109,
"step_loss": 0.2567521035671234
},
{
"epoch": 1.25,
"grad_norm": 0.7822296104419395,
"kl": 0.18686552345752716,
"learning_rate": 4.383489856827045e-06,
"loss": 0.2772,
"step": 110,
"step_loss": 0.23640470206737518
},
{
"epoch": 1.2613636363636362,
"grad_norm": 0.7749159198189497,
"kl": 0.26931869983673096,
"learning_rate": 4.372272501406511e-06,
"loss": 0.2754,
"step": 111,
"step_loss": 0.29603561758995056
},
{
"epoch": 1.2727272727272727,
"grad_norm": 0.8008832913025072,
"kl": 0.2696314752101898,
"learning_rate": 4.360970531805891e-06,
"loss": 0.2646,
"step": 112,
"step_loss": 0.2858111262321472
},
{
"epoch": 1.2840909090909092,
"grad_norm": 0.8001913735849326,
"kl": 0.2806592881679535,
"learning_rate": 4.34958453751116e-06,
"loss": 0.2662,
"step": 113,
"step_loss": 0.2791585922241211
},
{
"epoch": 1.2954545454545454,
"grad_norm": 0.8024500136687336,
"kl": 0.24742648005485535,
"learning_rate": 4.338115112390839e-06,
"loss": 0.2777,
"step": 114,
"step_loss": 0.27738770842552185
},
{
"epoch": 1.3068181818181819,
"grad_norm": 0.7743853146387183,
"kl": 0.24617239832878113,
"learning_rate": 4.32656285466502e-06,
"loss": 0.272,
"step": 115,
"step_loss": 0.2725653350353241
},
{
"epoch": 1.3181818181818181,
"grad_norm": 0.8260411729526528,
"kl": 0.24938064813613892,
"learning_rate": 4.314928366874162e-06,
"loss": 0.2709,
"step": 116,
"step_loss": 0.23085035383701324
},
{
"epoch": 1.3295454545454546,
"grad_norm": 0.7593579608277142,
"kl": 0.2912304401397705,
"learning_rate": 4.30321225584766e-06,
"loss": 0.2595,
"step": 117,
"step_loss": 0.2982204258441925
},
{
"epoch": 1.3409090909090908,
"grad_norm": 0.7590166337347966,
"kl": 0.3144587576389313,
"learning_rate": 4.291415132672202e-06,
"loss": 0.2715,
"step": 118,
"step_loss": 0.2491433173418045
},
{
"epoch": 1.3522727272727273,
"grad_norm": 0.8096110997434255,
"kl": 0.3628700077533722,
"learning_rate": 4.279537612659893e-06,
"loss": 0.2747,
"step": 119,
"step_loss": 0.29215359687805176
},
{
"epoch": 1.3636363636363638,
"grad_norm": 0.7676948672131104,
"kl": 0.32500603795051575,
"learning_rate": 4.267580315316156e-06,
"loss": 0.2692,
"step": 120,
"step_loss": 0.2846797704696655
},
{
"epoch": 1.375,
"grad_norm": 0.7763972054682284,
"kl": 0.3070172071456909,
"learning_rate": 4.2555438643074315e-06,
"loss": 0.2748,
"step": 121,
"step_loss": 0.29722610116004944
},
{
"epoch": 1.3863636363636362,
"grad_norm": 0.765562794552249,
"kl": 0.24159571528434753,
"learning_rate": 4.243428887428635e-06,
"loss": 0.2648,
"step": 122,
"step_loss": 0.20421911776065826
},
{
"epoch": 1.3977272727272727,
"grad_norm": 0.7829115174919172,
"kl": 0.1845472753047943,
"learning_rate": 4.231236016570425e-06,
"loss": 0.2671,
"step": 123,
"step_loss": 0.24147802591323853
},
{
"epoch": 1.4090909090909092,
"grad_norm": 0.8644726847122038,
"kl": 0.24288871884346008,
"learning_rate": 4.218965887686236e-06,
"loss": 0.2914,
"step": 124,
"step_loss": 0.25647714734077454
},
{
"epoch": 1.4204545454545454,
"grad_norm": 0.8339544286421284,
"kl": 0.231180340051651,
"learning_rate": 4.206619140759113e-06,
"loss": 0.2704,
"step": 125,
"step_loss": 0.22522114217281342
},
{
"epoch": 1.4318181818181819,
"grad_norm": 0.8221761695605967,
"kl": 0.2961800992488861,
"learning_rate": 4.1941964197683294e-06,
"loss": 0.2769,
"step": 126,
"step_loss": 0.3132496774196625
},
{
"epoch": 1.4431818181818181,
"grad_norm": 0.8281811749101126,
"kl": 0.228164404630661,
"learning_rate": 4.181698372655802e-06,
"loss": 0.296,
"step": 127,
"step_loss": 0.2789953351020813
},
{
"epoch": 1.4545454545454546,
"grad_norm": 0.7805168144692464,
"kl": 0.29118138551712036,
"learning_rate": 4.16912565129229e-06,
"loss": 0.2691,
"step": 128,
"step_loss": 0.2938961982727051
},
{
"epoch": 1.4659090909090908,
"grad_norm": 0.764790917540533,
"kl": 0.21116551756858826,
"learning_rate": 4.156478911443399e-06,
"loss": 0.2728,
"step": 129,
"step_loss": 0.19998157024383545
},
{
"epoch": 1.4772727272727273,
"grad_norm": 0.8182095775205943,
"kl": 0.2947465479373932,
"learning_rate": 4.143758812735377e-06,
"loss": 0.276,
"step": 130,
"step_loss": 0.23230603337287903
},
{
"epoch": 1.4886363636363638,
"grad_norm": 0.7882946546339218,
"kl": 0.24360516667366028,
"learning_rate": 4.130966018620709e-06,
"loss": 0.2677,
"step": 131,
"step_loss": 0.2349737137556076
},
{
"epoch": 1.5,
"grad_norm": 0.8656008871968565,
"kl": 0.35519665479660034,
"learning_rate": 4.118101196343515e-06,
"loss": 0.2729,
"step": 132,
"step_loss": 0.26551371812820435
},
{
"epoch": 1.5113636363636362,
"grad_norm": 0.8133559486757417,
"kl": 0.31251227855682373,
"learning_rate": 4.105165016904744e-06,
"loss": 0.2703,
"step": 133,
"step_loss": 0.30454349517822266
},
{
"epoch": 1.5227272727272727,
"grad_norm": 0.7341218877408442,
"kl": 0.3486112952232361,
"learning_rate": 4.092158155027177e-06,
"loss": 0.2778,
"step": 134,
"step_loss": 0.3166317045688629
},
{
"epoch": 1.5340909090909092,
"grad_norm": 0.8500600386901499,
"kl": 0.26721635460853577,
"learning_rate": 4.07908128912024e-06,
"loss": 0.2822,
"step": 135,
"step_loss": 0.31385499238967896
},
{
"epoch": 1.5454545454545454,
"grad_norm": 0.7849382316075052,
"kl": 0.25328248739242554,
"learning_rate": 4.065935101244614e-06,
"loss": 0.2798,
"step": 136,
"step_loss": 0.3054334223270416
},
{
"epoch": 1.5568181818181817,
"grad_norm": 0.8045542338117103,
"kl": 0.22720679640769958,
"learning_rate": 4.05272027707666e-06,
"loss": 0.2688,
"step": 137,
"step_loss": 0.2093893587589264
},
{
"epoch": 1.5681818181818183,
"grad_norm": 0.8059325354646184,
"kl": 0.33818334341049194,
"learning_rate": 4.039437505872661e-06,
"loss": 0.2755,
"step": 138,
"step_loss": 0.25241315364837646
},
{
"epoch": 1.5795454545454546,
"grad_norm": 0.7947964956021049,
"kl": 0.2659541666507721,
"learning_rate": 4.026087480432867e-06,
"loss": 0.2714,
"step": 139,
"step_loss": 0.22505882382392883
},
{
"epoch": 1.5909090909090908,
"grad_norm": 0.7798080488748865,
"kl": 0.3098128139972687,
"learning_rate": 4.012670897065361e-06,
"loss": 0.2645,
"step": 140,
"step_loss": 0.30545055866241455
},
{
"epoch": 1.6022727272727273,
"grad_norm": 0.775197771433234,
"kl": 0.22397831082344055,
"learning_rate": 3.999188455549743e-06,
"loss": 0.2666,
"step": 141,
"step_loss": 0.23903930187225342
},
{
"epoch": 1.6136363636363638,
"grad_norm": 0.8077084029693482,
"kl": 0.27500656247138977,
"learning_rate": 3.98564085910063e-06,
"loss": 0.2722,
"step": 142,
"step_loss": 0.35808488726615906
},
{
"epoch": 1.625,
"grad_norm": 0.8147386009423194,
"kl": 0.267314612865448,
"learning_rate": 3.972028814330974e-06,
"loss": 0.2723,
"step": 143,
"step_loss": 0.22990119457244873
},
{
"epoch": 1.6363636363636362,
"grad_norm": 0.8458877608603808,
"kl": 0.23753684759140015,
"learning_rate": 3.958353031215216e-06,
"loss": 0.2876,
"step": 144,
"step_loss": 0.2537928521633148
},
{
"epoch": 1.6477272727272727,
"grad_norm": 0.8197579462143825,
"kl": 0.23161743581295013,
"learning_rate": 3.944614223052246e-06,
"loss": 0.2801,
"step": 145,
"step_loss": 0.2637965679168701
},
{
"epoch": 1.6590909090909092,
"grad_norm": 0.8314592069504814,
"kl": 0.24659006297588348,
"learning_rate": 3.930813106428202e-06,
"loss": 0.2817,
"step": 146,
"step_loss": 0.2755964696407318
},
{
"epoch": 1.6704545454545454,
"grad_norm": 0.7638963920533027,
"kl": 0.20747093856334686,
"learning_rate": 3.916950401179096e-06,
"loss": 0.2635,
"step": 147,
"step_loss": 0.22498083114624023
},
{
"epoch": 1.6818181818181817,
"grad_norm": 0.8172422080755228,
"kl": 0.3187533915042877,
"learning_rate": 3.903026830353268e-06,
"loss": 0.273,
"step": 148,
"step_loss": 0.3263009190559387
},
{
"epoch": 1.6931818181818183,
"grad_norm": 0.8620536672122645,
"kl": 0.27937936782836914,
"learning_rate": 3.8890431201736715e-06,
"loss": 0.2776,
"step": 149,
"step_loss": 0.3091718554496765
},
{
"epoch": 1.7045454545454546,
"grad_norm": 0.8127977485887442,
"kl": 0.3105573356151581,
"learning_rate": 3.875e-06,
"loss": 0.2668,
"step": 150,
"step_loss": 0.3028494715690613
},
{
"epoch": 1.7159090909090908,
"grad_norm": 0.8201951442514309,
"kl": 0.3465476334095001,
"learning_rate": 3.86089820229064e-06,
"loss": 0.2791,
"step": 151,
"step_loss": 0.31831085681915283
},
{
"epoch": 1.7272727272727273,
"grad_norm": 0.835516351157446,
"kl": 0.21952536702156067,
"learning_rate": 3.846738462564469e-06,
"loss": 0.2721,
"step": 152,
"step_loss": 0.2234291136264801
},
{
"epoch": 1.7386363636363638,
"grad_norm": 0.8665975677625136,
"kl": 0.29508858919143677,
"learning_rate": 3.8325215193624945e-06,
"loss": 0.2802,
"step": 153,
"step_loss": 0.3162890076637268
},
{
"epoch": 1.75,
"grad_norm": 0.8064955980472845,
"kl": 0.32985877990722656,
"learning_rate": 3.8182481142093315e-06,
"loss": 0.2566,
"step": 154,
"step_loss": 0.2641649842262268
},
{
"epoch": 1.7613636363636362,
"grad_norm": 0.826451499999824,
"kl": 0.2728135287761688,
"learning_rate": 3.8039189915745286e-06,
"loss": 0.292,
"step": 155,
"step_loss": 0.32532957196235657
},
{
"epoch": 1.7727272727272727,
"grad_norm": 0.8336078231467312,
"kl": 0.38967686891555786,
"learning_rate": 3.789534898833732e-06,
"loss": 0.278,
"step": 156,
"step_loss": 0.3321007192134857
},
{
"epoch": 1.7840909090909092,
"grad_norm": 0.7893976689434017,
"kl": 0.2202274650335312,
"learning_rate": 3.775096586229713e-06,
"loss": 0.2696,
"step": 157,
"step_loss": 0.2540920376777649
},
{
"epoch": 1.7954545454545454,
"grad_norm": 0.9319185366269114,
"kl": 0.4245775043964386,
"learning_rate": 3.760604806833228e-06,
"loss": 0.288,
"step": 158,
"step_loss": 0.35646578669548035
},
{
"epoch": 1.8068181818181817,
"grad_norm": 0.744000278965067,
"kl": 0.21765930950641632,
"learning_rate": 3.7460603165037475e-06,
"loss": 0.262,
"step": 159,
"step_loss": 0.2947049140930176
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.7890825930010562,
"kl": 0.2992360591888428,
"learning_rate": 3.7314638738500275e-06,
"loss": 0.2797,
"step": 160,
"step_loss": 0.30612078309059143
},
{
"epoch": 1.8295454545454546,
"grad_norm": 0.806422206319446,
"kl": 0.2850240468978882,
"learning_rate": 3.7168162401905415e-06,
"loss": 0.2643,
"step": 161,
"step_loss": 0.25350436568260193
},
{
"epoch": 1.8409090909090908,
"grad_norm": 0.7772092463231446,
"kl": 0.294902503490448,
"learning_rate": 3.7021181795137777e-06,
"loss": 0.2752,
"step": 162,
"step_loss": 0.3195292353630066
},
{
"epoch": 1.8522727272727273,
"grad_norm": 0.8137138542667633,
"kl": 0.3464955985546112,
"learning_rate": 3.6873704584383834e-06,
"loss": 0.2879,
"step": 163,
"step_loss": 0.3319052755832672
},
{
"epoch": 1.8636363636363638,
"grad_norm": 0.8158519268417352,
"kl": 0.2651825547218323,
"learning_rate": 3.6725738461731865e-06,
"loss": 0.2698,
"step": 164,
"step_loss": 0.3068053722381592
},
{
"epoch": 1.875,
"grad_norm": 0.7751669380913654,
"kl": 0.2937452793121338,
"learning_rate": 3.65772911447707e-06,
"loss": 0.2701,
"step": 165,
"step_loss": 0.2569812834262848
},
{
"epoch": 1.8863636363636362,
"grad_norm": 0.7494161240442716,
"kl": 0.27101945877075195,
"learning_rate": 3.6428370376187212e-06,
"loss": 0.2756,
"step": 166,
"step_loss": 0.3467923402786255
},
{
"epoch": 1.8977272727272727,
"grad_norm": 0.8176861818718908,
"kl": 0.27005308866500854,
"learning_rate": 3.627898392336248e-06,
"loss": 0.2652,
"step": 167,
"step_loss": 0.27606719732284546
},
{
"epoch": 1.9090909090909092,
"grad_norm": 0.7568912608938045,
"kl": 0.23509696125984192,
"learning_rate": 3.6129139577966665e-06,
"loss": 0.2764,
"step": 168,
"step_loss": 0.27201592922210693
},
{
"epoch": 1.9204545454545454,
"grad_norm": 0.8229709961416729,
"kl": 0.28099095821380615,
"learning_rate": 3.597884515555258e-06,
"loss": 0.2685,
"step": 169,
"step_loss": 0.29871946573257446
},
{
"epoch": 1.9318181818181817,
"grad_norm": 0.7594413163226815,
"kl": 0.2222573161125183,
"learning_rate": 3.5828108495148077e-06,
"loss": 0.2601,
"step": 170,
"step_loss": 0.262919157743454
},
{
"epoch": 1.9431818181818183,
"grad_norm": 0.7658440412811187,
"kl": 0.2369879186153412,
"learning_rate": 3.5676937458847177e-06,
"loss": 0.2898,
"step": 171,
"step_loss": 0.2267894744873047
},
{
"epoch": 1.9545454545454546,
"grad_norm": 0.7855144082392459,
"kl": 0.24906377494335175,
"learning_rate": 3.55253399314e-06,
"loss": 0.2927,
"step": 172,
"step_loss": 0.33061373233795166
},
{
"epoch": 1.9659090909090908,
"grad_norm": 0.793142140040205,
"kl": 0.24075445532798767,
"learning_rate": 3.5373323819801493e-06,
"loss": 0.2788,
"step": 173,
"step_loss": 0.3006718158721924
},
{
"epoch": 1.9772727272727273,
"grad_norm": 0.7402321203997657,
"kl": 0.2078879177570343,
"learning_rate": 3.5220897052879047e-06,
"loss": 0.283,
"step": 174,
"step_loss": 0.2104087769985199
},
{
"epoch": 1.9886363636363638,
"grad_norm": 0.7547346712459372,
"kl": 0.20877879858016968,
"learning_rate": 3.5068067580878947e-06,
"loss": 0.2724,
"step": 175,
"step_loss": 0.23320713639259338
},
{
"epoch": 2.0,
"grad_norm": 0.7771753483379468,
"kl": 0.26442065834999084,
"learning_rate": 3.491484337505166e-06,
"loss": 0.268,
"step": 176,
"step_loss": 0.2616370916366577
},
{
"epoch": 2.0,
"eval_test_transformed.json_loss": 0.6573941707611084,
"eval_test_transformed.json_runtime": 57.072,
"eval_test_transformed.json_samples_per_second": 8.761,
"eval_test_transformed.json_steps_per_second": 0.561,
"step": 176
},
{
"epoch": 2.0113636363636362,
"grad_norm": 0.8267860023888801,
"kl": 0.10590674728155136,
"learning_rate": 3.476123242723614e-06,
"loss": 0.2097,
"step": 177,
"step_loss": 0.5581972599029541
},
{
"epoch": 2.022727272727273,
"grad_norm": 0.856141962071338,
"kl": 0.3062549829483032,
"learning_rate": 3.460724274944294e-06,
"loss": 0.2173,
"step": 178,
"step_loss": 0.23314087092876434
},
{
"epoch": 2.034090909090909,
"grad_norm": 0.7361155640769321,
"kl": 0.3152432143688202,
"learning_rate": 3.445288237343632e-06,
"loss": 0.1978,
"step": 179,
"step_loss": 0.20342102646827698
},
{
"epoch": 2.0454545454545454,
"grad_norm": 0.732358232912068,
"kl": 0.382126122713089,
"learning_rate": 3.4298159350315398e-06,
"loss": 0.2032,
"step": 180,
"step_loss": 0.24822968244552612
},
{
"epoch": 2.0568181818181817,
"grad_norm": 0.7942400733536357,
"kl": 0.3069852590560913,
"learning_rate": 3.4143081750094107e-06,
"loss": 0.1976,
"step": 181,
"step_loss": 0.21024365723133087
},
{
"epoch": 2.0681818181818183,
"grad_norm": 0.9225206215680616,
"kl": 0.4743329882621765,
"learning_rate": 3.3987657661280393e-06,
"loss": 0.2133,
"step": 182,
"step_loss": 0.22629733383655548
},
{
"epoch": 2.0795454545454546,
"grad_norm": 0.9790953481687389,
"kl": 0.31061556935310364,
"learning_rate": 3.383189519045428e-06,
"loss": 0.2031,
"step": 183,
"step_loss": 0.21077898144721985
},
{
"epoch": 2.090909090909091,
"grad_norm": 0.9744424313842567,
"kl": 0.380555659532547,
"learning_rate": 3.3675802461845036e-06,
"loss": 0.2004,
"step": 184,
"step_loss": 0.22242428362369537
},
{
"epoch": 2.102272727272727,
"grad_norm": 0.916179520314943,
"kl": 0.43431419134140015,
"learning_rate": 3.351938761690747e-06,
"loss": 0.1966,
"step": 185,
"step_loss": 0.2207513451576233
},
{
"epoch": 2.1136363636363638,
"grad_norm": 0.8004844301509703,
"kl": 0.28789108991622925,
"learning_rate": 3.3362658813897274e-06,
"loss": 0.1942,
"step": 186,
"step_loss": 0.26408788561820984
},
{
"epoch": 2.125,
"grad_norm": 0.8955852845271478,
"kl": 0.28126734495162964,
"learning_rate": 3.320562422744548e-06,
"loss": 0.2053,
"step": 187,
"step_loss": 0.2404702603816986
},
{
"epoch": 2.1363636363636362,
"grad_norm": 0.8109199492626206,
"kl": 0.31442955136299133,
"learning_rate": 3.3048292048132153e-06,
"loss": 0.2011,
"step": 188,
"step_loss": 0.21545666456222534
},
{
"epoch": 2.147727272727273,
"grad_norm": 0.8741546702827006,
"kl": 0.2675096094608307,
"learning_rate": 3.289067048205912e-06,
"loss": 0.2088,
"step": 189,
"step_loss": 0.1575331836938858
},
{
"epoch": 2.159090909090909,
"grad_norm": 0.8080813493615443,
"kl": 0.3244311213493347,
"learning_rate": 3.273276775042199e-06,
"loss": 0.2003,
"step": 190,
"step_loss": 0.187534362077713
},
{
"epoch": 2.1704545454545454,
"grad_norm": 0.8677916956527729,
"kl": 0.2513880729675293,
"learning_rate": 3.2574592089081375e-06,
"loss": 0.1989,
"step": 191,
"step_loss": 0.17420931160449982
},
{
"epoch": 2.1818181818181817,
"grad_norm": 0.798718321325761,
"kl": 0.291971892118454,
"learning_rate": 3.241615174813327e-06,
"loss": 0.207,
"step": 192,
"step_loss": 0.19951367378234863
},
{
"epoch": 2.1931818181818183,
"grad_norm": 0.8266125308725523,
"kl": 0.3032814562320709,
"learning_rate": 3.2257454991478825e-06,
"loss": 0.1893,
"step": 193,
"step_loss": 0.17084187269210815
},
{
"epoch": 2.2045454545454546,
"grad_norm": 0.7812137976841603,
"kl": 0.38334983587265015,
"learning_rate": 3.2098510096393214e-06,
"loss": 0.2027,
"step": 194,
"step_loss": 0.20809711515903473
},
{
"epoch": 2.215909090909091,
"grad_norm": 0.9554319759849772,
"kl": 0.3265298306941986,
"learning_rate": 3.1939325353094004e-06,
"loss": 0.2052,
"step": 195,
"step_loss": 0.19203175604343414
},
{
"epoch": 2.227272727272727,
"grad_norm": 0.8249089393067537,
"kl": 0.35938209295272827,
"learning_rate": 3.177990906430871e-06,
"loss": 0.1967,
"step": 196,
"step_loss": 0.2233177125453949
},
{
"epoch": 2.2386363636363638,
"grad_norm": 0.7933030950436119,
"kl": 0.32349923253059387,
"learning_rate": 3.1620269544841764e-06,
"loss": 0.1913,
"step": 197,
"step_loss": 0.1729939877986908
},
{
"epoch": 2.25,
"grad_norm": 0.8197164640788506,
"kl": 0.3547500669956207,
"learning_rate": 3.1460415121140804e-06,
"loss": 0.2006,
"step": 198,
"step_loss": 0.2586993873119354
},
{
"epoch": 2.2613636363636362,
"grad_norm": 0.8495735950170649,
"kl": 0.3509565591812134,
"learning_rate": 3.1300354130862404e-06,
"loss": 0.2045,
"step": 199,
"step_loss": 0.21880128979682922
},
{
"epoch": 2.2727272727272725,
"grad_norm": 0.7758237844371901,
"kl": 0.3566822409629822,
"learning_rate": 3.1140094922437213e-06,
"loss": 0.1906,
"step": 200,
"step_loss": 0.15825271606445312
},
{
"epoch": 2.284090909090909,
"grad_norm": 0.7748859598486534,
"kl": 0.23674319684505463,
"learning_rate": 3.097964585463449e-06,
"loss": 0.1904,
"step": 201,
"step_loss": 0.15398302674293518
},
{
"epoch": 2.2954545454545454,
"grad_norm": 0.8150881114674452,
"kl": 0.32412102818489075,
"learning_rate": 3.081901529612614e-06,
"loss": 0.2015,
"step": 202,
"step_loss": 0.1894824057817459
},
{
"epoch": 2.3068181818181817,
"grad_norm": 0.777056310216207,
"kl": 0.33722126483917236,
"learning_rate": 3.065821162505024e-06,
"loss": 0.2094,
"step": 203,
"step_loss": 0.24027691781520844
},
{
"epoch": 2.3181818181818183,
"grad_norm": 0.7521221020569652,
"kl": 0.30370503664016724,
"learning_rate": 3.0497243228574054e-06,
"loss": 0.2033,
"step": 204,
"step_loss": 0.2159876674413681
},
{
"epoch": 2.3295454545454546,
"grad_norm": 0.8056198200446986,
"kl": 0.27457597851753235,
"learning_rate": 3.0336118502456515e-06,
"loss": 0.2057,
"step": 205,
"step_loss": 0.1882285177707672
},
{
"epoch": 2.340909090909091,
"grad_norm": 0.7864857515183022,
"kl": 0.35345762968063354,
"learning_rate": 3.0174845850610395e-06,
"loss": 0.1896,
"step": 206,
"step_loss": 0.1616518199443817
},
{
"epoch": 2.3522727272727275,
"grad_norm": 0.7347377124860899,
"kl": 0.26013386249542236,
"learning_rate": 3.001343368466396e-06,
"loss": 0.1931,
"step": 207,
"step_loss": 0.15433846414089203
},
{
"epoch": 2.3636363636363638,
"grad_norm": 0.8262277498582172,
"kl": 0.41462039947509766,
"learning_rate": 2.9851890423522205e-06,
"loss": 0.1946,
"step": 208,
"step_loss": 0.17125703394412994
},
{
"epoch": 2.375,
"grad_norm": 0.8440169159473123,
"kl": 0.30189239978790283,
"learning_rate": 2.9690224492927777e-06,
"loss": 0.1972,
"step": 209,
"step_loss": 0.1643592119216919
},
{
"epoch": 2.3863636363636362,
"grad_norm": 0.7902010267144333,
"kl": 0.30659422278404236,
"learning_rate": 2.9528444325021475e-06,
"loss": 0.196,
"step": 210,
"step_loss": 0.16432619094848633
},
{
"epoch": 2.3977272727272725,
"grad_norm": 0.7897263557587403,
"kl": 0.35797473788261414,
"learning_rate": 2.9366558357902485e-06,
"loss": 0.2104,
"step": 211,
"step_loss": 0.18551866710186005
},
{
"epoch": 2.409090909090909,
"grad_norm": 0.8139541524586551,
"kl": 0.5444263815879822,
"learning_rate": 2.920457503518824e-06,
"loss": 0.2149,
"step": 212,
"step_loss": 0.2275647521018982
},
{
"epoch": 2.4204545454545454,
"grad_norm": 0.779793908267749,
"kl": 0.26403847336769104,
"learning_rate": 2.9042502805574022e-06,
"loss": 0.195,
"step": 213,
"step_loss": 0.16895370185375214
},
{
"epoch": 2.4318181818181817,
"grad_norm": 0.8129577814286397,
"kl": 0.3593398630619049,
"learning_rate": 2.8880350122392327e-06,
"loss": 0.1991,
"step": 214,
"step_loss": 0.19971898198127747
},
{
"epoch": 2.4431818181818183,
"grad_norm": 0.819141896928592,
"kl": 0.40604791045188904,
"learning_rate": 2.87181254431719e-06,
"loss": 0.1916,
"step": 215,
"step_loss": 0.19662940502166748
},
{
"epoch": 2.4545454545454546,
"grad_norm": 0.8326566611196083,
"kl": 0.30143654346466064,
"learning_rate": 2.8555837229196654e-06,
"loss": 0.2012,
"step": 216,
"step_loss": 0.19791531562805176
},
{
"epoch": 2.465909090909091,
"grad_norm": 0.7971460218117707,
"kl": 0.28109458088874817,
"learning_rate": 2.839349394506435e-06,
"loss": 0.2047,
"step": 217,
"step_loss": 0.1851027011871338
},
{
"epoch": 2.4772727272727275,
"grad_norm": 0.8160202720148206,
"kl": 0.3231932520866394,
"learning_rate": 2.8231104058245066e-06,
"loss": 0.2046,
"step": 218,
"step_loss": 0.1588630974292755
},
{
"epoch": 2.4886363636363638,
"grad_norm": 0.8870641900955384,
"kl": 0.39819225668907166,
"learning_rate": 2.8068676038639584e-06,
"loss": 0.2045,
"step": 219,
"step_loss": 0.21049059927463531
},
{
"epoch": 2.5,
"grad_norm": 0.8580982781435952,
"kl": 0.21048355102539062,
"learning_rate": 2.790621835813761e-06,
"loss": 0.1954,
"step": 220,
"step_loss": 0.15752212703227997
},
{
"epoch": 2.5113636363636362,
"grad_norm": 0.8332029161126598,
"kl": 0.3022167980670929,
"learning_rate": 2.7743739490175902e-06,
"loss": 0.1993,
"step": 221,
"step_loss": 0.18184006214141846
},
{
"epoch": 2.5227272727272725,
"grad_norm": 0.7915764221822466,
"kl": 0.3532565236091614,
"learning_rate": 2.75812479092963e-06,
"loss": 0.2028,
"step": 222,
"step_loss": 0.2090645432472229
},
{
"epoch": 2.534090909090909,
"grad_norm": 0.816127163580774,
"kl": 0.3357287645339966,
"learning_rate": 2.7418752090703716e-06,
"loss": 0.1993,
"step": 223,
"step_loss": 0.22028760612010956
},
{
"epoch": 2.5454545454545454,
"grad_norm": 0.8169798227897933,
"kl": 0.3894638121128082,
"learning_rate": 2.72562605098241e-06,
"loss": 0.2095,
"step": 224,
"step_loss": 0.3154832422733307
},
{
"epoch": 2.5568181818181817,
"grad_norm": 0.8178116233042594,
"kl": 0.4051194190979004,
"learning_rate": 2.7093781641862387e-06,
"loss": 0.215,
"step": 225,
"step_loss": 0.21542856097221375
},
{
"epoch": 2.5681818181818183,
"grad_norm": 0.7927440832417105,
"kl": 0.48752185702323914,
"learning_rate": 2.6931323961360423e-06,
"loss": 0.1982,
"step": 226,
"step_loss": 0.2092168629169464
},
{
"epoch": 2.5795454545454546,
"grad_norm": 0.8036623600570535,
"kl": 0.25240644812583923,
"learning_rate": 2.6768895941754945e-06,
"loss": 0.1875,
"step": 227,
"step_loss": 0.1589215099811554
},
{
"epoch": 2.590909090909091,
"grad_norm": 0.8374058314659513,
"kl": 0.3620951473712921,
"learning_rate": 2.660650605493566e-06,
"loss": 0.1882,
"step": 228,
"step_loss": 0.19974978268146515
},
{
"epoch": 2.6022727272727275,
"grad_norm": 0.8335380700060886,
"kl": 0.41594362258911133,
"learning_rate": 2.644416277080335e-06,
"loss": 0.2165,
"step": 229,
"step_loss": 0.24422059953212738
},
{
"epoch": 2.6136363636363638,
"grad_norm": 0.8100345325173146,
"kl": 0.4320451319217682,
"learning_rate": 2.6281874556828106e-06,
"loss": 0.2252,
"step": 230,
"step_loss": 0.2085280418395996
},
{
"epoch": 2.625,
"grad_norm": 0.8530910666847069,
"kl": 0.3395635783672333,
"learning_rate": 2.6119649877607684e-06,
"loss": 0.2006,
"step": 231,
"step_loss": 0.18190529942512512
},
{
"epoch": 2.6363636363636362,
"grad_norm": 0.7566154700308811,
"kl": 0.3144618272781372,
"learning_rate": 2.5957497194425985e-06,
"loss": 0.1887,
"step": 232,
"step_loss": 0.18467919528484344
},
{
"epoch": 2.6477272727272725,
"grad_norm": 0.7310945410825066,
"kl": 0.3082126975059509,
"learning_rate": 2.579542496481178e-06,
"loss": 0.1922,
"step": 233,
"step_loss": 0.21097740530967712
},
{
"epoch": 2.659090909090909,
"grad_norm": 0.7837603862506226,
"kl": 0.30413469672203064,
"learning_rate": 2.5633441642097527e-06,
"loss": 0.204,
"step": 234,
"step_loss": 0.2184579074382782
},
{
"epoch": 2.6704545454545454,
"grad_norm": 0.7849444203230034,
"kl": 0.32521089911460876,
"learning_rate": 2.5471555674978536e-06,
"loss": 0.1902,
"step": 235,
"step_loss": 0.1784965693950653
},
{
"epoch": 2.6818181818181817,
"grad_norm": 0.8978400192531096,
"kl": 0.4359068274497986,
"learning_rate": 2.530977550707223e-06,
"loss": 0.1998,
"step": 236,
"step_loss": 0.24568131566047668
},
{
"epoch": 2.6931818181818183,
"grad_norm": 0.8296558697377163,
"kl": 0.34526827931404114,
"learning_rate": 2.5148109576477798e-06,
"loss": 0.2051,
"step": 237,
"step_loss": 0.1845228672027588
},
{
"epoch": 2.7045454545454546,
"grad_norm": 0.8456638318243491,
"kl": 0.4374793767929077,
"learning_rate": 2.4986566315336047e-06,
"loss": 0.2099,
"step": 238,
"step_loss": 0.2226126492023468
},
{
"epoch": 2.715909090909091,
"grad_norm": 0.7855160396523918,
"kl": 0.35535740852355957,
"learning_rate": 2.4825154149389608e-06,
"loss": 0.2064,
"step": 239,
"step_loss": 0.2260066717863083
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.7687481608795249,
"kl": 0.3926181495189667,
"learning_rate": 2.4663881497543497e-06,
"loss": 0.1978,
"step": 240,
"step_loss": 0.183305025100708
},
{
"epoch": 2.7386363636363638,
"grad_norm": 0.8743610433319449,
"kl": 0.3105931580066681,
"learning_rate": 2.450275677142595e-06,
"loss": 0.1982,
"step": 241,
"step_loss": 0.1785978525876999
},
{
"epoch": 2.75,
"grad_norm": 0.8103030286089026,
"kl": 0.28116321563720703,
"learning_rate": 2.4341788374949753e-06,
"loss": 0.2028,
"step": 242,
"step_loss": 0.16758720576763153
},
{
"epoch": 2.7613636363636362,
"grad_norm": 0.7780046576774838,
"kl": 0.31231576204299927,
"learning_rate": 2.418098470387387e-06,
"loss": 0.204,
"step": 243,
"step_loss": 0.21919600665569305
},
{
"epoch": 2.7727272727272725,
"grad_norm": 0.7900342339932793,
"kl": 0.36211156845092773,
"learning_rate": 2.4020354145365526e-06,
"loss": 0.1925,
"step": 244,
"step_loss": 0.22099542617797852
},
{
"epoch": 2.784090909090909,
"grad_norm": 0.7782246567974114,
"kl": 0.24455486238002777,
"learning_rate": 2.3859905077562802e-06,
"loss": 0.1871,
"step": 245,
"step_loss": 0.13276106119155884
},
{
"epoch": 2.7954545454545454,
"grad_norm": 0.7780058068595647,
"kl": 0.3040383756160736,
"learning_rate": 2.36996458691376e-06,
"loss": 0.203,
"step": 246,
"step_loss": 0.18567970395088196
},
{
"epoch": 2.8068181818181817,
"grad_norm": 0.797452119268861,
"kl": 0.3017336428165436,
"learning_rate": 2.35395848788592e-06,
"loss": 0.2045,
"step": 247,
"step_loss": 0.18115977942943573
},
{
"epoch": 2.8181818181818183,
"grad_norm": 0.8291981778954278,
"kl": 0.31822529435157776,
"learning_rate": 2.337973045515824e-06,
"loss": 0.2063,
"step": 248,
"step_loss": 0.18958526849746704
},
{
"epoch": 2.8295454545454546,
"grad_norm": 0.8192468169601297,
"kl": 0.36042526364326477,
"learning_rate": 2.3220090935691284e-06,
"loss": 0.2078,
"step": 249,
"step_loss": 0.198805570602417
},
{
"epoch": 2.840909090909091,
"grad_norm": 0.8386142691384693,
"kl": 0.3129850924015045,
"learning_rate": 2.3060674646906007e-06,
"loss": 0.1976,
"step": 250,
"step_loss": 0.15486395359039307
},
{
"epoch": 2.8522727272727275,
"grad_norm": 0.8542094258716569,
"kl": 0.328252911567688,
"learning_rate": 2.2901489903606793e-06,
"loss": 0.2094,
"step": 251,
"step_loss": 0.22234201431274414
},
{
"epoch": 2.8636363636363638,
"grad_norm": 0.8235027946049533,
"kl": 0.33431679010391235,
"learning_rate": 2.274254500852118e-06,
"loss": 0.2108,
"step": 252,
"step_loss": 0.23872962594032288
},
{
"epoch": 2.875,
"grad_norm": 0.7826979380094895,
"kl": 0.35612499713897705,
"learning_rate": 2.258384825186673e-06,
"loss": 0.2018,
"step": 253,
"step_loss": 0.22147932648658752
},
{
"epoch": 2.8863636363636362,
"grad_norm": 0.7727887293657175,
"kl": 0.4257417321205139,
"learning_rate": 2.2425407910918632e-06,
"loss": 0.1996,
"step": 254,
"step_loss": 0.25106555223464966
},
{
"epoch": 2.8977272727272725,
"grad_norm": 0.7880191514694174,
"kl": 0.3257736265659332,
"learning_rate": 2.2267232249578026e-06,
"loss": 0.1997,
"step": 255,
"step_loss": 0.17997223138809204
},
{
"epoch": 2.909090909090909,
"grad_norm": 0.8229368102750961,
"kl": 0.32289835810661316,
"learning_rate": 2.2109329517940897e-06,
"loss": 0.2049,
"step": 256,
"step_loss": 0.19980621337890625
},
{
"epoch": 2.9204545454545454,
"grad_norm": 0.8649527821304737,
"kl": 0.4314434826374054,
"learning_rate": 2.1951707951867862e-06,
"loss": 0.2071,
"step": 257,
"step_loss": 0.19861182570457458
},
{
"epoch": 2.9318181818181817,
"grad_norm": 0.7631295075779413,
"kl": 0.2881592810153961,
"learning_rate": 2.1794375772554525e-06,
"loss": 0.195,
"step": 258,
"step_loss": 0.17643983662128448
},
{
"epoch": 2.9431818181818183,
"grad_norm": 0.8176775074066636,
"kl": 0.2888239324092865,
"learning_rate": 2.163734118610274e-06,
"loss": 0.2033,
"step": 259,
"step_loss": 0.22650499641895294
},
{
"epoch": 2.9545454545454546,
"grad_norm": 0.7857239601852497,
"kl": 0.39291509985923767,
"learning_rate": 2.148061238309253e-06,
"loss": 0.1934,
"step": 260,
"step_loss": 0.19493892788887024
},
{
"epoch": 2.965909090909091,
"grad_norm": 0.773095874284012,
"kl": 0.37562623620033264,
"learning_rate": 2.132419753815497e-06,
"loss": 0.1945,
"step": 261,
"step_loss": 0.2021428793668747
},
{
"epoch": 2.9772727272727275,
"grad_norm": 0.8313683748212116,
"kl": 0.3755533695220947,
"learning_rate": 2.116810480954573e-06,
"loss": 0.2116,
"step": 262,
"step_loss": 0.2229767143726349
},
{
"epoch": 2.9886363636363638,
"grad_norm": 0.7820076892211333,
"kl": 0.32565972208976746,
"learning_rate": 2.101234233871961e-06,
"loss": 0.2024,
"step": 263,
"step_loss": 0.2286100536584854
},
{
"epoch": 3.0,
"grad_norm": 0.7846662975927288,
"kl": 0.4572798013687134,
"learning_rate": 2.0856918249905904e-06,
"loss": 0.1966,
"step": 264,
"step_loss": 0.2366623431444168
},
{
"epoch": 3.0,
"eval_test_transformed.json_loss": 0.7202348113059998,
"eval_test_transformed.json_runtime": 57.1103,
"eval_test_transformed.json_samples_per_second": 8.755,
"eval_test_transformed.json_steps_per_second": 0.56,
"step": 264
}
],
"logging_steps": 1.0,
"max_steps": 440,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 50.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 105951870517248.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}