prm-qwen3-8b-bf16-6k / trainer_state.json
devangb4's picture
Upload PRM LoRA adapter + head + tokenizer from checkpoint
6fc1795 verified
{
"best_global_step": 6000,
"best_metric": 0.20116083323955536,
"best_model_checkpoint": "/content/drive/MyDrive/UC DAVIS/ECS289A-LLM/prm_project/run-2/checkpoints/checkpoint-6000",
"epoch": 0.8204286739821557,
"eval_steps": 2000,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006836905616517964,
"grad_norm": 1.572303056716919,
"learning_rate": 2.232346241457859e-05,
"loss": 2.3604,
"step": 50
},
{
"epoch": 0.013673811233035928,
"grad_norm": 5.201236248016357,
"learning_rate": 4.510250569476082e-05,
"loss": 2.1118,
"step": 100
},
{
"epoch": 0.02051071684955389,
"grad_norm": 9.312570571899414,
"learning_rate": 6.788154897494306e-05,
"loss": 1.8332,
"step": 150
},
{
"epoch": 0.027347622466071857,
"grad_norm": 8.565587043762207,
"learning_rate": 9.066059225512529e-05,
"loss": 1.9173,
"step": 200
},
{
"epoch": 0.03418452808258982,
"grad_norm": 3.824556350708008,
"learning_rate": 0.00011343963553530752,
"loss": 1.6633,
"step": 250
},
{
"epoch": 0.04102143369910778,
"grad_norm": 5.49424934387207,
"learning_rate": 0.00013621867881548976,
"loss": 1.6122,
"step": 300
},
{
"epoch": 0.04785833931562575,
"grad_norm": 6.3185038566589355,
"learning_rate": 0.000158997722095672,
"loss": 1.5782,
"step": 350
},
{
"epoch": 0.05469524493214371,
"grad_norm": 3.980173349380493,
"learning_rate": 0.00018177676537585422,
"loss": 1.444,
"step": 400
},
{
"epoch": 0.06153215054866167,
"grad_norm": 5.797272682189941,
"learning_rate": 0.00019999975488719786,
"loss": 1.5752,
"step": 450
},
{
"epoch": 0.06836905616517965,
"grad_norm": 11.263846397399902,
"learning_rate": 0.0001999911760652904,
"loss": 1.3607,
"step": 500
},
{
"epoch": 0.0752059617816976,
"grad_norm": 4.273462772369385,
"learning_rate": 0.0001999703428048544,
"loss": 1.5023,
"step": 550
},
{
"epoch": 0.08204286739821556,
"grad_norm": 2.9854705333709717,
"learning_rate": 0.00019993725765911436,
"loss": 1.3747,
"step": 600
},
{
"epoch": 0.08887977301473353,
"grad_norm": 2.9444832801818848,
"learning_rate": 0.0001998919246828268,
"loss": 1.4708,
"step": 650
},
{
"epoch": 0.0957166786312515,
"grad_norm": 3.348857879638672,
"learning_rate": 0.00019983434943178372,
"loss": 1.439,
"step": 700
},
{
"epoch": 0.10255358424776946,
"grad_norm": 5.90728759765625,
"learning_rate": 0.00019976453896213152,
"loss": 1.5048,
"step": 750
},
{
"epoch": 0.10939048986428743,
"grad_norm": 2.6572535037994385,
"learning_rate": 0.0001996825018295062,
"loss": 1.5023,
"step": 800
},
{
"epoch": 0.11622739548080539,
"grad_norm": 4.219803810119629,
"learning_rate": 0.00019958824808798494,
"loss": 1.5814,
"step": 850
},
{
"epoch": 0.12306430109732334,
"grad_norm": 5.457417964935303,
"learning_rate": 0.00019948178928885378,
"loss": 1.4203,
"step": 900
},
{
"epoch": 0.1299012067138413,
"grad_norm": 5.302417278289795,
"learning_rate": 0.00019936313847919218,
"loss": 1.3299,
"step": 950
},
{
"epoch": 0.1367381123303593,
"grad_norm": 4.385361194610596,
"learning_rate": 0.00019923231020027368,
"loss": 1.3468,
"step": 1000
},
{
"epoch": 0.14357501794687724,
"grad_norm": 4.836021423339844,
"learning_rate": 0.00019908932048578416,
"loss": 1.2813,
"step": 1050
},
{
"epoch": 0.1504119235633952,
"grad_norm": 4.949122905731201,
"learning_rate": 0.00019893418685985658,
"loss": 1.311,
"step": 1100
},
{
"epoch": 0.15724882917991317,
"grad_norm": 6.123111248016357,
"learning_rate": 0.00019876692833492343,
"loss": 1.342,
"step": 1150
},
{
"epoch": 0.16408573479643113,
"grad_norm": 5.803433418273926,
"learning_rate": 0.0001985875654093866,
"loss": 1.2384,
"step": 1200
},
{
"epoch": 0.1709226404129491,
"grad_norm": 3.196314811706543,
"learning_rate": 0.00019839612006510517,
"loss": 1.3117,
"step": 1250
},
{
"epoch": 0.17775954602946706,
"grad_norm": 6.21234130859375,
"learning_rate": 0.00019819261576470152,
"loss": 1.2307,
"step": 1300
},
{
"epoch": 0.18459645164598504,
"grad_norm": 3.274829149246216,
"learning_rate": 0.00019797707744868582,
"loss": 1.2408,
"step": 1350
},
{
"epoch": 0.191433357262503,
"grad_norm": 5.5120320320129395,
"learning_rate": 0.0001977495315323993,
"loss": 1.324,
"step": 1400
},
{
"epoch": 0.19827026287902094,
"grad_norm": 7.289828777313232,
"learning_rate": 0.0001975100059027772,
"loss": 1.2039,
"step": 1450
},
{
"epoch": 0.20510716849553892,
"grad_norm": 4.040754795074463,
"learning_rate": 0.00019725852991493083,
"loss": 1.3405,
"step": 1500
},
{
"epoch": 0.21194407411205687,
"grad_norm": 52.13080596923828,
"learning_rate": 0.00019699513438854995,
"loss": 1.2005,
"step": 1550
},
{
"epoch": 0.21878097972857485,
"grad_norm": 5.0520429611206055,
"learning_rate": 0.00019671985160412593,
"loss": 1.0046,
"step": 1600
},
{
"epoch": 0.2256178853450928,
"grad_norm": 1.7626160383224487,
"learning_rate": 0.00019643271529899532,
"loss": 1.1398,
"step": 1650
},
{
"epoch": 0.23245479096161079,
"grad_norm": 2.1751222610473633,
"learning_rate": 0.00019613376066320525,
"loss": 1.1519,
"step": 1700
},
{
"epoch": 0.23929169657812874,
"grad_norm": 4.483262062072754,
"learning_rate": 0.00019582302433520074,
"loss": 1.144,
"step": 1750
},
{
"epoch": 0.2461286021946467,
"grad_norm": 2.494478702545166,
"learning_rate": 0.00019550054439733449,
"loss": 1.1908,
"step": 1800
},
{
"epoch": 0.25296550781116467,
"grad_norm": 14.6198091506958,
"learning_rate": 0.00019516636037119952,
"loss": 1.0791,
"step": 1850
},
{
"epoch": 0.2598024134276826,
"grad_norm": 1.5368318557739258,
"learning_rate": 0.00019482051321278592,
"loss": 1.1994,
"step": 1900
},
{
"epoch": 0.2666393190442006,
"grad_norm": 6.854203701019287,
"learning_rate": 0.00019446304530746112,
"loss": 1.1871,
"step": 1950
},
{
"epoch": 0.2734762246607186,
"grad_norm": 3.686593770980835,
"learning_rate": 0.00019409400046477559,
"loss": 1.0619,
"step": 2000
},
{
"epoch": 0.2734762246607186,
"eval_loss": 0.3232106864452362,
"eval_runtime": 301.3298,
"eval_samples_per_second": 26.801,
"eval_steps_per_second": 3.352,
"step": 2000
},
{
"epoch": 0.28031313027723653,
"grad_norm": 2.84173321723938,
"learning_rate": 0.00019371342391309363,
"loss": 1.1769,
"step": 2050
},
{
"epoch": 0.2871500358937545,
"grad_norm": 6.158025741577148,
"learning_rate": 0.00019332136229405043,
"loss": 1.1985,
"step": 2100
},
{
"epoch": 0.29398694151027244,
"grad_norm": 1.3917083740234375,
"learning_rate": 0.00019291786365683599,
"loss": 1.2915,
"step": 2150
},
{
"epoch": 0.3008238471267904,
"grad_norm": 6.717157363891602,
"learning_rate": 0.00019250297745230615,
"loss": 0.9168,
"step": 2200
},
{
"epoch": 0.3076607527433084,
"grad_norm": 7.835381507873535,
"learning_rate": 0.00019207675452692259,
"loss": 1.0267,
"step": 2250
},
{
"epoch": 0.31449765835982635,
"grad_norm": 4.236868858337402,
"learning_rate": 0.00019163924711652092,
"loss": 1.1836,
"step": 2300
},
{
"epoch": 0.3213345639763443,
"grad_norm": 4.367033004760742,
"learning_rate": 0.00019119050883990903,
"loss": 1.1023,
"step": 2350
},
{
"epoch": 0.32817146959286225,
"grad_norm": 8.43916130065918,
"learning_rate": 0.00019073059469229602,
"loss": 1.1884,
"step": 2400
},
{
"epoch": 0.33500837520938026,
"grad_norm": 7.896825790405273,
"learning_rate": 0.0001902595610385519,
"loss": 1.1764,
"step": 2450
},
{
"epoch": 0.3418452808258982,
"grad_norm": 3.5363454818725586,
"learning_rate": 0.00018977746560630012,
"loss": 1.1172,
"step": 2500
},
{
"epoch": 0.34868218644241616,
"grad_norm": 12.307855606079102,
"learning_rate": 0.00018928436747884253,
"loss": 1.078,
"step": 2550
},
{
"epoch": 0.3555190920589341,
"grad_norm": 8.765337944030762,
"learning_rate": 0.00018878032708791854,
"loss": 1.1449,
"step": 2600
},
{
"epoch": 0.36235599767545207,
"grad_norm": 11.366116523742676,
"learning_rate": 0.00018826540620629873,
"loss": 1.1117,
"step": 2650
},
{
"epoch": 0.3691929032919701,
"grad_norm": 3.603243112564087,
"learning_rate": 0.0001877396679402145,
"loss": 1.1138,
"step": 2700
},
{
"epoch": 0.37602980890848803,
"grad_norm": 8.020549774169922,
"learning_rate": 0.00018720317672162392,
"loss": 1.0474,
"step": 2750
},
{
"epoch": 0.382866714525006,
"grad_norm": 4.786285877227783,
"learning_rate": 0.00018665599830031533,
"loss": 1.1041,
"step": 2800
},
{
"epoch": 0.38970362014152393,
"grad_norm": 7.1555633544921875,
"learning_rate": 0.00018609819973584924,
"loss": 1.0623,
"step": 2850
},
{
"epoch": 0.3965405257580419,
"grad_norm": 6.989715576171875,
"learning_rate": 0.00018552984938934006,
"loss": 0.9318,
"step": 2900
},
{
"epoch": 0.4033774313745599,
"grad_norm": 7.150449752807617,
"learning_rate": 0.00018495101691507783,
"loss": 1.132,
"step": 2950
},
{
"epoch": 0.41021433699107784,
"grad_norm": 4.584231853485107,
"learning_rate": 0.00018436177325199192,
"loss": 1.1382,
"step": 3000
},
{
"epoch": 0.4170512426075958,
"grad_norm": 5.139730930328369,
"learning_rate": 0.00018376219061495694,
"loss": 1.0452,
"step": 3050
},
{
"epoch": 0.42388814822411375,
"grad_norm": 15.497014999389648,
"learning_rate": 0.00018315234248594264,
"loss": 1.0451,
"step": 3100
},
{
"epoch": 0.43072505384063176,
"grad_norm": 3.4872303009033203,
"learning_rate": 0.0001825323036050081,
"loss": 1.131,
"step": 3150
},
{
"epoch": 0.4375619594571497,
"grad_norm": 11.307365417480469,
"learning_rate": 0.00018190214996114206,
"loss": 1.1382,
"step": 3200
},
{
"epoch": 0.44439886507366766,
"grad_norm": 5.577065467834473,
"learning_rate": 0.00018126195878295006,
"loss": 1.1045,
"step": 3250
},
{
"epoch": 0.4512357706901856,
"grad_norm": 14.33316421508789,
"learning_rate": 0.0001806118085291896,
"loss": 1.0887,
"step": 3300
},
{
"epoch": 0.45807267630670356,
"grad_norm": 15.240452766418457,
"learning_rate": 0.00017995177887915475,
"loss": 1.0171,
"step": 3350
},
{
"epoch": 0.46490958192322157,
"grad_norm": 10.07467269897461,
"learning_rate": 0.00017928195072291093,
"loss": 1.0966,
"step": 3400
},
{
"epoch": 0.4717464875397395,
"grad_norm": 2.930840253829956,
"learning_rate": 0.00017860240615138142,
"loss": 1.0418,
"step": 3450
},
{
"epoch": 0.4785833931562575,
"grad_norm": 30.01850700378418,
"learning_rate": 0.00017791322844628677,
"loss": 0.9635,
"step": 3500
},
{
"epoch": 0.4854202987727754,
"grad_norm": 5.433286666870117,
"learning_rate": 0.0001772145020699381,
"loss": 1.0108,
"step": 3550
},
{
"epoch": 0.4922572043892934,
"grad_norm": 3.0814309120178223,
"learning_rate": 0.0001765063126548858,
"loss": 1.1257,
"step": 3600
},
{
"epoch": 0.4990941100058114,
"grad_norm": 79.82017517089844,
"learning_rate": 0.00017578874699342493,
"loss": 1.1214,
"step": 3650
},
{
"epoch": 0.5059310156223293,
"grad_norm": 8.51614761352539,
"learning_rate": 0.00017506189302695827,
"loss": 0.8635,
"step": 3700
},
{
"epoch": 0.5127679212388473,
"grad_norm": 8.251550674438477,
"learning_rate": 0.0001743258398352187,
"loss": 0.9361,
"step": 3750
},
{
"epoch": 0.5196048268553652,
"grad_norm": 3.81523060798645,
"learning_rate": 0.00017358067762535186,
"loss": 1.066,
"step": 3800
},
{
"epoch": 0.5264417324718832,
"grad_norm": 15.210460662841797,
"learning_rate": 0.00017282649772086114,
"loss": 0.9778,
"step": 3850
},
{
"epoch": 0.5332786380884011,
"grad_norm": 5.145527362823486,
"learning_rate": 0.0001720633925504151,
"loss": 1.0966,
"step": 3900
},
{
"epoch": 0.5401155437049191,
"grad_norm": 3.485656261444092,
"learning_rate": 0.00017129145563652014,
"loss": 0.6889,
"step": 3950
},
{
"epoch": 0.5469524493214372,
"grad_norm": 7.915320873260498,
"learning_rate": 0.00017051078158405872,
"loss": 0.9154,
"step": 4000
},
{
"epoch": 0.5469524493214372,
"eval_loss": 0.24666446447372437,
"eval_runtime": 301.8017,
"eval_samples_per_second": 26.759,
"eval_steps_per_second": 3.347,
"step": 4000
},
{
"epoch": 0.5537893549379551,
"grad_norm": 12.610590934753418,
"learning_rate": 0.00016972146606869507,
"loss": 0.8612,
"step": 4050
},
{
"epoch": 0.5606262605544731,
"grad_norm": 34.93125915527344,
"learning_rate": 0.00016892360582514967,
"loss": 1.0867,
"step": 4100
},
{
"epoch": 0.567463166170991,
"grad_norm": 7.39677095413208,
"learning_rate": 0.00016811729863534377,
"loss": 1.1106,
"step": 4150
},
{
"epoch": 0.574300071787509,
"grad_norm": 2.4880149364471436,
"learning_rate": 0.00016730264331641585,
"loss": 0.9142,
"step": 4200
},
{
"epoch": 0.5811369774040269,
"grad_norm": 19.268964767456055,
"learning_rate": 0.00016647973970861104,
"loss": 0.9408,
"step": 4250
},
{
"epoch": 0.5879738830205449,
"grad_norm": 62.558837890625,
"learning_rate": 0.00016564868866304517,
"loss": 1.1798,
"step": 4300
},
{
"epoch": 0.5948107886370628,
"grad_norm": 12.449636459350586,
"learning_rate": 0.00016480959202934487,
"loss": 0.9386,
"step": 4350
},
{
"epoch": 0.6016476942535808,
"grad_norm": 9.708828926086426,
"learning_rate": 0.00016396255264316547,
"loss": 1.0766,
"step": 4400
},
{
"epoch": 0.6084845998700988,
"grad_norm": 4.00963020324707,
"learning_rate": 0.0001631076743135879,
"loss": 0.9953,
"step": 4450
},
{
"epoch": 0.6153215054866168,
"grad_norm": 14.70906925201416,
"learning_rate": 0.0001622450618103964,
"loss": 1.1006,
"step": 4500
},
{
"epoch": 0.6221584111031347,
"grad_norm": 2.471301317214966,
"learning_rate": 0.00016137482085123832,
"loss": 0.7397,
"step": 4550
},
{
"epoch": 0.6289953167196527,
"grad_norm": 0.671847939491272,
"learning_rate": 0.00016049705808866805,
"loss": 1.1298,
"step": 4600
},
{
"epoch": 0.6358322223361706,
"grad_norm": 11.712217330932617,
"learning_rate": 0.000159611881097076,
"loss": 0.8828,
"step": 4650
},
{
"epoch": 0.6426691279526886,
"grad_norm": 90.13214111328125,
"learning_rate": 0.00015871939835950503,
"loss": 1.085,
"step": 4700
},
{
"epoch": 0.6495060335692066,
"grad_norm": 2.1299564838409424,
"learning_rate": 0.00015781971925435498,
"loss": 1.0104,
"step": 4750
},
{
"epoch": 0.6563429391857245,
"grad_norm": 44.118778228759766,
"learning_rate": 0.0001569129540419781,
"loss": 0.8905,
"step": 4800
},
{
"epoch": 0.6631798448022425,
"grad_norm": 20.966922760009766,
"learning_rate": 0.00015599921385116582,
"loss": 0.9239,
"step": 4850
},
{
"epoch": 0.6700167504187605,
"grad_norm": 13.358034133911133,
"learning_rate": 0.00015507861066552955,
"loss": 0.8589,
"step": 4900
},
{
"epoch": 0.6768536560352785,
"grad_norm": 5.739938259124756,
"learning_rate": 0.00015415125730977626,
"loss": 1.0661,
"step": 4950
},
{
"epoch": 0.6836905616517964,
"grad_norm": 25.265790939331055,
"learning_rate": 0.00015321726743588155,
"loss": 0.9046,
"step": 5000
},
{
"epoch": 0.6905274672683144,
"grad_norm": 22.772367477416992,
"learning_rate": 0.00015227675550916073,
"loss": 1.0174,
"step": 5050
},
{
"epoch": 0.6973643728848323,
"grad_norm": 4.18620491027832,
"learning_rate": 0.0001513298367942405,
"loss": 0.9916,
"step": 5100
},
{
"epoch": 0.7042012785013503,
"grad_norm": 10.113117218017578,
"learning_rate": 0.00015037662734093286,
"loss": 0.9635,
"step": 5150
},
{
"epoch": 0.7110381841178682,
"grad_norm": 1.7103244066238403,
"learning_rate": 0.0001494172439700126,
"loss": 0.8927,
"step": 5200
},
{
"epoch": 0.7178750897343862,
"grad_norm": 24.236433029174805,
"learning_rate": 0.0001484518042589,
"loss": 0.9438,
"step": 5250
},
{
"epoch": 0.7247119953509041,
"grad_norm": 2.4070262908935547,
"learning_rate": 0.00014748042652725152,
"loss": 1.095,
"step": 5300
},
{
"epoch": 0.7315489009674222,
"grad_norm": 4.471241474151611,
"learning_rate": 0.0001465032298224588,
"loss": 0.8205,
"step": 5350
},
{
"epoch": 0.7383858065839402,
"grad_norm": 1.757636547088623,
"learning_rate": 0.0001455203339050589,
"loss": 0.9177,
"step": 5400
},
{
"epoch": 0.7452227122004581,
"grad_norm": 1.5365773439407349,
"learning_rate": 0.0001445318592340571,
"loss": 0.7696,
"step": 5450
},
{
"epoch": 0.7520596178169761,
"grad_norm": 1.7077670097351074,
"learning_rate": 0.00014353792695216382,
"loss": 0.9342,
"step": 5500
},
{
"epoch": 0.758896523433494,
"grad_norm": 28.525236129760742,
"learning_rate": 0.00014253865887094817,
"loss": 0.9897,
"step": 5550
},
{
"epoch": 0.765733429050012,
"grad_norm": 15.281404495239258,
"learning_rate": 0.00014153417745590914,
"loss": 0.8873,
"step": 5600
},
{
"epoch": 0.7725703346665299,
"grad_norm": 1.1002103090286255,
"learning_rate": 0.00014052460581146696,
"loss": 0.7727,
"step": 5650
},
{
"epoch": 0.7794072402830479,
"grad_norm": 4.395946025848389,
"learning_rate": 0.00013951006766587586,
"loss": 0.8922,
"step": 5700
},
{
"epoch": 0.7862441458995658,
"grad_norm": 5.225406169891357,
"learning_rate": 0.0001384906873560607,
"loss": 0.9766,
"step": 5750
},
{
"epoch": 0.7930810515160838,
"grad_norm": 6.0966315269470215,
"learning_rate": 0.00013746658981237867,
"loss": 1.1373,
"step": 5800
},
{
"epoch": 0.7999179571326018,
"grad_norm": 14.155887603759766,
"learning_rate": 0.00013643790054330846,
"loss": 0.8954,
"step": 5850
},
{
"epoch": 0.8067548627491198,
"grad_norm": 2.6549534797668457,
"learning_rate": 0.0001354047456200687,
"loss": 1.0428,
"step": 5900
},
{
"epoch": 0.8135917683656377,
"grad_norm": 7.79277229309082,
"learning_rate": 0.0001343672516611671,
"loss": 0.8715,
"step": 5950
},
{
"epoch": 0.8204286739821557,
"grad_norm": 17.183149337768555,
"learning_rate": 0.00013332554581688271,
"loss": 1.0601,
"step": 6000
},
{
"epoch": 0.8204286739821557,
"eval_loss": 0.20116083323955536,
"eval_runtime": 301.512,
"eval_samples_per_second": 26.785,
"eval_steps_per_second": 3.35,
"step": 6000
}
],
"logging_steps": 50,
"max_steps": 14628,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.5820248412832317e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}