| { |
| "best_global_step": 6000, |
| "best_metric": 0.20116083323955536, |
| "best_model_checkpoint": "/content/drive/MyDrive/UC DAVIS/ECS289A-LLM/prm_project/run-2/checkpoints/checkpoint-6000", |
| "epoch": 0.8204286739821557, |
| "eval_steps": 2000, |
| "global_step": 6000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006836905616517964, |
| "grad_norm": 1.572303056716919, |
| "learning_rate": 2.232346241457859e-05, |
| "loss": 2.3604, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.013673811233035928, |
| "grad_norm": 5.201236248016357, |
| "learning_rate": 4.510250569476082e-05, |
| "loss": 2.1118, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02051071684955389, |
| "grad_norm": 9.312570571899414, |
| "learning_rate": 6.788154897494306e-05, |
| "loss": 1.8332, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.027347622466071857, |
| "grad_norm": 8.565587043762207, |
| "learning_rate": 9.066059225512529e-05, |
| "loss": 1.9173, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03418452808258982, |
| "grad_norm": 3.824556350708008, |
| "learning_rate": 0.00011343963553530752, |
| "loss": 1.6633, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.04102143369910778, |
| "grad_norm": 5.49424934387207, |
| "learning_rate": 0.00013621867881548976, |
| "loss": 1.6122, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.04785833931562575, |
| "grad_norm": 6.3185038566589355, |
| "learning_rate": 0.000158997722095672, |
| "loss": 1.5782, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.05469524493214371, |
| "grad_norm": 3.980173349380493, |
| "learning_rate": 0.00018177676537585422, |
| "loss": 1.444, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06153215054866167, |
| "grad_norm": 5.797272682189941, |
| "learning_rate": 0.00019999975488719786, |
| "loss": 1.5752, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.06836905616517965, |
| "grad_norm": 11.263846397399902, |
| "learning_rate": 0.0001999911760652904, |
| "loss": 1.3607, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0752059617816976, |
| "grad_norm": 4.273462772369385, |
| "learning_rate": 0.0001999703428048544, |
| "loss": 1.5023, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.08204286739821556, |
| "grad_norm": 2.9854705333709717, |
| "learning_rate": 0.00019993725765911436, |
| "loss": 1.3747, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08887977301473353, |
| "grad_norm": 2.9444832801818848, |
| "learning_rate": 0.0001998919246828268, |
| "loss": 1.4708, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0957166786312515, |
| "grad_norm": 3.348857879638672, |
| "learning_rate": 0.00019983434943178372, |
| "loss": 1.439, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10255358424776946, |
| "grad_norm": 5.90728759765625, |
| "learning_rate": 0.00019976453896213152, |
| "loss": 1.5048, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.10939048986428743, |
| "grad_norm": 2.6572535037994385, |
| "learning_rate": 0.0001996825018295062, |
| "loss": 1.5023, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11622739548080539, |
| "grad_norm": 4.219803810119629, |
| "learning_rate": 0.00019958824808798494, |
| "loss": 1.5814, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.12306430109732334, |
| "grad_norm": 5.457417964935303, |
| "learning_rate": 0.00019948178928885378, |
| "loss": 1.4203, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.1299012067138413, |
| "grad_norm": 5.302417278289795, |
| "learning_rate": 0.00019936313847919218, |
| "loss": 1.3299, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1367381123303593, |
| "grad_norm": 4.385361194610596, |
| "learning_rate": 0.00019923231020027368, |
| "loss": 1.3468, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.14357501794687724, |
| "grad_norm": 4.836021423339844, |
| "learning_rate": 0.00019908932048578416, |
| "loss": 1.2813, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.1504119235633952, |
| "grad_norm": 4.949122905731201, |
| "learning_rate": 0.00019893418685985658, |
| "loss": 1.311, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.15724882917991317, |
| "grad_norm": 6.123111248016357, |
| "learning_rate": 0.00019876692833492343, |
| "loss": 1.342, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.16408573479643113, |
| "grad_norm": 5.803433418273926, |
| "learning_rate": 0.0001985875654093866, |
| "loss": 1.2384, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.1709226404129491, |
| "grad_norm": 3.196314811706543, |
| "learning_rate": 0.00019839612006510517, |
| "loss": 1.3117, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.17775954602946706, |
| "grad_norm": 6.21234130859375, |
| "learning_rate": 0.00019819261576470152, |
| "loss": 1.2307, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.18459645164598504, |
| "grad_norm": 3.274829149246216, |
| "learning_rate": 0.00019797707744868582, |
| "loss": 1.2408, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.191433357262503, |
| "grad_norm": 5.5120320320129395, |
| "learning_rate": 0.0001977495315323993, |
| "loss": 1.324, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.19827026287902094, |
| "grad_norm": 7.289828777313232, |
| "learning_rate": 0.0001975100059027772, |
| "loss": 1.2039, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.20510716849553892, |
| "grad_norm": 4.040754795074463, |
| "learning_rate": 0.00019725852991493083, |
| "loss": 1.3405, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.21194407411205687, |
| "grad_norm": 52.13080596923828, |
| "learning_rate": 0.00019699513438854995, |
| "loss": 1.2005, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.21878097972857485, |
| "grad_norm": 5.0520429611206055, |
| "learning_rate": 0.00019671985160412593, |
| "loss": 1.0046, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.2256178853450928, |
| "grad_norm": 1.7626160383224487, |
| "learning_rate": 0.00019643271529899532, |
| "loss": 1.1398, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.23245479096161079, |
| "grad_norm": 2.1751222610473633, |
| "learning_rate": 0.00019613376066320525, |
| "loss": 1.1519, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.23929169657812874, |
| "grad_norm": 4.483262062072754, |
| "learning_rate": 0.00019582302433520074, |
| "loss": 1.144, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.2461286021946467, |
| "grad_norm": 2.494478702545166, |
| "learning_rate": 0.00019550054439733449, |
| "loss": 1.1908, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.25296550781116467, |
| "grad_norm": 14.6198091506958, |
| "learning_rate": 0.00019516636037119952, |
| "loss": 1.0791, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.2598024134276826, |
| "grad_norm": 1.5368318557739258, |
| "learning_rate": 0.00019482051321278592, |
| "loss": 1.1994, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2666393190442006, |
| "grad_norm": 6.854203701019287, |
| "learning_rate": 0.00019446304530746112, |
| "loss": 1.1871, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2734762246607186, |
| "grad_norm": 3.686593770980835, |
| "learning_rate": 0.00019409400046477559, |
| "loss": 1.0619, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2734762246607186, |
| "eval_loss": 0.3232106864452362, |
| "eval_runtime": 301.3298, |
| "eval_samples_per_second": 26.801, |
| "eval_steps_per_second": 3.352, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.28031313027723653, |
| "grad_norm": 2.84173321723938, |
| "learning_rate": 0.00019371342391309363, |
| "loss": 1.1769, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2871500358937545, |
| "grad_norm": 6.158025741577148, |
| "learning_rate": 0.00019332136229405043, |
| "loss": 1.1985, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.29398694151027244, |
| "grad_norm": 1.3917083740234375, |
| "learning_rate": 0.00019291786365683599, |
| "loss": 1.2915, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.3008238471267904, |
| "grad_norm": 6.717157363891602, |
| "learning_rate": 0.00019250297745230615, |
| "loss": 0.9168, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.3076607527433084, |
| "grad_norm": 7.835381507873535, |
| "learning_rate": 0.00019207675452692259, |
| "loss": 1.0267, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.31449765835982635, |
| "grad_norm": 4.236868858337402, |
| "learning_rate": 0.00019163924711652092, |
| "loss": 1.1836, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.3213345639763443, |
| "grad_norm": 4.367033004760742, |
| "learning_rate": 0.00019119050883990903, |
| "loss": 1.1023, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.32817146959286225, |
| "grad_norm": 8.43916130065918, |
| "learning_rate": 0.00019073059469229602, |
| "loss": 1.1884, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.33500837520938026, |
| "grad_norm": 7.896825790405273, |
| "learning_rate": 0.0001902595610385519, |
| "loss": 1.1764, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.3418452808258982, |
| "grad_norm": 3.5363454818725586, |
| "learning_rate": 0.00018977746560630012, |
| "loss": 1.1172, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.34868218644241616, |
| "grad_norm": 12.307855606079102, |
| "learning_rate": 0.00018928436747884253, |
| "loss": 1.078, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.3555190920589341, |
| "grad_norm": 8.765337944030762, |
| "learning_rate": 0.00018878032708791854, |
| "loss": 1.1449, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.36235599767545207, |
| "grad_norm": 11.366116523742676, |
| "learning_rate": 0.00018826540620629873, |
| "loss": 1.1117, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.3691929032919701, |
| "grad_norm": 3.603243112564087, |
| "learning_rate": 0.0001877396679402145, |
| "loss": 1.1138, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.37602980890848803, |
| "grad_norm": 8.020549774169922, |
| "learning_rate": 0.00018720317672162392, |
| "loss": 1.0474, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.382866714525006, |
| "grad_norm": 4.786285877227783, |
| "learning_rate": 0.00018665599830031533, |
| "loss": 1.1041, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.38970362014152393, |
| "grad_norm": 7.1555633544921875, |
| "learning_rate": 0.00018609819973584924, |
| "loss": 1.0623, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.3965405257580419, |
| "grad_norm": 6.989715576171875, |
| "learning_rate": 0.00018552984938934006, |
| "loss": 0.9318, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.4033774313745599, |
| "grad_norm": 7.150449752807617, |
| "learning_rate": 0.00018495101691507783, |
| "loss": 1.132, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.41021433699107784, |
| "grad_norm": 4.584231853485107, |
| "learning_rate": 0.00018436177325199192, |
| "loss": 1.1382, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.4170512426075958, |
| "grad_norm": 5.139730930328369, |
| "learning_rate": 0.00018376219061495694, |
| "loss": 1.0452, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.42388814822411375, |
| "grad_norm": 15.497014999389648, |
| "learning_rate": 0.00018315234248594264, |
| "loss": 1.0451, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.43072505384063176, |
| "grad_norm": 3.4872303009033203, |
| "learning_rate": 0.0001825323036050081, |
| "loss": 1.131, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4375619594571497, |
| "grad_norm": 11.307365417480469, |
| "learning_rate": 0.00018190214996114206, |
| "loss": 1.1382, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.44439886507366766, |
| "grad_norm": 5.577065467834473, |
| "learning_rate": 0.00018126195878295006, |
| "loss": 1.1045, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.4512357706901856, |
| "grad_norm": 14.33316421508789, |
| "learning_rate": 0.0001806118085291896, |
| "loss": 1.0887, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.45807267630670356, |
| "grad_norm": 15.240452766418457, |
| "learning_rate": 0.00017995177887915475, |
| "loss": 1.0171, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.46490958192322157, |
| "grad_norm": 10.07467269897461, |
| "learning_rate": 0.00017928195072291093, |
| "loss": 1.0966, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4717464875397395, |
| "grad_norm": 2.930840253829956, |
| "learning_rate": 0.00017860240615138142, |
| "loss": 1.0418, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.4785833931562575, |
| "grad_norm": 30.01850700378418, |
| "learning_rate": 0.00017791322844628677, |
| "loss": 0.9635, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4854202987727754, |
| "grad_norm": 5.433286666870117, |
| "learning_rate": 0.0001772145020699381, |
| "loss": 1.0108, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.4922572043892934, |
| "grad_norm": 3.0814309120178223, |
| "learning_rate": 0.0001765063126548858, |
| "loss": 1.1257, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.4990941100058114, |
| "grad_norm": 79.82017517089844, |
| "learning_rate": 0.00017578874699342493, |
| "loss": 1.1214, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.5059310156223293, |
| "grad_norm": 8.51614761352539, |
| "learning_rate": 0.00017506189302695827, |
| "loss": 0.8635, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.5127679212388473, |
| "grad_norm": 8.251550674438477, |
| "learning_rate": 0.0001743258398352187, |
| "loss": 0.9361, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.5196048268553652, |
| "grad_norm": 3.81523060798645, |
| "learning_rate": 0.00017358067762535186, |
| "loss": 1.066, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.5264417324718832, |
| "grad_norm": 15.210460662841797, |
| "learning_rate": 0.00017282649772086114, |
| "loss": 0.9778, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.5332786380884011, |
| "grad_norm": 5.145527362823486, |
| "learning_rate": 0.0001720633925504151, |
| "loss": 1.0966, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5401155437049191, |
| "grad_norm": 3.485656261444092, |
| "learning_rate": 0.00017129145563652014, |
| "loss": 0.6889, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.5469524493214372, |
| "grad_norm": 7.915320873260498, |
| "learning_rate": 0.00017051078158405872, |
| "loss": 0.9154, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5469524493214372, |
| "eval_loss": 0.24666446447372437, |
| "eval_runtime": 301.8017, |
| "eval_samples_per_second": 26.759, |
| "eval_steps_per_second": 3.347, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5537893549379551, |
| "grad_norm": 12.610590934753418, |
| "learning_rate": 0.00016972146606869507, |
| "loss": 0.8612, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.5606262605544731, |
| "grad_norm": 34.93125915527344, |
| "learning_rate": 0.00016892360582514967, |
| "loss": 1.0867, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.567463166170991, |
| "grad_norm": 7.39677095413208, |
| "learning_rate": 0.00016811729863534377, |
| "loss": 1.1106, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.574300071787509, |
| "grad_norm": 2.4880149364471436, |
| "learning_rate": 0.00016730264331641585, |
| "loss": 0.9142, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5811369774040269, |
| "grad_norm": 19.268964767456055, |
| "learning_rate": 0.00016647973970861104, |
| "loss": 0.9408, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.5879738830205449, |
| "grad_norm": 62.558837890625, |
| "learning_rate": 0.00016564868866304517, |
| "loss": 1.1798, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.5948107886370628, |
| "grad_norm": 12.449636459350586, |
| "learning_rate": 0.00016480959202934487, |
| "loss": 0.9386, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.6016476942535808, |
| "grad_norm": 9.708828926086426, |
| "learning_rate": 0.00016396255264316547, |
| "loss": 1.0766, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.6084845998700988, |
| "grad_norm": 4.00963020324707, |
| "learning_rate": 0.0001631076743135879, |
| "loss": 0.9953, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.6153215054866168, |
| "grad_norm": 14.70906925201416, |
| "learning_rate": 0.0001622450618103964, |
| "loss": 1.1006, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.6221584111031347, |
| "grad_norm": 2.471301317214966, |
| "learning_rate": 0.00016137482085123832, |
| "loss": 0.7397, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.6289953167196527, |
| "grad_norm": 0.671847939491272, |
| "learning_rate": 0.00016049705808866805, |
| "loss": 1.1298, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.6358322223361706, |
| "grad_norm": 11.712217330932617, |
| "learning_rate": 0.000159611881097076, |
| "loss": 0.8828, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.6426691279526886, |
| "grad_norm": 90.13214111328125, |
| "learning_rate": 0.00015871939835950503, |
| "loss": 1.085, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6495060335692066, |
| "grad_norm": 2.1299564838409424, |
| "learning_rate": 0.00015781971925435498, |
| "loss": 1.0104, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.6563429391857245, |
| "grad_norm": 44.118778228759766, |
| "learning_rate": 0.0001569129540419781, |
| "loss": 0.8905, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6631798448022425, |
| "grad_norm": 20.966922760009766, |
| "learning_rate": 0.00015599921385116582, |
| "loss": 0.9239, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.6700167504187605, |
| "grad_norm": 13.358034133911133, |
| "learning_rate": 0.00015507861066552955, |
| "loss": 0.8589, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6768536560352785, |
| "grad_norm": 5.739938259124756, |
| "learning_rate": 0.00015415125730977626, |
| "loss": 1.0661, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.6836905616517964, |
| "grad_norm": 25.265790939331055, |
| "learning_rate": 0.00015321726743588155, |
| "loss": 0.9046, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6905274672683144, |
| "grad_norm": 22.772367477416992, |
| "learning_rate": 0.00015227675550916073, |
| "loss": 1.0174, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.6973643728848323, |
| "grad_norm": 4.18620491027832, |
| "learning_rate": 0.0001513298367942405, |
| "loss": 0.9916, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.7042012785013503, |
| "grad_norm": 10.113117218017578, |
| "learning_rate": 0.00015037662734093286, |
| "loss": 0.9635, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.7110381841178682, |
| "grad_norm": 1.7103244066238403, |
| "learning_rate": 0.0001494172439700126, |
| "loss": 0.8927, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.7178750897343862, |
| "grad_norm": 24.236433029174805, |
| "learning_rate": 0.0001484518042589, |
| "loss": 0.9438, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.7247119953509041, |
| "grad_norm": 2.4070262908935547, |
| "learning_rate": 0.00014748042652725152, |
| "loss": 1.095, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.7315489009674222, |
| "grad_norm": 4.471241474151611, |
| "learning_rate": 0.0001465032298224588, |
| "loss": 0.8205, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.7383858065839402, |
| "grad_norm": 1.757636547088623, |
| "learning_rate": 0.0001455203339050589, |
| "loss": 0.9177, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.7452227122004581, |
| "grad_norm": 1.5365773439407349, |
| "learning_rate": 0.0001445318592340571, |
| "loss": 0.7696, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.7520596178169761, |
| "grad_norm": 1.7077670097351074, |
| "learning_rate": 0.00014353792695216382, |
| "loss": 0.9342, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.758896523433494, |
| "grad_norm": 28.525236129760742, |
| "learning_rate": 0.00014253865887094817, |
| "loss": 0.9897, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.765733429050012, |
| "grad_norm": 15.281404495239258, |
| "learning_rate": 0.00014153417745590914, |
| "loss": 0.8873, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7725703346665299, |
| "grad_norm": 1.1002103090286255, |
| "learning_rate": 0.00014052460581146696, |
| "loss": 0.7727, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.7794072402830479, |
| "grad_norm": 4.395946025848389, |
| "learning_rate": 0.00013951006766587586, |
| "loss": 0.8922, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7862441458995658, |
| "grad_norm": 5.225406169891357, |
| "learning_rate": 0.0001384906873560607, |
| "loss": 0.9766, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.7930810515160838, |
| "grad_norm": 6.0966315269470215, |
| "learning_rate": 0.00013746658981237867, |
| "loss": 1.1373, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7999179571326018, |
| "grad_norm": 14.155887603759766, |
| "learning_rate": 0.00013643790054330846, |
| "loss": 0.8954, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.8067548627491198, |
| "grad_norm": 2.6549534797668457, |
| "learning_rate": 0.0001354047456200687, |
| "loss": 1.0428, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.8135917683656377, |
| "grad_norm": 7.79277229309082, |
| "learning_rate": 0.0001343672516611671, |
| "loss": 0.8715, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.8204286739821557, |
| "grad_norm": 17.183149337768555, |
| "learning_rate": 0.00013332554581688271, |
| "loss": 1.0601, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.8204286739821557, |
| "eval_loss": 0.20116083323955536, |
| "eval_runtime": 301.512, |
| "eval_samples_per_second": 26.785, |
| "eval_steps_per_second": 3.35, |
| "step": 6000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 14628, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5820248412832317e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|