| { | |
| "best_metric": 2.9280490707606077e-05, | |
| "best_model_checkpoint": "./results/models/checkpoint-156240", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 156240, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.025601638504864313, | |
| "grad_norm": 0.0004177093505859375, | |
| "learning_rate": 0.0009994879672299028, | |
| "loss": 0.023, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.051203277009728626, | |
| "grad_norm": 0.000274658203125, | |
| "learning_rate": 0.0009989759344598056, | |
| "loss": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07680491551459294, | |
| "grad_norm": 0.000270843505859375, | |
| "learning_rate": 0.0009984639016897081, | |
| "loss": 0.0, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.10240655401945725, | |
| "grad_norm": 0.0002231597900390625, | |
| "learning_rate": 0.000997951868919611, | |
| "loss": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12800819252432155, | |
| "grad_norm": 0.00021457672119140625, | |
| "learning_rate": 0.0009974398361495137, | |
| "loss": 0.0, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15360983102918588, | |
| "grad_norm": 0.00020694732666015625, | |
| "learning_rate": 0.0009969278033794163, | |
| "loss": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.17921146953405018, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.000996415770609319, | |
| "loss": 0.0, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2048131080389145, | |
| "grad_norm": 0.000202178955078125, | |
| "learning_rate": 0.0009959037378392218, | |
| "loss": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2304147465437788, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0009953917050691244, | |
| "loss": 0.0, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.2560163850486431, | |
| "grad_norm": 0.0024566650390625, | |
| "learning_rate": 0.0009948796722990272, | |
| "loss": 0.0813, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2816180235535074, | |
| "grad_norm": 0.0025482177734375, | |
| "learning_rate": 0.00099436763952893, | |
| "loss": 0.001, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.30721966205837176, | |
| "grad_norm": 0.00048828125, | |
| "learning_rate": 0.0009938556067588325, | |
| "loss": 0.0005, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.33282130056323606, | |
| "grad_norm": 0.00058746337890625, | |
| "learning_rate": 0.0009933435739887353, | |
| "loss": 0.0004, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.35842293906810035, | |
| "grad_norm": 0.000614166259765625, | |
| "learning_rate": 0.000992831541218638, | |
| "loss": 0.0004, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.38402457757296465, | |
| "grad_norm": 0.014404296875, | |
| "learning_rate": 0.0009923195084485406, | |
| "loss": 0.0002, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.409626216077829, | |
| "grad_norm": 0.000820159912109375, | |
| "learning_rate": 0.0009918074756784434, | |
| "loss": 0.0002, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.4352278545826933, | |
| "grad_norm": 0.000713348388671875, | |
| "learning_rate": 0.0009912954429083462, | |
| "loss": 0.0002, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.4608294930875576, | |
| "grad_norm": 0.00057220458984375, | |
| "learning_rate": 0.000990783410138249, | |
| "loss": 0.0002, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.4864311315924219, | |
| "grad_norm": 0.0026702880859375, | |
| "learning_rate": 0.0009902713773681515, | |
| "loss": 0.0002, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.5120327700972862, | |
| "grad_norm": 0.0026702880859375, | |
| "learning_rate": 0.0009897593445980543, | |
| "loss": 0.0001, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 0.0005035400390625, | |
| "learning_rate": 0.000989247311827957, | |
| "loss": 0.0001, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.5632360471070148, | |
| "grad_norm": 0.00052642822265625, | |
| "learning_rate": 0.0009887352790578599, | |
| "loss": 0.0002, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.5888376856118792, | |
| "grad_norm": 0.000667572021484375, | |
| "learning_rate": 0.0009882232462877624, | |
| "loss": 0.0001, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.6144393241167435, | |
| "grad_norm": 0.0003376007080078125, | |
| "learning_rate": 0.0009877112135176652, | |
| "loss": 0.0001, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.6400409626216078, | |
| "grad_norm": 0.00029754638671875, | |
| "learning_rate": 0.000987199180747568, | |
| "loss": 0.0001, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.6656426011264721, | |
| "grad_norm": 0.000308990478515625, | |
| "learning_rate": 0.0009866871479774705, | |
| "loss": 0.0001, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.6912442396313364, | |
| "grad_norm": 0.0003147125244140625, | |
| "learning_rate": 0.0009861751152073733, | |
| "loss": 0.0001, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.7168458781362007, | |
| "grad_norm": 0.0004367828369140625, | |
| "learning_rate": 0.0009856630824372759, | |
| "loss": 0.0001, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.742447516641065, | |
| "grad_norm": 0.0002880096435546875, | |
| "learning_rate": 0.0009851510496671787, | |
| "loss": 0.0001, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.7680491551459293, | |
| "grad_norm": 0.000274658203125, | |
| "learning_rate": 0.0009846390168970814, | |
| "loss": 0.0001, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 0.0003528594970703125, | |
| "learning_rate": 0.000984126984126984, | |
| "loss": 0.0001, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.819252432155658, | |
| "grad_norm": 0.0002880096435546875, | |
| "learning_rate": 0.0009836149513568868, | |
| "loss": 0.0001, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.8448540706605223, | |
| "grad_norm": 0.0002536773681640625, | |
| "learning_rate": 0.0009831029185867896, | |
| "loss": 0.0001, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.8704557091653866, | |
| "grad_norm": 0.0003509521484375, | |
| "learning_rate": 0.0009825908858166923, | |
| "loss": 0.0001, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.8960573476702509, | |
| "grad_norm": 0.000244140625, | |
| "learning_rate": 0.0009820788530465951, | |
| "loss": 0.0, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.9216589861751152, | |
| "grad_norm": 0.000568389892578125, | |
| "learning_rate": 0.0009815668202764977, | |
| "loss": 0.0007, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.9472606246799795, | |
| "grad_norm": 0.0005645751953125, | |
| "learning_rate": 0.0009810547875064005, | |
| "loss": 0.0001, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.9728622631848438, | |
| "grad_norm": 0.00037384033203125, | |
| "learning_rate": 0.0009805427547363032, | |
| "loss": 0.0001, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.9984639016897081, | |
| "grad_norm": 0.000507354736328125, | |
| "learning_rate": 0.000980030721966206, | |
| "loss": 0.0001, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 6.09988892392721e-05, | |
| "eval_runtime": 0.5651, | |
| "eval_samples_per_second": 1769.543, | |
| "eval_steps_per_second": 3.539, | |
| "step": 19530 | |
| }, | |
| { | |
| "epoch": 1.0240655401945724, | |
| "grad_norm": 0.00133514404296875, | |
| "learning_rate": 0.0009795186891961086, | |
| "loss": 0.0001, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.0496671786994367, | |
| "grad_norm": 0.0004024505615234375, | |
| "learning_rate": 0.0009790066564260114, | |
| "loss": 0.0001, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.075268817204301, | |
| "grad_norm": 0.00031280517578125, | |
| "learning_rate": 0.000978494623655914, | |
| "loss": 0.0002, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.1008704557091653, | |
| "grad_norm": 0.0002689361572265625, | |
| "learning_rate": 0.0009779825908858167, | |
| "loss": 0.0001, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.1264720942140296, | |
| "grad_norm": 0.00023365020751953125, | |
| "learning_rate": 0.0009774705581157195, | |
| "loss": 0.0001, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.1520737327188941, | |
| "grad_norm": 0.00032806396484375, | |
| "learning_rate": 0.000976958525345622, | |
| "loss": 0.0001, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.1776753712237582, | |
| "grad_norm": 0.000274658203125, | |
| "learning_rate": 0.0009764464925755249, | |
| "loss": 0.0001, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.2032770097286227, | |
| "grad_norm": 0.0002651214599609375, | |
| "learning_rate": 0.0009759344598054276, | |
| "loss": 0.0001, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.228878648233487, | |
| "grad_norm": 0.00029754638671875, | |
| "learning_rate": 0.0009754224270353303, | |
| "loss": 0.0001, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.2544802867383513, | |
| "grad_norm": 0.000308990478515625, | |
| "learning_rate": 0.0009749103942652329, | |
| "loss": 0.0001, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.2800819252432156, | |
| "grad_norm": 0.000263214111328125, | |
| "learning_rate": 0.0009743983614951357, | |
| "loss": 0.0001, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.30568356374808, | |
| "grad_norm": 0.00074005126953125, | |
| "learning_rate": 0.0009738863287250385, | |
| "loss": 0.0001, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.3312852022529442, | |
| "grad_norm": 0.000354766845703125, | |
| "learning_rate": 0.000973374295954941, | |
| "loss": 0.0001, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.3568868407578085, | |
| "grad_norm": 0.0003108978271484375, | |
| "learning_rate": 0.0009728622631848438, | |
| "loss": 0.0001, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.3824884792626728, | |
| "grad_norm": 0.000278472900390625, | |
| "learning_rate": 0.0009723502304147466, | |
| "loss": 0.0001, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.4080901177675371, | |
| "grad_norm": 0.000293731689453125, | |
| "learning_rate": 0.0009718381976446493, | |
| "loss": 0.0, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.4336917562724014, | |
| "grad_norm": 0.000263214111328125, | |
| "learning_rate": 0.000971326164874552, | |
| "loss": 0.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.4592933947772657, | |
| "grad_norm": 0.000293731689453125, | |
| "learning_rate": 0.0009708141321044547, | |
| "loss": 0.0, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.48489503328213, | |
| "grad_norm": 0.000278472900390625, | |
| "learning_rate": 0.0009703020993343574, | |
| "loss": 0.0001, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.5104966717869943, | |
| "grad_norm": 0.00030517578125, | |
| "learning_rate": 0.0009697900665642602, | |
| "loss": 0.0001, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.5360983102918588, | |
| "grad_norm": 0.0002899169921875, | |
| "learning_rate": 0.0009692780337941628, | |
| "loss": 0.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.561699948796723, | |
| "grad_norm": 0.000766754150390625, | |
| "learning_rate": 0.0009687660010240655, | |
| "loss": 0.0, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 0.00032806396484375, | |
| "learning_rate": 0.0009682539682539683, | |
| "loss": 0.0001, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 0.000301361083984375, | |
| "learning_rate": 0.000967741935483871, | |
| "loss": 0.0001, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.638504864311316, | |
| "grad_norm": 0.003997802734375, | |
| "learning_rate": 0.0009672299027137736, | |
| "loss": 0.0001, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.66410650281618, | |
| "grad_norm": 0.0003452301025390625, | |
| "learning_rate": 0.0009667178699436764, | |
| "loss": 0.0001, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.6897081413210446, | |
| "grad_norm": 0.0002918243408203125, | |
| "learning_rate": 0.0009662058371735791, | |
| "loss": 0.0001, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.7153097798259087, | |
| "grad_norm": 0.0002422332763671875, | |
| "learning_rate": 0.0009656938044034819, | |
| "loss": 0.0001, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.7409114183307732, | |
| "grad_norm": 0.00124359130859375, | |
| "learning_rate": 0.0009651817716333846, | |
| "loss": 0.0, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.7665130568356375, | |
| "grad_norm": 0.0009002685546875, | |
| "learning_rate": 0.0009646697388632872, | |
| "loss": 0.0001, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.7921146953405018, | |
| "grad_norm": 0.00030517578125, | |
| "learning_rate": 0.00096415770609319, | |
| "loss": 0.0, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.8177163338453661, | |
| "grad_norm": 0.00041961669921875, | |
| "learning_rate": 0.0009636456733230928, | |
| "loss": 0.0001, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.8433179723502304, | |
| "grad_norm": 0.0003108978271484375, | |
| "learning_rate": 0.0009631336405529954, | |
| "loss": 0.0, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.8689196108550947, | |
| "grad_norm": 0.0002727508544921875, | |
| "learning_rate": 0.0009626216077828981, | |
| "loss": 0.0, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.894521249359959, | |
| "grad_norm": 0.0002613067626953125, | |
| "learning_rate": 0.0009621095750128009, | |
| "loss": 0.0, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.9201228878648233, | |
| "grad_norm": 0.0003204345703125, | |
| "learning_rate": 0.0009615975422427036, | |
| "loss": 0.0, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.9457245263696876, | |
| "grad_norm": 0.000255584716796875, | |
| "learning_rate": 0.0009610855094726063, | |
| "loss": 0.0, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.971326164874552, | |
| "grad_norm": 0.000263214111328125, | |
| "learning_rate": 0.0009605734767025089, | |
| "loss": 0.0, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.9969278033794162, | |
| "grad_norm": 0.00048828125, | |
| "learning_rate": 0.0009600614439324117, | |
| "loss": 0.0001, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 3.46598717442248e-05, | |
| "eval_runtime": 0.5684, | |
| "eval_samples_per_second": 1759.346, | |
| "eval_steps_per_second": 3.519, | |
| "step": 39060 | |
| }, | |
| { | |
| "epoch": 2.0225294418842807, | |
| "grad_norm": 0.000423431396484375, | |
| "learning_rate": 0.0009595494111623145, | |
| "loss": 0.0, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.048131080389145, | |
| "grad_norm": 0.0002899169921875, | |
| "learning_rate": 0.0009590373783922171, | |
| "loss": 0.0, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.0737327188940093, | |
| "grad_norm": 0.0002593994140625, | |
| "learning_rate": 0.0009585253456221198, | |
| "loss": 0.0, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.0993343573988734, | |
| "grad_norm": 0.00042724609375, | |
| "learning_rate": 0.0009580133128520226, | |
| "loss": 0.0, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.124935995903738, | |
| "grad_norm": 0.00032806396484375, | |
| "learning_rate": 0.0009575012800819252, | |
| "loss": 0.0, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.150537634408602, | |
| "grad_norm": 0.0002918243408203125, | |
| "learning_rate": 0.000956989247311828, | |
| "loss": 0.0, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.1761392729134665, | |
| "grad_norm": 0.00250244140625, | |
| "learning_rate": 0.0009564772145417307, | |
| "loss": 0.0, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.2017409114183306, | |
| "grad_norm": 0.0002803802490234375, | |
| "learning_rate": 0.0009559651817716334, | |
| "loss": 0.0, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.227342549923195, | |
| "grad_norm": 0.0003681182861328125, | |
| "learning_rate": 0.0009554531490015361, | |
| "loss": 0.0, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.252944188428059, | |
| "grad_norm": 0.0002689361572265625, | |
| "learning_rate": 0.0009549411162314389, | |
| "loss": 0.0, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.2785458269329237, | |
| "grad_norm": 0.000370025634765625, | |
| "learning_rate": 0.0009544290834613415, | |
| "loss": 0.0001, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.3041474654377883, | |
| "grad_norm": 0.0002536773681640625, | |
| "learning_rate": 0.0009539170506912443, | |
| "loss": 0.0, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.3297491039426523, | |
| "grad_norm": 0.000591278076171875, | |
| "learning_rate": 0.0009534050179211469, | |
| "loss": 0.0, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.3553507424475164, | |
| "grad_norm": 0.000514984130859375, | |
| "learning_rate": 0.0009528929851510497, | |
| "loss": 0.0, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.00025177001953125, | |
| "learning_rate": 0.0009523809523809524, | |
| "loss": 0.0, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.4065540194572455, | |
| "grad_norm": 0.0002384185791015625, | |
| "learning_rate": 0.000951868919610855, | |
| "loss": 0.0, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.4321556579621095, | |
| "grad_norm": 0.000247955322265625, | |
| "learning_rate": 0.0009513568868407578, | |
| "loss": 0.0, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.457757296466974, | |
| "grad_norm": 0.000240325927734375, | |
| "learning_rate": 0.0009508448540706606, | |
| "loss": 0.0, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.483358934971838, | |
| "grad_norm": 0.000274658203125, | |
| "learning_rate": 0.0009503328213005633, | |
| "loss": 0.0, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.5089605734767026, | |
| "grad_norm": 0.0002269744873046875, | |
| "learning_rate": 0.000949820788530466, | |
| "loss": 0.0, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.5345622119815667, | |
| "grad_norm": 0.0003204345703125, | |
| "learning_rate": 0.0009493087557603687, | |
| "loss": 0.0, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.5601638504864312, | |
| "grad_norm": 0.00787353515625, | |
| "learning_rate": 0.0009487967229902714, | |
| "loss": 0.0, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.5857654889912953, | |
| "grad_norm": 0.00022983551025390625, | |
| "learning_rate": 0.0009482846902201742, | |
| "loss": 0.0, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.61136712749616, | |
| "grad_norm": 0.0002651214599609375, | |
| "learning_rate": 0.0009477726574500767, | |
| "loss": 0.0, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.636968766001024, | |
| "grad_norm": 0.0002346038818359375, | |
| "learning_rate": 0.0009472606246799795, | |
| "loss": 0.0, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.6625704045058884, | |
| "grad_norm": 0.00021839141845703125, | |
| "learning_rate": 0.0009467485919098823, | |
| "loss": 0.0, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.688172043010753, | |
| "grad_norm": 0.0004177093505859375, | |
| "learning_rate": 0.000946236559139785, | |
| "loss": 0.0, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.713773681515617, | |
| "grad_norm": 0.000247955322265625, | |
| "learning_rate": 0.0009457245263696876, | |
| "loss": 0.0, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.739375320020481, | |
| "grad_norm": 0.0002269744873046875, | |
| "learning_rate": 0.0009452124935995904, | |
| "loss": 0.0, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.7649769585253456, | |
| "grad_norm": 0.00025177001953125, | |
| "learning_rate": 0.0009447004608294931, | |
| "loss": 0.0, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.79057859703021, | |
| "grad_norm": 0.0002536773681640625, | |
| "learning_rate": 0.0009441884280593959, | |
| "loss": 0.0, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.8161802355350742, | |
| "grad_norm": 0.0002346038818359375, | |
| "learning_rate": 0.0009436763952892985, | |
| "loss": 0.0, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.8417818740399383, | |
| "grad_norm": 0.0013427734375, | |
| "learning_rate": 0.0009431643625192012, | |
| "loss": 0.0, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.867383512544803, | |
| "grad_norm": 0.0003528594970703125, | |
| "learning_rate": 0.000942652329749104, | |
| "loss": 0.0, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.8929851510496674, | |
| "grad_norm": 0.00023746490478515625, | |
| "learning_rate": 0.0009421402969790068, | |
| "loss": 0.0, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.9185867895545314, | |
| "grad_norm": 0.00055694580078125, | |
| "learning_rate": 0.0009416282642089093, | |
| "loss": 0.0, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.944188428059396, | |
| "grad_norm": 0.0002307891845703125, | |
| "learning_rate": 0.0009411162314388121, | |
| "loss": 0.0, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.96979006656426, | |
| "grad_norm": 0.000286102294921875, | |
| "learning_rate": 0.0009406041986687148, | |
| "loss": 0.0, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.9953917050691246, | |
| "grad_norm": 0.00019550323486328125, | |
| "learning_rate": 0.0009400921658986176, | |
| "loss": 0.0, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 3.896626367350109e-05, | |
| "eval_runtime": 0.5618, | |
| "eval_samples_per_second": 1780.014, | |
| "eval_steps_per_second": 3.56, | |
| "step": 58590 | |
| }, | |
| { | |
| "epoch": 3.0209933435739886, | |
| "grad_norm": 0.000255584716796875, | |
| "learning_rate": 0.0009395801331285202, | |
| "loss": 0.0, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 3.046594982078853, | |
| "grad_norm": 0.000209808349609375, | |
| "learning_rate": 0.0009390681003584229, | |
| "loss": 0.0, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 3.0721966205837172, | |
| "grad_norm": 0.00021457672119140625, | |
| "learning_rate": 0.0009385560675883257, | |
| "loss": 0.0, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.0977982590885818, | |
| "grad_norm": 0.0013580322265625, | |
| "learning_rate": 0.0009380440348182285, | |
| "loss": 0.0, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 3.123399897593446, | |
| "grad_norm": 0.0002002716064453125, | |
| "learning_rate": 0.000937532002048131, | |
| "loss": 0.0, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 3.1490015360983103, | |
| "grad_norm": 0.0002727508544921875, | |
| "learning_rate": 0.0009370199692780338, | |
| "loss": 0.0, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 3.1746031746031744, | |
| "grad_norm": 0.0003662109375, | |
| "learning_rate": 0.0009365079365079366, | |
| "loss": 0.0, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 3.200204813108039, | |
| "grad_norm": 0.0002040863037109375, | |
| "learning_rate": 0.0009359959037378392, | |
| "loss": 0.0, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "grad_norm": 0.00020694732666015625, | |
| "learning_rate": 0.0009354838709677419, | |
| "loss": 0.0, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 3.2514080901177675, | |
| "grad_norm": 0.000213623046875, | |
| "learning_rate": 0.0009349718381976447, | |
| "loss": 0.0, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 3.277009728622632, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.0009344598054275474, | |
| "loss": 0.0, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.302611367127496, | |
| "grad_norm": 0.0001983642578125, | |
| "learning_rate": 0.0009339477726574501, | |
| "loss": 0.0, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 3.32821300563236, | |
| "grad_norm": 0.0002117156982421875, | |
| "learning_rate": 0.0009334357398873528, | |
| "loss": 0.0, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 3.3538146441372247, | |
| "grad_norm": 0.0004444122314453125, | |
| "learning_rate": 0.0009329237071172555, | |
| "loss": 0.0, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 3.3794162826420893, | |
| "grad_norm": 0.00024318695068359375, | |
| "learning_rate": 0.0009324116743471583, | |
| "loss": 0.0, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 3.4050179211469533, | |
| "grad_norm": 0.004608154296875, | |
| "learning_rate": 0.0009318996415770609, | |
| "loss": 0.0, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 3.430619559651818, | |
| "grad_norm": 0.00022029876708984375, | |
| "learning_rate": 0.0009313876088069637, | |
| "loss": 0.0, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 3.456221198156682, | |
| "grad_norm": 0.00021648406982421875, | |
| "learning_rate": 0.0009308755760368664, | |
| "loss": 0.0, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 3.4818228366615465, | |
| "grad_norm": 0.0002841949462890625, | |
| "learning_rate": 0.000930363543266769, | |
| "loss": 0.0, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 3.5074244751664105, | |
| "grad_norm": 0.0002269744873046875, | |
| "learning_rate": 0.0009298515104966718, | |
| "loss": 0.0, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 3.533026113671275, | |
| "grad_norm": 0.00021648406982421875, | |
| "learning_rate": 0.0009293394777265746, | |
| "loss": 0.0, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 3.558627752176139, | |
| "grad_norm": 0.0002079010009765625, | |
| "learning_rate": 0.0009288274449564772, | |
| "loss": 0.0, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 3.5842293906810037, | |
| "grad_norm": 0.00022125244140625, | |
| "learning_rate": 0.00092831541218638, | |
| "loss": 0.0, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 3.6098310291858677, | |
| "grad_norm": 0.00021457672119140625, | |
| "learning_rate": 0.0009278033794162827, | |
| "loss": 0.0, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 3.6354326676907323, | |
| "grad_norm": 0.000209808349609375, | |
| "learning_rate": 0.0009272913466461854, | |
| "loss": 0.0, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 3.6610343061955968, | |
| "grad_norm": 0.0002727508544921875, | |
| "learning_rate": 0.0009267793138760881, | |
| "loss": 0.0, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 3.686635944700461, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.0009262672811059907, | |
| "loss": 0.0, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 3.712237583205325, | |
| "grad_norm": 0.0002288818359375, | |
| "learning_rate": 0.0009257552483358935, | |
| "loss": 0.0, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 3.7378392217101895, | |
| "grad_norm": 0.0010528564453125, | |
| "learning_rate": 0.0009252432155657963, | |
| "loss": 0.0, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 3.763440860215054, | |
| "grad_norm": 0.00019550323486328125, | |
| "learning_rate": 0.0009247311827956989, | |
| "loss": 0.0, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 3.789042498719918, | |
| "grad_norm": 0.00021266937255859375, | |
| "learning_rate": 0.0009242191500256016, | |
| "loss": 0.0, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 3.814644137224782, | |
| "grad_norm": 0.0003509521484375, | |
| "learning_rate": 0.0009237071172555044, | |
| "loss": 0.0, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 3.8402457757296466, | |
| "grad_norm": 0.00020599365234375, | |
| "learning_rate": 0.0009231950844854071, | |
| "loss": 0.0, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 3.865847414234511, | |
| "grad_norm": 0.0002002716064453125, | |
| "learning_rate": 0.0009226830517153098, | |
| "loss": 0.0, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 3.8914490527393752, | |
| "grad_norm": 0.00021076202392578125, | |
| "learning_rate": 0.0009221710189452125, | |
| "loss": 0.0, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 3.9170506912442398, | |
| "grad_norm": 0.0002918243408203125, | |
| "learning_rate": 0.0009216589861751152, | |
| "loss": 0.0, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 3.942652329749104, | |
| "grad_norm": 0.0003223419189453125, | |
| "learning_rate": 0.000921146953405018, | |
| "loss": 0.0, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 3.9682539682539684, | |
| "grad_norm": 0.0003032684326171875, | |
| "learning_rate": 0.0009206349206349207, | |
| "loss": 0.0, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 3.9938556067588324, | |
| "grad_norm": 0.000698089599609375, | |
| "learning_rate": 0.0009201228878648233, | |
| "loss": 0.0, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 3.7267222069203854e-05, | |
| "eval_runtime": 0.546, | |
| "eval_samples_per_second": 1831.41, | |
| "eval_steps_per_second": 3.663, | |
| "step": 78120 | |
| }, | |
| { | |
| "epoch": 4.0194572452636965, | |
| "grad_norm": 0.0002651214599609375, | |
| "learning_rate": 0.0009196108550947261, | |
| "loss": 0.0, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 4.0450588837685615, | |
| "grad_norm": 0.00020885467529296875, | |
| "learning_rate": 0.0009190988223246288, | |
| "loss": 0.0, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 4.070660522273426, | |
| "grad_norm": 0.00019741058349609375, | |
| "learning_rate": 0.0009185867895545314, | |
| "loss": 0.0, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 4.09626216077829, | |
| "grad_norm": 0.00021839141845703125, | |
| "learning_rate": 0.0009180747567844342, | |
| "loss": 0.0, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 4.121863799283154, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.0009175627240143369, | |
| "loss": 0.0, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 4.147465437788019, | |
| "grad_norm": 0.00023555755615234375, | |
| "learning_rate": 0.0009170506912442397, | |
| "loss": 0.0, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 4.173067076292883, | |
| "grad_norm": 0.0003204345703125, | |
| "learning_rate": 0.0009165386584741425, | |
| "loss": 0.0, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 4.198668714797747, | |
| "grad_norm": 0.00022411346435546875, | |
| "learning_rate": 0.000916026625704045, | |
| "loss": 0.0, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 4.224270353302611, | |
| "grad_norm": 0.0004634857177734375, | |
| "learning_rate": 0.0009155145929339478, | |
| "loss": 0.0, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 4.249871991807476, | |
| "grad_norm": 0.00023651123046875, | |
| "learning_rate": 0.0009150025601638506, | |
| "loss": 0.0, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 4.27547363031234, | |
| "grad_norm": 0.000202178955078125, | |
| "learning_rate": 0.0009144905273937532, | |
| "loss": 0.0, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 4.301075268817204, | |
| "grad_norm": 0.0003528594970703125, | |
| "learning_rate": 0.0009139784946236559, | |
| "loss": 0.0, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 4.326676907322069, | |
| "grad_norm": 0.0003204345703125, | |
| "learning_rate": 0.0009134664618535587, | |
| "loss": 0.0, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 4.352278545826933, | |
| "grad_norm": 0.000598907470703125, | |
| "learning_rate": 0.0009129544290834614, | |
| "loss": 0.0, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 4.377880184331797, | |
| "grad_norm": 0.000286102294921875, | |
| "learning_rate": 0.0009124423963133641, | |
| "loss": 0.0, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 4.403481822836661, | |
| "grad_norm": 0.00021839141845703125, | |
| "learning_rate": 0.0009119303635432667, | |
| "loss": 0.0, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 4.429083461341526, | |
| "grad_norm": 0.000225067138671875, | |
| "learning_rate": 0.0009114183307731695, | |
| "loss": 0.0, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 4.45468509984639, | |
| "grad_norm": 0.00141143798828125, | |
| "learning_rate": 0.0009109062980030723, | |
| "loss": 0.0, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 4.480286738351254, | |
| "grad_norm": 0.00022602081298828125, | |
| "learning_rate": 0.0009103942652329749, | |
| "loss": 0.0, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 4.505888376856118, | |
| "grad_norm": 0.000335693359375, | |
| "learning_rate": 0.0009098822324628776, | |
| "loss": 0.0, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 4.531490015360983, | |
| "grad_norm": 0.00019073486328125, | |
| "learning_rate": 0.0009093701996927804, | |
| "loss": 0.0, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 4.5570916538658475, | |
| "grad_norm": 0.00019931793212890625, | |
| "learning_rate": 0.000908858166922683, | |
| "loss": 0.0, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 4.5826932923707115, | |
| "grad_norm": 0.00021839141845703125, | |
| "learning_rate": 0.0009083461341525858, | |
| "loss": 0.0, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 4.6082949308755765, | |
| "grad_norm": 0.001983642578125, | |
| "learning_rate": 0.0009078341013824885, | |
| "loss": 0.0, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 4.633896569380441, | |
| "grad_norm": 0.00020503997802734375, | |
| "learning_rate": 0.0009073220686123912, | |
| "loss": 0.0, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 4.659498207885305, | |
| "grad_norm": 0.0002689361572265625, | |
| "learning_rate": 0.000906810035842294, | |
| "loss": 0.0, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 4.685099846390169, | |
| "grad_norm": 0.0002307891845703125, | |
| "learning_rate": 0.0009062980030721967, | |
| "loss": 0.0, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 4.710701484895033, | |
| "grad_norm": 0.00020885467529296875, | |
| "learning_rate": 0.0009057859703020993, | |
| "loss": 0.0, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 4.736303123399898, | |
| "grad_norm": 0.0002536773681640625, | |
| "learning_rate": 0.0009052739375320021, | |
| "loss": 0.0, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.0002040863037109375, | |
| "learning_rate": 0.0009047619047619047, | |
| "loss": 0.0, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 4.787506400409626, | |
| "grad_norm": 0.00174713134765625, | |
| "learning_rate": 0.0009042498719918075, | |
| "loss": 0.0, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 4.813108038914491, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.0009037378392217102, | |
| "loss": 0.0, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 4.838709677419355, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0009032258064516129, | |
| "loss": 0.0, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 4.864311315924219, | |
| "grad_norm": 0.000186920166015625, | |
| "learning_rate": 0.0009027137736815156, | |
| "loss": 0.0, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 4.889912954429083, | |
| "grad_norm": 0.00019359588623046875, | |
| "learning_rate": 0.0009022017409114184, | |
| "loss": 0.0, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 4.915514592933948, | |
| "grad_norm": 0.00018310546875, | |
| "learning_rate": 0.000901689708141321, | |
| "loss": 0.0, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 4.941116231438812, | |
| "grad_norm": 0.000579833984375, | |
| "learning_rate": 0.0009011776753712238, | |
| "loss": 0.0, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 4.966717869943676, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0009006656426011265, | |
| "loss": 0.0, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 4.99231950844854, | |
| "grad_norm": 0.000507354736328125, | |
| "learning_rate": 0.0009001536098310292, | |
| "loss": 0.0, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 3.1982614018488675e-05, | |
| "eval_runtime": 0.5464, | |
| "eval_samples_per_second": 1830.037, | |
| "eval_steps_per_second": 3.66, | |
| "step": 97650 | |
| }, | |
| { | |
| "epoch": 5.017921146953405, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.000899641577060932, | |
| "loss": 0.0, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 5.043522785458269, | |
| "grad_norm": 0.0002651214599609375, | |
| "learning_rate": 0.0008991295442908345, | |
| "loss": 0.0, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 5.0691244239631335, | |
| "grad_norm": 0.000335693359375, | |
| "learning_rate": 0.0008986175115207373, | |
| "loss": 0.0, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 5.0947260624679975, | |
| "grad_norm": 0.000186920166015625, | |
| "learning_rate": 0.0008981054787506401, | |
| "loss": 0.0, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 5.1203277009728625, | |
| "grad_norm": 0.0001850128173828125, | |
| "learning_rate": 0.0008975934459805428, | |
| "loss": 0.0, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 5.145929339477727, | |
| "grad_norm": 0.00022411346435546875, | |
| "learning_rate": 0.0008970814132104454, | |
| "loss": 0.0, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 5.171530977982591, | |
| "grad_norm": 0.0002002716064453125, | |
| "learning_rate": 0.0008965693804403482, | |
| "loss": 0.0, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 5.197132616487456, | |
| "grad_norm": 0.00019550323486328125, | |
| "learning_rate": 0.0008960573476702509, | |
| "loss": 0.0, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 5.22273425499232, | |
| "grad_norm": 0.00019931793212890625, | |
| "learning_rate": 0.0008955453149001537, | |
| "loss": 0.0, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 5.248335893497184, | |
| "grad_norm": 0.0002918243408203125, | |
| "learning_rate": 0.0008950332821300563, | |
| "loss": 0.0, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 5.273937532002048, | |
| "grad_norm": 0.000217437744140625, | |
| "learning_rate": 0.000894521249359959, | |
| "loss": 0.0, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 5.299539170506913, | |
| "grad_norm": 0.00018405914306640625, | |
| "learning_rate": 0.0008940092165898618, | |
| "loss": 0.0, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 5.325140809011777, | |
| "grad_norm": 0.0009918212890625, | |
| "learning_rate": 0.0008934971838197646, | |
| "loss": 0.0, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 5.350742447516641, | |
| "grad_norm": 0.00086212158203125, | |
| "learning_rate": 0.0008929851510496671, | |
| "loss": 0.0, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 5.376344086021505, | |
| "grad_norm": 0.00020122528076171875, | |
| "learning_rate": 0.0008924731182795699, | |
| "loss": 0.0, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 5.40194572452637, | |
| "grad_norm": 0.00019168853759765625, | |
| "learning_rate": 0.0008919610855094726, | |
| "loss": 0.0, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 5.427547363031234, | |
| "grad_norm": 0.0001964569091796875, | |
| "learning_rate": 0.0008914490527393754, | |
| "loss": 0.0, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 5.453149001536098, | |
| "grad_norm": 0.0001811981201171875, | |
| "learning_rate": 0.000890937019969278, | |
| "loss": 0.0, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 5.478750640040962, | |
| "grad_norm": 0.000179290771484375, | |
| "learning_rate": 0.0008904249871991807, | |
| "loss": 0.0, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 5.504352278545827, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0008899129544290835, | |
| "loss": 0.0, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 5.529953917050691, | |
| "grad_norm": 0.00018310546875, | |
| "learning_rate": 0.0008894009216589863, | |
| "loss": 0.0, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 0.00018596649169921875, | |
| "learning_rate": 0.0008888888888888888, | |
| "loss": 0.0, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 5.58115719406042, | |
| "grad_norm": 0.0002689361572265625, | |
| "learning_rate": 0.0008883768561187916, | |
| "loss": 0.0, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 5.606758832565284, | |
| "grad_norm": 0.00018215179443359375, | |
| "learning_rate": 0.0008878648233486944, | |
| "loss": 0.0, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 5.6323604710701485, | |
| "grad_norm": 0.0002613067626953125, | |
| "learning_rate": 0.000887352790578597, | |
| "loss": 0.0, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 5.6579621095750126, | |
| "grad_norm": 0.0002613067626953125, | |
| "learning_rate": 0.0008868407578084997, | |
| "loss": 0.0, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 5.683563748079877, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0008863287250384025, | |
| "loss": 0.0, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 5.709165386584742, | |
| "grad_norm": 0.00018024444580078125, | |
| "learning_rate": 0.0008858166922683052, | |
| "loss": 0.0, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 5.734767025089606, | |
| "grad_norm": 0.000186920166015625, | |
| "learning_rate": 0.000885304659498208, | |
| "loss": 0.0, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 5.76036866359447, | |
| "grad_norm": 0.00018024444580078125, | |
| "learning_rate": 0.0008847926267281106, | |
| "loss": 0.0, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 5.785970302099335, | |
| "grad_norm": 0.000347137451171875, | |
| "learning_rate": 0.0008842805939580133, | |
| "loss": 0.0, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 5.811571940604199, | |
| "grad_norm": 0.00023174285888671875, | |
| "learning_rate": 0.0008837685611879161, | |
| "loss": 0.0, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 5.837173579109063, | |
| "grad_norm": 0.000392913818359375, | |
| "learning_rate": 0.0008832565284178187, | |
| "loss": 0.0, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 5.862775217613927, | |
| "grad_norm": 0.0003032684326171875, | |
| "learning_rate": 0.0008827444956477215, | |
| "loss": 0.0, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 5.888376856118792, | |
| "grad_norm": 0.0001964569091796875, | |
| "learning_rate": 0.0008822324628776242, | |
| "loss": 0.0, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 5.913978494623656, | |
| "grad_norm": 0.00020122528076171875, | |
| "learning_rate": 0.0008817204301075269, | |
| "loss": 0.0, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 5.93958013312852, | |
| "grad_norm": 0.00019550323486328125, | |
| "learning_rate": 0.0008812083973374296, | |
| "loss": 0.0, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 5.965181771633384, | |
| "grad_norm": 0.000179290771484375, | |
| "learning_rate": 0.0008806963645673324, | |
| "loss": 0.0, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 5.990783410138249, | |
| "grad_norm": 0.00018024444580078125, | |
| "learning_rate": 0.000880184331797235, | |
| "loss": 0.0, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 3.100566391367465e-05, | |
| "eval_runtime": 0.5504, | |
| "eval_samples_per_second": 1816.722, | |
| "eval_steps_per_second": 3.633, | |
| "step": 117180 | |
| }, | |
| { | |
| "epoch": 6.016385048643113, | |
| "grad_norm": 0.0001983642578125, | |
| "learning_rate": 0.0008796722990271378, | |
| "loss": 0.0, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 6.041986687147977, | |
| "grad_norm": 0.00018978118896484375, | |
| "learning_rate": 0.0008791602662570405, | |
| "loss": 0.0, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 6.067588325652842, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0008786482334869432, | |
| "loss": 0.0, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 6.093189964157706, | |
| "grad_norm": 0.00019168853759765625, | |
| "learning_rate": 0.0008781362007168459, | |
| "loss": 0.0, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 6.11879160266257, | |
| "grad_norm": 0.000209808349609375, | |
| "learning_rate": 0.0008776241679467485, | |
| "loss": 0.0, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 6.1443932411674345, | |
| "grad_norm": 0.0001964569091796875, | |
| "learning_rate": 0.0008771121351766513, | |
| "loss": 0.0, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 6.169994879672299, | |
| "grad_norm": 0.0002002716064453125, | |
| "learning_rate": 0.0008766001024065541, | |
| "loss": 0.0, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 6.1955965181771635, | |
| "grad_norm": 0.0001773834228515625, | |
| "learning_rate": 0.0008760880696364567, | |
| "loss": 0.0, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 6.221198156682028, | |
| "grad_norm": 0.0001773834228515625, | |
| "learning_rate": 0.0008755760368663594, | |
| "loss": 0.0, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 6.246799795186892, | |
| "grad_norm": 0.0002040863037109375, | |
| "learning_rate": 0.0008750640040962622, | |
| "loss": 0.0, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 6.272401433691757, | |
| "grad_norm": 0.0002498626708984375, | |
| "learning_rate": 0.0008745519713261649, | |
| "loss": 0.0, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 6.298003072196621, | |
| "grad_norm": 0.000179290771484375, | |
| "learning_rate": 0.0008740399385560676, | |
| "loss": 0.0, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 6.323604710701485, | |
| "grad_norm": 0.00018405914306640625, | |
| "learning_rate": 0.0008735279057859703, | |
| "loss": 0.0, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 6.349206349206349, | |
| "grad_norm": 0.00017452239990234375, | |
| "learning_rate": 0.000873015873015873, | |
| "loss": 0.0, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 6.374807987711214, | |
| "grad_norm": 0.00018024444580078125, | |
| "learning_rate": 0.0008725038402457758, | |
| "loss": 0.0, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 6.400409626216078, | |
| "grad_norm": 0.00026702880859375, | |
| "learning_rate": 0.0008719918074756785, | |
| "loss": 0.0, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 6.426011264720942, | |
| "grad_norm": 0.00018978118896484375, | |
| "learning_rate": 0.0008714797747055811, | |
| "loss": 0.0, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 6.451612903225806, | |
| "grad_norm": 0.00018405914306640625, | |
| "learning_rate": 0.0008709677419354839, | |
| "loss": 0.0, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 6.477214541730671, | |
| "grad_norm": 0.00018596649169921875, | |
| "learning_rate": 0.0008704557091653866, | |
| "loss": 0.0, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 6.502816180235535, | |
| "grad_norm": 0.000568389892578125, | |
| "learning_rate": 0.0008699436763952893, | |
| "loss": 0.0, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 6.528417818740399, | |
| "grad_norm": 0.00060272216796875, | |
| "learning_rate": 0.000869431643625192, | |
| "loss": 0.0, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 6.554019457245264, | |
| "grad_norm": 0.000179290771484375, | |
| "learning_rate": 0.0008689196108550947, | |
| "loss": 0.0, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 6.579621095750128, | |
| "grad_norm": 0.000255584716796875, | |
| "learning_rate": 0.0008684075780849975, | |
| "loss": 0.0, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 6.605222734254992, | |
| "grad_norm": 0.00018978118896484375, | |
| "learning_rate": 0.0008678955453149003, | |
| "loss": 0.0, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 6.630824372759856, | |
| "grad_norm": 0.00019550323486328125, | |
| "learning_rate": 0.0008673835125448028, | |
| "loss": 0.0, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 6.65642601126472, | |
| "grad_norm": 0.0004329681396484375, | |
| "learning_rate": 0.0008668714797747056, | |
| "loss": 0.0, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 6.682027649769585, | |
| "grad_norm": 0.00040435791015625, | |
| "learning_rate": 0.0008663594470046084, | |
| "loss": 0.0, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 6.7076292882744495, | |
| "grad_norm": 0.000392913818359375, | |
| "learning_rate": 0.000865847414234511, | |
| "loss": 0.0, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 6.733230926779314, | |
| "grad_norm": 0.0001811981201171875, | |
| "learning_rate": 0.0008653353814644137, | |
| "loss": 0.0, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 6.7588325652841785, | |
| "grad_norm": 0.00018310546875, | |
| "learning_rate": 0.0008648233486943165, | |
| "loss": 0.0, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 6.784434203789043, | |
| "grad_norm": 0.00022125244140625, | |
| "learning_rate": 0.0008643113159242192, | |
| "loss": 0.0, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 6.810035842293907, | |
| "grad_norm": 0.00018310546875, | |
| "learning_rate": 0.000863799283154122, | |
| "loss": 0.0, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 6.835637480798771, | |
| "grad_norm": 0.00022125244140625, | |
| "learning_rate": 0.0008632872503840245, | |
| "loss": 0.0, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 6.861239119303636, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0008627752176139273, | |
| "loss": 0.0, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 6.8868407578085, | |
| "grad_norm": 0.00023365020751953125, | |
| "learning_rate": 0.0008622631848438301, | |
| "loss": 0.0, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 6.912442396313364, | |
| "grad_norm": 0.00018787384033203125, | |
| "learning_rate": 0.0008617511520737327, | |
| "loss": 0.0, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 6.938044034818228, | |
| "grad_norm": 0.0003833770751953125, | |
| "learning_rate": 0.0008612391193036354, | |
| "loss": 0.0, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 6.963645673323093, | |
| "grad_norm": 0.0003833770751953125, | |
| "learning_rate": 0.0008607270865335382, | |
| "loss": 0.0, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 6.989247311827957, | |
| "grad_norm": 0.0004425048828125, | |
| "learning_rate": 0.0008602150537634409, | |
| "loss": 0.0, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 3.138924512313679e-05, | |
| "eval_runtime": 0.5584, | |
| "eval_samples_per_second": 1790.97, | |
| "eval_steps_per_second": 3.582, | |
| "step": 136710 | |
| }, | |
| { | |
| "epoch": 7.014848950332821, | |
| "grad_norm": 0.0001811981201171875, | |
| "learning_rate": 0.0008597030209933436, | |
| "loss": 0.0, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 7.040450588837686, | |
| "grad_norm": 0.0002536773681640625, | |
| "learning_rate": 0.0008591909882232463, | |
| "loss": 0.0, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 7.06605222734255, | |
| "grad_norm": 0.0004558563232421875, | |
| "learning_rate": 0.000858678955453149, | |
| "loss": 0.0, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 7.091653865847414, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0008581669226830518, | |
| "loss": 0.0, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 7.117255504352278, | |
| "grad_norm": 0.0001773834228515625, | |
| "learning_rate": 0.0008576548899129545, | |
| "loss": 0.0, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 0.0004482269287109375, | |
| "learning_rate": 0.0008571428571428571, | |
| "loss": 0.0, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 7.168458781362007, | |
| "grad_norm": 0.00019359588623046875, | |
| "learning_rate": 0.0008566308243727599, | |
| "loss": 0.0, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 7.194060419866871, | |
| "grad_norm": 0.0001811981201171875, | |
| "learning_rate": 0.0008561187916026625, | |
| "loss": 0.0, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 7.2196620583717355, | |
| "grad_norm": 0.000576019287109375, | |
| "learning_rate": 0.0008556067588325653, | |
| "loss": 0.0, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 7.2452636968766, | |
| "grad_norm": 0.00018787384033203125, | |
| "learning_rate": 0.000855094726062468, | |
| "loss": 0.0, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 7.2708653353814645, | |
| "grad_norm": 0.0002155303955078125, | |
| "learning_rate": 0.0008545826932923707, | |
| "loss": 0.0, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 7.296466973886329, | |
| "grad_norm": 0.00018215179443359375, | |
| "learning_rate": 0.0008540706605222734, | |
| "loss": 0.0, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 7.322068612391193, | |
| "grad_norm": 0.0002346038818359375, | |
| "learning_rate": 0.0008535586277521762, | |
| "loss": 0.0, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 7.347670250896058, | |
| "grad_norm": 0.00023937225341796875, | |
| "learning_rate": 0.0008530465949820788, | |
| "loss": 0.0, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 7.373271889400922, | |
| "grad_norm": 0.0002460479736328125, | |
| "learning_rate": 0.0008525345622119816, | |
| "loss": 0.0, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 7.398873527905786, | |
| "grad_norm": 0.0002613067626953125, | |
| "learning_rate": 0.0008520225294418843, | |
| "loss": 0.0, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 7.42447516641065, | |
| "grad_norm": 0.00017642974853515625, | |
| "learning_rate": 0.000851510496671787, | |
| "loss": 0.0, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 7.450076804915515, | |
| "grad_norm": 0.000232696533203125, | |
| "learning_rate": 0.0008509984639016898, | |
| "loss": 0.0, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 7.475678443420379, | |
| "grad_norm": 0.0001697540283203125, | |
| "learning_rate": 0.0008504864311315924, | |
| "loss": 0.0, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 7.501280081925243, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0008499743983614951, | |
| "loss": 0.0, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 7.526881720430108, | |
| "grad_norm": 0.000377655029296875, | |
| "learning_rate": 0.0008494623655913979, | |
| "loss": 0.0, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 7.552483358934972, | |
| "grad_norm": 0.000194549560546875, | |
| "learning_rate": 0.0008489503328213006, | |
| "loss": 0.0, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 7.578084997439836, | |
| "grad_norm": 0.0004825592041015625, | |
| "learning_rate": 0.0008484383000512033, | |
| "loss": 0.0, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 7.6036866359447, | |
| "grad_norm": 0.0001773834228515625, | |
| "learning_rate": 0.000847926267281106, | |
| "loss": 0.0, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 7.629288274449565, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0008474142345110087, | |
| "loss": 0.0, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 7.654889912954429, | |
| "grad_norm": 0.00018215179443359375, | |
| "learning_rate": 0.0008469022017409115, | |
| "loss": 0.0, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 7.680491551459293, | |
| "grad_norm": 0.00019073486328125, | |
| "learning_rate": 0.0008463901689708142, | |
| "loss": 0.0, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 7.706093189964157, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0008458781362007168, | |
| "loss": 0.0, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 7.731694828469022, | |
| "grad_norm": 0.0001926422119140625, | |
| "learning_rate": 0.0008453661034306196, | |
| "loss": 0.0, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 7.757296466973886, | |
| "grad_norm": 0.000274658203125, | |
| "learning_rate": 0.0008448540706605224, | |
| "loss": 0.0, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 7.7828981054787505, | |
| "grad_norm": 0.00017452239990234375, | |
| "learning_rate": 0.0008443420378904249, | |
| "loss": 0.0, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 7.808499743983615, | |
| "grad_norm": 0.00017833709716796875, | |
| "learning_rate": 0.0008438300051203277, | |
| "loss": 0.0, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 7.8341013824884795, | |
| "grad_norm": 0.000301361083984375, | |
| "learning_rate": 0.0008433179723502304, | |
| "loss": 0.0, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 7.859703020993344, | |
| "grad_norm": 0.0001697540283203125, | |
| "learning_rate": 0.0008428059395801332, | |
| "loss": 0.0, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 7.885304659498208, | |
| "grad_norm": 0.00020313262939453125, | |
| "learning_rate": 0.0008422939068100358, | |
| "loss": 0.0, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 7.910906298003072, | |
| "grad_norm": 0.0004425048828125, | |
| "learning_rate": 0.0008417818740399385, | |
| "loss": 0.0, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 7.936507936507937, | |
| "grad_norm": 0.00017452239990234375, | |
| "learning_rate": 0.0008412698412698413, | |
| "loss": 0.0, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 7.962109575012801, | |
| "grad_norm": 0.0002346038818359375, | |
| "learning_rate": 0.0008407578084997441, | |
| "loss": 0.0, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 7.987711213517665, | |
| "grad_norm": 0.0001773834228515625, | |
| "learning_rate": 0.0008402457757296466, | |
| "loss": 0.0, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.9280490707606077e-05, | |
| "eval_runtime": 0.5361, | |
| "eval_samples_per_second": 1865.365, | |
| "eval_steps_per_second": 3.731, | |
| "step": 156240 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 976500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.132510754955264e+18, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |