| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 534, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01876172607879925, | |
| "grad_norm": 1.1220635175704956, | |
| "learning_rate": 1.791044776119403e-06, | |
| "loss": 1.3297, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0375234521575985, | |
| "grad_norm": 0.7967448234558105, | |
| "learning_rate": 4.029850746268657e-06, | |
| "loss": 1.3423, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05628517823639775, | |
| "grad_norm": 0.6561905145645142, | |
| "learning_rate": 6.268656716417911e-06, | |
| "loss": 1.3378, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.075046904315197, | |
| "grad_norm": 0.5100926160812378, | |
| "learning_rate": 8.507462686567164e-06, | |
| "loss": 1.2788, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09380863039399624, | |
| "grad_norm": 0.5022882223129272, | |
| "learning_rate": 1.0746268656716418e-05, | |
| "loss": 1.2822, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1125703564727955, | |
| "grad_norm": 0.5697208642959595, | |
| "learning_rate": 1.2985074626865672e-05, | |
| "loss": 1.2556, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13133208255159476, | |
| "grad_norm": 0.5847183465957642, | |
| "learning_rate": 1.5223880597014927e-05, | |
| "loss": 1.2403, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.150093808630394, | |
| "grad_norm": 0.47744235396385193, | |
| "learning_rate": 1.746268656716418e-05, | |
| "loss": 1.2096, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16885553470919323, | |
| "grad_norm": 0.47178810834884644, | |
| "learning_rate": 1.9701492537313435e-05, | |
| "loss": 1.2533, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18761726078799248, | |
| "grad_norm": 0.44852447509765625, | |
| "learning_rate": 2.194029850746269e-05, | |
| "loss": 1.2555, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20637898686679174, | |
| "grad_norm": 0.44277751445770264, | |
| "learning_rate": 2.417910447761194e-05, | |
| "loss": 1.1888, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.225140712945591, | |
| "grad_norm": 0.4661575257778168, | |
| "learning_rate": 2.6417910447761193e-05, | |
| "loss": 1.2168, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.5210767984390259, | |
| "learning_rate": 2.8656716417910447e-05, | |
| "loss": 1.1508, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2626641651031895, | |
| "grad_norm": 0.5324119925498962, | |
| "learning_rate": 2.9999815845810124e-05, | |
| "loss": 1.1554, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28142589118198874, | |
| "grad_norm": 0.5865416526794434, | |
| "learning_rate": 2.9997744163102546e-05, | |
| "loss": 1.1346, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.300187617260788, | |
| "grad_norm": 0.5545316338539124, | |
| "learning_rate": 2.9993370923930618e-05, | |
| "loss": 1.1047, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.31894934333958724, | |
| "grad_norm": 0.6227930784225464, | |
| "learning_rate": 2.9986696799412394e-05, | |
| "loss": 1.1855, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.33771106941838647, | |
| "grad_norm": 0.5252574682235718, | |
| "learning_rate": 2.9977722813760195e-05, | |
| "loss": 1.1261, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.35647279549718575, | |
| "grad_norm": 0.6158416867256165, | |
| "learning_rate": 2.9966450344123444e-05, | |
| "loss": 1.1142, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.37523452157598497, | |
| "grad_norm": 0.7427587509155273, | |
| "learning_rate": 2.9952881120377314e-05, | |
| "loss": 1.0015, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.39399624765478425, | |
| "grad_norm": 0.6637995839118958, | |
| "learning_rate": 2.993701722485728e-05, | |
| "loss": 1.0785, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.41275797373358347, | |
| "grad_norm": 0.7029932737350464, | |
| "learning_rate": 2.9918861092039548e-05, | |
| "loss": 1.057, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.43151969981238275, | |
| "grad_norm": 0.6611917018890381, | |
| "learning_rate": 2.9898415508167465e-05, | |
| "loss": 1.0607, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.450281425891182, | |
| "grad_norm": 0.539310872554779, | |
| "learning_rate": 2.987568361082392e-05, | |
| "loss": 1.0257, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.46904315196998125, | |
| "grad_norm": 0.6124554872512817, | |
| "learning_rate": 2.9850668888449912e-05, | |
| "loss": 0.9757, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.6248557567596436, | |
| "learning_rate": 2.9823375179809135e-05, | |
| "loss": 1.0127, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5065666041275797, | |
| "grad_norm": 0.993503987789154, | |
| "learning_rate": 2.9793806673398934e-05, | |
| "loss": 1.0222, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.525328330206379, | |
| "grad_norm": 0.7925176620483398, | |
| "learning_rate": 2.9761967906807514e-05, | |
| "loss": 0.9856, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5440900562851783, | |
| "grad_norm": 0.7589050531387329, | |
| "learning_rate": 2.9727863766017616e-05, | |
| "loss": 0.9949, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5628517823639775, | |
| "grad_norm": 0.7123038172721863, | |
| "learning_rate": 2.9691499484656697e-05, | |
| "loss": 0.939, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5816135084427767, | |
| "grad_norm": 0.8624748587608337, | |
| "learning_rate": 2.9652880643193788e-05, | |
| "loss": 0.9972, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.600375234521576, | |
| "grad_norm": 0.732760488986969, | |
| "learning_rate": 2.9612013168083113e-05, | |
| "loss": 1.005, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6191369606003753, | |
| "grad_norm": 0.820073127746582, | |
| "learning_rate": 2.9568903330854612e-05, | |
| "loss": 0.9217, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6378986866791745, | |
| "grad_norm": 0.8361268043518066, | |
| "learning_rate": 2.9523557747151505e-05, | |
| "loss": 0.8491, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6566604127579737, | |
| "grad_norm": 0.8558568954467773, | |
| "learning_rate": 2.9475983375715068e-05, | |
| "loss": 0.8825, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6754221388367729, | |
| "grad_norm": 0.8939546942710876, | |
| "learning_rate": 2.9426187517316723e-05, | |
| "loss": 0.8438, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6941838649155723, | |
| "grad_norm": 0.8571194410324097, | |
| "learning_rate": 2.93741778136377e-05, | |
| "loss": 0.8708, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7129455909943715, | |
| "grad_norm": 0.906139075756073, | |
| "learning_rate": 2.931996224609628e-05, | |
| "loss": 0.8978, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 1.2007263898849487, | |
| "learning_rate": 2.926354913462303e-05, | |
| "loss": 0.8055, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7504690431519699, | |
| "grad_norm": 1.034984827041626, | |
| "learning_rate": 2.9204947136383984e-05, | |
| "loss": 0.8532, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.9235329031944275, | |
| "learning_rate": 2.9144165244452125e-05, | |
| "loss": 0.7931, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7879924953095685, | |
| "grad_norm": 0.8924856781959534, | |
| "learning_rate": 2.9081212786427314e-05, | |
| "loss": 0.8213, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8067542213883677, | |
| "grad_norm": 0.9607811570167542, | |
| "learning_rate": 2.9016099423004854e-05, | |
| "loss": 0.801, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8255159474671669, | |
| "grad_norm": 1.1123484373092651, | |
| "learning_rate": 2.8948835146492985e-05, | |
| "loss": 0.8307, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8442776735459663, | |
| "grad_norm": 0.9834749698638916, | |
| "learning_rate": 2.8879430279279443e-05, | |
| "loss": 0.7948, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8630393996247655, | |
| "grad_norm": 1.0126004219055176, | |
| "learning_rate": 2.8807895472247393e-05, | |
| "loss": 0.8065, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8818011257035647, | |
| "grad_norm": 1.1847103834152222, | |
| "learning_rate": 2.873424170314093e-05, | |
| "loss": 0.7728, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.900562851782364, | |
| "grad_norm": 0.9110663533210754, | |
| "learning_rate": 2.8658480274880462e-05, | |
| "loss": 0.7374, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9193245778611632, | |
| "grad_norm": 1.0707428455352783, | |
| "learning_rate": 2.8580622813828123e-05, | |
| "loss": 0.7819, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.9380863039399625, | |
| "grad_norm": 1.02041494846344, | |
| "learning_rate": 2.8500681268003624e-05, | |
| "loss": 0.8076, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9568480300187617, | |
| "grad_norm": 1.0658625364303589, | |
| "learning_rate": 2.8418667905250683e-05, | |
| "loss": 0.7103, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.9454028606414795, | |
| "learning_rate": 2.833459531135441e-05, | |
| "loss": 0.7534, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9943714821763602, | |
| "grad_norm": 1.212715744972229, | |
| "learning_rate": 2.8248476388109897e-05, | |
| "loss": 0.7299, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.0112570356472796, | |
| "grad_norm": 1.012231469154358, | |
| "learning_rate": 2.8160324351342288e-05, | |
| "loss": 0.6632, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0300187617260788, | |
| "grad_norm": 1.054259181022644, | |
| "learning_rate": 2.80701527288787e-05, | |
| "loss": 0.6126, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.048780487804878, | |
| "grad_norm": 1.0729856491088867, | |
| "learning_rate": 2.797797535847221e-05, | |
| "loss": 0.6336, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0675422138836772, | |
| "grad_norm": 1.1964389085769653, | |
| "learning_rate": 2.788380638567835e-05, | |
| "loss": 0.663, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.0863039399624765, | |
| "grad_norm": 1.1786532402038574, | |
| "learning_rate": 2.778766026168428e-05, | |
| "loss": 0.5962, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.1050656660412759, | |
| "grad_norm": 1.171257495880127, | |
| "learning_rate": 2.768955174109115e-05, | |
| "loss": 0.5722, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.123827392120075, | |
| "grad_norm": 1.0211284160614014, | |
| "learning_rate": 2.758949587964983e-05, | |
| "loss": 0.5923, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1425891181988743, | |
| "grad_norm": 1.18953275680542, | |
| "learning_rate": 2.748750803195047e-05, | |
| "loss": 0.597, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.1613508442776737, | |
| "grad_norm": 1.2630354166030884, | |
| "learning_rate": 2.7383603849066167e-05, | |
| "loss": 0.5956, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1801125703564728, | |
| "grad_norm": 1.1608085632324219, | |
| "learning_rate": 2.7277799276151137e-05, | |
| "loss": 0.6085, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.198874296435272, | |
| "grad_norm": 1.1372995376586914, | |
| "learning_rate": 2.7170110549993817e-05, | |
| "loss": 0.5848, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2176360225140712, | |
| "grad_norm": 1.081286072731018, | |
| "learning_rate": 2.7060554196525105e-05, | |
| "loss": 0.5666, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.2363977485928705, | |
| "grad_norm": 1.418300747871399, | |
| "learning_rate": 2.694914702828233e-05, | |
| "loss": 0.5474, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2551594746716699, | |
| "grad_norm": 1.1424428224563599, | |
| "learning_rate": 2.6835906141829173e-05, | |
| "loss": 0.5782, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.273921200750469, | |
| "grad_norm": 1.208847165107727, | |
| "learning_rate": 2.6720848915132053e-05, | |
| "loss": 0.5132, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.2926829268292683, | |
| "grad_norm": 1.0693706274032593, | |
| "learning_rate": 2.660399300489326e-05, | |
| "loss": 0.5607, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.3114446529080674, | |
| "grad_norm": 1.2664192914962769, | |
| "learning_rate": 2.6485356343841395e-05, | |
| "loss": 0.5611, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3302063789868668, | |
| "grad_norm": 1.3361153602600098, | |
| "learning_rate": 2.636495713797938e-05, | |
| "loss": 0.5923, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.3489681050656661, | |
| "grad_norm": 1.1549376249313354, | |
| "learning_rate": 2.624281386379059e-05, | |
| "loss": 0.5911, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.3677298311444652, | |
| "grad_norm": 1.3522552251815796, | |
| "learning_rate": 2.6118945265403413e-05, | |
| "loss": 0.5859, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.3864915572232646, | |
| "grad_norm": 1.36016047000885, | |
| "learning_rate": 2.599337035171481e-05, | |
| "loss": 0.5155, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.4052532833020637, | |
| "grad_norm": 1.268315076828003, | |
| "learning_rate": 2.5866108393473182e-05, | |
| "loss": 0.4985, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.424015009380863, | |
| "grad_norm": 1.075866937637329, | |
| "learning_rate": 2.573717892032109e-05, | |
| "loss": 0.4856, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4427767354596623, | |
| "grad_norm": 1.2592428922653198, | |
| "learning_rate": 2.5606601717798212e-05, | |
| "loss": 0.5281, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.4615384615384617, | |
| "grad_norm": 1.2148767709732056, | |
| "learning_rate": 2.547439682430509e-05, | |
| "loss": 0.5133, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.4803001876172608, | |
| "grad_norm": 1.2412337064743042, | |
| "learning_rate": 2.534058452802798e-05, | |
| "loss": 0.5065, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.49906191369606, | |
| "grad_norm": 1.2149518728256226, | |
| "learning_rate": 2.5205185363825482e-05, | |
| "loss": 0.4689, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5178236397748592, | |
| "grad_norm": 1.579555869102478, | |
| "learning_rate": 2.506822011007721e-05, | |
| "loss": 0.5183, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.5365853658536586, | |
| "grad_norm": 1.134460687637329, | |
| "learning_rate": 2.492970978549515e-05, | |
| "loss": 0.5026, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.555347091932458, | |
| "grad_norm": 1.2218743562698364, | |
| "learning_rate": 2.478967564589816e-05, | |
| "loss": 0.5281, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.574108818011257, | |
| "grad_norm": 1.143458366394043, | |
| "learning_rate": 2.464813918094999e-05, | |
| "loss": 0.4916, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.5928705440900561, | |
| "grad_norm": 1.1755683422088623, | |
| "learning_rate": 2.450512211086151e-05, | |
| "loss": 0.5052, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.6116322701688555, | |
| "grad_norm": 1.4553024768829346, | |
| "learning_rate": 2.4360646383057522e-05, | |
| "loss": 0.4152, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6303939962476548, | |
| "grad_norm": 1.1782385110855103, | |
| "learning_rate": 2.4214734168808693e-05, | |
| "loss": 0.4832, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.6491557223264541, | |
| "grad_norm": 1.4304338693618774, | |
| "learning_rate": 2.4067407859829157e-05, | |
| "loss": 0.459, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6679174484052532, | |
| "grad_norm": 1.1110352277755737, | |
| "learning_rate": 2.3918690064840282e-05, | |
| "loss": 0.464, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.6866791744840526, | |
| "grad_norm": 1.200399398803711, | |
| "learning_rate": 2.37686036061011e-05, | |
| "loss": 0.4133, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.7054409005628517, | |
| "grad_norm": 1.1901819705963135, | |
| "learning_rate": 2.361717151590605e-05, | |
| "loss": 0.4412, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.724202626641651, | |
| "grad_norm": 1.2039285898208618, | |
| "learning_rate": 2.3464417033050394e-05, | |
| "loss": 0.4763, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7429643527204504, | |
| "grad_norm": 1.3061219453811646, | |
| "learning_rate": 2.331036359926402e-05, | |
| "loss": 0.4514, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.7617260787992497, | |
| "grad_norm": 1.2160491943359375, | |
| "learning_rate": 2.315503485561404e-05, | |
| "loss": 0.4169, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.7804878048780488, | |
| "grad_norm": 1.1981985569000244, | |
| "learning_rate": 2.299845463887685e-05, | |
| "loss": 0.4207, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.799249530956848, | |
| "grad_norm": 1.164445400238037, | |
| "learning_rate": 2.2840646977880132e-05, | |
| "loss": 0.4327, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.8180112570356473, | |
| "grad_norm": 1.268277883529663, | |
| "learning_rate": 2.268163608981536e-05, | |
| "loss": 0.434, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.8367729831144466, | |
| "grad_norm": 1.2116129398345947, | |
| "learning_rate": 2.2521446376521476e-05, | |
| "loss": 0.396, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.855534709193246, | |
| "grad_norm": 1.208999752998352, | |
| "learning_rate": 2.236010242074013e-05, | |
| "loss": 0.472, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.874296435272045, | |
| "grad_norm": 1.2668207883834839, | |
| "learning_rate": 2.2197628982343224e-05, | |
| "loss": 0.3733, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.8930581613508441, | |
| "grad_norm": 1.347996711730957, | |
| "learning_rate": 2.2034050994533275e-05, | |
| "loss": 0.407, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.9118198874296435, | |
| "grad_norm": 1.42654287815094, | |
| "learning_rate": 2.1869393560017143e-05, | |
| "loss": 0.3818, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.9305816135084428, | |
| "grad_norm": 1.1722033023834229, | |
| "learning_rate": 2.1703681947153768e-05, | |
| "loss": 0.4143, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.9493433395872422, | |
| "grad_norm": 1.4157124757766724, | |
| "learning_rate": 2.153694158607648e-05, | |
| "loss": 0.4325, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.9681050656660413, | |
| "grad_norm": 1.2454453706741333, | |
| "learning_rate": 2.136919806479051e-05, | |
| "loss": 0.3982, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.9868667917448404, | |
| "grad_norm": 1.444549798965454, | |
| "learning_rate": 2.1200477125246203e-05, | |
| "loss": 0.3563, | |
| "step": 530 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1335, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.513490532954276e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |