Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": 0.244761124253273, | |
| "best_model_checkpoint": "Classifier-Intent-snowflake/checkpoint-803", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 803, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012453300124533001, | |
| "grad_norm": 14.392992973327637, | |
| "learning_rate": 3.1133250311332504e-08, | |
| "loss": 1.3872, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0024906600249066002, | |
| "grad_norm": 16.613218307495117, | |
| "learning_rate": 6.226650062266501e-08, | |
| "loss": 1.4209, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0037359900373599006, | |
| "grad_norm": 14.957581520080566, | |
| "learning_rate": 9.339975093399752e-08, | |
| "loss": 1.5269, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0049813200498132005, | |
| "grad_norm": 14.315893173217773, | |
| "learning_rate": 1.2453300124533001e-07, | |
| "loss": 1.3745, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0062266500622665, | |
| "grad_norm": 17.72991371154785, | |
| "learning_rate": 1.556662515566625e-07, | |
| "loss": 1.2588, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007471980074719801, | |
| "grad_norm": 15.170116424560547, | |
| "learning_rate": 1.8679950186799505e-07, | |
| "loss": 1.4722, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008717310087173101, | |
| "grad_norm": 14.7129487991333, | |
| "learning_rate": 2.1793275217932754e-07, | |
| "loss": 1.4404, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.009962640099626401, | |
| "grad_norm": 19.042442321777344, | |
| "learning_rate": 2.4906600249066003e-07, | |
| "loss": 1.5845, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0112079701120797, | |
| "grad_norm": 14.830946922302246, | |
| "learning_rate": 2.801992528019925e-07, | |
| "loss": 1.3213, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.012453300124533, | |
| "grad_norm": 15.1524076461792, | |
| "learning_rate": 3.11332503113325e-07, | |
| "loss": 1.2402, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0136986301369863, | |
| "grad_norm": 15.068155288696289, | |
| "learning_rate": 3.4246575342465755e-07, | |
| "loss": 1.3062, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.014943960149439602, | |
| "grad_norm": 17.31379508972168, | |
| "learning_rate": 3.735990037359901e-07, | |
| "loss": 1.6055, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0161892901618929, | |
| "grad_norm": 15.690240859985352, | |
| "learning_rate": 4.0473225404732254e-07, | |
| "loss": 1.4761, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.017434620174346202, | |
| "grad_norm": 14.473444938659668, | |
| "learning_rate": 4.358655043586551e-07, | |
| "loss": 1.4365, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0186799501867995, | |
| "grad_norm": 15.4556884765625, | |
| "learning_rate": 4.669987546699875e-07, | |
| "loss": 1.5645, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.019925280199252802, | |
| "grad_norm": 16.610450744628906, | |
| "learning_rate": 4.981320049813201e-07, | |
| "loss": 1.3652, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.021170610211706103, | |
| "grad_norm": Infinity, | |
| "learning_rate": 4.981320049813201e-07, | |
| "loss": 1.5137, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0224159402241594, | |
| "grad_norm": 16.464548110961914, | |
| "learning_rate": 5.292652552926527e-07, | |
| "loss": 1.2983, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.023661270236612703, | |
| "grad_norm": 13.879263877868652, | |
| "learning_rate": 5.60398505603985e-07, | |
| "loss": 1.3018, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.024906600249066, | |
| "grad_norm": 18.191198348999023, | |
| "learning_rate": 5.915317559153176e-07, | |
| "loss": 1.5151, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.026151930261519303, | |
| "grad_norm": 14.711188316345215, | |
| "learning_rate": 6.2266500622665e-07, | |
| "loss": 1.4517, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0273972602739726, | |
| "grad_norm": 20.39883804321289, | |
| "learning_rate": 6.537982565379826e-07, | |
| "loss": 1.5142, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.028642590286425903, | |
| "grad_norm": 17.874603271484375, | |
| "learning_rate": 6.849315068493151e-07, | |
| "loss": 1.4731, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.029887920298879204, | |
| "grad_norm": 15.248433113098145, | |
| "learning_rate": 7.160647571606476e-07, | |
| "loss": 1.4927, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.031133250311332503, | |
| "grad_norm": 14.43382453918457, | |
| "learning_rate": 7.471980074719802e-07, | |
| "loss": 1.2744, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0323785803237858, | |
| "grad_norm": 20.193641662597656, | |
| "learning_rate": 7.783312577833126e-07, | |
| "loss": 1.5669, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.033623910336239106, | |
| "grad_norm": 16.741762161254883, | |
| "learning_rate": 8.094645080946451e-07, | |
| "loss": 1.5303, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.034869240348692404, | |
| "grad_norm": 15.6235933303833, | |
| "learning_rate": 8.405977584059777e-07, | |
| "loss": 1.3936, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0361145703611457, | |
| "grad_norm": 14.727874755859375, | |
| "learning_rate": 8.717310087173102e-07, | |
| "loss": 1.4126, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.037359900373599, | |
| "grad_norm": 16.20413589477539, | |
| "learning_rate": 9.028642590286426e-07, | |
| "loss": 1.4624, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.038605230386052306, | |
| "grad_norm": 20.796939849853516, | |
| "learning_rate": 9.33997509339975e-07, | |
| "loss": 1.3433, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.039850560398505604, | |
| "grad_norm": 16.971792221069336, | |
| "learning_rate": 9.651307596513077e-07, | |
| "loss": 1.3628, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0410958904109589, | |
| "grad_norm": 14.428796768188477, | |
| "learning_rate": 9.962640099626401e-07, | |
| "loss": 1.2837, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04234122042341221, | |
| "grad_norm": 15.790252685546875, | |
| "learning_rate": 1.0273972602739725e-06, | |
| "loss": 1.4268, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.043586550435865505, | |
| "grad_norm": 16.02347183227539, | |
| "learning_rate": 1.0585305105853053e-06, | |
| "loss": 1.4766, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0448318804483188, | |
| "grad_norm": 15.317863464355469, | |
| "learning_rate": 1.0896637608966377e-06, | |
| "loss": 1.3018, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0460772104607721, | |
| "grad_norm": 22.28313636779785, | |
| "learning_rate": 1.12079701120797e-06, | |
| "loss": 1.4688, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.047322540473225407, | |
| "grad_norm": 15.996356964111328, | |
| "learning_rate": 1.1519302615193027e-06, | |
| "loss": 1.1543, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.048567870485678705, | |
| "grad_norm": 15.208770751953125, | |
| "learning_rate": 1.1830635118306353e-06, | |
| "loss": 1.4375, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.049813200498132, | |
| "grad_norm": 15.227863311767578, | |
| "learning_rate": 1.2141967621419677e-06, | |
| "loss": 1.4365, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05105853051058531, | |
| "grad_norm": 14.673625946044922, | |
| "learning_rate": 1.2453300124533e-06, | |
| "loss": 1.2534, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.052303860523038606, | |
| "grad_norm": 17.28438949584961, | |
| "learning_rate": 1.2764632627646329e-06, | |
| "loss": 1.5381, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.053549190535491904, | |
| "grad_norm": 16.5577449798584, | |
| "learning_rate": 1.3075965130759652e-06, | |
| "loss": 1.5459, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.0547945205479452, | |
| "grad_norm": 18.29193687438965, | |
| "learning_rate": 1.3387297633872976e-06, | |
| "loss": 1.1919, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.05603985056039851, | |
| "grad_norm": 15.694727897644043, | |
| "learning_rate": 1.3698630136986302e-06, | |
| "loss": 1.4409, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.057285180572851806, | |
| "grad_norm": 14.10815715789795, | |
| "learning_rate": 1.4009962640099628e-06, | |
| "loss": 1.2461, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.058530510585305104, | |
| "grad_norm": 14.045819282531738, | |
| "learning_rate": 1.4321295143212952e-06, | |
| "loss": 1.4111, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05977584059775841, | |
| "grad_norm": 19.675201416015625, | |
| "learning_rate": 1.4632627646326276e-06, | |
| "loss": 1.4072, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.06102117061021171, | |
| "grad_norm": 14.410515785217285, | |
| "learning_rate": 1.4943960149439604e-06, | |
| "loss": 1.23, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.062266500622665005, | |
| "grad_norm": 16.496902465820312, | |
| "learning_rate": 1.5255292652552928e-06, | |
| "loss": 1.3691, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06351183063511831, | |
| "grad_norm": 14.99001407623291, | |
| "learning_rate": 1.5566625155666252e-06, | |
| "loss": 1.2393, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.0647571606475716, | |
| "grad_norm": 14.407447814941406, | |
| "learning_rate": 1.5877957658779578e-06, | |
| "loss": 1.3501, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0660024906600249, | |
| "grad_norm": 15.634856224060059, | |
| "learning_rate": 1.6189290161892901e-06, | |
| "loss": 1.5059, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.06724782067247821, | |
| "grad_norm": 13.683075904846191, | |
| "learning_rate": 1.6500622665006227e-06, | |
| "loss": 1.2251, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0684931506849315, | |
| "grad_norm": 15.530966758728027, | |
| "learning_rate": 1.6811955168119553e-06, | |
| "loss": 1.229, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06973848069738481, | |
| "grad_norm": 14.17822265625, | |
| "learning_rate": 1.7123287671232877e-06, | |
| "loss": 1.2646, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.07098381070983811, | |
| "grad_norm": 14.06949234008789, | |
| "learning_rate": 1.7434620174346203e-06, | |
| "loss": 1.1851, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0722291407222914, | |
| "grad_norm": 13.386149406433105, | |
| "learning_rate": 1.774595267745953e-06, | |
| "loss": 1.1406, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07347447073474471, | |
| "grad_norm": 15.319520950317383, | |
| "learning_rate": 1.8057285180572853e-06, | |
| "loss": 1.2173, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.074719800747198, | |
| "grad_norm": 14.985965728759766, | |
| "learning_rate": 1.8368617683686179e-06, | |
| "loss": 1.3159, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0759651307596513, | |
| "grad_norm": 17.426523208618164, | |
| "learning_rate": 1.86799501867995e-06, | |
| "loss": 1.0112, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07721046077210461, | |
| "grad_norm": 15.114604949951172, | |
| "learning_rate": 1.8991282689912827e-06, | |
| "loss": 1.3403, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0784557907845579, | |
| "grad_norm": 16.03323745727539, | |
| "learning_rate": 1.9302615193026155e-06, | |
| "loss": 1.2666, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07970112079701121, | |
| "grad_norm": 13.463469505310059, | |
| "learning_rate": 1.9613947696139476e-06, | |
| "loss": 1.106, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.08094645080946451, | |
| "grad_norm": 15.67467212677002, | |
| "learning_rate": 1.9925280199252802e-06, | |
| "loss": 1.3604, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0821917808219178, | |
| "grad_norm": 16.3656063079834, | |
| "learning_rate": 2.023661270236613e-06, | |
| "loss": 1.0149, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.08343711083437111, | |
| "grad_norm": 18.009429931640625, | |
| "learning_rate": 2.054794520547945e-06, | |
| "loss": 1.2056, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08468244084682441, | |
| "grad_norm": 17.479284286499023, | |
| "learning_rate": 2.085927770859278e-06, | |
| "loss": 0.96, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0859277708592777, | |
| "grad_norm": 18.173294067382812, | |
| "learning_rate": 2.1170610211706106e-06, | |
| "loss": 1.0894, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.08717310087173101, | |
| "grad_norm": 13.998863220214844, | |
| "learning_rate": 2.148194271481943e-06, | |
| "loss": 1.1992, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08841843088418432, | |
| "grad_norm": 20.954397201538086, | |
| "learning_rate": 2.1793275217932754e-06, | |
| "loss": 1.2236, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0896637608966376, | |
| "grad_norm": 15.964156150817871, | |
| "learning_rate": 2.210460772104608e-06, | |
| "loss": 1.4097, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 15.810689926147461, | |
| "learning_rate": 2.24159402241594e-06, | |
| "loss": 0.8547, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.0921544209215442, | |
| "grad_norm": 17.040708541870117, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 1.4102, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.09339975093399751, | |
| "grad_norm": 14.936725616455078, | |
| "learning_rate": 2.3038605230386054e-06, | |
| "loss": 1.249, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09464508094645081, | |
| "grad_norm": 15.473489761352539, | |
| "learning_rate": 2.334993773349938e-06, | |
| "loss": 0.833, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0958904109589041, | |
| "grad_norm": 20.1041259765625, | |
| "learning_rate": 2.3661270236612705e-06, | |
| "loss": 1.4458, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.09713574097135741, | |
| "grad_norm": 13.799981117248535, | |
| "learning_rate": 2.3972602739726027e-06, | |
| "loss": 1.0784, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09838107098381071, | |
| "grad_norm": 17.304981231689453, | |
| "learning_rate": 2.4283935242839353e-06, | |
| "loss": 1.5112, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.099626400996264, | |
| "grad_norm": 13.382006645202637, | |
| "learning_rate": 2.459526774595268e-06, | |
| "loss": 1.063, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10087173100871731, | |
| "grad_norm": 14.760406494140625, | |
| "learning_rate": 2.4906600249066e-06, | |
| "loss": 1.1277, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.10211706102117062, | |
| "grad_norm": 13.276914596557617, | |
| "learning_rate": 2.5217932752179327e-06, | |
| "loss": 0.9333, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.10336239103362391, | |
| "grad_norm": 21.620939254760742, | |
| "learning_rate": 2.5529265255292657e-06, | |
| "loss": 1.7554, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10460772104607721, | |
| "grad_norm": 18.264217376708984, | |
| "learning_rate": 2.584059775840598e-06, | |
| "loss": 1.2744, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10585305105853052, | |
| "grad_norm": 15.217682838439941, | |
| "learning_rate": 2.6151930261519305e-06, | |
| "loss": 1.2827, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.10709838107098381, | |
| "grad_norm": 18.51647186279297, | |
| "learning_rate": 2.646326276463263e-06, | |
| "loss": 1.5586, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10834371108343711, | |
| "grad_norm": 15.398965835571289, | |
| "learning_rate": 2.6774595267745952e-06, | |
| "loss": 0.916, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1095890410958904, | |
| "grad_norm": 14.449968338012695, | |
| "learning_rate": 2.708592777085928e-06, | |
| "loss": 0.6475, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.11083437110834371, | |
| "grad_norm": 15.214373588562012, | |
| "learning_rate": 2.7397260273972604e-06, | |
| "loss": 1.1885, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.11207970112079702, | |
| "grad_norm": 21.287311553955078, | |
| "learning_rate": 2.770859277708593e-06, | |
| "loss": 1.3501, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1133250311332503, | |
| "grad_norm": 14.835405349731445, | |
| "learning_rate": 2.8019925280199256e-06, | |
| "loss": 0.9062, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11457036114570361, | |
| "grad_norm": 16.75213050842285, | |
| "learning_rate": 2.833125778331258e-06, | |
| "loss": 1.1338, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11581569115815692, | |
| "grad_norm": 14.93796157836914, | |
| "learning_rate": 2.8642590286425904e-06, | |
| "loss": 0.9265, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.11706102117061021, | |
| "grad_norm": 15.707828521728516, | |
| "learning_rate": 2.895392278953923e-06, | |
| "loss": 1.0312, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11830635118306351, | |
| "grad_norm": 15.904691696166992, | |
| "learning_rate": 2.926525529265255e-06, | |
| "loss": 0.9608, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.11955168119551682, | |
| "grad_norm": 13.52252197265625, | |
| "learning_rate": 2.9576587795765878e-06, | |
| "loss": 0.6462, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.12079701120797011, | |
| "grad_norm": 15.788945198059082, | |
| "learning_rate": 2.9887920298879208e-06, | |
| "loss": 1.2263, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.12204234122042341, | |
| "grad_norm": 15.971314430236816, | |
| "learning_rate": 3.019925280199253e-06, | |
| "loss": 0.6865, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1232876712328767, | |
| "grad_norm": 16.350345611572266, | |
| "learning_rate": 3.0510585305105856e-06, | |
| "loss": 0.9343, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.12453300124533001, | |
| "grad_norm": 23.604875564575195, | |
| "learning_rate": 3.0821917808219177e-06, | |
| "loss": 1.2271, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12577833125778332, | |
| "grad_norm": 16.765127182006836, | |
| "learning_rate": 3.1133250311332503e-06, | |
| "loss": 0.9685, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.12702366127023662, | |
| "grad_norm": 19.068199157714844, | |
| "learning_rate": 3.144458281444583e-06, | |
| "loss": 1.4028, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12826899128268993, | |
| "grad_norm": 23.64339828491211, | |
| "learning_rate": 3.1755915317559155e-06, | |
| "loss": 1.1528, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.1295143212951432, | |
| "grad_norm": 17.963857650756836, | |
| "learning_rate": 3.206724782067248e-06, | |
| "loss": 1.2183, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1307596513075965, | |
| "grad_norm": 24.50640106201172, | |
| "learning_rate": 3.2378580323785803e-06, | |
| "loss": 1.1194, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1320049813200498, | |
| "grad_norm": 13.496341705322266, | |
| "learning_rate": 3.268991282689913e-06, | |
| "loss": 0.8138, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.13325031133250312, | |
| "grad_norm": 13.470151901245117, | |
| "learning_rate": 3.3001245330012455e-06, | |
| "loss": 0.4418, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.13449564134495642, | |
| "grad_norm": 15.696036338806152, | |
| "learning_rate": 3.331257783312578e-06, | |
| "loss": 0.9106, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1357409713574097, | |
| "grad_norm": 15.40795612335205, | |
| "learning_rate": 3.3623910336239107e-06, | |
| "loss": 0.8492, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.136986301369863, | |
| "grad_norm": 14.989590644836426, | |
| "learning_rate": 3.393524283935243e-06, | |
| "loss": 0.6815, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1382316313823163, | |
| "grad_norm": 11.08140754699707, | |
| "learning_rate": 3.4246575342465754e-06, | |
| "loss": 0.3635, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.13947696139476962, | |
| "grad_norm": 13.492122650146484, | |
| "learning_rate": 3.455790784557908e-06, | |
| "loss": 0.4391, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.14072229140722292, | |
| "grad_norm": 24.947566986083984, | |
| "learning_rate": 3.4869240348692406e-06, | |
| "loss": 1.6245, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.14196762141967623, | |
| "grad_norm": 21.374814987182617, | |
| "learning_rate": 3.5180572851805732e-06, | |
| "loss": 1.0759, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.1432129514321295, | |
| "grad_norm": 12.628018379211426, | |
| "learning_rate": 3.549190535491906e-06, | |
| "loss": 0.3741, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1444582814445828, | |
| "grad_norm": 28.174150466918945, | |
| "learning_rate": 3.5803237858032376e-06, | |
| "loss": 1.8252, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14570361145703611, | |
| "grad_norm": 29.708969116210938, | |
| "learning_rate": 3.6114570361145706e-06, | |
| "loss": 1.6035, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.14694894146948942, | |
| "grad_norm": 14.904471397399902, | |
| "learning_rate": 3.642590286425903e-06, | |
| "loss": 0.693, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.14819427148194272, | |
| "grad_norm": 19.106191635131836, | |
| "learning_rate": 3.6737235367372358e-06, | |
| "loss": 0.7761, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.149439601494396, | |
| "grad_norm": 21.0386905670166, | |
| "learning_rate": 3.7048567870485684e-06, | |
| "loss": 1.1099, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1506849315068493, | |
| "grad_norm": 11.261611938476562, | |
| "learning_rate": 3.7359900373599e-06, | |
| "loss": 0.3363, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1519302615193026, | |
| "grad_norm": 21.45566749572754, | |
| "learning_rate": 3.7671232876712327e-06, | |
| "loss": 1.1392, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.15317559153175592, | |
| "grad_norm": 23.72317123413086, | |
| "learning_rate": 3.7982565379825653e-06, | |
| "loss": 1.2175, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.15442092154420922, | |
| "grad_norm": 9.110578536987305, | |
| "learning_rate": 3.829389788293898e-06, | |
| "loss": 0.2401, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15566625155666253, | |
| "grad_norm": 10.689005851745605, | |
| "learning_rate": 3.860523038605231e-06, | |
| "loss": 0.2262, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1569115815691158, | |
| "grad_norm": 18.003347396850586, | |
| "learning_rate": 3.8916562889165635e-06, | |
| "loss": 0.8304, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1581569115815691, | |
| "grad_norm": 16.37116241455078, | |
| "learning_rate": 3.922789539227895e-06, | |
| "loss": 0.6732, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.15940224159402241, | |
| "grad_norm": 20.549619674682617, | |
| "learning_rate": 3.953922789539228e-06, | |
| "loss": 0.7898, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.16064757160647572, | |
| "grad_norm": 27.759565353393555, | |
| "learning_rate": 3.9850560398505605e-06, | |
| "loss": 1.6685, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.16189290161892902, | |
| "grad_norm": 10.014034271240234, | |
| "learning_rate": 4.016189290161893e-06, | |
| "loss": 0.2059, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.16313823163138233, | |
| "grad_norm": 18.375551223754883, | |
| "learning_rate": 4.047322540473226e-06, | |
| "loss": 0.5604, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1643835616438356, | |
| "grad_norm": 23.120948791503906, | |
| "learning_rate": 4.078455790784558e-06, | |
| "loss": 1.2139, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1656288916562889, | |
| "grad_norm": 20.939762115478516, | |
| "learning_rate": 4.10958904109589e-06, | |
| "loss": 0.8262, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.16687422166874222, | |
| "grad_norm": 39.98530578613281, | |
| "learning_rate": 4.140722291407223e-06, | |
| "loss": 1.2119, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.16811955168119552, | |
| "grad_norm": 16.684823989868164, | |
| "learning_rate": 4.171855541718556e-06, | |
| "loss": 0.7434, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.16936488169364883, | |
| "grad_norm": 8.765166282653809, | |
| "learning_rate": 4.202988792029889e-06, | |
| "loss": 0.1506, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1706102117061021, | |
| "grad_norm": 20.599409103393555, | |
| "learning_rate": 4.234122042341221e-06, | |
| "loss": 0.8276, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1718555417185554, | |
| "grad_norm": 27.572763442993164, | |
| "learning_rate": 4.265255292652553e-06, | |
| "loss": 1.0833, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.17310087173100872, | |
| "grad_norm": 18.92407989501953, | |
| "learning_rate": 4.296388542963886e-06, | |
| "loss": 0.4558, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.17434620174346202, | |
| "grad_norm": 17.19509506225586, | |
| "learning_rate": 4.327521793275218e-06, | |
| "loss": 0.2935, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17559153175591533, | |
| "grad_norm": 24.49059295654297, | |
| "learning_rate": 4.358655043586551e-06, | |
| "loss": 0.7617, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.17683686176836863, | |
| "grad_norm": 10.664165496826172, | |
| "learning_rate": 4.389788293897883e-06, | |
| "loss": 0.2395, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.1780821917808219, | |
| "grad_norm": 25.44748878479004, | |
| "learning_rate": 4.420921544209216e-06, | |
| "loss": 0.9827, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1793275217932752, | |
| "grad_norm": 15.069397926330566, | |
| "learning_rate": 4.452054794520548e-06, | |
| "loss": 0.631, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.18057285180572852, | |
| "grad_norm": 18.701967239379883, | |
| "learning_rate": 4.48318804483188e-06, | |
| "loss": 0.8523, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 29.00722885131836, | |
| "learning_rate": 4.514321295143213e-06, | |
| "loss": 1.2954, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.18306351183063513, | |
| "grad_norm": 9.37511157989502, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.2489, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.1843088418430884, | |
| "grad_norm": 6.786942005157471, | |
| "learning_rate": 4.576587795765878e-06, | |
| "loss": 0.1326, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1855541718555417, | |
| "grad_norm": 28.655126571655273, | |
| "learning_rate": 4.607721046077211e-06, | |
| "loss": 0.9426, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.18679950186799502, | |
| "grad_norm": 6.270091533660889, | |
| "learning_rate": 4.638854296388543e-06, | |
| "loss": 0.203, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18804483188044832, | |
| "grad_norm": 24.001052856445312, | |
| "learning_rate": 4.669987546699876e-06, | |
| "loss": 0.6611, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.18929016189290163, | |
| "grad_norm": 5.734297275543213, | |
| "learning_rate": 4.7011207970112085e-06, | |
| "loss": 0.1378, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.19053549190535493, | |
| "grad_norm": 10.421098709106445, | |
| "learning_rate": 4.732254047322541e-06, | |
| "loss": 0.1292, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.1917808219178082, | |
| "grad_norm": 6.499827861785889, | |
| "learning_rate": 4.763387297633874e-06, | |
| "loss": 0.1825, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1930261519302615, | |
| "grad_norm": 7.8410563468933105, | |
| "learning_rate": 4.7945205479452054e-06, | |
| "loss": 0.2148, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.19427148194271482, | |
| "grad_norm": 21.975595474243164, | |
| "learning_rate": 4.825653798256538e-06, | |
| "loss": 0.3541, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.19551681195516812, | |
| "grad_norm": Infinity, | |
| "learning_rate": 4.825653798256538e-06, | |
| "loss": 0.611, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.19676214196762143, | |
| "grad_norm": 41.450469970703125, | |
| "learning_rate": 4.856787048567871e-06, | |
| "loss": 0.7124, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1980074719800747, | |
| "grad_norm": 11.570192337036133, | |
| "learning_rate": 4.887920298879203e-06, | |
| "loss": 0.2204, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.199252801992528, | |
| "grad_norm": 9.37869930267334, | |
| "learning_rate": 4.919053549190536e-06, | |
| "loss": 0.2504, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.20049813200498132, | |
| "grad_norm": 10.956586837768555, | |
| "learning_rate": 4.950186799501868e-06, | |
| "loss": 0.2246, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.20174346201743462, | |
| "grad_norm": 6.231212139129639, | |
| "learning_rate": 4.9813200498132e-06, | |
| "loss": 0.1144, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.20298879202988793, | |
| "grad_norm": 7.454379558563232, | |
| "learning_rate": 5.012453300124533e-06, | |
| "loss": 0.1583, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.20423412204234123, | |
| "grad_norm": 4.702846050262451, | |
| "learning_rate": 5.043586550435865e-06, | |
| "loss": 0.0929, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2054794520547945, | |
| "grad_norm": 35.559165954589844, | |
| "learning_rate": 5.074719800747199e-06, | |
| "loss": 0.4275, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.20672478206724781, | |
| "grad_norm": 2.42557430267334, | |
| "learning_rate": 5.105853051058531e-06, | |
| "loss": 0.0526, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20797011207970112, | |
| "grad_norm": 1.8609647750854492, | |
| "learning_rate": 5.136986301369863e-06, | |
| "loss": 0.0334, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.20921544209215442, | |
| "grad_norm": 4.347940921783447, | |
| "learning_rate": 5.168119551681196e-06, | |
| "loss": 0.095, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.21046077210460773, | |
| "grad_norm": 7.721733093261719, | |
| "learning_rate": 5.199252801992528e-06, | |
| "loss": 0.1641, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.21170610211706103, | |
| "grad_norm": 42.037933349609375, | |
| "learning_rate": 5.230386052303861e-06, | |
| "loss": 0.4911, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2129514321295143, | |
| "grad_norm": 15.133713722229004, | |
| "learning_rate": 5.2615193026151935e-06, | |
| "loss": 0.1069, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.21419676214196762, | |
| "grad_norm": 3.205000638961792, | |
| "learning_rate": 5.292652552926526e-06, | |
| "loss": 0.0497, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.21544209215442092, | |
| "grad_norm": 1.0115067958831787, | |
| "learning_rate": 5.323785803237858e-06, | |
| "loss": 0.0211, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.21668742216687423, | |
| "grad_norm": Infinity, | |
| "learning_rate": 5.323785803237858e-06, | |
| "loss": 0.6843, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.21793275217932753, | |
| "grad_norm": 2.7913990020751953, | |
| "learning_rate": 5.3549190535491905e-06, | |
| "loss": 0.03, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2191780821917808, | |
| "grad_norm": 14.680956840515137, | |
| "learning_rate": 5.386052303860523e-06, | |
| "loss": 0.0976, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.22042341220423411, | |
| "grad_norm": 5.276736736297607, | |
| "learning_rate": 5.417185554171856e-06, | |
| "loss": 0.0715, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.22166874221668742, | |
| "grad_norm": 2.4684441089630127, | |
| "learning_rate": 5.448318804483188e-06, | |
| "loss": 0.0288, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.22291407222914073, | |
| "grad_norm": 1.0922425985336304, | |
| "learning_rate": 5.479452054794521e-06, | |
| "loss": 0.0211, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.22415940224159403, | |
| "grad_norm": 9.240842819213867, | |
| "learning_rate": 5.5105853051058535e-06, | |
| "loss": 0.0652, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.22540473225404734, | |
| "grad_norm": 38.4419059753418, | |
| "learning_rate": 5.541718555417186e-06, | |
| "loss": 0.685, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2266500622665006, | |
| "grad_norm": 15.644163131713867, | |
| "learning_rate": 5.572851805728519e-06, | |
| "loss": 0.4103, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22789539227895392, | |
| "grad_norm": 2.4954333305358887, | |
| "learning_rate": 5.603985056039851e-06, | |
| "loss": 0.0449, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.22914072229140722, | |
| "grad_norm": 18.7884521484375, | |
| "learning_rate": 5.635118306351184e-06, | |
| "loss": 0.3378, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.23038605230386053, | |
| "grad_norm": Infinity, | |
| "learning_rate": 5.635118306351184e-06, | |
| "loss": 0.8211, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.23163138231631383, | |
| "grad_norm": 2.243523359298706, | |
| "learning_rate": 5.666251556662516e-06, | |
| "loss": 0.0479, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2328767123287671, | |
| "grad_norm": 3.3581135272979736, | |
| "learning_rate": 5.697384806973848e-06, | |
| "loss": 0.0505, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.23412204234122042, | |
| "grad_norm": 1.6243762969970703, | |
| "learning_rate": 5.728518057285181e-06, | |
| "loss": 0.0287, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.23536737235367372, | |
| "grad_norm": 55.31060791015625, | |
| "learning_rate": 5.759651307596513e-06, | |
| "loss": 0.2187, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.23661270236612703, | |
| "grad_norm": 0.3759680986404419, | |
| "learning_rate": 5.790784557907846e-06, | |
| "loss": 0.0085, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.23785803237858033, | |
| "grad_norm": 10.535552978515625, | |
| "learning_rate": 5.821917808219178e-06, | |
| "loss": 0.1855, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.23910336239103364, | |
| "grad_norm": 11.76515007019043, | |
| "learning_rate": 5.85305105853051e-06, | |
| "loss": 0.0808, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2403486924034869, | |
| "grad_norm": 16.85251808166504, | |
| "learning_rate": 5.884184308841843e-06, | |
| "loss": 0.2412, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.24159402241594022, | |
| "grad_norm": 0.46440303325653076, | |
| "learning_rate": 5.9153175591531755e-06, | |
| "loss": 0.008, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.24283935242839352, | |
| "grad_norm": 0.7289634943008423, | |
| "learning_rate": 5.946450809464509e-06, | |
| "loss": 0.013, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.24408468244084683, | |
| "grad_norm": 11.138826370239258, | |
| "learning_rate": 5.9775840597758416e-06, | |
| "loss": 0.1779, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.24533001245330013, | |
| "grad_norm": 1.223634123802185, | |
| "learning_rate": 6.008717310087173e-06, | |
| "loss": 0.0177, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2465753424657534, | |
| "grad_norm": 3.939805507659912, | |
| "learning_rate": 6.039850560398506e-06, | |
| "loss": 0.0818, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.24782067247820672, | |
| "grad_norm": 137.29930114746094, | |
| "learning_rate": 6.0709838107098385e-06, | |
| "loss": 3.1221, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.24906600249066002, | |
| "grad_norm": 3.8515782356262207, | |
| "learning_rate": 6.102117061021171e-06, | |
| "loss": 0.0835, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2503113325031133, | |
| "grad_norm": 1.5677456855773926, | |
| "learning_rate": 6.133250311332504e-06, | |
| "loss": 0.0312, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.25155666251556663, | |
| "grad_norm": 1.6086269617080688, | |
| "learning_rate": 6.1643835616438354e-06, | |
| "loss": 0.0299, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.25280199252801994, | |
| "grad_norm": 0.9720219969749451, | |
| "learning_rate": 6.195516811955168e-06, | |
| "loss": 0.0152, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.25404732254047324, | |
| "grad_norm": 29.63043212890625, | |
| "learning_rate": 6.226650062266501e-06, | |
| "loss": 0.1063, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.25529265255292655, | |
| "grad_norm": 0.7106034159660339, | |
| "learning_rate": 6.257783312577833e-06, | |
| "loss": 0.0128, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.25653798256537985, | |
| "grad_norm": 0.7417896389961243, | |
| "learning_rate": 6.288916562889166e-06, | |
| "loss": 0.0138, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.2577833125778331, | |
| "grad_norm": 2.157313823699951, | |
| "learning_rate": 6.3200498132004984e-06, | |
| "loss": 0.0267, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2590286425902864, | |
| "grad_norm": 0.8388156294822693, | |
| "learning_rate": 6.351183063511831e-06, | |
| "loss": 0.0125, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2602739726027397, | |
| "grad_norm": 0.33427631855010986, | |
| "learning_rate": 6.382316313823164e-06, | |
| "loss": 0.0067, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.261519302615193, | |
| "grad_norm": 0.7715888023376465, | |
| "learning_rate": 6.413449564134496e-06, | |
| "loss": 0.0112, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2627646326276463, | |
| "grad_norm": 0.23136259615421295, | |
| "learning_rate": 6.444582814445828e-06, | |
| "loss": 0.0052, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2640099626400996, | |
| "grad_norm": 149.45394897460938, | |
| "learning_rate": 6.4757160647571606e-06, | |
| "loss": 0.3285, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.26525529265255293, | |
| "grad_norm": 2.4453482627868652, | |
| "learning_rate": 6.506849315068493e-06, | |
| "loss": 0.0472, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.26650062266500624, | |
| "grad_norm": 2.4057695865631104, | |
| "learning_rate": 6.537982565379826e-06, | |
| "loss": 0.033, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.26774595267745954, | |
| "grad_norm": 0.2910887598991394, | |
| "learning_rate": 6.569115815691158e-06, | |
| "loss": 0.0054, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.26899128268991285, | |
| "grad_norm": 0.9707146286964417, | |
| "learning_rate": 6.600249066002491e-06, | |
| "loss": 0.0173, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.27023661270236615, | |
| "grad_norm": 0.2008867859840393, | |
| "learning_rate": 6.6313823163138235e-06, | |
| "loss": 0.0038, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2714819427148194, | |
| "grad_norm": 1.5367100238800049, | |
| "learning_rate": 6.662515566625156e-06, | |
| "loss": 0.0185, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 0.5055931806564331, | |
| "learning_rate": 6.693648816936489e-06, | |
| "loss": 0.0066, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 0.4430530071258545, | |
| "learning_rate": 6.724782067247821e-06, | |
| "loss": 0.0062, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2752179327521793, | |
| "grad_norm": 2.2975895404815674, | |
| "learning_rate": 6.755915317559154e-06, | |
| "loss": 0.01, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2764632627646326, | |
| "grad_norm": 0.8265185952186584, | |
| "learning_rate": 6.787048567870486e-06, | |
| "loss": 0.0136, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2777085927770859, | |
| "grad_norm": 168.16004943847656, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 2.9077, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.27895392278953923, | |
| "grad_norm": 0.7623637318611145, | |
| "learning_rate": 6.849315068493151e-06, | |
| "loss": 0.0124, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.28019925280199254, | |
| "grad_norm": 0.5590365529060364, | |
| "learning_rate": 6.8804483188044835e-06, | |
| "loss": 0.0115, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.28144458281444584, | |
| "grad_norm": 0.36643216013908386, | |
| "learning_rate": 6.911581569115816e-06, | |
| "loss": 0.005, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.28268991282689915, | |
| "grad_norm": 0.33054330945014954, | |
| "learning_rate": 6.942714819427149e-06, | |
| "loss": 0.0077, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.28393524283935245, | |
| "grad_norm": 0.34179171919822693, | |
| "learning_rate": 6.973848069738481e-06, | |
| "loss": 0.0077, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2851805728518057, | |
| "grad_norm": 7.439018726348877, | |
| "learning_rate": 7.004981320049814e-06, | |
| "loss": 0.0183, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.286425902864259, | |
| "grad_norm": 0.4672091603279114, | |
| "learning_rate": 7.0361145703611465e-06, | |
| "loss": 0.0088, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2876712328767123, | |
| "grad_norm": 43.73134994506836, | |
| "learning_rate": 7.067247820672479e-06, | |
| "loss": 0.0645, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2889165628891656, | |
| "grad_norm": 0.5883788466453552, | |
| "learning_rate": 7.098381070983812e-06, | |
| "loss": 0.0077, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2901618929016189, | |
| "grad_norm": 0.11801683157682419, | |
| "learning_rate": 7.1295143212951425e-06, | |
| "loss": 0.0025, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.29140722291407223, | |
| "grad_norm": 0.4613223671913147, | |
| "learning_rate": 7.160647571606475e-06, | |
| "loss": 0.0061, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.29265255292652553, | |
| "grad_norm": 0.46132174134254456, | |
| "learning_rate": 7.191780821917809e-06, | |
| "loss": 0.0054, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.29389788293897884, | |
| "grad_norm": Infinity, | |
| "learning_rate": 7.191780821917809e-06, | |
| "loss": 0.4395, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.29514321295143214, | |
| "grad_norm": 0.17022739350795746, | |
| "learning_rate": 7.222914072229141e-06, | |
| "loss": 0.0041, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.29638854296388545, | |
| "grad_norm": 0.10204841196537018, | |
| "learning_rate": 7.254047322540474e-06, | |
| "loss": 0.0025, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.29763387297633875, | |
| "grad_norm": 0.21153950691223145, | |
| "learning_rate": 7.285180572851806e-06, | |
| "loss": 0.0037, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.298879202988792, | |
| "grad_norm": 0.15493176877498627, | |
| "learning_rate": 7.316313823163139e-06, | |
| "loss": 0.003, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3001245330012453, | |
| "grad_norm": 0.24285216629505157, | |
| "learning_rate": 7.3474470734744716e-06, | |
| "loss": 0.0049, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3013698630136986, | |
| "grad_norm": 0.19606204330921173, | |
| "learning_rate": 7.378580323785804e-06, | |
| "loss": 0.0031, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3026151930261519, | |
| "grad_norm": 2.727463483810425, | |
| "learning_rate": 7.409713574097137e-06, | |
| "loss": 0.0078, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3038605230386052, | |
| "grad_norm": 0.1808951050043106, | |
| "learning_rate": 7.440846824408469e-06, | |
| "loss": 0.0039, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.30510585305105853, | |
| "grad_norm": 0.24642078578472137, | |
| "learning_rate": 7.4719800747198e-06, | |
| "loss": 0.0047, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.30635118306351183, | |
| "grad_norm": 0.10990118980407715, | |
| "learning_rate": 7.503113325031133e-06, | |
| "loss": 0.0021, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.30759651307596514, | |
| "grad_norm": 0.08530181646347046, | |
| "learning_rate": 7.5342465753424655e-06, | |
| "loss": 0.0022, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.30884184308841844, | |
| "grad_norm": 1.499770998954773, | |
| "learning_rate": 7.565379825653798e-06, | |
| "loss": 0.0047, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.31008717310087175, | |
| "grad_norm": 0.08772747963666916, | |
| "learning_rate": 7.596513075965131e-06, | |
| "loss": 0.0026, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.31133250311332505, | |
| "grad_norm": 0.38723257184028625, | |
| "learning_rate": 7.627646326276463e-06, | |
| "loss": 0.0045, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3125778331257783, | |
| "grad_norm": 0.09018506854772568, | |
| "learning_rate": 7.658779576587797e-06, | |
| "loss": 0.002, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.3138231631382316, | |
| "grad_norm": 3.251638650894165, | |
| "learning_rate": 7.689912826899128e-06, | |
| "loss": 0.0073, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3150684931506849, | |
| "grad_norm": 0.17742273211479187, | |
| "learning_rate": 7.721046077210462e-06, | |
| "loss": 0.0034, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.3163138231631382, | |
| "grad_norm": 4.7799201011657715, | |
| "learning_rate": 7.752179327521794e-06, | |
| "loss": 0.0149, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3175591531755915, | |
| "grad_norm": 0.7822676301002502, | |
| "learning_rate": 7.783312577833127e-06, | |
| "loss": 0.0043, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.31880448318804483, | |
| "grad_norm": 0.07635273039340973, | |
| "learning_rate": 7.814445828144457e-06, | |
| "loss": 0.0019, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.32004981320049813, | |
| "grad_norm": 0.128676638007164, | |
| "learning_rate": 7.84557907845579e-06, | |
| "loss": 0.0031, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.32129514321295144, | |
| "grad_norm": 0.35170984268188477, | |
| "learning_rate": 7.876712328767124e-06, | |
| "loss": 0.0034, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.32254047322540474, | |
| "grad_norm": 0.17562495172023773, | |
| "learning_rate": 7.907845579078456e-06, | |
| "loss": 0.0036, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.32378580323785805, | |
| "grad_norm": 0.4719379246234894, | |
| "learning_rate": 7.93897882938979e-06, | |
| "loss": 0.0052, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.32503113325031135, | |
| "grad_norm": 1.012569546699524, | |
| "learning_rate": 7.970112079701121e-06, | |
| "loss": 0.0034, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.32627646326276466, | |
| "grad_norm": 0.6060551404953003, | |
| "learning_rate": 8.001245330012454e-06, | |
| "loss": 0.0033, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3275217932752179, | |
| "grad_norm": 0.04582296311855316, | |
| "learning_rate": 8.032378580323786e-06, | |
| "loss": 0.0012, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.3287671232876712, | |
| "grad_norm": 0.109385184943676, | |
| "learning_rate": 8.06351183063512e-06, | |
| "loss": 0.0023, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3300124533001245, | |
| "grad_norm": 0.056446850299835205, | |
| "learning_rate": 8.094645080946451e-06, | |
| "loss": 0.0013, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.3312577833125778, | |
| "grad_norm": 0.10354617983102798, | |
| "learning_rate": 8.125778331257785e-06, | |
| "loss": 0.002, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.33250311332503113, | |
| "grad_norm": 0.14216098189353943, | |
| "learning_rate": 8.156911581569117e-06, | |
| "loss": 0.0029, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.33374844333748444, | |
| "grad_norm": 0.07656246423721313, | |
| "learning_rate": 8.188044831880448e-06, | |
| "loss": 0.0018, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.33499377334993774, | |
| "grad_norm": 0.2349928468465805, | |
| "learning_rate": 8.21917808219178e-06, | |
| "loss": 0.0034, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.33623910336239105, | |
| "grad_norm": 0.1743057817220688, | |
| "learning_rate": 8.250311332503113e-06, | |
| "loss": 0.0041, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.33748443337484435, | |
| "grad_norm": 0.05078033730387688, | |
| "learning_rate": 8.281444582814445e-06, | |
| "loss": 0.0015, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.33872976338729766, | |
| "grad_norm": 0.12597429752349854, | |
| "learning_rate": 8.312577833125779e-06, | |
| "loss": 0.0032, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.33997509339975096, | |
| "grad_norm": 0.09458588808774948, | |
| "learning_rate": 8.343711083437112e-06, | |
| "loss": 0.002, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.3412204234122042, | |
| "grad_norm": 0.20183101296424866, | |
| "learning_rate": 8.374844333748444e-06, | |
| "loss": 0.0043, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3424657534246575, | |
| "grad_norm": 0.16585314273834229, | |
| "learning_rate": 8.405977584059777e-06, | |
| "loss": 0.0026, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3437110834371108, | |
| "grad_norm": 0.05950070172548294, | |
| "learning_rate": 8.437110834371109e-06, | |
| "loss": 0.0018, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3449564134495641, | |
| "grad_norm": 0.062412526458501816, | |
| "learning_rate": 8.468244084682442e-06, | |
| "loss": 0.0017, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.34620174346201743, | |
| "grad_norm": 297.8834533691406, | |
| "learning_rate": 8.499377334993774e-06, | |
| "loss": 2.7641, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.34744707347447074, | |
| "grad_norm": 0.18788257241249084, | |
| "learning_rate": 8.530510585305106e-06, | |
| "loss": 0.0031, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.34869240348692404, | |
| "grad_norm": 0.05538473278284073, | |
| "learning_rate": 8.561643835616438e-06, | |
| "loss": 0.0014, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.34993773349937735, | |
| "grad_norm": 0.05929434299468994, | |
| "learning_rate": 8.592777085927771e-06, | |
| "loss": 0.0015, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.35118306351183065, | |
| "grad_norm": 0.15558889508247375, | |
| "learning_rate": 8.623910336239103e-06, | |
| "loss": 0.0032, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.35242839352428396, | |
| "grad_norm": 0.0714510902762413, | |
| "learning_rate": 8.655043586550436e-06, | |
| "loss": 0.002, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.35367372353673726, | |
| "grad_norm": 2.3466129302978516, | |
| "learning_rate": 8.686176836861768e-06, | |
| "loss": 0.0066, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3549190535491905, | |
| "grad_norm": 17.250829696655273, | |
| "learning_rate": 8.717310087173102e-06, | |
| "loss": 0.0224, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3561643835616438, | |
| "grad_norm": 0.03599457070231438, | |
| "learning_rate": 8.748443337484433e-06, | |
| "loss": 0.0011, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3574097135740971, | |
| "grad_norm": 0.05941268801689148, | |
| "learning_rate": 8.779576587795767e-06, | |
| "loss": 0.0019, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3586550435865504, | |
| "grad_norm": 1.2639917135238647, | |
| "learning_rate": 8.810709838107099e-06, | |
| "loss": 0.0044, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.35990037359900373, | |
| "grad_norm": 0.04103681072592735, | |
| "learning_rate": 8.841843088418432e-06, | |
| "loss": 0.001, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.36114570361145704, | |
| "grad_norm": 0.03893645480275154, | |
| "learning_rate": 8.872976338729764e-06, | |
| "loss": 0.001, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.36239103362391034, | |
| "grad_norm": 0.038509551435709, | |
| "learning_rate": 8.904109589041095e-06, | |
| "loss": 0.0009, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.03188912197947502, | |
| "learning_rate": 8.935242839352429e-06, | |
| "loss": 0.001, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.36488169364881695, | |
| "grad_norm": 0.048545584082603455, | |
| "learning_rate": 8.96637608966376e-06, | |
| "loss": 0.0011, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.36612702366127026, | |
| "grad_norm": 0.0602889247238636, | |
| "learning_rate": 8.997509339975094e-06, | |
| "loss": 0.0015, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.36737235367372356, | |
| "grad_norm": 0.05375710129737854, | |
| "learning_rate": 9.028642590286426e-06, | |
| "loss": 0.0016, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3686176836861768, | |
| "grad_norm": 0.043809376657009125, | |
| "learning_rate": 9.05977584059776e-06, | |
| "loss": 0.0012, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3698630136986301, | |
| "grad_norm": 0.0780409425497055, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.0022, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3711083437110834, | |
| "grad_norm": 0.06276142597198486, | |
| "learning_rate": 9.122042341220424e-06, | |
| "loss": 0.0017, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3723536737235367, | |
| "grad_norm": 0.060071829706430435, | |
| "learning_rate": 9.153175591531756e-06, | |
| "loss": 0.0014, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.37359900373599003, | |
| "grad_norm": 0.032719388604164124, | |
| "learning_rate": 9.18430884184309e-06, | |
| "loss": 0.0007, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.37484433374844334, | |
| "grad_norm": 0.034909844398498535, | |
| "learning_rate": 9.215442092154421e-06, | |
| "loss": 0.001, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.37608966376089664, | |
| "grad_norm": 0.034523140639066696, | |
| "learning_rate": 9.246575342465753e-06, | |
| "loss": 0.0011, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.37733499377334995, | |
| "grad_norm": 0.05015862360596657, | |
| "learning_rate": 9.277708592777087e-06, | |
| "loss": 0.0013, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.37858032378580325, | |
| "grad_norm": 0.05602340027689934, | |
| "learning_rate": 9.308841843088418e-06, | |
| "loss": 0.0016, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.37982565379825656, | |
| "grad_norm": 0.04742440581321716, | |
| "learning_rate": 9.339975093399752e-06, | |
| "loss": 0.0014, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.38107098381070986, | |
| "grad_norm": 0.03035055100917816, | |
| "learning_rate": 9.371108343711084e-06, | |
| "loss": 0.0009, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3823163138231631, | |
| "grad_norm": 241.25111389160156, | |
| "learning_rate": 9.402241594022417e-06, | |
| "loss": 0.1876, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3835616438356164, | |
| "grad_norm": 0.03797473758459091, | |
| "learning_rate": 9.433374844333749e-06, | |
| "loss": 0.001, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3848069738480697, | |
| "grad_norm": 0.03934524580836296, | |
| "learning_rate": 9.464508094645082e-06, | |
| "loss": 0.001, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.386052303860523, | |
| "grad_norm": 0.04892684891819954, | |
| "learning_rate": 9.495641344956414e-06, | |
| "loss": 0.0013, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.38729763387297633, | |
| "grad_norm": 0.06903809309005737, | |
| "learning_rate": 9.526774595267747e-06, | |
| "loss": 0.0018, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.38854296388542964, | |
| "grad_norm": 0.17654924094676971, | |
| "learning_rate": 9.557907845579077e-06, | |
| "loss": 0.0018, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.38978829389788294, | |
| "grad_norm": 0.047983210533857346, | |
| "learning_rate": 9.589041095890411e-06, | |
| "loss": 0.001, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.39103362391033625, | |
| "grad_norm": 0.0729343593120575, | |
| "learning_rate": 9.620174346201744e-06, | |
| "loss": 0.0018, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.39227895392278955, | |
| "grad_norm": 0.025607705116271973, | |
| "learning_rate": 9.651307596513076e-06, | |
| "loss": 0.0007, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.39352428393524286, | |
| "grad_norm": 0.0369686633348465, | |
| "learning_rate": 9.68244084682441e-06, | |
| "loss": 0.001, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.39476961394769616, | |
| "grad_norm": 0.03150925785303116, | |
| "learning_rate": 9.713574097135741e-06, | |
| "loss": 0.001, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.3960149439601494, | |
| "grad_norm": 537.4097900390625, | |
| "learning_rate": 9.744707347447075e-06, | |
| "loss": 0.9077, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3972602739726027, | |
| "grad_norm": 0.036139559000730515, | |
| "learning_rate": 9.775840597758406e-06, | |
| "loss": 0.0011, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.398505603985056, | |
| "grad_norm": 0.10030055046081543, | |
| "learning_rate": 9.80697384806974e-06, | |
| "loss": 0.0019, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.39975093399750933, | |
| "grad_norm": 0.20713728666305542, | |
| "learning_rate": 9.838107098381072e-06, | |
| "loss": 0.0013, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.40099626400996263, | |
| "grad_norm": 0.21006031334400177, | |
| "learning_rate": 9.869240348692405e-06, | |
| "loss": 0.0021, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.40224159402241594, | |
| "grad_norm": 409.08544921875, | |
| "learning_rate": 9.900373599003735e-06, | |
| "loss": 1.8641, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.40348692403486924, | |
| "grad_norm": 0.04977629333734512, | |
| "learning_rate": 9.931506849315069e-06, | |
| "loss": 0.0012, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.40473225404732255, | |
| "grad_norm": 0.06899397075176239, | |
| "learning_rate": 9.9626400996264e-06, | |
| "loss": 0.0011, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.40597758405977585, | |
| "grad_norm": 0.3704112470149994, | |
| "learning_rate": 9.993773349937734e-06, | |
| "loss": 0.0014, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.40722291407222916, | |
| "grad_norm": 0.03436332195997238, | |
| "learning_rate": 1.0024906600249066e-05, | |
| "loss": 0.0011, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.40846824408468246, | |
| "grad_norm": 0.03816661238670349, | |
| "learning_rate": 1.0056039850560399e-05, | |
| "loss": 0.0009, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.40971357409713577, | |
| "grad_norm": 0.053675808012485504, | |
| "learning_rate": 1.008717310087173e-05, | |
| "loss": 0.0014, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.410958904109589, | |
| "grad_norm": 0.024651149287819862, | |
| "learning_rate": 1.0118306351183064e-05, | |
| "loss": 0.0007, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4122042341220423, | |
| "grad_norm": 0.03284426033496857, | |
| "learning_rate": 1.0149439601494398e-05, | |
| "loss": 0.001, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.41344956413449563, | |
| "grad_norm": 0.03643254190683365, | |
| "learning_rate": 1.018057285180573e-05, | |
| "loss": 0.0011, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.41469489414694893, | |
| "grad_norm": 0.02989336848258972, | |
| "learning_rate": 1.0211706102117063e-05, | |
| "loss": 0.0008, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.41594022415940224, | |
| "grad_norm": 0.020424343645572662, | |
| "learning_rate": 1.0242839352428395e-05, | |
| "loss": 0.0007, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.41718555417185554, | |
| "grad_norm": 0.03185396268963814, | |
| "learning_rate": 1.0273972602739726e-05, | |
| "loss": 0.0009, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.41843088418430885, | |
| "grad_norm": 0.022784588858485222, | |
| "learning_rate": 1.0305105853051058e-05, | |
| "loss": 0.0006, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.41967621419676215, | |
| "grad_norm": 0.1662231832742691, | |
| "learning_rate": 1.0336239103362392e-05, | |
| "loss": 0.0018, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.42092154420921546, | |
| "grad_norm": 0.05111798271536827, | |
| "learning_rate": 1.0367372353673723e-05, | |
| "loss": 0.0014, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.42216687422166876, | |
| "grad_norm": 0.024023687466979027, | |
| "learning_rate": 1.0398505603985057e-05, | |
| "loss": 0.0007, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.42341220423412207, | |
| "grad_norm": 0.07146386057138443, | |
| "learning_rate": 1.0429638854296388e-05, | |
| "loss": 0.0019, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4246575342465753, | |
| "grad_norm": 0.01847468502819538, | |
| "learning_rate": 1.0460772104607722e-05, | |
| "loss": 0.0006, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.4259028642590286, | |
| "grad_norm": 0.11909367889165878, | |
| "learning_rate": 1.0491905354919054e-05, | |
| "loss": 0.0009, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.42714819427148193, | |
| "grad_norm": 0.07260438799858093, | |
| "learning_rate": 1.0523038605230387e-05, | |
| "loss": 0.002, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.42839352428393523, | |
| "grad_norm": 113.6898193359375, | |
| "learning_rate": 1.0554171855541719e-05, | |
| "loss": 0.0637, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.42963885429638854, | |
| "grad_norm": 0.018576975911855698, | |
| "learning_rate": 1.0585305105853052e-05, | |
| "loss": 0.0006, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.43088418430884184, | |
| "grad_norm": 0.03654215857386589, | |
| "learning_rate": 1.0616438356164384e-05, | |
| "loss": 0.0007, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.43212951432129515, | |
| "grad_norm": 0.025475049391388893, | |
| "learning_rate": 1.0647571606475716e-05, | |
| "loss": 0.0007, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.43337484433374845, | |
| "grad_norm": 0.02617563307285309, | |
| "learning_rate": 1.067870485678705e-05, | |
| "loss": 0.0008, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.43462017434620176, | |
| "grad_norm": 0.07997260987758636, | |
| "learning_rate": 1.0709838107098381e-05, | |
| "loss": 0.0016, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.43586550435865506, | |
| "grad_norm": 0.020727328956127167, | |
| "learning_rate": 1.0740971357409714e-05, | |
| "loss": 0.0007, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.43711083437110837, | |
| "grad_norm": 0.02753385342657566, | |
| "learning_rate": 1.0772104607721046e-05, | |
| "loss": 0.0007, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.4383561643835616, | |
| "grad_norm": 0.04742880165576935, | |
| "learning_rate": 1.080323785803238e-05, | |
| "loss": 0.0009, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4396014943960149, | |
| "grad_norm": 0.03920525684952736, | |
| "learning_rate": 1.0834371108343711e-05, | |
| "loss": 0.0011, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.44084682440846823, | |
| "grad_norm": 0.04735913872718811, | |
| "learning_rate": 1.0865504358655045e-05, | |
| "loss": 0.0012, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.44209215442092153, | |
| "grad_norm": 0.028404802083969116, | |
| "learning_rate": 1.0896637608966377e-05, | |
| "loss": 0.0009, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.44333748443337484, | |
| "grad_norm": 0.02533857710659504, | |
| "learning_rate": 1.092777085927771e-05, | |
| "loss": 0.0006, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.44458281444582815, | |
| "grad_norm": 0.04108303785324097, | |
| "learning_rate": 1.0958904109589042e-05, | |
| "loss": 0.0013, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.44582814445828145, | |
| "grad_norm": 0.03464365378022194, | |
| "learning_rate": 1.0990037359900373e-05, | |
| "loss": 0.0009, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.44707347447073476, | |
| "grad_norm": 0.030825745314359665, | |
| "learning_rate": 1.1021170610211707e-05, | |
| "loss": 0.0008, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.44831880448318806, | |
| "grad_norm": 0.04480734467506409, | |
| "learning_rate": 1.1052303860523039e-05, | |
| "loss": 0.0012, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.44956413449564137, | |
| "grad_norm": 0.02541348710656166, | |
| "learning_rate": 1.1083437110834372e-05, | |
| "loss": 0.0008, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.45080946450809467, | |
| "grad_norm": 0.02149001508951187, | |
| "learning_rate": 1.1114570361145704e-05, | |
| "loss": 0.0006, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4520547945205479, | |
| "grad_norm": 0.05121343955397606, | |
| "learning_rate": 1.1145703611457037e-05, | |
| "loss": 0.0015, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.4533001245330012, | |
| "grad_norm": 0.022881271317601204, | |
| "learning_rate": 1.1176836861768369e-05, | |
| "loss": 0.0007, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 0.029813582077622414, | |
| "learning_rate": 1.1207970112079703e-05, | |
| "loss": 0.0007, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.45579078455790784, | |
| "grad_norm": 0.0214352048933506, | |
| "learning_rate": 1.1239103362391034e-05, | |
| "loss": 0.0007, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.45703611457036114, | |
| "grad_norm": 0.04457417130470276, | |
| "learning_rate": 1.1270236612702368e-05, | |
| "loss": 0.0008, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.45828144458281445, | |
| "grad_norm": 0.019106173887848854, | |
| "learning_rate": 1.1301369863013698e-05, | |
| "loss": 0.0006, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.45952677459526775, | |
| "grad_norm": 0.022846408188343048, | |
| "learning_rate": 1.1332503113325031e-05, | |
| "loss": 0.0006, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.46077210460772106, | |
| "grad_norm": 0.018946994096040726, | |
| "learning_rate": 1.1363636363636365e-05, | |
| "loss": 0.0006, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.46201743462017436, | |
| "grad_norm": 0.021404925733804703, | |
| "learning_rate": 1.1394769613947696e-05, | |
| "loss": 0.0006, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.46326276463262767, | |
| "grad_norm": 0.01195521280169487, | |
| "learning_rate": 1.142590286425903e-05, | |
| "loss": 0.0004, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.46450809464508097, | |
| "grad_norm": 0.03864084184169769, | |
| "learning_rate": 1.1457036114570362e-05, | |
| "loss": 0.001, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4657534246575342, | |
| "grad_norm": 0.058303095400333405, | |
| "learning_rate": 1.1488169364881695e-05, | |
| "loss": 0.0012, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4669987546699875, | |
| "grad_norm": 0.013412773609161377, | |
| "learning_rate": 1.1519302615193027e-05, | |
| "loss": 0.0004, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.46824408468244083, | |
| "grad_norm": 0.02416684851050377, | |
| "learning_rate": 1.155043586550436e-05, | |
| "loss": 0.0007, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.46948941469489414, | |
| "grad_norm": 0.016587672755122185, | |
| "learning_rate": 1.1581569115815692e-05, | |
| "loss": 0.0005, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.47073474470734744, | |
| "grad_norm": 0.020129237323999405, | |
| "learning_rate": 1.1612702366127025e-05, | |
| "loss": 0.0006, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.47198007471980075, | |
| "grad_norm": 0.2290887087583542, | |
| "learning_rate": 1.1643835616438355e-05, | |
| "loss": 0.0013, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.47322540473225405, | |
| "grad_norm": 0.0186260174959898, | |
| "learning_rate": 1.1674968866749689e-05, | |
| "loss": 0.0006, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.47447073474470736, | |
| "grad_norm": 0.03915928676724434, | |
| "learning_rate": 1.170610211706102e-05, | |
| "loss": 0.0009, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.47571606475716066, | |
| "grad_norm": 0.024174867197871208, | |
| "learning_rate": 1.1737235367372354e-05, | |
| "loss": 0.0006, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.47696139476961397, | |
| "grad_norm": 0.06258780509233475, | |
| "learning_rate": 1.1768368617683686e-05, | |
| "loss": 0.0012, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.47820672478206727, | |
| "grad_norm": 0.0187270175665617, | |
| "learning_rate": 1.179950186799502e-05, | |
| "loss": 0.0006, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4794520547945205, | |
| "grad_norm": 0.036254920065402985, | |
| "learning_rate": 1.1830635118306351e-05, | |
| "loss": 0.0011, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4806973848069738, | |
| "grad_norm": 0.04100683704018593, | |
| "learning_rate": 1.1861768368617684e-05, | |
| "loss": 0.0008, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.48194271481942713, | |
| "grad_norm": 0.023180831223726273, | |
| "learning_rate": 1.1892901618929018e-05, | |
| "loss": 0.0007, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.48318804483188044, | |
| "grad_norm": 36.136348724365234, | |
| "learning_rate": 1.192403486924035e-05, | |
| "loss": 4.5358, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.48443337484433374, | |
| "grad_norm": 0.06236216425895691, | |
| "learning_rate": 1.1955168119551683e-05, | |
| "loss": 0.0013, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.48567870485678705, | |
| "grad_norm": 0.11113505810499191, | |
| "learning_rate": 1.1986301369863013e-05, | |
| "loss": 0.0014, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.48692403486924035, | |
| "grad_norm": 0.028809353709220886, | |
| "learning_rate": 1.2017434620174347e-05, | |
| "loss": 0.0006, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.48816936488169366, | |
| "grad_norm": 0.04308629035949707, | |
| "learning_rate": 1.2048567870485678e-05, | |
| "loss": 0.001, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.48941469489414696, | |
| "grad_norm": 0.03488301858305931, | |
| "learning_rate": 1.2079701120797012e-05, | |
| "loss": 0.001, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.49066002490660027, | |
| "grad_norm": 0.03795866668224335, | |
| "learning_rate": 1.2110834371108344e-05, | |
| "loss": 0.0009, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.4919053549190536, | |
| "grad_norm": 179.07867431640625, | |
| "learning_rate": 1.2141967621419677e-05, | |
| "loss": 0.306, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4931506849315068, | |
| "grad_norm": 0.07366206496953964, | |
| "learning_rate": 1.2173100871731009e-05, | |
| "loss": 0.0016, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4943960149439601, | |
| "grad_norm": 0.1270761936903, | |
| "learning_rate": 1.2204234122042342e-05, | |
| "loss": 0.0023, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.49564134495641343, | |
| "grad_norm": 0.1619614213705063, | |
| "learning_rate": 1.2235367372353674e-05, | |
| "loss": 0.0025, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.49688667496886674, | |
| "grad_norm": 0.027039946988224983, | |
| "learning_rate": 1.2266500622665007e-05, | |
| "loss": 0.0005, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.49813200498132004, | |
| "grad_norm": 0.012688295915722847, | |
| "learning_rate": 1.2297633872976339e-05, | |
| "loss": 0.0003, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.49937733499377335, | |
| "grad_norm": 0.04193650931119919, | |
| "learning_rate": 1.2328767123287671e-05, | |
| "loss": 0.001, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.5006226650062267, | |
| "grad_norm": 0.2457994669675827, | |
| "learning_rate": 1.2359900373599004e-05, | |
| "loss": 0.0033, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.50186799501868, | |
| "grad_norm": 0.07151038944721222, | |
| "learning_rate": 1.2391033623910336e-05, | |
| "loss": 0.0012, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5031133250311333, | |
| "grad_norm": 0.03706686571240425, | |
| "learning_rate": 1.242216687422167e-05, | |
| "loss": 0.001, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5043586550435866, | |
| "grad_norm": 0.03082493133842945, | |
| "learning_rate": 1.2453300124533001e-05, | |
| "loss": 0.0008, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.5056039850560399, | |
| "grad_norm": 0.02312391996383667, | |
| "learning_rate": 1.2484433374844335e-05, | |
| "loss": 0.0007, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5068493150684932, | |
| "grad_norm": 43.44374084472656, | |
| "learning_rate": 1.2515566625155666e-05, | |
| "loss": 4.0239, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5080946450809465, | |
| "grad_norm": 0.04549500346183777, | |
| "learning_rate": 1.2546699875467e-05, | |
| "loss": 0.0011, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5093399750933998, | |
| "grad_norm": 0.44390103220939636, | |
| "learning_rate": 1.2577833125778332e-05, | |
| "loss": 0.0017, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.5105853051058531, | |
| "grad_norm": 0.017668342217803, | |
| "learning_rate": 1.2608966376089665e-05, | |
| "loss": 0.0004, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5118306351183064, | |
| "grad_norm": 0.02797042578458786, | |
| "learning_rate": 1.2640099626400997e-05, | |
| "loss": 0.0005, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5130759651307597, | |
| "grad_norm": 0.05557764694094658, | |
| "learning_rate": 1.267123287671233e-05, | |
| "loss": 0.0011, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5143212951432129, | |
| "grad_norm": 0.028871331363916397, | |
| "learning_rate": 1.2702366127023662e-05, | |
| "loss": 0.0007, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.5155666251556662, | |
| "grad_norm": 0.04884202778339386, | |
| "learning_rate": 1.2733499377334995e-05, | |
| "loss": 0.001, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5168119551681195, | |
| "grad_norm": 0.014481289312243462, | |
| "learning_rate": 1.2764632627646327e-05, | |
| "loss": 0.0004, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5180572851805728, | |
| "grad_norm": 0.08000053465366364, | |
| "learning_rate": 1.279576587795766e-05, | |
| "loss": 0.0015, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.5193026151930261, | |
| "grad_norm": 0.036073487251996994, | |
| "learning_rate": 1.2826899128268992e-05, | |
| "loss": 0.0007, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.5205479452054794, | |
| "grad_norm": 0.08941499143838882, | |
| "learning_rate": 1.2858032378580322e-05, | |
| "loss": 0.0015, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5217932752179327, | |
| "grad_norm": 0.06853260844945908, | |
| "learning_rate": 1.2889165628891656e-05, | |
| "loss": 0.0013, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.523038605230386, | |
| "grad_norm": 0.026791630312800407, | |
| "learning_rate": 1.2920298879202988e-05, | |
| "loss": 0.0007, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5242839352428393, | |
| "grad_norm": 0.3121366500854492, | |
| "learning_rate": 1.2951432129514321e-05, | |
| "loss": 0.0039, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.5255292652552926, | |
| "grad_norm": 0.02174542099237442, | |
| "learning_rate": 1.2982565379825653e-05, | |
| "loss": 0.0006, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.526774595267746, | |
| "grad_norm": 0.053185317665338516, | |
| "learning_rate": 1.3013698630136986e-05, | |
| "loss": 0.0011, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.5280199252801993, | |
| "grad_norm": 0.033572856336832047, | |
| "learning_rate": 1.3044831880448318e-05, | |
| "loss": 0.0009, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5292652552926526, | |
| "grad_norm": 0.0287881251424551, | |
| "learning_rate": 1.3075965130759652e-05, | |
| "loss": 0.0008, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5305105853051059, | |
| "grad_norm": 0.029981469735503197, | |
| "learning_rate": 1.3107098381070983e-05, | |
| "loss": 0.0006, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5317559153175592, | |
| "grad_norm": 0.028788315132260323, | |
| "learning_rate": 1.3138231631382317e-05, | |
| "loss": 0.0005, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.5330012453300125, | |
| "grad_norm": 0.021008843556046486, | |
| "learning_rate": 1.316936488169365e-05, | |
| "loss": 0.0005, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5342465753424658, | |
| "grad_norm": 0.04118547961115837, | |
| "learning_rate": 1.3200498132004982e-05, | |
| "loss": 0.001, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5354919053549191, | |
| "grad_norm": 0.012453455477952957, | |
| "learning_rate": 1.3231631382316315e-05, | |
| "loss": 0.0003, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5367372353673724, | |
| "grad_norm": 0.06938812136650085, | |
| "learning_rate": 1.3262764632627647e-05, | |
| "loss": 0.0011, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.5379825653798257, | |
| "grad_norm": 0.017569739371538162, | |
| "learning_rate": 1.329389788293898e-05, | |
| "loss": 0.0005, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.539227895392279, | |
| "grad_norm": 0.026109851896762848, | |
| "learning_rate": 1.3325031133250312e-05, | |
| "loss": 0.0006, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5404732254047323, | |
| "grad_norm": 0.015702908858656883, | |
| "learning_rate": 1.3356164383561646e-05, | |
| "loss": 0.0004, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5417185554171855, | |
| "grad_norm": 0.025982121005654335, | |
| "learning_rate": 1.3387297633872977e-05, | |
| "loss": 0.0007, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5429638854296388, | |
| "grad_norm": 0.06682372093200684, | |
| "learning_rate": 1.3418430884184311e-05, | |
| "loss": 0.0013, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5442092154420921, | |
| "grad_norm": 0.016124481335282326, | |
| "learning_rate": 1.3449564134495643e-05, | |
| "loss": 0.0005, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.018914785236120224, | |
| "learning_rate": 1.3480697384806976e-05, | |
| "loss": 0.0005, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5466998754669987, | |
| "grad_norm": 0.01492242980748415, | |
| "learning_rate": 1.3511830635118308e-05, | |
| "loss": 0.0004, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 0.06164323166012764, | |
| "learning_rate": 1.3542963885429638e-05, | |
| "loss": 0.0011, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5491905354919053, | |
| "grad_norm": 0.07254376262426376, | |
| "learning_rate": 1.3574097135740971e-05, | |
| "loss": 0.0015, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5504358655043586, | |
| "grad_norm": 0.09924010187387466, | |
| "learning_rate": 1.3605230386052303e-05, | |
| "loss": 0.0019, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5516811955168119, | |
| "grad_norm": 0.01098677609115839, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.0003, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.5529265255292652, | |
| "grad_norm": 0.030665650963783264, | |
| "learning_rate": 1.3667496886674968e-05, | |
| "loss": 0.001, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5541718555417185, | |
| "grad_norm": 0.04467572271823883, | |
| "learning_rate": 1.3698630136986302e-05, | |
| "loss": 0.001, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5554171855541719, | |
| "grad_norm": 0.01499516423791647, | |
| "learning_rate": 1.3729763387297633e-05, | |
| "loss": 0.0004, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5566625155666252, | |
| "grad_norm": 0.01595112681388855, | |
| "learning_rate": 1.3760896637608967e-05, | |
| "loss": 0.0005, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5579078455790785, | |
| "grad_norm": 0.02192739024758339, | |
| "learning_rate": 1.3792029887920299e-05, | |
| "loss": 0.0006, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5591531755915318, | |
| "grad_norm": 0.0317448228597641, | |
| "learning_rate": 1.3823163138231632e-05, | |
| "loss": 0.0006, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.5603985056039851, | |
| "grad_norm": 0.01051297876983881, | |
| "learning_rate": 1.3854296388542964e-05, | |
| "loss": 0.0003, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5616438356164384, | |
| "grad_norm": 0.014249038882553577, | |
| "learning_rate": 1.3885429638854297e-05, | |
| "loss": 0.0004, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5628891656288917, | |
| "grad_norm": 0.026663757860660553, | |
| "learning_rate": 1.3916562889165629e-05, | |
| "loss": 0.0007, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.564134495641345, | |
| "grad_norm": 0.018503081053495407, | |
| "learning_rate": 1.3947696139476963e-05, | |
| "loss": 0.0005, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5653798256537983, | |
| "grad_norm": 0.013995744287967682, | |
| "learning_rate": 1.3978829389788294e-05, | |
| "loss": 0.0004, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5666251556662516, | |
| "grad_norm": 0.06841859221458435, | |
| "learning_rate": 1.4009962640099628e-05, | |
| "loss": 0.0012, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5678704856787049, | |
| "grad_norm": 0.052551478147506714, | |
| "learning_rate": 1.404109589041096e-05, | |
| "loss": 0.0009, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5691158156911582, | |
| "grad_norm": 0.01047549955546856, | |
| "learning_rate": 1.4072229140722293e-05, | |
| "loss": 0.0004, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5703611457036114, | |
| "grad_norm": 0.01352018117904663, | |
| "learning_rate": 1.4103362391033625e-05, | |
| "loss": 0.0004, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5716064757160647, | |
| "grad_norm": 0.023181084543466568, | |
| "learning_rate": 1.4134495641344958e-05, | |
| "loss": 0.0006, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.572851805728518, | |
| "grad_norm": 0.01287688035517931, | |
| "learning_rate": 1.4165628891656292e-05, | |
| "loss": 0.0004, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5740971357409713, | |
| "grad_norm": 0.013366766273975372, | |
| "learning_rate": 1.4196762141967623e-05, | |
| "loss": 0.0004, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5753424657534246, | |
| "grad_norm": 0.01742659881711006, | |
| "learning_rate": 1.4227895392278957e-05, | |
| "loss": 0.0005, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5765877957658779, | |
| "grad_norm": 0.018992751836776733, | |
| "learning_rate": 1.4259028642590285e-05, | |
| "loss": 0.0004, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5778331257783312, | |
| "grad_norm": 0.013830466195940971, | |
| "learning_rate": 1.4290161892901619e-05, | |
| "loss": 0.0005, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5790784557907845, | |
| "grad_norm": 0.2647791802883148, | |
| "learning_rate": 1.432129514321295e-05, | |
| "loss": 0.0015, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5803237858032378, | |
| "grad_norm": 0.05277368426322937, | |
| "learning_rate": 1.4352428393524284e-05, | |
| "loss": 0.0014, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5815691158156912, | |
| "grad_norm": 0.04205463454127312, | |
| "learning_rate": 1.4383561643835617e-05, | |
| "loss": 0.0011, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5828144458281445, | |
| "grad_norm": 0.01518219243735075, | |
| "learning_rate": 1.4414694894146949e-05, | |
| "loss": 0.0004, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5840597758405978, | |
| "grad_norm": 0.011395282112061977, | |
| "learning_rate": 1.4445828144458282e-05, | |
| "loss": 0.0004, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5853051058530511, | |
| "grad_norm": 0.014821592718362808, | |
| "learning_rate": 1.4476961394769614e-05, | |
| "loss": 0.0005, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5865504358655044, | |
| "grad_norm": 0.01130912359803915, | |
| "learning_rate": 1.4508094645080948e-05, | |
| "loss": 0.0004, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5877957658779577, | |
| "grad_norm": 0.02256758324801922, | |
| "learning_rate": 1.453922789539228e-05, | |
| "loss": 0.0006, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.589041095890411, | |
| "grad_norm": 0.1458512842655182, | |
| "learning_rate": 1.4570361145703613e-05, | |
| "loss": 0.0014, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5902864259028643, | |
| "grad_norm": 0.07600380480289459, | |
| "learning_rate": 1.4601494396014945e-05, | |
| "loss": 0.0016, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5915317559153176, | |
| "grad_norm": 0.007826216518878937, | |
| "learning_rate": 1.4632627646326278e-05, | |
| "loss": 0.0002, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5927770859277709, | |
| "grad_norm": 0.013695678673684597, | |
| "learning_rate": 1.466376089663761e-05, | |
| "loss": 0.0004, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5940224159402242, | |
| "grad_norm": 0.034744229167699814, | |
| "learning_rate": 1.4694894146948943e-05, | |
| "loss": 0.0009, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5952677459526775, | |
| "grad_norm": 0.015751633793115616, | |
| "learning_rate": 1.4726027397260275e-05, | |
| "loss": 0.0005, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5965130759651308, | |
| "grad_norm": 0.01636291854083538, | |
| "learning_rate": 1.4757160647571608e-05, | |
| "loss": 0.0004, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.597758405977584, | |
| "grad_norm": 0.019713019952178, | |
| "learning_rate": 1.478829389788294e-05, | |
| "loss": 0.0006, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5990037359900373, | |
| "grad_norm": 0.020456036552786827, | |
| "learning_rate": 1.4819427148194274e-05, | |
| "loss": 0.0005, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6002490660024906, | |
| "grad_norm": 0.027187447994947433, | |
| "learning_rate": 1.4850560398505605e-05, | |
| "loss": 0.0006, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6014943960149439, | |
| "grad_norm": 0.024321310222148895, | |
| "learning_rate": 1.4881693648816939e-05, | |
| "loss": 0.0007, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.6027397260273972, | |
| "grad_norm": 0.01486989390105009, | |
| "learning_rate": 1.491282689912827e-05, | |
| "loss": 0.0004, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6039850560398505, | |
| "grad_norm": 0.022661667317152023, | |
| "learning_rate": 1.49439601494396e-05, | |
| "loss": 0.0007, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.6052303860523038, | |
| "grad_norm": 0.01003281120210886, | |
| "learning_rate": 1.4975093399750934e-05, | |
| "loss": 0.0003, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.6064757160647571, | |
| "grad_norm": 0.01938827708363533, | |
| "learning_rate": 1.5006226650062266e-05, | |
| "loss": 0.0005, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.6077210460772104, | |
| "grad_norm": 0.058401111513376236, | |
| "learning_rate": 1.50373599003736e-05, | |
| "loss": 0.0006, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.6089663760896638, | |
| "grad_norm": 0.008321065455675125, | |
| "learning_rate": 1.5068493150684931e-05, | |
| "loss": 0.0003, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.6102117061021171, | |
| "grad_norm": 0.01695171184837818, | |
| "learning_rate": 1.5099626400996264e-05, | |
| "loss": 0.0005, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6114570361145704, | |
| "grad_norm": 0.008688063360750675, | |
| "learning_rate": 1.5130759651307596e-05, | |
| "loss": 0.0003, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.6127023661270237, | |
| "grad_norm": 0.009470910765230656, | |
| "learning_rate": 1.516189290161893e-05, | |
| "loss": 0.0003, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.613947696139477, | |
| "grad_norm": 0.010343602858483791, | |
| "learning_rate": 1.5193026151930261e-05, | |
| "loss": 0.0003, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.6151930261519303, | |
| "grad_norm": 0.031660452485084534, | |
| "learning_rate": 1.5224159402241595e-05, | |
| "loss": 0.0006, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.6164383561643836, | |
| "grad_norm": 0.02456934005022049, | |
| "learning_rate": 1.5255292652552926e-05, | |
| "loss": 0.0005, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.6176836861768369, | |
| "grad_norm": 0.022074950858950615, | |
| "learning_rate": 1.5286425902864258e-05, | |
| "loss": 0.0006, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.6189290161892902, | |
| "grad_norm": 0.013984983786940575, | |
| "learning_rate": 1.5317559153175593e-05, | |
| "loss": 0.0004, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.6201743462017435, | |
| "grad_norm": 0.02767989970743656, | |
| "learning_rate": 1.5348692403486925e-05, | |
| "loss": 0.0004, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.6214196762141968, | |
| "grad_norm": 0.011965448036789894, | |
| "learning_rate": 1.5379825653798257e-05, | |
| "loss": 0.0003, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.6226650062266501, | |
| "grad_norm": 0.018284225836396217, | |
| "learning_rate": 1.541095890410959e-05, | |
| "loss": 0.0005, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6239103362391034, | |
| "grad_norm": 0.010995174758136272, | |
| "learning_rate": 1.5442092154420924e-05, | |
| "loss": 0.0002, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.6251556662515566, | |
| "grad_norm": 0.008704639971256256, | |
| "learning_rate": 1.5473225404732256e-05, | |
| "loss": 0.0002, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6264009962640099, | |
| "grad_norm": 0.030416160821914673, | |
| "learning_rate": 1.5504358655043587e-05, | |
| "loss": 0.0007, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.6276463262764632, | |
| "grad_norm": 0.02834182232618332, | |
| "learning_rate": 1.5535491905354922e-05, | |
| "loss": 0.0007, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6288916562889165, | |
| "grad_norm": 0.008636824786663055, | |
| "learning_rate": 1.5566625155666254e-05, | |
| "loss": 0.0003, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.6301369863013698, | |
| "grad_norm": 0.037112049758434296, | |
| "learning_rate": 1.5597758405977586e-05, | |
| "loss": 0.0009, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6313823163138231, | |
| "grad_norm": 0.012123404070734978, | |
| "learning_rate": 1.5628891656288914e-05, | |
| "loss": 0.0003, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.6326276463262764, | |
| "grad_norm": 36.184539794921875, | |
| "learning_rate": 1.566002490660025e-05, | |
| "loss": 0.0304, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6338729763387297, | |
| "grad_norm": 0.03620361536741257, | |
| "learning_rate": 1.569115815691158e-05, | |
| "loss": 0.0009, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.635118306351183, | |
| "grad_norm": 0.01849571242928505, | |
| "learning_rate": 1.5722291407222913e-05, | |
| "loss": 0.0005, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 0.010837621986865997, | |
| "learning_rate": 1.5753424657534248e-05, | |
| "loss": 0.0003, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.6376089663760897, | |
| "grad_norm": 0.017697712406516075, | |
| "learning_rate": 1.578455790784558e-05, | |
| "loss": 0.0004, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.638854296388543, | |
| "grad_norm": 0.00896854791790247, | |
| "learning_rate": 1.581569115815691e-05, | |
| "loss": 0.0003, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.6400996264009963, | |
| "grad_norm": 0.009376812726259232, | |
| "learning_rate": 1.5846824408468243e-05, | |
| "loss": 0.0003, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6413449564134496, | |
| "grad_norm": 0.03261823207139969, | |
| "learning_rate": 1.587795765877958e-05, | |
| "loss": 0.0006, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.6425902864259029, | |
| "grad_norm": 71.34445190429688, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 4.0159, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6438356164383562, | |
| "grad_norm": 0.02780863456428051, | |
| "learning_rate": 1.5940224159402242e-05, | |
| "loss": 0.0006, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.6450809464508095, | |
| "grad_norm": 0.008818407543003559, | |
| "learning_rate": 1.5971357409713574e-05, | |
| "loss": 0.0003, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6463262764632628, | |
| "grad_norm": 0.030920347198843956, | |
| "learning_rate": 1.600249066002491e-05, | |
| "loss": 0.0007, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6475716064757161, | |
| "grad_norm": 0.018262671306729317, | |
| "learning_rate": 1.603362391033624e-05, | |
| "loss": 0.0005, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6488169364881694, | |
| "grad_norm": 0.011576538905501366, | |
| "learning_rate": 1.6064757160647572e-05, | |
| "loss": 0.0004, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.6500622665006227, | |
| "grad_norm": 0.010801947675645351, | |
| "learning_rate": 1.6095890410958904e-05, | |
| "loss": 0.0003, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.651307596513076, | |
| "grad_norm": 0.013210455887019634, | |
| "learning_rate": 1.612702366127024e-05, | |
| "loss": 0.0005, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.6525529265255293, | |
| "grad_norm": 0.014238444156944752, | |
| "learning_rate": 1.615815691158157e-05, | |
| "loss": 0.0004, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6537982565379825, | |
| "grad_norm": 0.007543179206550121, | |
| "learning_rate": 1.6189290161892903e-05, | |
| "loss": 0.0002, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.6550435865504358, | |
| "grad_norm": 0.007191088050603867, | |
| "learning_rate": 1.6220423412204234e-05, | |
| "loss": 0.0002, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6562889165628891, | |
| "grad_norm": 0.011641144752502441, | |
| "learning_rate": 1.625155666251557e-05, | |
| "loss": 0.0003, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.6575342465753424, | |
| "grad_norm": 0.018345683813095093, | |
| "learning_rate": 1.62826899128269e-05, | |
| "loss": 0.0005, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6587795765877957, | |
| "grad_norm": 0.3033308684825897, | |
| "learning_rate": 1.6313823163138233e-05, | |
| "loss": 0.0012, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.660024906600249, | |
| "grad_norm": 0.03083566203713417, | |
| "learning_rate": 1.6344956413449565e-05, | |
| "loss": 0.0007, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6612702366127023, | |
| "grad_norm": 0.011249137111008167, | |
| "learning_rate": 1.6376089663760897e-05, | |
| "loss": 0.0003, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6625155666251556, | |
| "grad_norm": 0.009096617810428143, | |
| "learning_rate": 1.640722291407223e-05, | |
| "loss": 0.0003, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.663760896637609, | |
| "grad_norm": 0.007661182899028063, | |
| "learning_rate": 1.643835616438356e-05, | |
| "loss": 0.0002, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.6650062266500623, | |
| "grad_norm": 0.03464965149760246, | |
| "learning_rate": 1.6469489414694895e-05, | |
| "loss": 0.0006, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6662515566625156, | |
| "grad_norm": 0.017583874985575676, | |
| "learning_rate": 1.6500622665006227e-05, | |
| "loss": 0.0005, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6674968866749689, | |
| "grad_norm": 0.012846691533923149, | |
| "learning_rate": 1.653175591531756e-05, | |
| "loss": 0.0003, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6687422166874222, | |
| "grad_norm": 0.008167251013219357, | |
| "learning_rate": 1.656288916562889e-05, | |
| "loss": 0.0002, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6699875466998755, | |
| "grad_norm": 0.09242931753396988, | |
| "learning_rate": 1.6594022415940226e-05, | |
| "loss": 0.0006, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6712328767123288, | |
| "grad_norm": 0.007621095050126314, | |
| "learning_rate": 1.6625155666251557e-05, | |
| "loss": 0.0003, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6724782067247821, | |
| "grad_norm": 364.0179138183594, | |
| "learning_rate": 1.665628891656289e-05, | |
| "loss": 2.4925, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6737235367372354, | |
| "grad_norm": 0.029700903221964836, | |
| "learning_rate": 1.6687422166874224e-05, | |
| "loss": 0.0005, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6749688667496887, | |
| "grad_norm": 0.009756062179803848, | |
| "learning_rate": 1.6718555417185556e-05, | |
| "loss": 0.0003, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.676214196762142, | |
| "grad_norm": 0.02434486895799637, | |
| "learning_rate": 1.6749688667496888e-05, | |
| "loss": 0.0007, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6774595267745953, | |
| "grad_norm": 0.0061378516256809235, | |
| "learning_rate": 1.678082191780822e-05, | |
| "loss": 0.0002, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6787048567870486, | |
| "grad_norm": 0.007974776439368725, | |
| "learning_rate": 1.6811955168119555e-05, | |
| "loss": 0.0002, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.6799501867995019, | |
| "grad_norm": 0.023721277713775635, | |
| "learning_rate": 1.6843088418430886e-05, | |
| "loss": 0.0007, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6811955168119551, | |
| "grad_norm": 0.06722849607467651, | |
| "learning_rate": 1.6874221668742218e-05, | |
| "loss": 0.0014, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6824408468244084, | |
| "grad_norm": 0.021218659356236458, | |
| "learning_rate": 1.690535491905355e-05, | |
| "loss": 0.0005, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6836861768368617, | |
| "grad_norm": 0.007651370484381914, | |
| "learning_rate": 1.6936488169364885e-05, | |
| "loss": 0.0003, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.684931506849315, | |
| "grad_norm": 0.023434964939951897, | |
| "learning_rate": 1.6967621419676217e-05, | |
| "loss": 0.0004, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6861768368617683, | |
| "grad_norm": 0.010944285430014133, | |
| "learning_rate": 1.699875466998755e-05, | |
| "loss": 0.0002, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6874221668742216, | |
| "grad_norm": 0.007479478605091572, | |
| "learning_rate": 1.702988792029888e-05, | |
| "loss": 0.0002, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.688667496886675, | |
| "grad_norm": 0.016678282991051674, | |
| "learning_rate": 1.7061021170610212e-05, | |
| "loss": 0.0004, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6899128268991283, | |
| "grad_norm": 0.008227194659411907, | |
| "learning_rate": 1.7092154420921544e-05, | |
| "loss": 0.0002, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6911581569115816, | |
| "grad_norm": 0.016022512689232826, | |
| "learning_rate": 1.7123287671232875e-05, | |
| "loss": 0.0004, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6924034869240349, | |
| "grad_norm": 0.01723802089691162, | |
| "learning_rate": 1.715442092154421e-05, | |
| "loss": 0.0004, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6936488169364882, | |
| "grad_norm": 0.007776948623359203, | |
| "learning_rate": 1.7185554171855542e-05, | |
| "loss": 0.0002, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6948941469489415, | |
| "grad_norm": 0.061478786170482635, | |
| "learning_rate": 1.7216687422166874e-05, | |
| "loss": 0.0004, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6961394769613948, | |
| "grad_norm": 0.030175473541021347, | |
| "learning_rate": 1.7247820672478206e-05, | |
| "loss": 0.0005, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6973848069738481, | |
| "grad_norm": 0.03586643561720848, | |
| "learning_rate": 1.727895392278954e-05, | |
| "loss": 0.0009, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6986301369863014, | |
| "grad_norm": 0.01669226959347725, | |
| "learning_rate": 1.7310087173100873e-05, | |
| "loss": 0.0004, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6998754669987547, | |
| "grad_norm": 0.013228816911578178, | |
| "learning_rate": 1.7341220423412205e-05, | |
| "loss": 0.0003, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.701120797011208, | |
| "grad_norm": 0.16547606885433197, | |
| "learning_rate": 1.7372353673723536e-05, | |
| "loss": 0.0014, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.7023661270236613, | |
| "grad_norm": 0.20769615471363068, | |
| "learning_rate": 1.740348692403487e-05, | |
| "loss": 0.0007, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.7036114570361146, | |
| "grad_norm": Infinity, | |
| "learning_rate": 1.740348692403487e-05, | |
| "loss": 3.7559, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.7048567870485679, | |
| "grad_norm": 0.010459132492542267, | |
| "learning_rate": 1.7434620174346203e-05, | |
| "loss": 0.0003, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.7061021170610212, | |
| "grad_norm": 7.497586727142334, | |
| "learning_rate": 1.7465753424657535e-05, | |
| "loss": 0.008, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.7073474470734745, | |
| "grad_norm": 0.011709270067512989, | |
| "learning_rate": 1.7496886674968867e-05, | |
| "loss": 0.0003, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.7085927770859277, | |
| "grad_norm": 0.024786679074168205, | |
| "learning_rate": 1.7528019925280202e-05, | |
| "loss": 0.0005, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.709838107098381, | |
| "grad_norm": 0.007164615672081709, | |
| "learning_rate": 1.7559153175591534e-05, | |
| "loss": 0.0003, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.7110834371108343, | |
| "grad_norm": 0.006929496768862009, | |
| "learning_rate": 1.7590286425902865e-05, | |
| "loss": 0.0002, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.7123287671232876, | |
| "grad_norm": 0.01036135945469141, | |
| "learning_rate": 1.7621419676214197e-05, | |
| "loss": 0.0003, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.7135740971357409, | |
| "grad_norm": 0.01619466207921505, | |
| "learning_rate": 1.7652552926525532e-05, | |
| "loss": 0.0004, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.7148194271481942, | |
| "grad_norm": 0.007037854287773371, | |
| "learning_rate": 1.7683686176836864e-05, | |
| "loss": 0.0002, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.7160647571606475, | |
| "grad_norm": 0.015169711783528328, | |
| "learning_rate": 1.7714819427148192e-05, | |
| "loss": 0.0004, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.7173100871731009, | |
| "grad_norm": 0.014573472552001476, | |
| "learning_rate": 1.7745952677459527e-05, | |
| "loss": 0.0003, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7185554171855542, | |
| "grad_norm": 0.012262790463864803, | |
| "learning_rate": 1.777708592777086e-05, | |
| "loss": 0.0003, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.7198007471980075, | |
| "grad_norm": 0.011037294752895832, | |
| "learning_rate": 1.780821917808219e-05, | |
| "loss": 0.0003, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.7210460772104608, | |
| "grad_norm": 0.012611133977770805, | |
| "learning_rate": 1.7839352428393523e-05, | |
| "loss": 0.0003, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.7222914072229141, | |
| "grad_norm": 0.13023485243320465, | |
| "learning_rate": 1.7870485678704858e-05, | |
| "loss": 0.0009, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7235367372353674, | |
| "grad_norm": 0.006935072597116232, | |
| "learning_rate": 1.790161892901619e-05, | |
| "loss": 0.0002, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.7247820672478207, | |
| "grad_norm": 0.026650428771972656, | |
| "learning_rate": 1.793275217932752e-05, | |
| "loss": 0.0006, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.726027397260274, | |
| "grad_norm": 0.015044482424855232, | |
| "learning_rate": 1.7963885429638856e-05, | |
| "loss": 0.0004, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.019932331517338753, | |
| "learning_rate": 1.7995018679950188e-05, | |
| "loss": 0.0005, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7285180572851806, | |
| "grad_norm": 0.01698875240981579, | |
| "learning_rate": 1.802615193026152e-05, | |
| "loss": 0.0004, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.7297633872976339, | |
| "grad_norm": 0.4486841857433319, | |
| "learning_rate": 1.805728518057285e-05, | |
| "loss": 0.0005, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.7310087173100872, | |
| "grad_norm": 0.01894947700202465, | |
| "learning_rate": 1.8088418430884187e-05, | |
| "loss": 0.0006, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.7322540473225405, | |
| "grad_norm": 0.006948466412723064, | |
| "learning_rate": 1.811955168119552e-05, | |
| "loss": 0.0002, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7334993773349938, | |
| "grad_norm": 15.503718376159668, | |
| "learning_rate": 1.815068493150685e-05, | |
| "loss": 0.0137, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7347447073474471, | |
| "grad_norm": 0.021334033459424973, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.0006, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7359900373599004, | |
| "grad_norm": 0.02985548786818981, | |
| "learning_rate": 1.8212951432129517e-05, | |
| "loss": 0.0005, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.7372353673723536, | |
| "grad_norm": 0.007480076979845762, | |
| "learning_rate": 1.824408468244085e-05, | |
| "loss": 0.0002, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7384806973848069, | |
| "grad_norm": 0.006202853284776211, | |
| "learning_rate": 1.827521793275218e-05, | |
| "loss": 0.0002, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.7397260273972602, | |
| "grad_norm": 0.020105713978409767, | |
| "learning_rate": 1.8306351183063512e-05, | |
| "loss": 0.0005, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7409713574097135, | |
| "grad_norm": 0.01176950428634882, | |
| "learning_rate": 1.8337484433374848e-05, | |
| "loss": 0.0003, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.7422166874221668, | |
| "grad_norm": 0.02436145208775997, | |
| "learning_rate": 1.836861768368618e-05, | |
| "loss": 0.0005, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7434620174346201, | |
| "grad_norm": 0.015877658501267433, | |
| "learning_rate": 1.839975093399751e-05, | |
| "loss": 0.0004, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.7447073474470735, | |
| "grad_norm": 0.0258621908724308, | |
| "learning_rate": 1.8430884184308843e-05, | |
| "loss": 0.0006, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7459526774595268, | |
| "grad_norm": 0.0054780724458396435, | |
| "learning_rate": 1.8462017434620175e-05, | |
| "loss": 0.0002, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.7471980074719801, | |
| "grad_norm": 0.01809469237923622, | |
| "learning_rate": 1.8493150684931506e-05, | |
| "loss": 0.0004, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7484433374844334, | |
| "grad_norm": 0.012986347079277039, | |
| "learning_rate": 1.8524283935242838e-05, | |
| "loss": 0.0003, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.7496886674968867, | |
| "grad_norm": 0.004867818206548691, | |
| "learning_rate": 1.8555417185554173e-05, | |
| "loss": 0.0001, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.75093399750934, | |
| "grad_norm": 0.005523454863578081, | |
| "learning_rate": 1.8586550435865505e-05, | |
| "loss": 0.0002, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.7521793275217933, | |
| "grad_norm": 0.009668633341789246, | |
| "learning_rate": 1.8617683686176837e-05, | |
| "loss": 0.0003, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7534246575342466, | |
| "grad_norm": 0.0070527163334190845, | |
| "learning_rate": 1.864881693648817e-05, | |
| "loss": 0.0002, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.7546699875466999, | |
| "grad_norm": 0.006774348672479391, | |
| "learning_rate": 1.8679950186799504e-05, | |
| "loss": 0.0002, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7559153175591532, | |
| "grad_norm": 0.007995886728167534, | |
| "learning_rate": 1.8711083437110835e-05, | |
| "loss": 0.0002, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.7571606475716065, | |
| "grad_norm": 30.348756790161133, | |
| "learning_rate": 1.8742216687422167e-05, | |
| "loss": 4.172, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7584059775840598, | |
| "grad_norm": 0.01787879690527916, | |
| "learning_rate": 1.87733499377335e-05, | |
| "loss": 0.0004, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7596513075965131, | |
| "grad_norm": 0.06024169921875, | |
| "learning_rate": 1.8804483188044834e-05, | |
| "loss": 0.0011, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7608966376089664, | |
| "grad_norm": 0.06412393599748611, | |
| "learning_rate": 1.8835616438356166e-05, | |
| "loss": 0.0014, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.7621419676214197, | |
| "grad_norm": 0.01381937600672245, | |
| "learning_rate": 1.8866749688667497e-05, | |
| "loss": 0.0005, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.763387297633873, | |
| "grad_norm": 0.01991051435470581, | |
| "learning_rate": 1.889788293897883e-05, | |
| "loss": 0.0003, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.7646326276463262, | |
| "grad_norm": 0.14104107022285461, | |
| "learning_rate": 1.8929016189290164e-05, | |
| "loss": 0.0026, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.7658779576587795, | |
| "grad_norm": 0.0066263917833566666, | |
| "learning_rate": 1.8960149439601496e-05, | |
| "loss": 0.0002, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.7671232876712328, | |
| "grad_norm": 0.006442869547754526, | |
| "learning_rate": 1.8991282689912828e-05, | |
| "loss": 0.0002, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7683686176836861, | |
| "grad_norm": 0.20366807281970978, | |
| "learning_rate": 1.9022415940224163e-05, | |
| "loss": 0.0028, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.7696139476961394, | |
| "grad_norm": 0.16002459824085236, | |
| "learning_rate": 1.9053549190535495e-05, | |
| "loss": 0.0023, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7708592777085927, | |
| "grad_norm": 0.007126240525394678, | |
| "learning_rate": 1.9084682440846827e-05, | |
| "loss": 0.0002, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.772104607721046, | |
| "grad_norm": 0.22348296642303467, | |
| "learning_rate": 1.9115815691158155e-05, | |
| "loss": 0.0034, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7733499377334994, | |
| "grad_norm": 0.01117734331637621, | |
| "learning_rate": 1.914694894146949e-05, | |
| "loss": 0.0003, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.7745952677459527, | |
| "grad_norm": 0.017832182347774506, | |
| "learning_rate": 1.9178082191780822e-05, | |
| "loss": 0.0004, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.775840597758406, | |
| "grad_norm": 0.10084803402423859, | |
| "learning_rate": 1.9209215442092154e-05, | |
| "loss": 0.002, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.7770859277708593, | |
| "grad_norm": 0.0404939204454422, | |
| "learning_rate": 1.924034869240349e-05, | |
| "loss": 0.0009, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7783312577833126, | |
| "grad_norm": 0.006709231995046139, | |
| "learning_rate": 1.927148194271482e-05, | |
| "loss": 0.0002, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7795765877957659, | |
| "grad_norm": 0.006246612407267094, | |
| "learning_rate": 1.9302615193026152e-05, | |
| "loss": 0.0002, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7808219178082192, | |
| "grad_norm": 0.007551430258899927, | |
| "learning_rate": 1.9333748443337484e-05, | |
| "loss": 0.0002, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.7820672478206725, | |
| "grad_norm": 0.010194691829383373, | |
| "learning_rate": 1.936488169364882e-05, | |
| "loss": 0.0002, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7833125778331258, | |
| "grad_norm": 0.007259845733642578, | |
| "learning_rate": 1.939601494396015e-05, | |
| "loss": 0.0002, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7845579078455791, | |
| "grad_norm": 0.6343588829040527, | |
| "learning_rate": 1.9427148194271483e-05, | |
| "loss": 0.0014, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7858032378580324, | |
| "grad_norm": 0.004895548801869154, | |
| "learning_rate": 1.9458281444582814e-05, | |
| "loss": 0.0001, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7870485678704857, | |
| "grad_norm": 0.023873023688793182, | |
| "learning_rate": 1.948941469489415e-05, | |
| "loss": 0.0006, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.788293897882939, | |
| "grad_norm": 0.06282692402601242, | |
| "learning_rate": 1.952054794520548e-05, | |
| "loss": 0.0014, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.7895392278953923, | |
| "grad_norm": 0.01570272073149681, | |
| "learning_rate": 1.9551681195516813e-05, | |
| "loss": 0.0005, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7907845579078456, | |
| "grad_norm": 0.004377361387014389, | |
| "learning_rate": 1.9582814445828145e-05, | |
| "loss": 0.0001, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7920298879202988, | |
| "grad_norm": 0.005370027385652065, | |
| "learning_rate": 1.961394769613948e-05, | |
| "loss": 0.0001, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7932752179327521, | |
| "grad_norm": 0.016998134553432465, | |
| "learning_rate": 1.964508094645081e-05, | |
| "loss": 0.0003, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7945205479452054, | |
| "grad_norm": 0.02801138535141945, | |
| "learning_rate": 1.9676214196762143e-05, | |
| "loss": 0.0007, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7957658779576587, | |
| "grad_norm": 0.007101301569491625, | |
| "learning_rate": 1.9707347447073475e-05, | |
| "loss": 0.0002, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.797011207970112, | |
| "grad_norm": 0.007805291563272476, | |
| "learning_rate": 1.973848069738481e-05, | |
| "loss": 0.0002, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7982565379825654, | |
| "grad_norm": 0.01866893284022808, | |
| "learning_rate": 1.9769613947696142e-05, | |
| "loss": 0.0004, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7995018679950187, | |
| "grad_norm": 0.008472064509987831, | |
| "learning_rate": 1.980074719800747e-05, | |
| "loss": 0.0002, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.800747198007472, | |
| "grad_norm": 0.011058184318244457, | |
| "learning_rate": 1.9831880448318805e-05, | |
| "loss": 0.0004, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.8019925280199253, | |
| "grad_norm": 0.01657005585730076, | |
| "learning_rate": 1.9863013698630137e-05, | |
| "loss": 0.0004, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.8032378580323786, | |
| "grad_norm": 0.007903863675892353, | |
| "learning_rate": 1.989414694894147e-05, | |
| "loss": 0.0002, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.8044831880448319, | |
| "grad_norm": 0.008648911491036415, | |
| "learning_rate": 1.99252801992528e-05, | |
| "loss": 0.0003, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.8057285180572852, | |
| "grad_norm": 0.005954551976174116, | |
| "learning_rate": 1.9956413449564136e-05, | |
| "loss": 0.0001, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.8069738480697385, | |
| "grad_norm": 0.012240339070558548, | |
| "learning_rate": 1.9987546699875468e-05, | |
| "loss": 0.0004, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8082191780821918, | |
| "grad_norm": 0.012209310196340084, | |
| "learning_rate": 2.00186799501868e-05, | |
| "loss": 0.0004, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.8094645080946451, | |
| "grad_norm": 0.013876602053642273, | |
| "learning_rate": 2.004981320049813e-05, | |
| "loss": 0.0004, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8107098381070984, | |
| "grad_norm": 0.006682861130684614, | |
| "learning_rate": 2.0080946450809466e-05, | |
| "loss": 0.0002, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.8119551681195517, | |
| "grad_norm": 0.01869480311870575, | |
| "learning_rate": 2.0112079701120798e-05, | |
| "loss": 0.0004, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.813200498132005, | |
| "grad_norm": 0.006386366207152605, | |
| "learning_rate": 2.014321295143213e-05, | |
| "loss": 0.0002, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.8144458281444583, | |
| "grad_norm": 0.031244048848748207, | |
| "learning_rate": 2.017434620174346e-05, | |
| "loss": 0.0007, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.8156911581569116, | |
| "grad_norm": 0.005839107092469931, | |
| "learning_rate": 2.0205479452054797e-05, | |
| "loss": 0.0002, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.8169364881693649, | |
| "grad_norm": 0.012466920539736748, | |
| "learning_rate": 2.023661270236613e-05, | |
| "loss": 0.0003, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 0.011677310802042484, | |
| "learning_rate": 2.026774595267746e-05, | |
| "loss": 0.0003, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.8194271481942715, | |
| "grad_norm": 325.08514404296875, | |
| "learning_rate": 2.0298879202988795e-05, | |
| "loss": 0.185, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.8206724782067247, | |
| "grad_norm": 0.00978070218116045, | |
| "learning_rate": 2.0330012453300127e-05, | |
| "loss": 0.0003, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 0.009361130185425282, | |
| "learning_rate": 2.036114570361146e-05, | |
| "loss": 0.0003, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8231631382316313, | |
| "grad_norm": 0.007570465561002493, | |
| "learning_rate": 2.039227895392279e-05, | |
| "loss": 0.0003, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.8244084682440846, | |
| "grad_norm": 0.00575603824108839, | |
| "learning_rate": 2.0423412204234126e-05, | |
| "loss": 0.0002, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.825653798256538, | |
| "grad_norm": 0.014008327387273312, | |
| "learning_rate": 2.0454545454545457e-05, | |
| "loss": 0.0004, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.8268991282689913, | |
| "grad_norm": 0.00547524681314826, | |
| "learning_rate": 2.048567870485679e-05, | |
| "loss": 0.0001, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8281444582814446, | |
| "grad_norm": 0.026367267593741417, | |
| "learning_rate": 2.051681195516812e-05, | |
| "loss": 0.0005, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.8293897882938979, | |
| "grad_norm": 0.0041604661382734776, | |
| "learning_rate": 2.0547945205479453e-05, | |
| "loss": 0.0001, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8306351183063512, | |
| "grad_norm": 0.01260537002235651, | |
| "learning_rate": 2.0579078455790784e-05, | |
| "loss": 0.0004, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.8318804483188045, | |
| "grad_norm": 0.005095213185995817, | |
| "learning_rate": 2.0610211706102116e-05, | |
| "loss": 0.0002, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8331257783312578, | |
| "grad_norm": 0.004534134641289711, | |
| "learning_rate": 2.064134495641345e-05, | |
| "loss": 0.0001, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.8343711083437111, | |
| "grad_norm": 0.015001599676907063, | |
| "learning_rate": 2.0672478206724783e-05, | |
| "loss": 0.0004, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8356164383561644, | |
| "grad_norm": 0.005808024201542139, | |
| "learning_rate": 2.0703611457036115e-05, | |
| "loss": 0.0002, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.8368617683686177, | |
| "grad_norm": 0.008496883325278759, | |
| "learning_rate": 2.0734744707347447e-05, | |
| "loss": 0.0003, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.838107098381071, | |
| "grad_norm": 410.8919677734375, | |
| "learning_rate": 2.076587795765878e-05, | |
| "loss": 1.7746, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.8393524283935243, | |
| "grad_norm": 0.15478110313415527, | |
| "learning_rate": 2.0797011207970113e-05, | |
| "loss": 0.0008, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8405977584059776, | |
| "grad_norm": 0.017121130600571632, | |
| "learning_rate": 2.0828144458281445e-05, | |
| "loss": 0.0004, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.8418430884184309, | |
| "grad_norm": 0.01048367191106081, | |
| "learning_rate": 2.0859277708592777e-05, | |
| "loss": 0.0003, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8430884184308842, | |
| "grad_norm": 0.013435076922178268, | |
| "learning_rate": 2.0890410958904112e-05, | |
| "loss": 0.0004, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.8443337484433375, | |
| "grad_norm": 0.0057032410986721516, | |
| "learning_rate": 2.0921544209215444e-05, | |
| "loss": 0.0002, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8455790784557908, | |
| "grad_norm": 0.05629182606935501, | |
| "learning_rate": 2.0952677459526776e-05, | |
| "loss": 0.0005, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.8468244084682441, | |
| "grad_norm": 0.8133471608161926, | |
| "learning_rate": 2.0983810709838107e-05, | |
| "loss": 0.0012, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8480697384806973, | |
| "grad_norm": 0.011576468124985695, | |
| "learning_rate": 2.1014943960149442e-05, | |
| "loss": 0.0003, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.8493150684931506, | |
| "grad_norm": 0.079744853079319, | |
| "learning_rate": 2.1046077210460774e-05, | |
| "loss": 0.0006, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8505603985056039, | |
| "grad_norm": 0.019048074260354042, | |
| "learning_rate": 2.1077210460772106e-05, | |
| "loss": 0.0004, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.8518057285180572, | |
| "grad_norm": 0.004764070268720388, | |
| "learning_rate": 2.1108343711083438e-05, | |
| "loss": 0.0001, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8530510585305106, | |
| "grad_norm": 0.022517533972859383, | |
| "learning_rate": 2.1139476961394773e-05, | |
| "loss": 0.0003, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.8542963885429639, | |
| "grad_norm": 0.17990639805793762, | |
| "learning_rate": 2.1170610211706105e-05, | |
| "loss": 0.0007, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.8555417185554172, | |
| "grad_norm": 0.0133855314925313, | |
| "learning_rate": 2.1201743462017433e-05, | |
| "loss": 0.0004, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.8567870485678705, | |
| "grad_norm": 0.01034181471914053, | |
| "learning_rate": 2.1232876712328768e-05, | |
| "loss": 0.0003, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8580323785803238, | |
| "grad_norm": 0.09839920699596405, | |
| "learning_rate": 2.12640099626401e-05, | |
| "loss": 0.0007, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.8592777085927771, | |
| "grad_norm": 0.28286799788475037, | |
| "learning_rate": 2.129514321295143e-05, | |
| "loss": 0.0009, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8605230386052304, | |
| "grad_norm": 0.004863832611590624, | |
| "learning_rate": 2.1326276463262763e-05, | |
| "loss": 0.0001, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.8617683686176837, | |
| "grad_norm": 0.007945407181978226, | |
| "learning_rate": 2.13574097135741e-05, | |
| "loss": 0.0002, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.863013698630137, | |
| "grad_norm": 0.17650844156742096, | |
| "learning_rate": 2.138854296388543e-05, | |
| "loss": 0.0006, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.8642590286425903, | |
| "grad_norm": 36.761592864990234, | |
| "learning_rate": 2.1419676214196762e-05, | |
| "loss": 4.8048, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8655043586550436, | |
| "grad_norm": 43.7182731628418, | |
| "learning_rate": 2.1450809464508094e-05, | |
| "loss": 4.1331, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.8667496886674969, | |
| "grad_norm": 0.031437598168849945, | |
| "learning_rate": 2.148194271481943e-05, | |
| "loss": 0.0005, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.8679950186799502, | |
| "grad_norm": 0.17908449470996857, | |
| "learning_rate": 2.151307596513076e-05, | |
| "loss": 0.0018, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.8692403486924035, | |
| "grad_norm": 43.03351974487305, | |
| "learning_rate": 2.1544209215442092e-05, | |
| "loss": 0.9142, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8704856787048568, | |
| "grad_norm": 0.07657460123300552, | |
| "learning_rate": 2.1575342465753427e-05, | |
| "loss": 0.0007, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.8717310087173101, | |
| "grad_norm": 43.546669006347656, | |
| "learning_rate": 2.160647571606476e-05, | |
| "loss": 1.2326, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8729763387297634, | |
| "grad_norm": 0.15518978238105774, | |
| "learning_rate": 2.163760896637609e-05, | |
| "loss": 0.0013, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.8742216687422167, | |
| "grad_norm": 20.484352111816406, | |
| "learning_rate": 2.1668742216687423e-05, | |
| "loss": 0.4034, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8754669987546699, | |
| "grad_norm": 8.134427070617676, | |
| "learning_rate": 2.1699875466998758e-05, | |
| "loss": 0.1308, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.8767123287671232, | |
| "grad_norm": 31.111207962036133, | |
| "learning_rate": 2.173100871731009e-05, | |
| "loss": 1.3048, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8779576587795765, | |
| "grad_norm": 1.6822067499160767, | |
| "learning_rate": 2.176214196762142e-05, | |
| "loss": 0.0337, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.8792029887920298, | |
| "grad_norm": 0.016219645738601685, | |
| "learning_rate": 2.1793275217932753e-05, | |
| "loss": 0.0002, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8804483188044832, | |
| "grad_norm": 0.9385362267494202, | |
| "learning_rate": 2.1824408468244088e-05, | |
| "loss": 0.0118, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.8816936488169365, | |
| "grad_norm": 59.062347412109375, | |
| "learning_rate": 2.185554171855542e-05, | |
| "loss": 1.5594, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8829389788293898, | |
| "grad_norm": 0.8278292417526245, | |
| "learning_rate": 2.188667496886675e-05, | |
| "loss": 0.0164, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.8841843088418431, | |
| "grad_norm": 0.1193016767501831, | |
| "learning_rate": 2.1917808219178083e-05, | |
| "loss": 0.0026, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8854296388542964, | |
| "grad_norm": 0.06685473769903183, | |
| "learning_rate": 2.1948941469489415e-05, | |
| "loss": 0.0007, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.8866749688667497, | |
| "grad_norm": 0.2482631653547287, | |
| "learning_rate": 2.1980074719800747e-05, | |
| "loss": 0.0044, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.887920298879203, | |
| "grad_norm": 0.09288740158081055, | |
| "learning_rate": 2.201120797011208e-05, | |
| "loss": 0.001, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.8891656288916563, | |
| "grad_norm": 0.07905003428459167, | |
| "learning_rate": 2.2042341220423414e-05, | |
| "loss": 0.001, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8904109589041096, | |
| "grad_norm": 0.03586210682988167, | |
| "learning_rate": 2.2073474470734746e-05, | |
| "loss": 0.0007, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.8916562889165629, | |
| "grad_norm": 0.029501890763640404, | |
| "learning_rate": 2.2104607721046077e-05, | |
| "loss": 0.0005, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8929016189290162, | |
| "grad_norm": 1.9498989582061768, | |
| "learning_rate": 2.213574097135741e-05, | |
| "loss": 0.0056, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.8941469489414695, | |
| "grad_norm": 0.011584372259676456, | |
| "learning_rate": 2.2166874221668744e-05, | |
| "loss": 0.0002, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8953922789539228, | |
| "grad_norm": 0.052831344306468964, | |
| "learning_rate": 2.2198007471980076e-05, | |
| "loss": 0.0007, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8966376089663761, | |
| "grad_norm": 152.57171630859375, | |
| "learning_rate": 2.2229140722291408e-05, | |
| "loss": 0.5103, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8978829389788294, | |
| "grad_norm": 0.03796133026480675, | |
| "learning_rate": 2.226027397260274e-05, | |
| "loss": 0.0008, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8991282689912827, | |
| "grad_norm": 9.698473930358887, | |
| "learning_rate": 2.2291407222914075e-05, | |
| "loss": 0.0168, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.900373599003736, | |
| "grad_norm": 0.014799389988183975, | |
| "learning_rate": 2.2322540473225406e-05, | |
| "loss": 0.0003, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.9016189290161893, | |
| "grad_norm": 0.015290978364646435, | |
| "learning_rate": 2.2353673723536738e-05, | |
| "loss": 0.0004, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.9028642590286425, | |
| "grad_norm": 0.0121547756716609, | |
| "learning_rate": 2.238480697384807e-05, | |
| "loss": 0.0004, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.9041095890410958, | |
| "grad_norm": 0.043171875178813934, | |
| "learning_rate": 2.2415940224159405e-05, | |
| "loss": 0.001, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.9053549190535491, | |
| "grad_norm": 0.02570340782403946, | |
| "learning_rate": 2.2447073474470737e-05, | |
| "loss": 0.0004, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.9066002490660025, | |
| "grad_norm": 0.4008868634700775, | |
| "learning_rate": 2.247820672478207e-05, | |
| "loss": 0.0015, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.9078455790784558, | |
| "grad_norm": 0.012521167285740376, | |
| "learning_rate": 2.2509339975093404e-05, | |
| "loss": 0.0003, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.039595190435647964, | |
| "learning_rate": 2.2540473225404735e-05, | |
| "loss": 0.0008, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.9103362391033624, | |
| "grad_norm": 0.0371573381125927, | |
| "learning_rate": 2.2571606475716064e-05, | |
| "loss": 0.0007, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.9115815691158157, | |
| "grad_norm": 0.0111406734213233, | |
| "learning_rate": 2.2602739726027396e-05, | |
| "loss": 0.0003, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.912826899128269, | |
| "grad_norm": 34.578346252441406, | |
| "learning_rate": 2.263387297633873e-05, | |
| "loss": 4.4143, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.9140722291407223, | |
| "grad_norm": 0.006715845782309771, | |
| "learning_rate": 2.2665006226650062e-05, | |
| "loss": 0.0002, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.9153175591531756, | |
| "grad_norm": 0.014482389204204082, | |
| "learning_rate": 2.2696139476961394e-05, | |
| "loss": 0.0004, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.9165628891656289, | |
| "grad_norm": 0.0057504503056406975, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.0001, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.9178082191780822, | |
| "grad_norm": 0.04472869634628296, | |
| "learning_rate": 2.275840597758406e-05, | |
| "loss": 0.001, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.9190535491905355, | |
| "grad_norm": 0.05841754376888275, | |
| "learning_rate": 2.2789539227895393e-05, | |
| "loss": 0.001, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.9202988792029888, | |
| "grad_norm": 0.009739454835653305, | |
| "learning_rate": 2.2820672478206725e-05, | |
| "loss": 0.0002, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.9215442092154421, | |
| "grad_norm": 0.011922300793230534, | |
| "learning_rate": 2.285180572851806e-05, | |
| "loss": 0.0004, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9227895392278954, | |
| "grad_norm": 0.05216851085424423, | |
| "learning_rate": 2.288293897882939e-05, | |
| "loss": 0.001, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.9240348692403487, | |
| "grad_norm": 0.007307402323931456, | |
| "learning_rate": 2.2914072229140723e-05, | |
| "loss": 0.0002, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.925280199252802, | |
| "grad_norm": 0.04301249235868454, | |
| "learning_rate": 2.2945205479452055e-05, | |
| "loss": 0.0005, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.9265255292652553, | |
| "grad_norm": 0.013793856836855412, | |
| "learning_rate": 2.297633872976339e-05, | |
| "loss": 0.0003, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9277708592777086, | |
| "grad_norm": 0.1124817505478859, | |
| "learning_rate": 2.3007471980074722e-05, | |
| "loss": 0.0022, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.9290161892901619, | |
| "grad_norm": 0.005083655938506126, | |
| "learning_rate": 2.3038605230386054e-05, | |
| "loss": 0.0001, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.9302615193026152, | |
| "grad_norm": 0.005723627284169197, | |
| "learning_rate": 2.3069738480697385e-05, | |
| "loss": 0.0001, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.9315068493150684, | |
| "grad_norm": 0.08036380261182785, | |
| "learning_rate": 2.310087173100872e-05, | |
| "loss": 0.0014, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9327521793275217, | |
| "grad_norm": 0.007362319156527519, | |
| "learning_rate": 2.3132004981320052e-05, | |
| "loss": 0.0002, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.933997509339975, | |
| "grad_norm": 1.5796531438827515, | |
| "learning_rate": 2.3163138231631384e-05, | |
| "loss": 0.0147, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9352428393524284, | |
| "grad_norm": 0.038087982684373856, | |
| "learning_rate": 2.3194271481942716e-05, | |
| "loss": 0.0008, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.9364881693648817, | |
| "grad_norm": 0.005102880764752626, | |
| "learning_rate": 2.322540473225405e-05, | |
| "loss": 0.0001, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.937733499377335, | |
| "grad_norm": 306.6837158203125, | |
| "learning_rate": 2.3256537982565383e-05, | |
| "loss": 3.1504, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.9389788293897883, | |
| "grad_norm": 0.006043303292244673, | |
| "learning_rate": 2.328767123287671e-05, | |
| "loss": 0.0001, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9402241594022416, | |
| "grad_norm": 0.027712326496839523, | |
| "learning_rate": 2.3318804483188046e-05, | |
| "loss": 0.0008, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.9414694894146949, | |
| "grad_norm": 0.015633290633559227, | |
| "learning_rate": 2.3349937733499378e-05, | |
| "loss": 0.0004, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9427148194271482, | |
| "grad_norm": 0.007909745909273624, | |
| "learning_rate": 2.338107098381071e-05, | |
| "loss": 0.0002, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.9439601494396015, | |
| "grad_norm": 0.018452487885951996, | |
| "learning_rate": 2.341220423412204e-05, | |
| "loss": 0.0004, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.9452054794520548, | |
| "grad_norm": 0.010309605859220028, | |
| "learning_rate": 2.3443337484433376e-05, | |
| "loss": 0.0002, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.9464508094645081, | |
| "grad_norm": 0.005897897761315107, | |
| "learning_rate": 2.3474470734744708e-05, | |
| "loss": 0.0001, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9476961394769614, | |
| "grad_norm": 0.024718550965189934, | |
| "learning_rate": 2.350560398505604e-05, | |
| "loss": 0.0007, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.9489414694894147, | |
| "grad_norm": 0.014151460491120815, | |
| "learning_rate": 2.3536737235367372e-05, | |
| "loss": 0.0004, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.950186799501868, | |
| "grad_norm": 0.05046864598989487, | |
| "learning_rate": 2.3567870485678707e-05, | |
| "loss": 0.0005, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.9514321295143213, | |
| "grad_norm": 0.05455144867300987, | |
| "learning_rate": 2.359900373599004e-05, | |
| "loss": 0.0006, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9526774595267746, | |
| "grad_norm": 0.02435392327606678, | |
| "learning_rate": 2.363013698630137e-05, | |
| "loss": 0.0003, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.9539227895392279, | |
| "grad_norm": 0.025639377534389496, | |
| "learning_rate": 2.3661270236612702e-05, | |
| "loss": 0.0005, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9551681195516812, | |
| "grad_norm": 0.015089256688952446, | |
| "learning_rate": 2.3692403486924037e-05, | |
| "loss": 0.0004, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.9564134495641345, | |
| "grad_norm": 0.032805927097797394, | |
| "learning_rate": 2.372353673723537e-05, | |
| "loss": 0.0006, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.9576587795765878, | |
| "grad_norm": 0.015525261871516705, | |
| "learning_rate": 2.37546699875467e-05, | |
| "loss": 0.0004, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.958904109589041, | |
| "grad_norm": 0.008337048813700676, | |
| "learning_rate": 2.3785803237858036e-05, | |
| "loss": 0.0002, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9601494396014943, | |
| "grad_norm": 0.037120576947927475, | |
| "learning_rate": 2.3816936488169368e-05, | |
| "loss": 0.0004, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.9613947696139477, | |
| "grad_norm": 0.01175164058804512, | |
| "learning_rate": 2.38480697384807e-05, | |
| "loss": 0.0003, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.962640099626401, | |
| "grad_norm": 0.010447794571518898, | |
| "learning_rate": 2.387920298879203e-05, | |
| "loss": 0.0003, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.9638854296388543, | |
| "grad_norm": 0.010614910162985325, | |
| "learning_rate": 2.3910336239103366e-05, | |
| "loss": 0.0001, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9651307596513076, | |
| "grad_norm": 0.07238447666168213, | |
| "learning_rate": 2.3941469489414698e-05, | |
| "loss": 0.0007, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9663760896637609, | |
| "grad_norm": 0.03060179576277733, | |
| "learning_rate": 2.3972602739726026e-05, | |
| "loss": 0.0007, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9676214196762142, | |
| "grad_norm": 0.08607795089483261, | |
| "learning_rate": 2.400373599003736e-05, | |
| "loss": 0.0004, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.9688667496886675, | |
| "grad_norm": 0.030211659148335457, | |
| "learning_rate": 2.4034869240348693e-05, | |
| "loss": 0.0003, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.9701120797011208, | |
| "grad_norm": 0.006784611847251654, | |
| "learning_rate": 2.4066002490660025e-05, | |
| "loss": 0.0002, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.9713574097135741, | |
| "grad_norm": 0.011817213147878647, | |
| "learning_rate": 2.4097135740971357e-05, | |
| "loss": 0.0003, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9726027397260274, | |
| "grad_norm": 0.029583904892206192, | |
| "learning_rate": 2.4128268991282692e-05, | |
| "loss": 0.0004, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.9738480697384807, | |
| "grad_norm": 0.007558898068964481, | |
| "learning_rate": 2.4159402241594024e-05, | |
| "loss": 0.0003, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.975093399750934, | |
| "grad_norm": 481.3611755371094, | |
| "learning_rate": 2.4190535491905355e-05, | |
| "loss": 2.5255, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.9763387297633873, | |
| "grad_norm": 127.75431060791016, | |
| "learning_rate": 2.4221668742216687e-05, | |
| "loss": 0.0841, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9775840597758406, | |
| "grad_norm": 0.01205628365278244, | |
| "learning_rate": 2.4252801992528022e-05, | |
| "loss": 0.0004, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.9788293897882939, | |
| "grad_norm": 411.4049377441406, | |
| "learning_rate": 2.4283935242839354e-05, | |
| "loss": 1.7384, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9800747198007472, | |
| "grad_norm": 1.6122777462005615, | |
| "learning_rate": 2.4315068493150686e-05, | |
| "loss": 0.0018, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.9813200498132005, | |
| "grad_norm": 0.013621006160974503, | |
| "learning_rate": 2.4346201743462018e-05, | |
| "loss": 0.0004, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9825653798256538, | |
| "grad_norm": 0.0152182187885046, | |
| "learning_rate": 2.4377334993773353e-05, | |
| "loss": 0.0003, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.9838107098381071, | |
| "grad_norm": 241.25070190429688, | |
| "learning_rate": 2.4408468244084684e-05, | |
| "loss": 0.1739, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9850560398505604, | |
| "grad_norm": 0.009512806311249733, | |
| "learning_rate": 2.4439601494396016e-05, | |
| "loss": 0.0003, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.9863013698630136, | |
| "grad_norm": 12.394267082214355, | |
| "learning_rate": 2.4470734744707348e-05, | |
| "loss": 0.0218, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.987546699875467, | |
| "grad_norm": 0.008201587945222855, | |
| "learning_rate": 2.4501867995018683e-05, | |
| "loss": 0.0002, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.9887920298879203, | |
| "grad_norm": 0.049125440418720245, | |
| "learning_rate": 2.4533001245330015e-05, | |
| "loss": 0.0006, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9900373599003736, | |
| "grad_norm": 0.0920347198843956, | |
| "learning_rate": 2.4564134495641347e-05, | |
| "loss": 0.001, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.9912826899128269, | |
| "grad_norm": 35.2567253112793, | |
| "learning_rate": 2.4595267745952678e-05, | |
| "loss": 0.0267, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9925280199252802, | |
| "grad_norm": 0.01363935973495245, | |
| "learning_rate": 2.4626400996264013e-05, | |
| "loss": 0.0003, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.9937733499377335, | |
| "grad_norm": 0.009647058323025703, | |
| "learning_rate": 2.4657534246575342e-05, | |
| "loss": 0.0003, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9950186799501868, | |
| "grad_norm": 0.005581174045801163, | |
| "learning_rate": 2.4688667496886674e-05, | |
| "loss": 0.0002, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.9962640099626401, | |
| "grad_norm": 0.006403461564332247, | |
| "learning_rate": 2.471980074719801e-05, | |
| "loss": 0.0002, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9975093399750934, | |
| "grad_norm": 0.018721066415309906, | |
| "learning_rate": 2.475093399750934e-05, | |
| "loss": 0.0005, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.9987546699875467, | |
| "grad_norm": 0.0068865250796079636, | |
| "learning_rate": 2.4782067247820672e-05, | |
| "loss": 0.0002, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 148.17623901367188, | |
| "learning_rate": 2.4813200498132004e-05, | |
| "loss": 0.2457, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9760765550239234, | |
| "eval_f1_macro": 0.9768339768339769, | |
| "eval_f1_micro": 0.9760765550239234, | |
| "eval_f1_weighted": 0.9760457655194498, | |
| "eval_loss": 0.244761124253273, | |
| "eval_precision_macro": 0.978448275862069, | |
| "eval_precision_micro": 0.9760765550239234, | |
| "eval_precision_weighted": 0.978138920970137, | |
| "eval_recall_macro": 0.9772727272727273, | |
| "eval_recall_micro": 0.9760765550239234, | |
| "eval_recall_weighted": 0.9760765550239234, | |
| "eval_runtime": 29.9929, | |
| "eval_samples_per_second": 6.968, | |
| "eval_steps_per_second": 0.467, | |
| "step": 803 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 16060, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.394707013520589e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |