| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9850187265917603, |
| "eval_steps": 500, |
| "global_step": 132, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0149812734082397, |
| "grad_norm": 5.084571368060712, |
| "learning_rate": 9.998583973465647e-06, |
| "loss": 0.6568, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0299625468164794, |
| "grad_norm": 2.5916731058563154, |
| "learning_rate": 9.994336695915041e-06, |
| "loss": 0.5868, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0449438202247191, |
| "grad_norm": 2.91627215452919, |
| "learning_rate": 9.987260573051268e-06, |
| "loss": 0.5514, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0599250936329588, |
| "grad_norm": 2.275898613414116, |
| "learning_rate": 9.977359612865424e-06, |
| "loss": 0.5441, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0749063670411985, |
| "grad_norm": 1.6237079225920172, |
| "learning_rate": 9.964639423366442e-06, |
| "loss": 0.4732, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0898876404494382, |
| "grad_norm": 1.239385870767508, |
| "learning_rate": 9.949107209404664e-06, |
| "loss": 0.4375, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.10486891385767791, |
| "grad_norm": 1.0302163579537231, |
| "learning_rate": 9.930771768590934e-06, |
| "loss": 0.4391, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.1198501872659176, |
| "grad_norm": 1.1409516576697065, |
| "learning_rate": 9.909643486313533e-06, |
| "loss": 0.4258, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.1348314606741573, |
| "grad_norm": 0.94379582312002, |
| "learning_rate": 9.885734329855798e-06, |
| "loss": 0.4126, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.149812734082397, |
| "grad_norm": 0.7876867969848453, |
| "learning_rate": 9.859057841617709e-06, |
| "loss": 0.4005, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1647940074906367, |
| "grad_norm": 0.7681173237114753, |
| "learning_rate": 9.829629131445342e-06, |
| "loss": 0.4036, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1797752808988764, |
| "grad_norm": 0.6968631524976365, |
| "learning_rate": 9.797464868072489e-06, |
| "loss": 0.4003, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.1947565543071161, |
| "grad_norm": 0.6697811026019894, |
| "learning_rate": 9.762583269679304e-06, |
| "loss": 0.382, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.20973782771535582, |
| "grad_norm": 0.7034056314034299, |
| "learning_rate": 9.725004093573343e-06, |
| "loss": 0.3939, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2247191011235955, |
| "grad_norm": 0.644792322528016, |
| "learning_rate": 9.68474862499881e-06, |
| "loss": 0.3815, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2397003745318352, |
| "grad_norm": 0.6540449125702598, |
| "learning_rate": 9.641839665080363e-06, |
| "loss": 0.4052, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2546816479400749, |
| "grad_norm": 0.5850979560902584, |
| "learning_rate": 9.596301517908329e-06, |
| "loss": 0.3677, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.2696629213483146, |
| "grad_norm": 0.5989452826148334, |
| "learning_rate": 9.548159976772593e-06, |
| "loss": 0.3507, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.2846441947565543, |
| "grad_norm": 0.6541103491169268, |
| "learning_rate": 9.497442309553017e-06, |
| "loss": 0.3946, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.299625468164794, |
| "grad_norm": 0.6264121596538782, |
| "learning_rate": 9.444177243274619e-06, |
| "loss": 0.3938, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3146067415730337, |
| "grad_norm": 0.5442036904494095, |
| "learning_rate": 9.388394947836278e-06, |
| "loss": 0.3617, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3295880149812734, |
| "grad_norm": 0.5226396388761955, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.3569, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3445692883895131, |
| "grad_norm": 0.572134019195198, |
| "learning_rate": 9.269406460105742e-06, |
| "loss": 0.3472, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3595505617977528, |
| "grad_norm": 0.6001488940660931, |
| "learning_rate": 9.206267664155906e-06, |
| "loss": 0.3676, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.37453183520599254, |
| "grad_norm": 0.49694447854102536, |
| "learning_rate": 9.140746393556853e-06, |
| "loss": 0.3471, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3895131086142322, |
| "grad_norm": 0.5550862526263433, |
| "learning_rate": 9.07287976025168e-06, |
| "loss": 0.3615, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.4044943820224719, |
| "grad_norm": 0.5164806611991527, |
| "learning_rate": 9.002706204621802e-06, |
| "loss": 0.3544, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.41947565543071164, |
| "grad_norm": 0.5722090559507117, |
| "learning_rate": 8.930265473713939e-06, |
| "loss": 0.3732, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.4344569288389513, |
| "grad_norm": 0.5281465270716825, |
| "learning_rate": 8.85559859872694e-06, |
| "loss": 0.3529, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.449438202247191, |
| "grad_norm": 0.5478825064447109, |
| "learning_rate": 8.778747871771293e-06, |
| "loss": 0.3496, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.46441947565543074, |
| "grad_norm": 0.4841594545626075, |
| "learning_rate": 8.69975682191442e-06, |
| "loss": 0.3416, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.4794007490636704, |
| "grad_norm": 0.5457688505090527, |
| "learning_rate": 8.61867019052535e-06, |
| "loss": 0.3486, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.4943820224719101, |
| "grad_norm": 0.5314177647379028, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.3446, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5093632958801498, |
| "grad_norm": 0.5403044715003406, |
| "learning_rate": 8.450395057410561e-06, |
| "loss": 0.3516, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.5243445692883895, |
| "grad_norm": 0.5421794672241224, |
| "learning_rate": 8.363301868506264e-06, |
| "loss": 0.3677, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5393258426966292, |
| "grad_norm": 0.5223559269222365, |
| "learning_rate": 8.274303669726427e-06, |
| "loss": 0.3233, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5543071161048689, |
| "grad_norm": 0.5239472612001447, |
| "learning_rate": 8.183450870595443e-06, |
| "loss": 0.3648, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5692883895131086, |
| "grad_norm": 0.513928373743982, |
| "learning_rate": 8.090794931103026e-06, |
| "loss": 0.3463, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5842696629213483, |
| "grad_norm": 0.5862122505448358, |
| "learning_rate": 7.996388332556735e-06, |
| "loss": 0.354, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.599250936329588, |
| "grad_norm": 0.511373942485004, |
| "learning_rate": 7.900284547855992e-06, |
| "loss": 0.3083, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6142322097378277, |
| "grad_norm": 0.5055474388205636, |
| "learning_rate": 7.80253801120447e-06, |
| "loss": 0.3353, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.6292134831460674, |
| "grad_norm": 0.5127108176142576, |
| "learning_rate": 7.703204087277989e-06, |
| "loss": 0.3233, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6441947565543071, |
| "grad_norm": 0.4768902770487289, |
| "learning_rate": 7.602339039865362e-06, |
| "loss": 0.3421, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.6591760299625468, |
| "grad_norm": 0.5106783335089775, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.3307, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6741573033707865, |
| "grad_norm": 0.5677504435953342, |
| "learning_rate": 7.396244933600285e-06, |
| "loss": 0.3516, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6891385767790262, |
| "grad_norm": 0.5210369843248303, |
| "learning_rate": 7.291132608637053e-06, |
| "loss": 0.3289, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.704119850187266, |
| "grad_norm": 0.5111361256226582, |
| "learning_rate": 7.1847225618467975e-06, |
| "loss": 0.3598, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.7191011235955056, |
| "grad_norm": 0.4982722152685452, |
| "learning_rate": 7.0770750650094335e-06, |
| "loss": 0.3385, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.7340823970037453, |
| "grad_norm": 0.5850783917597748, |
| "learning_rate": 6.968251090809708e-06, |
| "loss": 0.3386, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.7490636704119851, |
| "grad_norm": 0.48217289709222916, |
| "learning_rate": 6.858312278301638e-06, |
| "loss": 0.3331, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7640449438202247, |
| "grad_norm": 0.4968268533018931, |
| "learning_rate": 6.747320897995493e-06, |
| "loss": 0.3402, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.7790262172284644, |
| "grad_norm": 0.48120817793832665, |
| "learning_rate": 6.635339816587109e-06, |
| "loss": 0.3387, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7940074906367042, |
| "grad_norm": 0.5123955773541512, |
| "learning_rate": 6.522432461349536e-06, |
| "loss": 0.339, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.8089887640449438, |
| "grad_norm": 0.515935218451783, |
| "learning_rate": 6.408662784207149e-06, |
| "loss": 0.3542, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.8239700374531835, |
| "grad_norm": 0.4981909870306645, |
| "learning_rate": 6.294095225512604e-06, |
| "loss": 0.3496, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.8389513108614233, |
| "grad_norm": 0.5129966763370457, |
| "learning_rate": 6.178794677547138e-06, |
| "loss": 0.3503, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.8539325842696629, |
| "grad_norm": 0.4999507047625763, |
| "learning_rate": 6.062826447764883e-06, |
| "loss": 0.3396, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.8689138576779026, |
| "grad_norm": 0.45085096719089746, |
| "learning_rate": 5.946256221802052e-06, |
| "loss": 0.332, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.8838951310861424, |
| "grad_norm": 0.5415726389508178, |
| "learning_rate": 5.829150026271871e-06, |
| "loss": 0.3557, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.898876404494382, |
| "grad_norm": 0.49690077423330675, |
| "learning_rate": 5.711574191366427e-06, |
| "loss": 0.3338, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9138576779026217, |
| "grad_norm": 0.48976318907654964, |
| "learning_rate": 5.593595313286526e-06, |
| "loss": 0.3411, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.9288389513108615, |
| "grad_norm": 0.459487617252603, |
| "learning_rate": 5.475280216520913e-06, |
| "loss": 0.3339, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.9438202247191011, |
| "grad_norm": 0.4501510964518993, |
| "learning_rate": 5.356695915996162e-06, |
| "loss": 0.3273, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.9588014981273408, |
| "grad_norm": 0.47937244215139657, |
| "learning_rate": 5.237909579118713e-06, |
| "loss": 0.3315, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.9737827715355806, |
| "grad_norm": 0.48887804569954546, |
| "learning_rate": 5.118988487730537e-06, |
| "loss": 0.3452, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9887640449438202, |
| "grad_norm": 0.4668723453608968, |
| "learning_rate": 5e-06, |
| "loss": 0.336, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.0112359550561798, |
| "grad_norm": 1.0170564244441662, |
| "learning_rate": 4.881011512269464e-06, |
| "loss": 0.6052, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.0262172284644195, |
| "grad_norm": 0.48395499394358177, |
| "learning_rate": 4.762090420881289e-06, |
| "loss": 0.2861, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.0411985018726593, |
| "grad_norm": 0.5116230912653792, |
| "learning_rate": 4.643304084003839e-06, |
| "loss": 0.3288, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.0561797752808988, |
| "grad_norm": 0.5306839321240365, |
| "learning_rate": 4.524719783479088e-06, |
| "loss": 0.3019, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0711610486891385, |
| "grad_norm": 0.5089522936482375, |
| "learning_rate": 4.4064046867134755e-06, |
| "loss": 0.2964, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.0861423220973783, |
| "grad_norm": 0.46474040406185857, |
| "learning_rate": 4.2884258086335755e-06, |
| "loss": 0.2723, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.101123595505618, |
| "grad_norm": 0.513397817955286, |
| "learning_rate": 4.1708499737281305e-06, |
| "loss": 0.3013, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.1161048689138577, |
| "grad_norm": 0.5155099415658073, |
| "learning_rate": 4.053743778197951e-06, |
| "loss": 0.2931, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.1310861423220975, |
| "grad_norm": 0.5545415459286378, |
| "learning_rate": 3.937173552235117e-06, |
| "loss": 0.3019, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.146067415730337, |
| "grad_norm": 0.5051575656323628, |
| "learning_rate": 3.821205322452863e-06, |
| "loss": 0.2875, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.1610486891385767, |
| "grad_norm": 0.529669570940759, |
| "learning_rate": 3.705904774487396e-06, |
| "loss": 0.3136, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.1760299625468165, |
| "grad_norm": 0.516652042867861, |
| "learning_rate": 3.5913372157928515e-06, |
| "loss": 0.2865, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.1910112359550562, |
| "grad_norm": 0.5160903225067296, |
| "learning_rate": 3.477567538650466e-06, |
| "loss": 0.2998, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.205992509363296, |
| "grad_norm": 0.49355283122625104, |
| "learning_rate": 3.3646601834128924e-06, |
| "loss": 0.2785, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.2209737827715357, |
| "grad_norm": 0.5259456191410616, |
| "learning_rate": 3.252679102004509e-06, |
| "loss": 0.3001, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.2359550561797752, |
| "grad_norm": 0.49982720147660564, |
| "learning_rate": 3.141687721698363e-06, |
| "loss": 0.2747, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.250936329588015, |
| "grad_norm": 0.4953841635791301, |
| "learning_rate": 3.0317489091902936e-06, |
| "loss": 0.2743, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.2659176029962547, |
| "grad_norm": 0.49853623320143337, |
| "learning_rate": 2.9229249349905686e-06, |
| "loss": 0.2956, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.2808988764044944, |
| "grad_norm": 0.4812793355263138, |
| "learning_rate": 2.8152774381532033e-06, |
| "loss": 0.28, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.2958801498127341, |
| "grad_norm": 0.46484538692798055, |
| "learning_rate": 2.708867391362948e-06, |
| "loss": 0.2689, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.3108614232209739, |
| "grad_norm": 0.5163036151171484, |
| "learning_rate": 2.603755066399718e-06, |
| "loss": 0.3012, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.3258426966292136, |
| "grad_norm": 0.5032344023183358, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.2838, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.3408239700374531, |
| "grad_norm": 0.49223530725238673, |
| "learning_rate": 2.3976609601346395e-06, |
| "loss": 0.3001, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.3558052434456929, |
| "grad_norm": 0.48497886789963784, |
| "learning_rate": 2.296795912722014e-06, |
| "loss": 0.2882, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.3707865168539326, |
| "grad_norm": 0.5145435737396187, |
| "learning_rate": 2.1974619887955294e-06, |
| "loss": 0.2862, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.3857677902621723, |
| "grad_norm": 0.4674755119848922, |
| "learning_rate": 2.09971545214401e-06, |
| "loss": 0.2835, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.4007490636704119, |
| "grad_norm": 0.45195647037068015, |
| "learning_rate": 2.0036116674432653e-06, |
| "loss": 0.2802, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.4157303370786516, |
| "grad_norm": 0.47464107585204235, |
| "learning_rate": 1.9092050688969736e-06, |
| "loss": 0.2993, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.4307116104868913, |
| "grad_norm": 0.4700827875283776, |
| "learning_rate": 1.8165491294045596e-06, |
| "loss": 0.3153, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.445692883895131, |
| "grad_norm": 0.4949362978211587, |
| "learning_rate": 1.7256963302735752e-06, |
| "loss": 0.2816, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.4606741573033708, |
| "grad_norm": 0.48429429007341784, |
| "learning_rate": 1.6366981314937374e-06, |
| "loss": 0.2973, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.4756554307116105, |
| "grad_norm": 0.5161818624558605, |
| "learning_rate": 1.549604942589441e-06, |
| "loss": 0.3117, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.4906367041198503, |
| "grad_norm": 0.4465694401602429, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.2457, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.50561797752809, |
| "grad_norm": 0.5060293606542552, |
| "learning_rate": 1.3813298094746491e-06, |
| "loss": 0.289, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.5205992509363297, |
| "grad_norm": 0.4515822734596748, |
| "learning_rate": 1.3002431780855817e-06, |
| "loss": 0.2801, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.5355805243445693, |
| "grad_norm": 0.47945146527879845, |
| "learning_rate": 1.2212521282287093e-06, |
| "loss": 0.3053, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.550561797752809, |
| "grad_norm": 0.46345395570552683, |
| "learning_rate": 1.144401401273062e-06, |
| "loss": 0.2983, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.5655430711610487, |
| "grad_norm": 0.45715293441983024, |
| "learning_rate": 1.0697345262860638e-06, |
| "loss": 0.2888, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.5805243445692883, |
| "grad_norm": 0.4616261918302568, |
| "learning_rate": 9.972937953781985e-07, |
| "loss": 0.291, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.595505617977528, |
| "grad_norm": 0.4692638415051811, |
| "learning_rate": 9.271202397483214e-07, |
| "loss": 0.3084, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.6104868913857677, |
| "grad_norm": 0.4443693865513048, |
| "learning_rate": 8.592536064431467e-07, |
| "loss": 0.2666, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.6254681647940075, |
| "grad_norm": 0.45345475123000406, |
| "learning_rate": 7.937323358440935e-07, |
| "loss": 0.2716, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.6404494382022472, |
| "grad_norm": 0.4701810003973807, |
| "learning_rate": 7.305935398942598e-07, |
| "loss": 0.3019, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.655430711610487, |
| "grad_norm": 0.4850037347675527, |
| "learning_rate": 6.698729810778065e-07, |
| "loss": 0.2883, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.6704119850187267, |
| "grad_norm": 0.45474907303732376, |
| "learning_rate": 6.116050521637218e-07, |
| "loss": 0.2779, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.6853932584269664, |
| "grad_norm": 0.4378690287299302, |
| "learning_rate": 5.558227567253832e-07, |
| "loss": 0.2675, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.7003745318352061, |
| "grad_norm": 0.4515066464327336, |
| "learning_rate": 5.025576904469842e-07, |
| "loss": 0.2542, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.7153558052434457, |
| "grad_norm": 0.4970973999578922, |
| "learning_rate": 4.5184002322740784e-07, |
| "loss": 0.3218, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.7303370786516854, |
| "grad_norm": 0.48236620387261825, |
| "learning_rate": 4.036984820916723e-07, |
| "loss": 0.2909, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.7453183520599251, |
| "grad_norm": 0.473863223130393, |
| "learning_rate": 3.581603349196372e-07, |
| "loss": 0.2912, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.7602996254681647, |
| "grad_norm": 0.4258929259586013, |
| "learning_rate": 3.1525137500119207e-07, |
| "loss": 0.2839, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.7752808988764044, |
| "grad_norm": 0.44903741968138483, |
| "learning_rate": 2.7499590642665773e-07, |
| "loss": 0.2813, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.7902621722846441, |
| "grad_norm": 0.44910436098903456, |
| "learning_rate": 2.3741673032069757e-07, |
| "loss": 0.2623, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.8052434456928839, |
| "grad_norm": 0.44692478559629295, |
| "learning_rate": 2.0253513192751374e-07, |
| "loss": 0.2692, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.8202247191011236, |
| "grad_norm": 0.4513597628185662, |
| "learning_rate": 1.7037086855465902e-07, |
| "loss": 0.2813, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.8352059925093633, |
| "grad_norm": 0.4830816269750559, |
| "learning_rate": 1.4094215838229176e-07, |
| "loss": 0.2878, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.850187265917603, |
| "grad_norm": 0.4588243087471401, |
| "learning_rate": 1.1426567014420297e-07, |
| "loss": 0.2869, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.8651685393258428, |
| "grad_norm": 0.4478517149312853, |
| "learning_rate": 9.035651368646647e-08, |
| "loss": 0.2735, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.8801498127340825, |
| "grad_norm": 0.5035730109336932, |
| "learning_rate": 6.922823140906754e-08, |
| "loss": 0.306, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.895131086142322, |
| "grad_norm": 0.45493024460418946, |
| "learning_rate": 5.089279059533658e-08, |
| "loss": 0.2688, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.9101123595505618, |
| "grad_norm": 0.4540132381090704, |
| "learning_rate": 3.536057663355852e-08, |
| "loss": 0.2549, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.9250936329588015, |
| "grad_norm": 0.45729402388738727, |
| "learning_rate": 2.264038713457706e-08, |
| "loss": 0.3035, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.940074906367041, |
| "grad_norm": 0.433225409384696, |
| "learning_rate": 1.2739426948732426e-08, |
| "loss": 0.2798, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.9550561797752808, |
| "grad_norm": 0.46008798535725426, |
| "learning_rate": 5.6633040849601865e-09, |
| "loss": 0.2843, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.9700374531835205, |
| "grad_norm": 0.47719390376038995, |
| "learning_rate": 1.4160265343549084e-09, |
| "loss": 0.3398, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.9850187265917603, |
| "grad_norm": 0.4646929748227334, |
| "learning_rate": 0.0, |
| "loss": 0.2828, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.9850187265917603, |
| "step": 132, |
| "total_flos": 37082064814080.0, |
| "train_loss": 0.33343393603960675, |
| "train_runtime": 1526.7332, |
| "train_samples_per_second": 6.981, |
| "train_steps_per_second": 0.086 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 132, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 37082064814080.0, |
| "train_batch_size": 5, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|