| { | |
| "best_global_step": 1188, | |
| "best_metric": 0.9063876651982378, | |
| "best_model_checkpoint": "./albert_multilabel_large\\checkpoint-1188", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 1188, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0025252525252525255, | |
| "grad_norm": 11.870144844055176, | |
| "learning_rate": 1.9983164983164986e-05, | |
| "loss": 0.758, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005050505050505051, | |
| "grad_norm": 3.960209369659424, | |
| "learning_rate": 1.9966329966329967e-05, | |
| "loss": 0.6626, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.007575757575757576, | |
| "grad_norm": 4.163188457489014, | |
| "learning_rate": 1.994949494949495e-05, | |
| "loss": 0.6126, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010101010101010102, | |
| "grad_norm": 3.630805253982544, | |
| "learning_rate": 1.9932659932659936e-05, | |
| "loss": 0.6105, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.012626262626262626, | |
| "grad_norm": 3.7358059883117676, | |
| "learning_rate": 1.9915824915824917e-05, | |
| "loss": 0.5486, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015151515151515152, | |
| "grad_norm": 4.360195636749268, | |
| "learning_rate": 1.98989898989899e-05, | |
| "loss": 0.4834, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.017676767676767676, | |
| "grad_norm": 4.5092549324035645, | |
| "learning_rate": 1.9882154882154885e-05, | |
| "loss": 0.4723, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.020202020202020204, | |
| "grad_norm": 3.9408679008483887, | |
| "learning_rate": 1.9865319865319866e-05, | |
| "loss": 0.4853, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.022727272727272728, | |
| "grad_norm": 253.27503967285156, | |
| "learning_rate": 1.984848484848485e-05, | |
| "loss": 0.5783, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.025252525252525252, | |
| "grad_norm": 17.573854446411133, | |
| "learning_rate": 1.9831649831649832e-05, | |
| "loss": 0.3991, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027777777777777776, | |
| "grad_norm": 4.111778259277344, | |
| "learning_rate": 1.9814814814814816e-05, | |
| "loss": 0.4054, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.030303030303030304, | |
| "grad_norm": 2.9756879806518555, | |
| "learning_rate": 1.97979797979798e-05, | |
| "loss": 0.4636, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03282828282828283, | |
| "grad_norm": 1.9542008638381958, | |
| "learning_rate": 1.978114478114478e-05, | |
| "loss": 0.3732, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03535353535353535, | |
| "grad_norm": 2.1436798572540283, | |
| "learning_rate": 1.9764309764309766e-05, | |
| "loss": 0.3341, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03787878787878788, | |
| "grad_norm": 2.5457680225372314, | |
| "learning_rate": 1.9747474747474747e-05, | |
| "loss": 0.358, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04040404040404041, | |
| "grad_norm": 2.3681640625, | |
| "learning_rate": 1.973063973063973e-05, | |
| "loss": 0.3813, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04292929292929293, | |
| "grad_norm": 3.3765199184417725, | |
| "learning_rate": 1.9713804713804716e-05, | |
| "loss": 0.3288, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.045454545454545456, | |
| "grad_norm": 2.5906496047973633, | |
| "learning_rate": 1.96969696969697e-05, | |
| "loss": 0.3025, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.047979797979797977, | |
| "grad_norm": 1.6513965129852295, | |
| "learning_rate": 1.968013468013468e-05, | |
| "loss": 0.287, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.050505050505050504, | |
| "grad_norm": 2.1033501625061035, | |
| "learning_rate": 1.9663299663299665e-05, | |
| "loss": 0.4552, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05303030303030303, | |
| "grad_norm": 2.6947014331817627, | |
| "learning_rate": 1.964646464646465e-05, | |
| "loss": 0.3561, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 1.4776068925857544, | |
| "learning_rate": 1.962962962962963e-05, | |
| "loss": 0.3564, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.05808080808080808, | |
| "grad_norm": 1.8511464595794678, | |
| "learning_rate": 1.9612794612794615e-05, | |
| "loss": 0.3851, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 1.9145028591156006, | |
| "learning_rate": 1.9595959595959596e-05, | |
| "loss": 0.3591, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06313131313131314, | |
| "grad_norm": 3.7978272438049316, | |
| "learning_rate": 1.957912457912458e-05, | |
| "loss": 0.2914, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06565656565656566, | |
| "grad_norm": 1.9927159547805786, | |
| "learning_rate": 1.9562289562289565e-05, | |
| "loss": 0.276, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06818181818181818, | |
| "grad_norm": 1.7165324687957764, | |
| "learning_rate": 1.9545454545454546e-05, | |
| "loss": 0.3855, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0707070707070707, | |
| "grad_norm": 3.547311544418335, | |
| "learning_rate": 1.952861952861953e-05, | |
| "loss": 0.3146, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07323232323232323, | |
| "grad_norm": 2.205611228942871, | |
| "learning_rate": 1.951178451178451e-05, | |
| "loss": 0.2515, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.07575757575757576, | |
| "grad_norm": 131.6199951171875, | |
| "learning_rate": 1.9494949494949496e-05, | |
| "loss": 0.4546, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07828282828282829, | |
| "grad_norm": 330.8481140136719, | |
| "learning_rate": 1.947811447811448e-05, | |
| "loss": 0.425, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08080808080808081, | |
| "grad_norm": 24.042455673217773, | |
| "learning_rate": 1.9461279461279464e-05, | |
| "loss": 0.3878, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 9.382911682128906, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.3594, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.08585858585858586, | |
| "grad_norm": 2.793823003768921, | |
| "learning_rate": 1.942760942760943e-05, | |
| "loss": 0.3946, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08838383838383838, | |
| "grad_norm": 2.098381280899048, | |
| "learning_rate": 1.9410774410774414e-05, | |
| "loss": 0.2873, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09090909090909091, | |
| "grad_norm": 3.7400736808776855, | |
| "learning_rate": 1.9393939393939395e-05, | |
| "loss": 0.1903, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09343434343434344, | |
| "grad_norm": 2.984248638153076, | |
| "learning_rate": 1.937710437710438e-05, | |
| "loss": 0.3758, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.09595959595959595, | |
| "grad_norm": 2.084982395172119, | |
| "learning_rate": 1.936026936026936e-05, | |
| "loss": 0.2711, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.09848484848484848, | |
| "grad_norm": 2.7394371032714844, | |
| "learning_rate": 1.9343434343434345e-05, | |
| "loss": 0.3333, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10101010101010101, | |
| "grad_norm": 3.4980244636535645, | |
| "learning_rate": 1.932659932659933e-05, | |
| "loss": 0.253, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10353535353535354, | |
| "grad_norm": 3.121978521347046, | |
| "learning_rate": 1.930976430976431e-05, | |
| "loss": 0.2335, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.10606060606060606, | |
| "grad_norm": 2.696462631225586, | |
| "learning_rate": 1.9292929292929295e-05, | |
| "loss": 0.3397, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10858585858585859, | |
| "grad_norm": 3.063912868499756, | |
| "learning_rate": 1.9276094276094276e-05, | |
| "loss": 0.3242, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 8.048778533935547, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 0.3208, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.11363636363636363, | |
| "grad_norm": 10.508525848388672, | |
| "learning_rate": 1.9242424242424244e-05, | |
| "loss": 0.3138, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.11616161616161616, | |
| "grad_norm": 2.972494125366211, | |
| "learning_rate": 1.922558922558923e-05, | |
| "loss": 0.2749, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11868686868686869, | |
| "grad_norm": 2.6326518058776855, | |
| "learning_rate": 1.920875420875421e-05, | |
| "loss": 0.2419, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 3.0405683517456055, | |
| "learning_rate": 1.9191919191919194e-05, | |
| "loss": 0.3422, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12373737373737374, | |
| "grad_norm": 5.278780460357666, | |
| "learning_rate": 1.917508417508418e-05, | |
| "loss": 0.2458, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.12626262626262627, | |
| "grad_norm": 4.309386730194092, | |
| "learning_rate": 1.915824915824916e-05, | |
| "loss": 0.2252, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12878787878787878, | |
| "grad_norm": 2.3794400691986084, | |
| "learning_rate": 1.9141414141414144e-05, | |
| "loss": 0.1916, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13131313131313133, | |
| "grad_norm": 3.2079036235809326, | |
| "learning_rate": 1.9124579124579125e-05, | |
| "loss": 0.2763, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13383838383838384, | |
| "grad_norm": 6.404500961303711, | |
| "learning_rate": 1.910774410774411e-05, | |
| "loss": 0.3073, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.13636363636363635, | |
| "grad_norm": 3.2996926307678223, | |
| "learning_rate": 1.9090909090909094e-05, | |
| "loss": 0.2442, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1388888888888889, | |
| "grad_norm": 3.197521924972534, | |
| "learning_rate": 1.9074074074074075e-05, | |
| "loss": 0.2451, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1414141414141414, | |
| "grad_norm": 2.8418166637420654, | |
| "learning_rate": 1.905723905723906e-05, | |
| "loss": 0.2383, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14393939393939395, | |
| "grad_norm": 2.393613338470459, | |
| "learning_rate": 1.904040404040404e-05, | |
| "loss": 0.1834, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.14646464646464646, | |
| "grad_norm": 2.1811683177948, | |
| "learning_rate": 1.9023569023569024e-05, | |
| "loss": 0.2564, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14898989898989898, | |
| "grad_norm": 2.4366374015808105, | |
| "learning_rate": 1.900673400673401e-05, | |
| "loss": 0.2541, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15151515151515152, | |
| "grad_norm": 5.706679344177246, | |
| "learning_rate": 1.8989898989898993e-05, | |
| "loss": 0.3285, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.15404040404040403, | |
| "grad_norm": 1.8341200351715088, | |
| "learning_rate": 1.8973063973063974e-05, | |
| "loss": 0.2692, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.15656565656565657, | |
| "grad_norm": 3.0101611614227295, | |
| "learning_rate": 1.895622895622896e-05, | |
| "loss": 0.2135, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1590909090909091, | |
| "grad_norm": 3.3140006065368652, | |
| "learning_rate": 1.8939393939393943e-05, | |
| "loss": 0.2027, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16161616161616163, | |
| "grad_norm": 2.118210554122925, | |
| "learning_rate": 1.8922558922558924e-05, | |
| "loss": 0.2213, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.16414141414141414, | |
| "grad_norm": 3.6016147136688232, | |
| "learning_rate": 1.8905723905723908e-05, | |
| "loss": 0.1682, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 7.3510823249816895, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.369, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1691919191919192, | |
| "grad_norm": 3.6129775047302246, | |
| "learning_rate": 1.8872053872053873e-05, | |
| "loss": 0.2257, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1717171717171717, | |
| "grad_norm": 4.521103858947754, | |
| "learning_rate": 1.8855218855218858e-05, | |
| "loss": 0.1425, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.17424242424242425, | |
| "grad_norm": 1.941278100013733, | |
| "learning_rate": 1.883838383838384e-05, | |
| "loss": 0.2962, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.17676767676767677, | |
| "grad_norm": 4.856161594390869, | |
| "learning_rate": 1.8821548821548823e-05, | |
| "loss": 0.2071, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.17929292929292928, | |
| "grad_norm": 4.528213024139404, | |
| "learning_rate": 1.8804713804713804e-05, | |
| "loss": 0.2222, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 6.646481037139893, | |
| "learning_rate": 1.8787878787878792e-05, | |
| "loss": 0.2942, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.18434343434343434, | |
| "grad_norm": 2.1316299438476562, | |
| "learning_rate": 1.8771043771043773e-05, | |
| "loss": 0.2697, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.18686868686868688, | |
| "grad_norm": 3.7682583332061768, | |
| "learning_rate": 1.8754208754208757e-05, | |
| "loss": 0.2575, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1893939393939394, | |
| "grad_norm": 2.1818718910217285, | |
| "learning_rate": 1.873737373737374e-05, | |
| "loss": 0.1755, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.1919191919191919, | |
| "grad_norm": 5.337326526641846, | |
| "learning_rate": 1.8720538720538723e-05, | |
| "loss": 0.2238, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.19444444444444445, | |
| "grad_norm": 5.185172080993652, | |
| "learning_rate": 1.8703703703703707e-05, | |
| "loss": 0.14, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.19696969696969696, | |
| "grad_norm": 5.610733509063721, | |
| "learning_rate": 1.8686868686868688e-05, | |
| "loss": 0.2109, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1994949494949495, | |
| "grad_norm": 3.34989333152771, | |
| "learning_rate": 1.8670033670033672e-05, | |
| "loss": 0.2358, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.20202020202020202, | |
| "grad_norm": 4.732699394226074, | |
| "learning_rate": 1.8653198653198653e-05, | |
| "loss": 0.2582, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.20454545454545456, | |
| "grad_norm": 3.595618963241577, | |
| "learning_rate": 1.8636363636363638e-05, | |
| "loss": 0.2499, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.20707070707070707, | |
| "grad_norm": 4.39829158782959, | |
| "learning_rate": 1.8619528619528622e-05, | |
| "loss": 0.1776, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.20959595959595959, | |
| "grad_norm": 5.79127836227417, | |
| "learning_rate": 1.8602693602693603e-05, | |
| "loss": 0.1329, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.21212121212121213, | |
| "grad_norm": 3.827282428741455, | |
| "learning_rate": 1.8585858585858588e-05, | |
| "loss": 0.2073, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.21464646464646464, | |
| "grad_norm": 6.159754753112793, | |
| "learning_rate": 1.856902356902357e-05, | |
| "loss": 0.1658, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.21717171717171718, | |
| "grad_norm": 9.290190696716309, | |
| "learning_rate": 1.8552188552188556e-05, | |
| "loss": 0.286, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2196969696969697, | |
| "grad_norm": 5.264730930328369, | |
| "learning_rate": 1.8535353535353537e-05, | |
| "loss": 0.2504, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 3.915583848953247, | |
| "learning_rate": 1.851851851851852e-05, | |
| "loss": 0.2535, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.22474747474747475, | |
| "grad_norm": 3.885434627532959, | |
| "learning_rate": 1.8501683501683503e-05, | |
| "loss": 0.1451, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.22727272727272727, | |
| "grad_norm": 3.5729010105133057, | |
| "learning_rate": 1.8484848484848487e-05, | |
| "loss": 0.1989, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2297979797979798, | |
| "grad_norm": 2.3339507579803467, | |
| "learning_rate": 1.846801346801347e-05, | |
| "loss": 0.3191, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.23232323232323232, | |
| "grad_norm": 3.946099281311035, | |
| "learning_rate": 1.8451178451178452e-05, | |
| "loss": 0.2271, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.23484848484848486, | |
| "grad_norm": 5.328370571136475, | |
| "learning_rate": 1.8434343434343437e-05, | |
| "loss": 0.3326, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.23737373737373738, | |
| "grad_norm": 4.987793445587158, | |
| "learning_rate": 1.8417508417508418e-05, | |
| "loss": 0.2377, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2398989898989899, | |
| "grad_norm": 3.6775288581848145, | |
| "learning_rate": 1.8400673400673402e-05, | |
| "loss": 0.2323, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 3.444467782974243, | |
| "learning_rate": 1.8383838383838387e-05, | |
| "loss": 0.2712, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.24494949494949494, | |
| "grad_norm": 7.329760551452637, | |
| "learning_rate": 1.8367003367003367e-05, | |
| "loss": 0.3223, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2474747474747475, | |
| "grad_norm": 3.329362154006958, | |
| "learning_rate": 1.8350168350168352e-05, | |
| "loss": 0.1859, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.950449228286743, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.2032, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.25252525252525254, | |
| "grad_norm": 3.4235892295837402, | |
| "learning_rate": 1.831649831649832e-05, | |
| "loss": 0.1982, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.255050505050505, | |
| "grad_norm": 4.13006067276001, | |
| "learning_rate": 1.82996632996633e-05, | |
| "loss": 0.1787, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.25757575757575757, | |
| "grad_norm": 2.0153565406799316, | |
| "learning_rate": 1.8282828282828286e-05, | |
| "loss": 0.1408, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2601010101010101, | |
| "grad_norm": 3.2294890880584717, | |
| "learning_rate": 1.8265993265993267e-05, | |
| "loss": 0.2113, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.26262626262626265, | |
| "grad_norm": 3.2181968688964844, | |
| "learning_rate": 1.824915824915825e-05, | |
| "loss": 0.1296, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.26515151515151514, | |
| "grad_norm": 1.6924734115600586, | |
| "learning_rate": 1.8232323232323236e-05, | |
| "loss": 0.1773, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2676767676767677, | |
| "grad_norm": 5.491613864898682, | |
| "learning_rate": 1.8215488215488217e-05, | |
| "loss": 0.1511, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2702020202020202, | |
| "grad_norm": 4.4867143630981445, | |
| "learning_rate": 1.81986531986532e-05, | |
| "loss": 0.1978, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2727272727272727, | |
| "grad_norm": 1.801491379737854, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.2535, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.27525252525252525, | |
| "grad_norm": 2.2414021492004395, | |
| "learning_rate": 1.8164983164983166e-05, | |
| "loss": 0.2129, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 1.8164544105529785, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 0.1744, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2803030303030303, | |
| "grad_norm": 1.4675378799438477, | |
| "learning_rate": 1.8131313131313132e-05, | |
| "loss": 0.2502, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.2828282828282828, | |
| "grad_norm": 2.9425742626190186, | |
| "learning_rate": 1.8114478114478116e-05, | |
| "loss": 0.1312, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.28535353535353536, | |
| "grad_norm": 2.8444998264312744, | |
| "learning_rate": 1.8097643097643097e-05, | |
| "loss": 0.233, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.2878787878787879, | |
| "grad_norm": 1.8977577686309814, | |
| "learning_rate": 1.8080808080808085e-05, | |
| "loss": 0.0896, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2904040404040404, | |
| "grad_norm": 4.595700740814209, | |
| "learning_rate": 1.8063973063973066e-05, | |
| "loss": 0.3304, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.29292929292929293, | |
| "grad_norm": 2.2750136852264404, | |
| "learning_rate": 1.804713804713805e-05, | |
| "loss": 0.209, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.29545454545454547, | |
| "grad_norm": 2.0217509269714355, | |
| "learning_rate": 1.803030303030303e-05, | |
| "loss": 0.2202, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.29797979797979796, | |
| "grad_norm": 2.943140745162964, | |
| "learning_rate": 1.8013468013468016e-05, | |
| "loss": 0.2903, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3005050505050505, | |
| "grad_norm": 2.4190146923065186, | |
| "learning_rate": 1.7996632996633e-05, | |
| "loss": 0.1384, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 3.664355993270874, | |
| "learning_rate": 1.797979797979798e-05, | |
| "loss": 0.1866, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3055555555555556, | |
| "grad_norm": 3.616316795349121, | |
| "learning_rate": 1.7962962962962965e-05, | |
| "loss": 0.2016, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.30808080808080807, | |
| "grad_norm": 6.439982891082764, | |
| "learning_rate": 1.7946127946127946e-05, | |
| "loss": 0.2699, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3106060606060606, | |
| "grad_norm": 3.2625112533569336, | |
| "learning_rate": 1.792929292929293e-05, | |
| "loss": 0.242, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.31313131313131315, | |
| "grad_norm": 4.760579586029053, | |
| "learning_rate": 1.7912457912457915e-05, | |
| "loss": 0.1812, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.31565656565656564, | |
| "grad_norm": 5.375882625579834, | |
| "learning_rate": 1.7895622895622896e-05, | |
| "loss": 0.0892, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3181818181818182, | |
| "grad_norm": 1.5627996921539307, | |
| "learning_rate": 1.787878787878788e-05, | |
| "loss": 0.1608, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3207070707070707, | |
| "grad_norm": 2.0782926082611084, | |
| "learning_rate": 1.786195286195286e-05, | |
| "loss": 0.1384, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.32323232323232326, | |
| "grad_norm": 3.5221481323242188, | |
| "learning_rate": 1.7845117845117846e-05, | |
| "loss": 0.2595, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.32575757575757575, | |
| "grad_norm": 1.7717233896255493, | |
| "learning_rate": 1.782828282828283e-05, | |
| "loss": 0.1401, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.3282828282828283, | |
| "grad_norm": 3.81760311126709, | |
| "learning_rate": 1.781144781144781e-05, | |
| "loss": 0.1153, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.33080808080808083, | |
| "grad_norm": 4.479602813720703, | |
| "learning_rate": 1.7794612794612796e-05, | |
| "loss": 0.2117, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 1.1932178735733032, | |
| "learning_rate": 1.7777777777777777e-05, | |
| "loss": 0.1669, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.33585858585858586, | |
| "grad_norm": 3.330796003341675, | |
| "learning_rate": 1.7760942760942764e-05, | |
| "loss": 0.152, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3383838383838384, | |
| "grad_norm": 3.5781233310699463, | |
| "learning_rate": 1.7744107744107745e-05, | |
| "loss": 0.269, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3409090909090909, | |
| "grad_norm": 2.489184617996216, | |
| "learning_rate": 1.772727272727273e-05, | |
| "loss": 0.1631, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3434343434343434, | |
| "grad_norm": 5.023707389831543, | |
| "learning_rate": 1.771043771043771e-05, | |
| "loss": 0.1623, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.34595959595959597, | |
| "grad_norm": 1.8095295429229736, | |
| "learning_rate": 1.7693602693602695e-05, | |
| "loss": 0.1941, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3484848484848485, | |
| "grad_norm": 5.773559093475342, | |
| "learning_rate": 1.767676767676768e-05, | |
| "loss": 0.2198, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.351010101010101, | |
| "grad_norm": 2.3348917961120605, | |
| "learning_rate": 1.765993265993266e-05, | |
| "loss": 0.0749, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.35353535353535354, | |
| "grad_norm": 4.8729023933410645, | |
| "learning_rate": 1.7643097643097645e-05, | |
| "loss": 0.2523, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3560606060606061, | |
| "grad_norm": 2.1227433681488037, | |
| "learning_rate": 1.7626262626262626e-05, | |
| "loss": 0.1616, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.35858585858585856, | |
| "grad_norm": 4.208232402801514, | |
| "learning_rate": 1.760942760942761e-05, | |
| "loss": 0.1206, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3611111111111111, | |
| "grad_norm": 2.2808191776275635, | |
| "learning_rate": 1.7592592592592595e-05, | |
| "loss": 0.1413, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 2.797044515609741, | |
| "learning_rate": 1.7575757575757576e-05, | |
| "loss": 0.1553, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3661616161616162, | |
| "grad_norm": 2.0235748291015625, | |
| "learning_rate": 1.755892255892256e-05, | |
| "loss": 0.1748, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3686868686868687, | |
| "grad_norm": 1.668614149093628, | |
| "learning_rate": 1.754208754208754e-05, | |
| "loss": 0.1397, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3712121212121212, | |
| "grad_norm": 2.048588991165161, | |
| "learning_rate": 1.752525252525253e-05, | |
| "loss": 0.1636, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.37373737373737376, | |
| "grad_norm": 3.2544357776641846, | |
| "learning_rate": 1.750841750841751e-05, | |
| "loss": 0.1906, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.37626262626262624, | |
| "grad_norm": 2.5983431339263916, | |
| "learning_rate": 1.7491582491582494e-05, | |
| "loss": 0.1833, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.3787878787878788, | |
| "grad_norm": 3.579721689224243, | |
| "learning_rate": 1.7474747474747475e-05, | |
| "loss": 0.2445, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3813131313131313, | |
| "grad_norm": 3.889470338821411, | |
| "learning_rate": 1.745791245791246e-05, | |
| "loss": 0.2457, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.3838383838383838, | |
| "grad_norm": 1.612406611442566, | |
| "learning_rate": 1.7441077441077444e-05, | |
| "loss": 0.1468, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.38636363636363635, | |
| "grad_norm": 3.572401285171509, | |
| "learning_rate": 1.7424242424242425e-05, | |
| "loss": 0.1659, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 2.7137911319732666, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 0.2389, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.39141414141414144, | |
| "grad_norm": 2.293943166732788, | |
| "learning_rate": 1.739057239057239e-05, | |
| "loss": 0.2188, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3939393939393939, | |
| "grad_norm": 5.641902923583984, | |
| "learning_rate": 1.7373737373737375e-05, | |
| "loss": 0.1843, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.39646464646464646, | |
| "grad_norm": 4.039111137390137, | |
| "learning_rate": 1.735690235690236e-05, | |
| "loss": 0.2627, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.398989898989899, | |
| "grad_norm": 2.942754030227661, | |
| "learning_rate": 1.734006734006734e-05, | |
| "loss": 0.2299, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4015151515151515, | |
| "grad_norm": 3.7655181884765625, | |
| "learning_rate": 1.7323232323232324e-05, | |
| "loss": 0.2167, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.40404040404040403, | |
| "grad_norm": 2.5062334537506104, | |
| "learning_rate": 1.7306397306397305e-05, | |
| "loss": 0.164, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4065656565656566, | |
| "grad_norm": 2.07106614112854, | |
| "learning_rate": 1.7289562289562293e-05, | |
| "loss": 0.1956, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.4090909090909091, | |
| "grad_norm": 6.250090599060059, | |
| "learning_rate": 1.7272727272727274e-05, | |
| "loss": 0.1399, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4116161616161616, | |
| "grad_norm": 2.017141819000244, | |
| "learning_rate": 1.725589225589226e-05, | |
| "loss": 0.2036, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.41414141414141414, | |
| "grad_norm": 3.3339602947235107, | |
| "learning_rate": 1.723905723905724e-05, | |
| "loss": 0.1596, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 3.8334908485412598, | |
| "learning_rate": 1.7222222222222224e-05, | |
| "loss": 0.1267, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.41919191919191917, | |
| "grad_norm": 2.0751090049743652, | |
| "learning_rate": 1.7205387205387208e-05, | |
| "loss": 0.2058, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4217171717171717, | |
| "grad_norm": 3.0513789653778076, | |
| "learning_rate": 1.718855218855219e-05, | |
| "loss": 0.091, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 5.1696648597717285, | |
| "learning_rate": 1.7171717171717173e-05, | |
| "loss": 0.2313, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.42676767676767674, | |
| "grad_norm": 3.9072530269622803, | |
| "learning_rate": 1.7154882154882154e-05, | |
| "loss": 0.2648, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4292929292929293, | |
| "grad_norm": 4.278628349304199, | |
| "learning_rate": 1.713804713804714e-05, | |
| "loss": 0.1539, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4318181818181818, | |
| "grad_norm": 1.6870406866073608, | |
| "learning_rate": 1.7121212121212123e-05, | |
| "loss": 0.1714, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.43434343434343436, | |
| "grad_norm": 1.6782217025756836, | |
| "learning_rate": 1.7104377104377104e-05, | |
| "loss": 0.1269, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.43686868686868685, | |
| "grad_norm": 5.854135513305664, | |
| "learning_rate": 1.708754208754209e-05, | |
| "loss": 0.1568, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.4393939393939394, | |
| "grad_norm": 3.947122097015381, | |
| "learning_rate": 1.707070707070707e-05, | |
| "loss": 0.2051, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.44191919191919193, | |
| "grad_norm": 2.085911273956299, | |
| "learning_rate": 1.7053872053872057e-05, | |
| "loss": 0.1101, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 4.145143985748291, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 0.1773, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.44696969696969696, | |
| "grad_norm": 4.920554161071777, | |
| "learning_rate": 1.7020202020202023e-05, | |
| "loss": 0.2379, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.4494949494949495, | |
| "grad_norm": 2.8730502128601074, | |
| "learning_rate": 1.7003367003367004e-05, | |
| "loss": 0.1019, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.45202020202020204, | |
| "grad_norm": 1.0413464307785034, | |
| "learning_rate": 1.6986531986531988e-05, | |
| "loss": 0.1205, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.45454545454545453, | |
| "grad_norm": 2.591437816619873, | |
| "learning_rate": 1.6969696969696972e-05, | |
| "loss": 0.1173, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.45707070707070707, | |
| "grad_norm": 4.737552165985107, | |
| "learning_rate": 1.6952861952861953e-05, | |
| "loss": 0.4692, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.4595959595959596, | |
| "grad_norm": 5.872066974639893, | |
| "learning_rate": 1.6936026936026938e-05, | |
| "loss": 0.178, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4621212121212121, | |
| "grad_norm": 4.527502536773682, | |
| "learning_rate": 1.691919191919192e-05, | |
| "loss": 0.2855, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.46464646464646464, | |
| "grad_norm": 3.166898488998413, | |
| "learning_rate": 1.6902356902356903e-05, | |
| "loss": 0.201, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4671717171717172, | |
| "grad_norm": 8.388322830200195, | |
| "learning_rate": 1.6885521885521888e-05, | |
| "loss": 0.2455, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4696969696969697, | |
| "grad_norm": 3.2028310298919678, | |
| "learning_rate": 1.686868686868687e-05, | |
| "loss": 0.2577, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4722222222222222, | |
| "grad_norm": 3.2072689533233643, | |
| "learning_rate": 1.6851851851851853e-05, | |
| "loss": 0.1123, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.47474747474747475, | |
| "grad_norm": 2.532289743423462, | |
| "learning_rate": 1.6835016835016837e-05, | |
| "loss": 0.2389, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4772727272727273, | |
| "grad_norm": 3.049967050552368, | |
| "learning_rate": 1.681818181818182e-05, | |
| "loss": 0.1156, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.4797979797979798, | |
| "grad_norm": 2.940448760986328, | |
| "learning_rate": 1.6801346801346803e-05, | |
| "loss": 0.149, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4823232323232323, | |
| "grad_norm": 2.2545042037963867, | |
| "learning_rate": 1.6784511784511787e-05, | |
| "loss": 0.1751, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 2.66123628616333, | |
| "learning_rate": 1.6767676767676768e-05, | |
| "loss": 0.1685, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.48737373737373735, | |
| "grad_norm": 2.0476951599121094, | |
| "learning_rate": 1.6750841750841752e-05, | |
| "loss": 0.1705, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.4898989898989899, | |
| "grad_norm": 2.9459142684936523, | |
| "learning_rate": 1.6734006734006737e-05, | |
| "loss": 0.1873, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.49242424242424243, | |
| "grad_norm": 3.9844117164611816, | |
| "learning_rate": 1.6717171717171718e-05, | |
| "loss": 0.1531, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.494949494949495, | |
| "grad_norm": 6.765873908996582, | |
| "learning_rate": 1.6700336700336702e-05, | |
| "loss": 0.164, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.49747474747474746, | |
| "grad_norm": 2.809617757797241, | |
| "learning_rate": 1.6683501683501683e-05, | |
| "loss": 0.144, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 6.575211524963379, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.2556, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5025252525252525, | |
| "grad_norm": 4.31246280670166, | |
| "learning_rate": 1.6649831649831652e-05, | |
| "loss": 0.1998, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5050505050505051, | |
| "grad_norm": 3.3406026363372803, | |
| "learning_rate": 1.6632996632996633e-05, | |
| "loss": 0.1341, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5075757575757576, | |
| "grad_norm": 2.613698720932007, | |
| "learning_rate": 1.6616161616161617e-05, | |
| "loss": 0.1245, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.51010101010101, | |
| "grad_norm": 4.394161224365234, | |
| "learning_rate": 1.65993265993266e-05, | |
| "loss": 0.1683, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5126262626262627, | |
| "grad_norm": 1.0652117729187012, | |
| "learning_rate": 1.6582491582491586e-05, | |
| "loss": 0.1133, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5151515151515151, | |
| "grad_norm": 2.15743350982666, | |
| "learning_rate": 1.6565656565656567e-05, | |
| "loss": 0.1525, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5176767676767676, | |
| "grad_norm": 1.6530711650848389, | |
| "learning_rate": 1.654882154882155e-05, | |
| "loss": 0.1417, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5202020202020202, | |
| "grad_norm": 6.711721897125244, | |
| "learning_rate": 1.6531986531986532e-05, | |
| "loss": 0.2334, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5227272727272727, | |
| "grad_norm": 1.627074122428894, | |
| "learning_rate": 1.6515151515151517e-05, | |
| "loss": 0.1847, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5252525252525253, | |
| "grad_norm": 1.3665039539337158, | |
| "learning_rate": 1.64983164983165e-05, | |
| "loss": 0.0733, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5277777777777778, | |
| "grad_norm": 1.800305724143982, | |
| "learning_rate": 1.6481481481481482e-05, | |
| "loss": 0.0821, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5303030303030303, | |
| "grad_norm": 2.238971710205078, | |
| "learning_rate": 1.6464646464646466e-05, | |
| "loss": 0.162, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5328282828282829, | |
| "grad_norm": 3.941727638244629, | |
| "learning_rate": 1.6447811447811447e-05, | |
| "loss": 0.171, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5353535353535354, | |
| "grad_norm": 2.0416862964630127, | |
| "learning_rate": 1.6430976430976432e-05, | |
| "loss": 0.121, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5378787878787878, | |
| "grad_norm": 2.75635027885437, | |
| "learning_rate": 1.6414141414141416e-05, | |
| "loss": 0.1973, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5404040404040404, | |
| "grad_norm": 5.226922512054443, | |
| "learning_rate": 1.6397306397306397e-05, | |
| "loss": 0.111, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5429292929292929, | |
| "grad_norm": 6.741361618041992, | |
| "learning_rate": 1.638047138047138e-05, | |
| "loss": 0.0877, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 2.957056999206543, | |
| "learning_rate": 1.6363636363636366e-05, | |
| "loss": 0.0627, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.547979797979798, | |
| "grad_norm": 3.5542659759521484, | |
| "learning_rate": 1.634680134680135e-05, | |
| "loss": 0.2518, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5505050505050505, | |
| "grad_norm": 8.325895309448242, | |
| "learning_rate": 1.632996632996633e-05, | |
| "loss": 0.1792, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.553030303030303, | |
| "grad_norm": 4.116200923919678, | |
| "learning_rate": 1.6313131313131316e-05, | |
| "loss": 0.0821, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 2.4049417972564697, | |
| "learning_rate": 1.6296296296296297e-05, | |
| "loss": 0.1445, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5580808080808081, | |
| "grad_norm": 2.702348470687866, | |
| "learning_rate": 1.627946127946128e-05, | |
| "loss": 0.2077, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5606060606060606, | |
| "grad_norm": 4.276516437530518, | |
| "learning_rate": 1.6262626262626265e-05, | |
| "loss": 0.189, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5631313131313131, | |
| "grad_norm": 2.212054491043091, | |
| "learning_rate": 1.6245791245791246e-05, | |
| "loss": 0.2111, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.5656565656565656, | |
| "grad_norm": 2.9544410705566406, | |
| "learning_rate": 1.622895622895623e-05, | |
| "loss": 0.1649, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5681818181818182, | |
| "grad_norm": 3.0044991970062256, | |
| "learning_rate": 1.6212121212121212e-05, | |
| "loss": 0.1728, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.5707070707070707, | |
| "grad_norm": 3.5259811878204346, | |
| "learning_rate": 1.6195286195286196e-05, | |
| "loss": 0.1621, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5732323232323232, | |
| "grad_norm": 3.774447441101074, | |
| "learning_rate": 1.617845117845118e-05, | |
| "loss": 0.2288, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.5757575757575758, | |
| "grad_norm": 2.975698232650757, | |
| "learning_rate": 1.616161616161616e-05, | |
| "loss": 0.1691, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5782828282828283, | |
| "grad_norm": 4.2801713943481445, | |
| "learning_rate": 1.6144781144781146e-05, | |
| "loss": 0.1332, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.5808080808080808, | |
| "grad_norm": 4.899673938751221, | |
| "learning_rate": 1.612794612794613e-05, | |
| "loss": 0.1472, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 5.345510482788086, | |
| "learning_rate": 1.6111111111111115e-05, | |
| "loss": 0.2117, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.5858585858585859, | |
| "grad_norm": 3.8797693252563477, | |
| "learning_rate": 1.6094276094276096e-05, | |
| "loss": 0.2047, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5883838383838383, | |
| "grad_norm": 6.221108913421631, | |
| "learning_rate": 1.607744107744108e-05, | |
| "loss": 0.1221, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.5909090909090909, | |
| "grad_norm": 3.437472343444824, | |
| "learning_rate": 1.606060606060606e-05, | |
| "loss": 0.1748, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5934343434343434, | |
| "grad_norm": 6.737703323364258, | |
| "learning_rate": 1.6043771043771045e-05, | |
| "loss": 0.1095, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5959595959595959, | |
| "grad_norm": 1.2895629405975342, | |
| "learning_rate": 1.602693602693603e-05, | |
| "loss": 0.0798, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5984848484848485, | |
| "grad_norm": 3.281799554824829, | |
| "learning_rate": 1.601010101010101e-05, | |
| "loss": 0.2223, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.601010101010101, | |
| "grad_norm": 3.6054065227508545, | |
| "learning_rate": 1.5993265993265995e-05, | |
| "loss": 0.1802, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6035353535353535, | |
| "grad_norm": 2.032210350036621, | |
| "learning_rate": 1.597643097643098e-05, | |
| "loss": 0.1613, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 3.515641212463379, | |
| "learning_rate": 1.595959595959596e-05, | |
| "loss": 0.1694, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6085858585858586, | |
| "grad_norm": 3.1133809089660645, | |
| "learning_rate": 1.5942760942760945e-05, | |
| "loss": 0.1114, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 3.401221752166748, | |
| "learning_rate": 1.5925925925925926e-05, | |
| "loss": 0.1692, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6136363636363636, | |
| "grad_norm": 1.9235018491744995, | |
| "learning_rate": 1.590909090909091e-05, | |
| "loss": 0.2513, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6161616161616161, | |
| "grad_norm": 2.6812822818756104, | |
| "learning_rate": 1.5892255892255895e-05, | |
| "loss": 0.1857, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6186868686868687, | |
| "grad_norm": 3.470087766647339, | |
| "learning_rate": 1.5875420875420876e-05, | |
| "loss": 0.1377, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6212121212121212, | |
| "grad_norm": 2.309100866317749, | |
| "learning_rate": 1.585858585858586e-05, | |
| "loss": 0.22, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6237373737373737, | |
| "grad_norm": 5.392738342285156, | |
| "learning_rate": 1.584175084175084e-05, | |
| "loss": 0.1767, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6262626262626263, | |
| "grad_norm": 3.751511573791504, | |
| "learning_rate": 1.5824915824915825e-05, | |
| "loss": 0.1504, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6287878787878788, | |
| "grad_norm": 1.9343714714050293, | |
| "learning_rate": 1.580808080808081e-05, | |
| "loss": 0.2363, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6313131313131313, | |
| "grad_norm": 3.65728759765625, | |
| "learning_rate": 1.5791245791245794e-05, | |
| "loss": 0.3138, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6338383838383839, | |
| "grad_norm": 4.637652397155762, | |
| "learning_rate": 1.5774410774410775e-05, | |
| "loss": 0.1518, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6363636363636364, | |
| "grad_norm": 2.6128430366516113, | |
| "learning_rate": 1.575757575757576e-05, | |
| "loss": 0.137, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6388888888888888, | |
| "grad_norm": 2.5993456840515137, | |
| "learning_rate": 1.5740740740740744e-05, | |
| "loss": 0.2126, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6414141414141414, | |
| "grad_norm": 2.630402088165283, | |
| "learning_rate": 1.5723905723905725e-05, | |
| "loss": 0.1712, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6439393939393939, | |
| "grad_norm": 3.6941192150115967, | |
| "learning_rate": 1.570707070707071e-05, | |
| "loss": 0.251, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6464646464646465, | |
| "grad_norm": 3.1765594482421875, | |
| "learning_rate": 1.569023569023569e-05, | |
| "loss": 0.2738, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.648989898989899, | |
| "grad_norm": 5.44793701171875, | |
| "learning_rate": 1.5673400673400674e-05, | |
| "loss": 0.1806, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6515151515151515, | |
| "grad_norm": 2.671917676925659, | |
| "learning_rate": 1.565656565656566e-05, | |
| "loss": 0.2302, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6540404040404041, | |
| "grad_norm": 3.816720485687256, | |
| "learning_rate": 1.563973063973064e-05, | |
| "loss": 0.2166, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6565656565656566, | |
| "grad_norm": 4.604842662811279, | |
| "learning_rate": 1.5622895622895624e-05, | |
| "loss": 0.1936, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6590909090909091, | |
| "grad_norm": 3.8842062950134277, | |
| "learning_rate": 1.5606060606060605e-05, | |
| "loss": 0.1321, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.6616161616161617, | |
| "grad_norm": 4.19383430480957, | |
| "learning_rate": 1.558922558922559e-05, | |
| "loss": 0.2093, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6641414141414141, | |
| "grad_norm": 6.01501989364624, | |
| "learning_rate": 1.5572390572390574e-05, | |
| "loss": 0.281, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 3.173448324203491, | |
| "learning_rate": 1.555555555555556e-05, | |
| "loss": 0.2289, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6691919191919192, | |
| "grad_norm": 3.035527229309082, | |
| "learning_rate": 1.553872053872054e-05, | |
| "loss": 0.2046, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.6717171717171717, | |
| "grad_norm": 4.2569684982299805, | |
| "learning_rate": 1.5521885521885524e-05, | |
| "loss": 0.3241, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6742424242424242, | |
| "grad_norm": 4.195226669311523, | |
| "learning_rate": 1.5505050505050508e-05, | |
| "loss": 0.1396, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.6767676767676768, | |
| "grad_norm": 1.8019922971725464, | |
| "learning_rate": 1.548821548821549e-05, | |
| "loss": 0.1341, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6792929292929293, | |
| "grad_norm": 2.006047248840332, | |
| "learning_rate": 1.5471380471380473e-05, | |
| "loss": 0.2256, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 2.592977523803711, | |
| "learning_rate": 1.5454545454545454e-05, | |
| "loss": 0.1794, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6843434343434344, | |
| "grad_norm": 1.8588799238204956, | |
| "learning_rate": 1.543771043771044e-05, | |
| "loss": 0.1279, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.6868686868686869, | |
| "grad_norm": 2.6189968585968018, | |
| "learning_rate": 1.5420875420875423e-05, | |
| "loss": 0.0616, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6893939393939394, | |
| "grad_norm": 1.683362603187561, | |
| "learning_rate": 1.5404040404040404e-05, | |
| "loss": 0.0476, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.6919191919191919, | |
| "grad_norm": 2.88405179977417, | |
| "learning_rate": 1.538720538720539e-05, | |
| "loss": 0.1016, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6944444444444444, | |
| "grad_norm": 1.6002604961395264, | |
| "learning_rate": 1.537037037037037e-05, | |
| "loss": 0.1343, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.696969696969697, | |
| "grad_norm": 1.0753880739212036, | |
| "learning_rate": 1.5353535353535354e-05, | |
| "loss": 0.1, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6994949494949495, | |
| "grad_norm": 3.1269478797912598, | |
| "learning_rate": 1.5336700336700338e-05, | |
| "loss": 0.136, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.702020202020202, | |
| "grad_norm": 2.584567070007324, | |
| "learning_rate": 1.5319865319865323e-05, | |
| "loss": 0.2408, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.7045454545454546, | |
| "grad_norm": 3.7829692363739014, | |
| "learning_rate": 1.5303030303030304e-05, | |
| "loss": 0.1797, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7070707070707071, | |
| "grad_norm": 1.9160706996917725, | |
| "learning_rate": 1.5286195286195288e-05, | |
| "loss": 0.1379, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7095959595959596, | |
| "grad_norm": 1.7192413806915283, | |
| "learning_rate": 1.5269360269360272e-05, | |
| "loss": 0.0992, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7121212121212122, | |
| "grad_norm": 1.8255947828292847, | |
| "learning_rate": 1.5252525252525255e-05, | |
| "loss": 0.1683, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7146464646464646, | |
| "grad_norm": 1.4913876056671143, | |
| "learning_rate": 1.5235690235690238e-05, | |
| "loss": 0.1559, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7171717171717171, | |
| "grad_norm": 1.4210553169250488, | |
| "learning_rate": 1.521885521885522e-05, | |
| "loss": 0.0947, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7196969696969697, | |
| "grad_norm": 2.0691561698913574, | |
| "learning_rate": 1.5202020202020203e-05, | |
| "loss": 0.1111, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 1.425347089767456, | |
| "learning_rate": 1.5185185185185187e-05, | |
| "loss": 0.1305, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7247474747474747, | |
| "grad_norm": 2.620968818664551, | |
| "learning_rate": 1.516835016835017e-05, | |
| "loss": 0.1324, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 1.2153469324111938, | |
| "learning_rate": 1.5151515151515153e-05, | |
| "loss": 0.1875, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7297979797979798, | |
| "grad_norm": 2.5912091732025146, | |
| "learning_rate": 1.5134680134680136e-05, | |
| "loss": 0.1125, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7323232323232324, | |
| "grad_norm": 3.2707126140594482, | |
| "learning_rate": 1.5117845117845118e-05, | |
| "loss": 0.175, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7348484848484849, | |
| "grad_norm": 2.4020352363586426, | |
| "learning_rate": 1.5101010101010103e-05, | |
| "loss": 0.1203, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7373737373737373, | |
| "grad_norm": 4.660423278808594, | |
| "learning_rate": 1.5084175084175085e-05, | |
| "loss": 0.2577, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.73989898989899, | |
| "grad_norm": 5.82301139831543, | |
| "learning_rate": 1.5067340067340068e-05, | |
| "loss": 0.1374, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7424242424242424, | |
| "grad_norm": 1.974256992340088, | |
| "learning_rate": 1.505050505050505e-05, | |
| "loss": 0.1145, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7449494949494949, | |
| "grad_norm": 2.0848591327667236, | |
| "learning_rate": 1.5033670033670035e-05, | |
| "loss": 0.1168, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7474747474747475, | |
| "grad_norm": 2.9144437313079834, | |
| "learning_rate": 1.5016835016835018e-05, | |
| "loss": 0.2312, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 4.225992202758789, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.1782, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.7525252525252525, | |
| "grad_norm": 4.229215145111084, | |
| "learning_rate": 1.4983164983164985e-05, | |
| "loss": 0.1512, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7550505050505051, | |
| "grad_norm": 2.8152060508728027, | |
| "learning_rate": 1.4966329966329967e-05, | |
| "loss": 0.1511, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.7575757575757576, | |
| "grad_norm": 3.588789224624634, | |
| "learning_rate": 1.4949494949494952e-05, | |
| "loss": 0.2879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.76010101010101, | |
| "grad_norm": 3.0448029041290283, | |
| "learning_rate": 1.4932659932659934e-05, | |
| "loss": 0.1764, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.7626262626262627, | |
| "grad_norm": 1.7650105953216553, | |
| "learning_rate": 1.4915824915824917e-05, | |
| "loss": 0.1692, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7651515151515151, | |
| "grad_norm": 1.2958582639694214, | |
| "learning_rate": 1.48989898989899e-05, | |
| "loss": 0.1425, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.7676767676767676, | |
| "grad_norm": 2.6900827884674072, | |
| "learning_rate": 1.4882154882154884e-05, | |
| "loss": 0.2965, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7702020202020202, | |
| "grad_norm": 5.048685550689697, | |
| "learning_rate": 1.4865319865319867e-05, | |
| "loss": 0.2404, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.7727272727272727, | |
| "grad_norm": 3.7027716636657715, | |
| "learning_rate": 1.484848484848485e-05, | |
| "loss": 0.148, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7752525252525253, | |
| "grad_norm": 4.220457553863525, | |
| "learning_rate": 1.4831649831649832e-05, | |
| "loss": 0.1583, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 3.0033810138702393, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.2244, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7803030303030303, | |
| "grad_norm": 4.2939043045043945, | |
| "learning_rate": 1.47979797979798e-05, | |
| "loss": 0.1701, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.7828282828282829, | |
| "grad_norm": 2.8431057929992676, | |
| "learning_rate": 1.4781144781144782e-05, | |
| "loss": 0.1284, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7853535353535354, | |
| "grad_norm": 1.8190436363220215, | |
| "learning_rate": 1.4764309764309765e-05, | |
| "loss": 0.22, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 4.867546558380127, | |
| "learning_rate": 1.4747474747474747e-05, | |
| "loss": 0.308, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7904040404040404, | |
| "grad_norm": 2.632307767868042, | |
| "learning_rate": 1.473063973063973e-05, | |
| "loss": 0.1137, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.7929292929292929, | |
| "grad_norm": 5.3593339920043945, | |
| "learning_rate": 1.4713804713804716e-05, | |
| "loss": 0.1462, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7954545454545454, | |
| "grad_norm": 1.6120672225952148, | |
| "learning_rate": 1.4696969696969699e-05, | |
| "loss": 0.1291, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.797979797979798, | |
| "grad_norm": 2.3134396076202393, | |
| "learning_rate": 1.4680134680134681e-05, | |
| "loss": 0.2119, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8005050505050505, | |
| "grad_norm": 3.2344558238983154, | |
| "learning_rate": 1.4663299663299664e-05, | |
| "loss": 0.1588, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.803030303030303, | |
| "grad_norm": 3.4733057022094727, | |
| "learning_rate": 1.4646464646464649e-05, | |
| "loss": 0.1466, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8055555555555556, | |
| "grad_norm": 3.2476141452789307, | |
| "learning_rate": 1.4629629629629631e-05, | |
| "loss": 0.2466, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8080808080808081, | |
| "grad_norm": 5.198851108551025, | |
| "learning_rate": 1.4612794612794614e-05, | |
| "loss": 0.2239, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8106060606060606, | |
| "grad_norm": 2.9820196628570557, | |
| "learning_rate": 1.4595959595959597e-05, | |
| "loss": 0.2467, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8131313131313131, | |
| "grad_norm": 3.2972326278686523, | |
| "learning_rate": 1.457912457912458e-05, | |
| "loss": 0.1642, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8156565656565656, | |
| "grad_norm": 2.3504161834716797, | |
| "learning_rate": 1.4562289562289564e-05, | |
| "loss": 0.0984, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8181818181818182, | |
| "grad_norm": 4.491511821746826, | |
| "learning_rate": 1.4545454545454546e-05, | |
| "loss": 0.2576, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8207070707070707, | |
| "grad_norm": 3.1529364585876465, | |
| "learning_rate": 1.4528619528619529e-05, | |
| "loss": 0.1666, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8232323232323232, | |
| "grad_norm": 3.350400447845459, | |
| "learning_rate": 1.4511784511784512e-05, | |
| "loss": 0.2227, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8257575757575758, | |
| "grad_norm": 9.460111618041992, | |
| "learning_rate": 1.4494949494949494e-05, | |
| "loss": 0.2407, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8282828282828283, | |
| "grad_norm": 2.648740768432617, | |
| "learning_rate": 1.447811447811448e-05, | |
| "loss": 0.2017, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8308080808080808, | |
| "grad_norm": 2.6164615154266357, | |
| "learning_rate": 1.4461279461279463e-05, | |
| "loss": 0.1981, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 8.09026050567627, | |
| "learning_rate": 1.4444444444444446e-05, | |
| "loss": 0.1033, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8358585858585859, | |
| "grad_norm": 2.6296372413635254, | |
| "learning_rate": 1.4427609427609428e-05, | |
| "loss": 0.1782, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8383838383838383, | |
| "grad_norm": 2.2566778659820557, | |
| "learning_rate": 1.4410774410774413e-05, | |
| "loss": 0.1623, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8409090909090909, | |
| "grad_norm": 2.2079505920410156, | |
| "learning_rate": 1.4393939393939396e-05, | |
| "loss": 0.1723, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8434343434343434, | |
| "grad_norm": 1.1799554824829102, | |
| "learning_rate": 1.4377104377104378e-05, | |
| "loss": 0.1547, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8459595959595959, | |
| "grad_norm": 1.7930541038513184, | |
| "learning_rate": 1.4360269360269361e-05, | |
| "loss": 0.0783, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 1.4967056512832642, | |
| "learning_rate": 1.4343434343434344e-05, | |
| "loss": 0.1916, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.851010101010101, | |
| "grad_norm": 4.922051906585693, | |
| "learning_rate": 1.4326599326599328e-05, | |
| "loss": 0.1304, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.8535353535353535, | |
| "grad_norm": 2.2897162437438965, | |
| "learning_rate": 1.430976430976431e-05, | |
| "loss": 0.2447, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8560606060606061, | |
| "grad_norm": 2.769693613052368, | |
| "learning_rate": 1.4292929292929293e-05, | |
| "loss": 0.1017, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.8585858585858586, | |
| "grad_norm": 1.7574080228805542, | |
| "learning_rate": 1.4276094276094276e-05, | |
| "loss": 0.0973, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8611111111111112, | |
| "grad_norm": 1.2174127101898193, | |
| "learning_rate": 1.4259259259259259e-05, | |
| "loss": 0.1903, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.8636363636363636, | |
| "grad_norm": 3.2463648319244385, | |
| "learning_rate": 1.4242424242424245e-05, | |
| "loss": 0.0818, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8661616161616161, | |
| "grad_norm": 6.192782402038574, | |
| "learning_rate": 1.4225589225589227e-05, | |
| "loss": 0.2601, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.8686868686868687, | |
| "grad_norm": 2.965963125228882, | |
| "learning_rate": 1.420875420875421e-05, | |
| "loss": 0.1399, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8712121212121212, | |
| "grad_norm": 2.0515079498291016, | |
| "learning_rate": 1.4191919191919193e-05, | |
| "loss": 0.2507, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.8737373737373737, | |
| "grad_norm": 2.2152068614959717, | |
| "learning_rate": 1.4175084175084177e-05, | |
| "loss": 0.107, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8762626262626263, | |
| "grad_norm": 1.5435770750045776, | |
| "learning_rate": 1.415824915824916e-05, | |
| "loss": 0.1083, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.8787878787878788, | |
| "grad_norm": 9.01554012298584, | |
| "learning_rate": 1.4141414141414143e-05, | |
| "loss": 0.1817, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8813131313131313, | |
| "grad_norm": 4.514248847961426, | |
| "learning_rate": 1.4124579124579125e-05, | |
| "loss": 0.111, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.8838383838383839, | |
| "grad_norm": 2.0948216915130615, | |
| "learning_rate": 1.4107744107744108e-05, | |
| "loss": 0.141, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8863636363636364, | |
| "grad_norm": 1.3202215433120728, | |
| "learning_rate": 1.4090909090909092e-05, | |
| "loss": 0.122, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 2.4289798736572266, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 0.1194, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8914141414141414, | |
| "grad_norm": 1.9062124490737915, | |
| "learning_rate": 1.4057239057239058e-05, | |
| "loss": 0.118, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.8939393939393939, | |
| "grad_norm": 4.942960739135742, | |
| "learning_rate": 1.404040404040404e-05, | |
| "loss": 0.1745, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8964646464646465, | |
| "grad_norm": 1.1973398923873901, | |
| "learning_rate": 1.4023569023569023e-05, | |
| "loss": 0.0998, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.898989898989899, | |
| "grad_norm": 2.537156343460083, | |
| "learning_rate": 1.4006734006734009e-05, | |
| "loss": 0.2053, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9015151515151515, | |
| "grad_norm": 1.6075160503387451, | |
| "learning_rate": 1.3989898989898992e-05, | |
| "loss": 0.1967, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9040404040404041, | |
| "grad_norm": 1.8782991170883179, | |
| "learning_rate": 1.3973063973063974e-05, | |
| "loss": 0.1067, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9065656565656566, | |
| "grad_norm": 1.8922234773635864, | |
| "learning_rate": 1.3956228956228957e-05, | |
| "loss": 0.1048, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 2.411635637283325, | |
| "learning_rate": 1.3939393939393942e-05, | |
| "loss": 0.1557, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9116161616161617, | |
| "grad_norm": 2.8136637210845947, | |
| "learning_rate": 1.3922558922558924e-05, | |
| "loss": 0.207, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9141414141414141, | |
| "grad_norm": 3.0127274990081787, | |
| "learning_rate": 1.3905723905723907e-05, | |
| "loss": 0.1463, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 5.223660469055176, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.1901, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.9191919191919192, | |
| "grad_norm": 2.7952096462249756, | |
| "learning_rate": 1.3872053872053872e-05, | |
| "loss": 0.1744, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9217171717171717, | |
| "grad_norm": 4.329316139221191, | |
| "learning_rate": 1.3855218855218857e-05, | |
| "loss": 0.1503, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9242424242424242, | |
| "grad_norm": 1.9337060451507568, | |
| "learning_rate": 1.383838383838384e-05, | |
| "loss": 0.1652, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9267676767676768, | |
| "grad_norm": 6.0468645095825195, | |
| "learning_rate": 1.3821548821548822e-05, | |
| "loss": 0.21, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.9292929292929293, | |
| "grad_norm": 6.893640041351318, | |
| "learning_rate": 1.3804713804713805e-05, | |
| "loss": 0.0772, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9318181818181818, | |
| "grad_norm": 11.513550758361816, | |
| "learning_rate": 1.378787878787879e-05, | |
| "loss": 0.1589, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9343434343434344, | |
| "grad_norm": 3.4360713958740234, | |
| "learning_rate": 1.3771043771043773e-05, | |
| "loss": 0.1376, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9368686868686869, | |
| "grad_norm": 1.2209364175796509, | |
| "learning_rate": 1.3754208754208756e-05, | |
| "loss": 0.1248, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.9393939393939394, | |
| "grad_norm": 4.991886615753174, | |
| "learning_rate": 1.3737373737373739e-05, | |
| "loss": 0.1584, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9419191919191919, | |
| "grad_norm": 5.338135242462158, | |
| "learning_rate": 1.3720538720538721e-05, | |
| "loss": 0.1409, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 1.5582698583602905, | |
| "learning_rate": 1.3703703703703706e-05, | |
| "loss": 0.0927, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.946969696969697, | |
| "grad_norm": 2.0467121601104736, | |
| "learning_rate": 1.3686868686868689e-05, | |
| "loss": 0.1042, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.9494949494949495, | |
| "grad_norm": 3.5191733837127686, | |
| "learning_rate": 1.3670033670033671e-05, | |
| "loss": 0.1543, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.952020202020202, | |
| "grad_norm": 0.8805956244468689, | |
| "learning_rate": 1.3653198653198654e-05, | |
| "loss": 0.0977, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.9545454545454546, | |
| "grad_norm": 1.0059103965759277, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.111, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9570707070707071, | |
| "grad_norm": 2.59409761428833, | |
| "learning_rate": 1.3619528619528621e-05, | |
| "loss": 0.0839, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.9595959595959596, | |
| "grad_norm": 3.308858633041382, | |
| "learning_rate": 1.3602693602693604e-05, | |
| "loss": 0.1505, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9621212121212122, | |
| "grad_norm": 7.8376970291137695, | |
| "learning_rate": 1.3585858585858586e-05, | |
| "loss": 0.1694, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.9646464646464646, | |
| "grad_norm": 2.214016914367676, | |
| "learning_rate": 1.3569023569023569e-05, | |
| "loss": 0.2274, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9671717171717171, | |
| "grad_norm": 2.3379106521606445, | |
| "learning_rate": 1.3552188552188555e-05, | |
| "loss": 0.1562, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 4.5499043464660645, | |
| "learning_rate": 1.3535353535353538e-05, | |
| "loss": 0.1899, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9722222222222222, | |
| "grad_norm": 5.938162803649902, | |
| "learning_rate": 1.351851851851852e-05, | |
| "loss": 0.1627, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.9747474747474747, | |
| "grad_norm": 2.1362643241882324, | |
| "learning_rate": 1.3501683501683503e-05, | |
| "loss": 0.1437, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9772727272727273, | |
| "grad_norm": 5.690845012664795, | |
| "learning_rate": 1.3484848484848486e-05, | |
| "loss": 0.1572, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.9797979797979798, | |
| "grad_norm": 1.170046329498291, | |
| "learning_rate": 1.346801346801347e-05, | |
| "loss": 0.0697, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9823232323232324, | |
| "grad_norm": 2.7204504013061523, | |
| "learning_rate": 1.3451178451178453e-05, | |
| "loss": 0.1125, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.9848484848484849, | |
| "grad_norm": 2.044360637664795, | |
| "learning_rate": 1.3434343434343436e-05, | |
| "loss": 0.0664, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9873737373737373, | |
| "grad_norm": 2.956345558166504, | |
| "learning_rate": 1.3417508417508418e-05, | |
| "loss": 0.1254, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.98989898989899, | |
| "grad_norm": 3.5149104595184326, | |
| "learning_rate": 1.3400673400673401e-05, | |
| "loss": 0.243, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9924242424242424, | |
| "grad_norm": 3.848884344100952, | |
| "learning_rate": 1.3383838383838385e-05, | |
| "loss": 0.2502, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.9949494949494949, | |
| "grad_norm": 5.738306522369385, | |
| "learning_rate": 1.3367003367003368e-05, | |
| "loss": 0.1173, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9974747474747475, | |
| "grad_norm": 3.4760327339172363, | |
| "learning_rate": 1.335016835016835e-05, | |
| "loss": 0.251, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.0074808597564697, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.2484, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7363636363636363, | |
| "eval_f1": 0.8896302474284127, | |
| "eval_loss": 0.1552901417016983, | |
| "eval_runtime": 38.4517, | |
| "eval_samples_per_second": 22.886, | |
| "eval_steps_per_second": 0.494, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.0025252525252526, | |
| "grad_norm": 2.1957874298095703, | |
| "learning_rate": 1.331649831649832e-05, | |
| "loss": 0.2159, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.005050505050505, | |
| "grad_norm": 1.4837441444396973, | |
| "learning_rate": 1.3299663299663302e-05, | |
| "loss": 0.1777, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.0075757575757576, | |
| "grad_norm": 1.9972238540649414, | |
| "learning_rate": 1.3282828282828285e-05, | |
| "loss": 0.1156, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0101010101010102, | |
| "grad_norm": 2.300161361694336, | |
| "learning_rate": 1.3265993265993267e-05, | |
| "loss": 0.1664, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0126262626262625, | |
| "grad_norm": 5.714986801147461, | |
| "learning_rate": 1.324915824915825e-05, | |
| "loss": 0.1888, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.0151515151515151, | |
| "grad_norm": 2.443563222885132, | |
| "learning_rate": 1.3232323232323234e-05, | |
| "loss": 0.1548, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.0176767676767677, | |
| "grad_norm": 4.469695091247559, | |
| "learning_rate": 1.3215488215488217e-05, | |
| "loss": 0.197, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.02020202020202, | |
| "grad_norm": 2.7210092544555664, | |
| "learning_rate": 1.31986531986532e-05, | |
| "loss": 0.1858, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.0227272727272727, | |
| "grad_norm": 2.2780253887176514, | |
| "learning_rate": 1.3181818181818183e-05, | |
| "loss": 0.1188, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.0252525252525253, | |
| "grad_norm": 4.452651500701904, | |
| "learning_rate": 1.3164983164983165e-05, | |
| "loss": 0.1813, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.0277777777777777, | |
| "grad_norm": 3.2592673301696777, | |
| "learning_rate": 1.314814814814815e-05, | |
| "loss": 0.1642, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.0303030303030303, | |
| "grad_norm": 2.8365068435668945, | |
| "learning_rate": 1.3131313131313132e-05, | |
| "loss": 0.1951, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.0328282828282829, | |
| "grad_norm": 1.204214334487915, | |
| "learning_rate": 1.3114478114478115e-05, | |
| "loss": 0.0943, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.0353535353535352, | |
| "grad_norm": 3.9835519790649414, | |
| "learning_rate": 1.3097643097643098e-05, | |
| "loss": 0.1914, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0378787878787878, | |
| "grad_norm": 2.162397861480713, | |
| "learning_rate": 1.3080808080808084e-05, | |
| "loss": 0.1681, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.0404040404040404, | |
| "grad_norm": 5.173393726348877, | |
| "learning_rate": 1.3063973063973066e-05, | |
| "loss": 0.1114, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0429292929292928, | |
| "grad_norm": 3.75376558303833, | |
| "learning_rate": 1.3047138047138049e-05, | |
| "loss": 0.2572, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.0454545454545454, | |
| "grad_norm": 2.164644479751587, | |
| "learning_rate": 1.3030303030303032e-05, | |
| "loss": 0.0901, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.047979797979798, | |
| "grad_norm": 1.5438755750656128, | |
| "learning_rate": 1.3013468013468014e-05, | |
| "loss": 0.1127, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.0505050505050506, | |
| "grad_norm": 1.1772854328155518, | |
| "learning_rate": 1.2996632996632999e-05, | |
| "loss": 0.1177, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.053030303030303, | |
| "grad_norm": 1.5766481161117554, | |
| "learning_rate": 1.2979797979797981e-05, | |
| "loss": 0.186, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.0555555555555556, | |
| "grad_norm": 3.8098015785217285, | |
| "learning_rate": 1.2962962962962964e-05, | |
| "loss": 0.2378, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0580808080808082, | |
| "grad_norm": 2.2948317527770996, | |
| "learning_rate": 1.2946127946127947e-05, | |
| "loss": 0.2139, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.0606060606060606, | |
| "grad_norm": 3.4112160205841064, | |
| "learning_rate": 1.2929292929292931e-05, | |
| "loss": 0.1936, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0631313131313131, | |
| "grad_norm": 6.034069061279297, | |
| "learning_rate": 1.2912457912457914e-05, | |
| "loss": 0.0954, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.0656565656565657, | |
| "grad_norm": 1.4731574058532715, | |
| "learning_rate": 1.2895622895622897e-05, | |
| "loss": 0.1835, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.0681818181818181, | |
| "grad_norm": 2.9212472438812256, | |
| "learning_rate": 1.287878787878788e-05, | |
| "loss": 0.1626, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.0707070707070707, | |
| "grad_norm": 2.7289297580718994, | |
| "learning_rate": 1.2861952861952862e-05, | |
| "loss": 0.0934, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.0732323232323233, | |
| "grad_norm": 2.0637686252593994, | |
| "learning_rate": 1.2845117845117846e-05, | |
| "loss": 0.1157, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.0757575757575757, | |
| "grad_norm": 5.231685638427734, | |
| "learning_rate": 1.2828282828282829e-05, | |
| "loss": 0.1704, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.0782828282828283, | |
| "grad_norm": 2.5837466716766357, | |
| "learning_rate": 1.2811447811447812e-05, | |
| "loss": 0.1756, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.0808080808080809, | |
| "grad_norm": 1.4013397693634033, | |
| "learning_rate": 1.2794612794612794e-05, | |
| "loss": 0.0978, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.0833333333333333, | |
| "grad_norm": 2.0431172847747803, | |
| "learning_rate": 1.2777777777777777e-05, | |
| "loss": 0.1109, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.0858585858585859, | |
| "grad_norm": 1.8190507888793945, | |
| "learning_rate": 1.2760942760942763e-05, | |
| "loss": 0.1234, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0883838383838385, | |
| "grad_norm": 2.79791259765625, | |
| "learning_rate": 1.2744107744107746e-05, | |
| "loss": 0.1882, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.0909090909090908, | |
| "grad_norm": 1.9243866205215454, | |
| "learning_rate": 1.2727272727272728e-05, | |
| "loss": 0.2005, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.0934343434343434, | |
| "grad_norm": 1.8950653076171875, | |
| "learning_rate": 1.2710437710437711e-05, | |
| "loss": 0.1346, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.095959595959596, | |
| "grad_norm": 3.081726551055908, | |
| "learning_rate": 1.2693602693602696e-05, | |
| "loss": 0.1366, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.0984848484848484, | |
| "grad_norm": 1.9953432083129883, | |
| "learning_rate": 1.2676767676767678e-05, | |
| "loss": 0.1675, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.101010101010101, | |
| "grad_norm": 1.969563603401184, | |
| "learning_rate": 1.2659932659932661e-05, | |
| "loss": 0.0675, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.1035353535353536, | |
| "grad_norm": 2.647690773010254, | |
| "learning_rate": 1.2643097643097644e-05, | |
| "loss": 0.1804, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.106060606060606, | |
| "grad_norm": 1.8048343658447266, | |
| "learning_rate": 1.2626262626262626e-05, | |
| "loss": 0.1463, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.1085858585858586, | |
| "grad_norm": 3.305330514907837, | |
| "learning_rate": 1.260942760942761e-05, | |
| "loss": 0.1339, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 1.416746973991394, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 0.1269, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.1136363636363635, | |
| "grad_norm": 5.987617015838623, | |
| "learning_rate": 1.2575757575757576e-05, | |
| "loss": 0.1437, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.1161616161616161, | |
| "grad_norm": 2.646730422973633, | |
| "learning_rate": 1.2558922558922559e-05, | |
| "loss": 0.1424, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.1186868686868687, | |
| "grad_norm": 12.504968643188477, | |
| "learning_rate": 1.2542087542087541e-05, | |
| "loss": 0.2163, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.121212121212121, | |
| "grad_norm": 3.575183153152466, | |
| "learning_rate": 1.2525252525252527e-05, | |
| "loss": 0.2721, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.1237373737373737, | |
| "grad_norm": 2.08457088470459, | |
| "learning_rate": 1.250841750841751e-05, | |
| "loss": 0.0727, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.1262626262626263, | |
| "grad_norm": 3.8432371616363525, | |
| "learning_rate": 1.2491582491582493e-05, | |
| "loss": 0.1455, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.128787878787879, | |
| "grad_norm": 1.7455401420593262, | |
| "learning_rate": 1.2474747474747475e-05, | |
| "loss": 0.1424, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.1313131313131313, | |
| "grad_norm": 2.4613749980926514, | |
| "learning_rate": 1.245791245791246e-05, | |
| "loss": 0.2304, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.1338383838383839, | |
| "grad_norm": 0.8536304831504822, | |
| "learning_rate": 1.2441077441077443e-05, | |
| "loss": 0.0967, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.1363636363636362, | |
| "grad_norm": 1.8662675619125366, | |
| "learning_rate": 1.2424242424242425e-05, | |
| "loss": 0.1165, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.1388888888888888, | |
| "grad_norm": 6.983151435852051, | |
| "learning_rate": 1.2407407407407408e-05, | |
| "loss": 0.1065, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.1414141414141414, | |
| "grad_norm": 3.3465776443481445, | |
| "learning_rate": 1.239057239057239e-05, | |
| "loss": 0.1295, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.143939393939394, | |
| "grad_norm": 3.347223997116089, | |
| "learning_rate": 1.2373737373737375e-05, | |
| "loss": 0.0947, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.1464646464646464, | |
| "grad_norm": 2.8548214435577393, | |
| "learning_rate": 1.2356902356902358e-05, | |
| "loss": 0.1031, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.148989898989899, | |
| "grad_norm": 5.722323417663574, | |
| "learning_rate": 1.234006734006734e-05, | |
| "loss": 0.2435, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.1515151515151516, | |
| "grad_norm": 4.030499458312988, | |
| "learning_rate": 1.2323232323232323e-05, | |
| "loss": 0.2088, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.154040404040404, | |
| "grad_norm": 1.1742424964904785, | |
| "learning_rate": 1.2306397306397306e-05, | |
| "loss": 0.149, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.1565656565656566, | |
| "grad_norm": 3.6498565673828125, | |
| "learning_rate": 1.2289562289562292e-05, | |
| "loss": 0.1704, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1590909090909092, | |
| "grad_norm": 7.278556823730469, | |
| "learning_rate": 1.2272727272727274e-05, | |
| "loss": 0.2622, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.1616161616161615, | |
| "grad_norm": 5.127131462097168, | |
| "learning_rate": 1.2255892255892257e-05, | |
| "loss": 0.1456, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1641414141414141, | |
| "grad_norm": 5.602865219116211, | |
| "learning_rate": 1.223905723905724e-05, | |
| "loss": 0.2437, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 2.088778257369995, | |
| "learning_rate": 1.2222222222222224e-05, | |
| "loss": 0.1368, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.1691919191919191, | |
| "grad_norm": 3.3843162059783936, | |
| "learning_rate": 1.2205387205387207e-05, | |
| "loss": 0.1656, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.1717171717171717, | |
| "grad_norm": 6.630326271057129, | |
| "learning_rate": 1.218855218855219e-05, | |
| "loss": 0.1077, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.1742424242424243, | |
| "grad_norm": 1.9552891254425049, | |
| "learning_rate": 1.2171717171717172e-05, | |
| "loss": 0.069, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.1767676767676767, | |
| "grad_norm": 2.806879997253418, | |
| "learning_rate": 1.2154882154882155e-05, | |
| "loss": 0.1524, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.1792929292929293, | |
| "grad_norm": 5.727405071258545, | |
| "learning_rate": 1.213804713804714e-05, | |
| "loss": 0.2153, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 2.179191827774048, | |
| "learning_rate": 1.2121212121212122e-05, | |
| "loss": 0.0931, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.1843434343434343, | |
| "grad_norm": 4.29899263381958, | |
| "learning_rate": 1.2104377104377105e-05, | |
| "loss": 0.3174, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.1868686868686869, | |
| "grad_norm": 3.667917490005493, | |
| "learning_rate": 1.2087542087542087e-05, | |
| "loss": 0.1926, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1893939393939394, | |
| "grad_norm": 2.626540422439575, | |
| "learning_rate": 1.207070707070707e-05, | |
| "loss": 0.1145, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.1919191919191918, | |
| "grad_norm": 2.2297780513763428, | |
| "learning_rate": 1.2053872053872056e-05, | |
| "loss": 0.1552, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.1944444444444444, | |
| "grad_norm": 5.271230220794678, | |
| "learning_rate": 1.2037037037037039e-05, | |
| "loss": 0.0832, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.196969696969697, | |
| "grad_norm": 3.2105331420898438, | |
| "learning_rate": 1.2020202020202021e-05, | |
| "loss": 0.1523, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.1994949494949494, | |
| "grad_norm": 3.91426157951355, | |
| "learning_rate": 1.2003367003367004e-05, | |
| "loss": 0.0819, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.202020202020202, | |
| "grad_norm": 3.227917194366455, | |
| "learning_rate": 1.1986531986531988e-05, | |
| "loss": 0.2343, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.2045454545454546, | |
| "grad_norm": 3.9899652004241943, | |
| "learning_rate": 1.1969696969696971e-05, | |
| "loss": 0.1466, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.2070707070707072, | |
| "grad_norm": 1.5494883060455322, | |
| "learning_rate": 1.1952861952861954e-05, | |
| "loss": 0.1265, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.2095959595959596, | |
| "grad_norm": 3.4442131519317627, | |
| "learning_rate": 1.1936026936026937e-05, | |
| "loss": 0.1355, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.2121212121212122, | |
| "grad_norm": 2.505126714706421, | |
| "learning_rate": 1.191919191919192e-05, | |
| "loss": 0.1529, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.2146464646464645, | |
| "grad_norm": 3.982832431793213, | |
| "learning_rate": 1.1902356902356904e-05, | |
| "loss": 0.1428, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.2171717171717171, | |
| "grad_norm": 2.3964552879333496, | |
| "learning_rate": 1.1885521885521886e-05, | |
| "loss": 0.1062, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.2196969696969697, | |
| "grad_norm": 11.200109481811523, | |
| "learning_rate": 1.1868686868686869e-05, | |
| "loss": 0.1621, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 3.8272476196289062, | |
| "learning_rate": 1.1851851851851852e-05, | |
| "loss": 0.1814, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.2247474747474747, | |
| "grad_norm": 2.3707261085510254, | |
| "learning_rate": 1.1835016835016838e-05, | |
| "loss": 0.1253, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.2272727272727273, | |
| "grad_norm": 6.070870399475098, | |
| "learning_rate": 1.181818181818182e-05, | |
| "loss": 0.1005, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.22979797979798, | |
| "grad_norm": 2.2206804752349854, | |
| "learning_rate": 1.1801346801346803e-05, | |
| "loss": 0.0647, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.2323232323232323, | |
| "grad_norm": 1.8281488418579102, | |
| "learning_rate": 1.1784511784511786e-05, | |
| "loss": 0.0642, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.2348484848484849, | |
| "grad_norm": 2.690546751022339, | |
| "learning_rate": 1.1767676767676768e-05, | |
| "loss": 0.1824, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.2373737373737375, | |
| "grad_norm": 8.035049438476562, | |
| "learning_rate": 1.1750841750841753e-05, | |
| "loss": 0.2047, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.2398989898989898, | |
| "grad_norm": 2.475928783416748, | |
| "learning_rate": 1.1734006734006735e-05, | |
| "loss": 0.0882, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 2.2812142372131348, | |
| "learning_rate": 1.1717171717171718e-05, | |
| "loss": 0.1556, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.244949494949495, | |
| "grad_norm": 1.1551276445388794, | |
| "learning_rate": 1.17003367003367e-05, | |
| "loss": 0.0905, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.2474747474747474, | |
| "grad_norm": 1.8045101165771484, | |
| "learning_rate": 1.1683501683501684e-05, | |
| "loss": 0.1399, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 2.3668212890625, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.1231, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.2525252525252526, | |
| "grad_norm": 4.466938018798828, | |
| "learning_rate": 1.164983164983165e-05, | |
| "loss": 0.1478, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.255050505050505, | |
| "grad_norm": 6.409605026245117, | |
| "learning_rate": 1.1632996632996633e-05, | |
| "loss": 0.0892, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.2575757575757576, | |
| "grad_norm": 3.112314224243164, | |
| "learning_rate": 1.1616161616161616e-05, | |
| "loss": 0.2378, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.2601010101010102, | |
| "grad_norm": 2.3682100772857666, | |
| "learning_rate": 1.1599326599326602e-05, | |
| "loss": 0.0708, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.2626262626262625, | |
| "grad_norm": 1.4351693391799927, | |
| "learning_rate": 1.1582491582491585e-05, | |
| "loss": 0.1386, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.2651515151515151, | |
| "grad_norm": 1.9613323211669922, | |
| "learning_rate": 1.1565656565656567e-05, | |
| "loss": 0.0956, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.2676767676767677, | |
| "grad_norm": 3.0842106342315674, | |
| "learning_rate": 1.154882154882155e-05, | |
| "loss": 0.1891, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.2702020202020203, | |
| "grad_norm": 1.4221595525741577, | |
| "learning_rate": 1.1531986531986533e-05, | |
| "loss": 0.1238, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.2727272727272727, | |
| "grad_norm": 2.890872001647949, | |
| "learning_rate": 1.1515151515151517e-05, | |
| "loss": 0.1804, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.2752525252525253, | |
| "grad_norm": 2.4430460929870605, | |
| "learning_rate": 1.14983164983165e-05, | |
| "loss": 0.1566, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.2777777777777777, | |
| "grad_norm": 4.546942710876465, | |
| "learning_rate": 1.1481481481481482e-05, | |
| "loss": 0.088, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.2803030303030303, | |
| "grad_norm": 3.6536505222320557, | |
| "learning_rate": 1.1464646464646465e-05, | |
| "loss": 0.2054, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.2828282828282829, | |
| "grad_norm": 1.6071276664733887, | |
| "learning_rate": 1.1447811447811448e-05, | |
| "loss": 0.1044, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.2853535353535355, | |
| "grad_norm": 3.828359365463257, | |
| "learning_rate": 1.1430976430976432e-05, | |
| "loss": 0.1279, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.2878787878787878, | |
| "grad_norm": 3.433269500732422, | |
| "learning_rate": 1.1414141414141415e-05, | |
| "loss": 0.1197, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.2904040404040404, | |
| "grad_norm": 1.6743693351745605, | |
| "learning_rate": 1.1397306397306398e-05, | |
| "loss": 0.1057, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.2929292929292928, | |
| "grad_norm": 5.027686595916748, | |
| "learning_rate": 1.138047138047138e-05, | |
| "loss": 0.1509, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.2954545454545454, | |
| "grad_norm": 1.6235765218734741, | |
| "learning_rate": 1.1363636363636366e-05, | |
| "loss": 0.1147, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.297979797979798, | |
| "grad_norm": 2.351604700088501, | |
| "learning_rate": 1.1346801346801349e-05, | |
| "loss": 0.0844, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.3005050505050506, | |
| "grad_norm": 4.877485275268555, | |
| "learning_rate": 1.1329966329966332e-05, | |
| "loss": 0.1426, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.303030303030303, | |
| "grad_norm": 1.512285828590393, | |
| "learning_rate": 1.1313131313131314e-05, | |
| "loss": 0.1225, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.3055555555555556, | |
| "grad_norm": 2.247408628463745, | |
| "learning_rate": 1.1296296296296297e-05, | |
| "loss": 0.105, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.308080808080808, | |
| "grad_norm": 1.3952003717422485, | |
| "learning_rate": 1.1279461279461281e-05, | |
| "loss": 0.0738, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.3106060606060606, | |
| "grad_norm": 1.185707688331604, | |
| "learning_rate": 1.1262626262626264e-05, | |
| "loss": 0.0644, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.3131313131313131, | |
| "grad_norm": 1.2581713199615479, | |
| "learning_rate": 1.1245791245791247e-05, | |
| "loss": 0.1065, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.3156565656565657, | |
| "grad_norm": 5.530824661254883, | |
| "learning_rate": 1.122895622895623e-05, | |
| "loss": 0.1829, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.3181818181818181, | |
| "grad_norm": 2.5609781742095947, | |
| "learning_rate": 1.1212121212121212e-05, | |
| "loss": 0.2349, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.3207070707070707, | |
| "grad_norm": 2.8253445625305176, | |
| "learning_rate": 1.1195286195286197e-05, | |
| "loss": 0.1403, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.3232323232323233, | |
| "grad_norm": 4.705146312713623, | |
| "learning_rate": 1.117845117845118e-05, | |
| "loss": 0.2046, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.3257575757575757, | |
| "grad_norm": 4.86195182800293, | |
| "learning_rate": 1.1161616161616162e-05, | |
| "loss": 0.1632, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.3282828282828283, | |
| "grad_norm": 2.6909475326538086, | |
| "learning_rate": 1.1144781144781145e-05, | |
| "loss": 0.0647, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.3308080808080809, | |
| "grad_norm": 1.6233677864074707, | |
| "learning_rate": 1.112794612794613e-05, | |
| "loss": 0.1891, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.8622492551803589, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.1507, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.3358585858585859, | |
| "grad_norm": 3.173917770385742, | |
| "learning_rate": 1.1094276094276096e-05, | |
| "loss": 0.1618, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.3383838383838385, | |
| "grad_norm": 5.947041034698486, | |
| "learning_rate": 1.1077441077441079e-05, | |
| "loss": 0.1692, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.3409090909090908, | |
| "grad_norm": 2.8621153831481934, | |
| "learning_rate": 1.1060606060606061e-05, | |
| "loss": 0.218, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.3434343434343434, | |
| "grad_norm": 3.8391976356506348, | |
| "learning_rate": 1.1043771043771046e-05, | |
| "loss": 0.2624, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.345959595959596, | |
| "grad_norm": 4.155307769775391, | |
| "learning_rate": 1.1026936026936028e-05, | |
| "loss": 0.1923, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.3484848484848486, | |
| "grad_norm": 2.796172618865967, | |
| "learning_rate": 1.1010101010101011e-05, | |
| "loss": 0.1886, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.351010101010101, | |
| "grad_norm": 3.59019136428833, | |
| "learning_rate": 1.0993265993265994e-05, | |
| "loss": 0.1854, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.3535353535353536, | |
| "grad_norm": 2.077014684677124, | |
| "learning_rate": 1.0976430976430978e-05, | |
| "loss": 0.0919, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.356060606060606, | |
| "grad_norm": 2.869927167892456, | |
| "learning_rate": 1.0959595959595961e-05, | |
| "loss": 0.1828, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.3585858585858586, | |
| "grad_norm": 2.4379348754882812, | |
| "learning_rate": 1.0942760942760944e-05, | |
| "loss": 0.1257, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.3611111111111112, | |
| "grad_norm": 2.5572493076324463, | |
| "learning_rate": 1.0925925925925926e-05, | |
| "loss": 0.169, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 7.126609802246094, | |
| "learning_rate": 1.0909090909090909e-05, | |
| "loss": 0.1178, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.3661616161616161, | |
| "grad_norm": 3.068500280380249, | |
| "learning_rate": 1.0892255892255893e-05, | |
| "loss": 0.1507, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.3686868686868687, | |
| "grad_norm": 3.512056350708008, | |
| "learning_rate": 1.0875420875420876e-05, | |
| "loss": 0.1816, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.371212121212121, | |
| "grad_norm": 4.917716979980469, | |
| "learning_rate": 1.0858585858585859e-05, | |
| "loss": 0.1301, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.3737373737373737, | |
| "grad_norm": 2.207784414291382, | |
| "learning_rate": 1.0841750841750841e-05, | |
| "loss": 0.1251, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.3762626262626263, | |
| "grad_norm": 4.091345310211182, | |
| "learning_rate": 1.0824915824915824e-05, | |
| "loss": 0.2324, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.378787878787879, | |
| "grad_norm": 3.5930373668670654, | |
| "learning_rate": 1.080808080808081e-05, | |
| "loss": 0.2327, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.3813131313131313, | |
| "grad_norm": 0.9397197365760803, | |
| "learning_rate": 1.0791245791245793e-05, | |
| "loss": 0.0909, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.3838383838383839, | |
| "grad_norm": 1.6392264366149902, | |
| "learning_rate": 1.0774410774410775e-05, | |
| "loss": 0.065, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.3863636363636362, | |
| "grad_norm": 3.9621989727020264, | |
| "learning_rate": 1.0757575757575758e-05, | |
| "loss": 0.0888, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 1.8630791902542114, | |
| "learning_rate": 1.0740740740740742e-05, | |
| "loss": 0.0705, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.3914141414141414, | |
| "grad_norm": 2.29435133934021, | |
| "learning_rate": 1.0723905723905725e-05, | |
| "loss": 0.1626, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.393939393939394, | |
| "grad_norm": 3.3439769744873047, | |
| "learning_rate": 1.0707070707070708e-05, | |
| "loss": 0.1741, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.3964646464646464, | |
| "grad_norm": 3.894381523132324, | |
| "learning_rate": 1.069023569023569e-05, | |
| "loss": 0.2404, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.398989898989899, | |
| "grad_norm": 2.4891560077667236, | |
| "learning_rate": 1.0673400673400673e-05, | |
| "loss": 0.1854, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.4015151515151514, | |
| "grad_norm": 2.0606627464294434, | |
| "learning_rate": 1.0656565656565658e-05, | |
| "loss": 0.1896, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.404040404040404, | |
| "grad_norm": 1.3142637014389038, | |
| "learning_rate": 1.063973063973064e-05, | |
| "loss": 0.0976, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.4065656565656566, | |
| "grad_norm": 1.7551708221435547, | |
| "learning_rate": 1.0622895622895623e-05, | |
| "loss": 0.1013, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.4090909090909092, | |
| "grad_norm": 2.389742612838745, | |
| "learning_rate": 1.0606060606060606e-05, | |
| "loss": 0.0802, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.4116161616161615, | |
| "grad_norm": 5.079484462738037, | |
| "learning_rate": 1.0589225589225588e-05, | |
| "loss": 0.1066, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.4141414141414141, | |
| "grad_norm": 1.7105693817138672, | |
| "learning_rate": 1.0572390572390574e-05, | |
| "loss": 0.0917, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.4166666666666667, | |
| "grad_norm": 2.481248617172241, | |
| "learning_rate": 1.0555555555555557e-05, | |
| "loss": 0.0901, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.4191919191919191, | |
| "grad_norm": 4.0751495361328125, | |
| "learning_rate": 1.053872053872054e-05, | |
| "loss": 0.1493, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.4217171717171717, | |
| "grad_norm": 2.6854546070098877, | |
| "learning_rate": 1.0521885521885522e-05, | |
| "loss": 0.1751, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.4242424242424243, | |
| "grad_norm": 7.801976203918457, | |
| "learning_rate": 1.0505050505050507e-05, | |
| "loss": 0.09, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.4267676767676767, | |
| "grad_norm": 1.9461811780929565, | |
| "learning_rate": 1.048821548821549e-05, | |
| "loss": 0.0539, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.4292929292929293, | |
| "grad_norm": 1.0220575332641602, | |
| "learning_rate": 1.0471380471380472e-05, | |
| "loss": 0.0629, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.4318181818181819, | |
| "grad_norm": 3.8231167793273926, | |
| "learning_rate": 1.0454545454545455e-05, | |
| "loss": 0.0949, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.4343434343434343, | |
| "grad_norm": 4.782219886779785, | |
| "learning_rate": 1.0437710437710438e-05, | |
| "loss": 0.2014, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.4368686868686869, | |
| "grad_norm": 1.7311866283416748, | |
| "learning_rate": 1.0420875420875422e-05, | |
| "loss": 0.1586, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.4393939393939394, | |
| "grad_norm": 1.6415760517120361, | |
| "learning_rate": 1.0404040404040405e-05, | |
| "loss": 0.0832, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.441919191919192, | |
| "grad_norm": 3.1272056102752686, | |
| "learning_rate": 1.0387205387205387e-05, | |
| "loss": 0.1086, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 1.3914761543273926, | |
| "learning_rate": 1.037037037037037e-05, | |
| "loss": 0.0353, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.446969696969697, | |
| "grad_norm": 2.641190528869629, | |
| "learning_rate": 1.0353535353535353e-05, | |
| "loss": 0.1669, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.4494949494949494, | |
| "grad_norm": 1.974168300628662, | |
| "learning_rate": 1.0336700336700339e-05, | |
| "loss": 0.1607, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.452020202020202, | |
| "grad_norm": 1.5384374856948853, | |
| "learning_rate": 1.0319865319865321e-05, | |
| "loss": 0.066, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.4545454545454546, | |
| "grad_norm": 2.555971145629883, | |
| "learning_rate": 1.0303030303030304e-05, | |
| "loss": 0.1178, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.4570707070707072, | |
| "grad_norm": 3.460545301437378, | |
| "learning_rate": 1.0286195286195287e-05, | |
| "loss": 0.163, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.4595959595959596, | |
| "grad_norm": 2.9746346473693848, | |
| "learning_rate": 1.0269360269360271e-05, | |
| "loss": 0.2179, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.4621212121212122, | |
| "grad_norm": 1.8450326919555664, | |
| "learning_rate": 1.0252525252525254e-05, | |
| "loss": 0.0707, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.4646464646464645, | |
| "grad_norm": 1.596994400024414, | |
| "learning_rate": 1.0235690235690236e-05, | |
| "loss": 0.0712, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.4671717171717171, | |
| "grad_norm": 2.0924813747406006, | |
| "learning_rate": 1.021885521885522e-05, | |
| "loss": 0.1328, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.4696969696969697, | |
| "grad_norm": 2.734872579574585, | |
| "learning_rate": 1.0202020202020202e-05, | |
| "loss": 0.2434, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.4722222222222223, | |
| "grad_norm": 2.7146146297454834, | |
| "learning_rate": 1.0185185185185186e-05, | |
| "loss": 0.1953, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.4747474747474747, | |
| "grad_norm": 2.9375946521759033, | |
| "learning_rate": 1.0168350168350169e-05, | |
| "loss": 0.1656, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.4772727272727273, | |
| "grad_norm": 2.132500648498535, | |
| "learning_rate": 1.0151515151515152e-05, | |
| "loss": 0.112, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.4797979797979797, | |
| "grad_norm": 2.179478883743286, | |
| "learning_rate": 1.0134680134680134e-05, | |
| "loss": 0.0973, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.4823232323232323, | |
| "grad_norm": 3.4565017223358154, | |
| "learning_rate": 1.0117845117845117e-05, | |
| "loss": 0.1256, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.4848484848484849, | |
| "grad_norm": 1.9032288789749146, | |
| "learning_rate": 1.0101010101010103e-05, | |
| "loss": 0.0915, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.4873737373737375, | |
| "grad_norm": 8.383233070373535, | |
| "learning_rate": 1.0084175084175086e-05, | |
| "loss": 0.1538, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.4898989898989898, | |
| "grad_norm": 4.910621166229248, | |
| "learning_rate": 1.0067340067340068e-05, | |
| "loss": 0.1799, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.4924242424242424, | |
| "grad_norm": 2.6224441528320312, | |
| "learning_rate": 1.0050505050505051e-05, | |
| "loss": 0.155, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.494949494949495, | |
| "grad_norm": 3.4021310806274414, | |
| "learning_rate": 1.0033670033670035e-05, | |
| "loss": 0.1196, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.4974747474747474, | |
| "grad_norm": 2.7120611667633057, | |
| "learning_rate": 1.0016835016835018e-05, | |
| "loss": 0.08, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.137710452079773, | |
| "learning_rate": 1e-05, | |
| "loss": 0.078, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.5025252525252526, | |
| "grad_norm": 2.9225590229034424, | |
| "learning_rate": 9.983164983164983e-06, | |
| "loss": 0.2341, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.5050505050505052, | |
| "grad_norm": 1.6335861682891846, | |
| "learning_rate": 9.966329966329968e-06, | |
| "loss": 0.1192, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.5075757575757576, | |
| "grad_norm": 2.710495948791504, | |
| "learning_rate": 9.94949494949495e-06, | |
| "loss": 0.1237, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.51010101010101, | |
| "grad_norm": 2.903191328048706, | |
| "learning_rate": 9.932659932659933e-06, | |
| "loss": 0.0784, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.5126262626262625, | |
| "grad_norm": 3.359354019165039, | |
| "learning_rate": 9.915824915824916e-06, | |
| "loss": 0.2288, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.5151515151515151, | |
| "grad_norm": 2.92893648147583, | |
| "learning_rate": 9.8989898989899e-06, | |
| "loss": 0.1936, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.5176767676767677, | |
| "grad_norm": 3.7757456302642822, | |
| "learning_rate": 9.882154882154883e-06, | |
| "loss": 0.2431, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.5202020202020203, | |
| "grad_norm": 2.7293543815612793, | |
| "learning_rate": 9.865319865319866e-06, | |
| "loss": 0.1475, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.5227272727272727, | |
| "grad_norm": 4.0022873878479, | |
| "learning_rate": 9.84848484848485e-06, | |
| "loss": 0.1055, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.5252525252525253, | |
| "grad_norm": 4.107253074645996, | |
| "learning_rate": 9.831649831649833e-06, | |
| "loss": 0.1306, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.5277777777777777, | |
| "grad_norm": 2.5653955936431885, | |
| "learning_rate": 9.814814814814815e-06, | |
| "loss": 0.1317, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.5303030303030303, | |
| "grad_norm": 2.9474546909332275, | |
| "learning_rate": 9.797979797979798e-06, | |
| "loss": 0.1486, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.5328282828282829, | |
| "grad_norm": 1.209354043006897, | |
| "learning_rate": 9.781144781144782e-06, | |
| "loss": 0.1019, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.5353535353535355, | |
| "grad_norm": 2.3573384284973145, | |
| "learning_rate": 9.764309764309765e-06, | |
| "loss": 0.0792, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.5378787878787878, | |
| "grad_norm": 2.1612727642059326, | |
| "learning_rate": 9.747474747474748e-06, | |
| "loss": 0.1402, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.5404040404040404, | |
| "grad_norm": 1.7895665168762207, | |
| "learning_rate": 9.730639730639732e-06, | |
| "loss": 0.118, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.5429292929292928, | |
| "grad_norm": 1.4610426425933838, | |
| "learning_rate": 9.713804713804715e-06, | |
| "loss": 0.1428, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 2.2483487129211426, | |
| "learning_rate": 9.696969696969698e-06, | |
| "loss": 0.1732, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.547979797979798, | |
| "grad_norm": 5.811710834503174, | |
| "learning_rate": 9.68013468013468e-06, | |
| "loss": 0.1112, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.5505050505050506, | |
| "grad_norm": 6.1415815353393555, | |
| "learning_rate": 9.663299663299665e-06, | |
| "loss": 0.138, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.553030303030303, | |
| "grad_norm": 1.204952597618103, | |
| "learning_rate": 9.646464646464647e-06, | |
| "loss": 0.0998, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 2.5513834953308105, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.0971, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.558080808080808, | |
| "grad_norm": 4.2005181312561035, | |
| "learning_rate": 9.612794612794614e-06, | |
| "loss": 0.1096, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.5606060606060606, | |
| "grad_norm": 2.5134921073913574, | |
| "learning_rate": 9.595959595959597e-06, | |
| "loss": 0.1817, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.5631313131313131, | |
| "grad_norm": 3.6018764972686768, | |
| "learning_rate": 9.57912457912458e-06, | |
| "loss": 0.0849, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.5656565656565657, | |
| "grad_norm": 1.6318095922470093, | |
| "learning_rate": 9.562289562289562e-06, | |
| "loss": 0.0661, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.5681818181818183, | |
| "grad_norm": 3.3563179969787598, | |
| "learning_rate": 9.545454545454547e-06, | |
| "loss": 0.1141, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.5707070707070707, | |
| "grad_norm": 2.074086904525757, | |
| "learning_rate": 9.52861952861953e-06, | |
| "loss": 0.1207, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.573232323232323, | |
| "grad_norm": 2.5464348793029785, | |
| "learning_rate": 9.511784511784512e-06, | |
| "loss": 0.1951, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.5757575757575757, | |
| "grad_norm": 5.284518718719482, | |
| "learning_rate": 9.494949494949497e-06, | |
| "loss": 0.1868, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.5782828282828283, | |
| "grad_norm": 2.5765862464904785, | |
| "learning_rate": 9.47811447811448e-06, | |
| "loss": 0.187, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.5808080808080809, | |
| "grad_norm": 4.491573333740234, | |
| "learning_rate": 9.461279461279462e-06, | |
| "loss": 0.1033, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.5833333333333335, | |
| "grad_norm": 4.794037818908691, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 0.1638, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.5858585858585859, | |
| "grad_norm": 1.3392722606658936, | |
| "learning_rate": 9.427609427609429e-06, | |
| "loss": 0.0673, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.5883838383838382, | |
| "grad_norm": 2.59481143951416, | |
| "learning_rate": 9.410774410774412e-06, | |
| "loss": 0.1506, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.5909090909090908, | |
| "grad_norm": 1.926398754119873, | |
| "learning_rate": 9.393939393939396e-06, | |
| "loss": 0.0817, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.5934343434343434, | |
| "grad_norm": 3.796034812927246, | |
| "learning_rate": 9.377104377104379e-06, | |
| "loss": 0.1526, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.595959595959596, | |
| "grad_norm": 3.06642484664917, | |
| "learning_rate": 9.360269360269361e-06, | |
| "loss": 0.158, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.5984848484848486, | |
| "grad_norm": 2.3332364559173584, | |
| "learning_rate": 9.343434343434344e-06, | |
| "loss": 0.1412, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.601010101010101, | |
| "grad_norm": 13.372260093688965, | |
| "learning_rate": 9.326599326599327e-06, | |
| "loss": 0.0737, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.6035353535353534, | |
| "grad_norm": 2.744684934616089, | |
| "learning_rate": 9.309764309764311e-06, | |
| "loss": 0.1845, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.606060606060606, | |
| "grad_norm": 4.262907981872559, | |
| "learning_rate": 9.292929292929294e-06, | |
| "loss": 0.2397, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.6085858585858586, | |
| "grad_norm": 2.6066222190856934, | |
| "learning_rate": 9.276094276094278e-06, | |
| "loss": 0.0889, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.6111111111111112, | |
| "grad_norm": 5.02886962890625, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.3094, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.6136363636363638, | |
| "grad_norm": 1.2655010223388672, | |
| "learning_rate": 9.242424242424244e-06, | |
| "loss": 0.1043, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.6161616161616161, | |
| "grad_norm": 2.1592676639556885, | |
| "learning_rate": 9.225589225589226e-06, | |
| "loss": 0.1541, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.6186868686868687, | |
| "grad_norm": 1.4674041271209717, | |
| "learning_rate": 9.208754208754209e-06, | |
| "loss": 0.0803, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.621212121212121, | |
| "grad_norm": 1.3324946165084839, | |
| "learning_rate": 9.191919191919193e-06, | |
| "loss": 0.1697, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.6237373737373737, | |
| "grad_norm": 4.259162902832031, | |
| "learning_rate": 9.175084175084176e-06, | |
| "loss": 0.1512, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.6262626262626263, | |
| "grad_norm": 1.390676498413086, | |
| "learning_rate": 9.15824915824916e-06, | |
| "loss": 0.0868, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.628787878787879, | |
| "grad_norm": 2.026618242263794, | |
| "learning_rate": 9.141414141414143e-06, | |
| "loss": 0.0679, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.6313131313131313, | |
| "grad_norm": 3.238002061843872, | |
| "learning_rate": 9.124579124579126e-06, | |
| "loss": 0.1706, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.6338383838383839, | |
| "grad_norm": 1.8931351900100708, | |
| "learning_rate": 9.107744107744108e-06, | |
| "loss": 0.0645, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.6363636363636362, | |
| "grad_norm": 1.5486174821853638, | |
| "learning_rate": 9.090909090909091e-06, | |
| "loss": 0.139, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.6388888888888888, | |
| "grad_norm": 1.988709807395935, | |
| "learning_rate": 9.074074074074075e-06, | |
| "loss": 0.2034, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.6414141414141414, | |
| "grad_norm": 2.529951572418213, | |
| "learning_rate": 9.057239057239058e-06, | |
| "loss": 0.0846, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.643939393939394, | |
| "grad_norm": 6.469368934631348, | |
| "learning_rate": 9.040404040404042e-06, | |
| "loss": 0.1614, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.6464646464646466, | |
| "grad_norm": 1.5296707153320312, | |
| "learning_rate": 9.023569023569025e-06, | |
| "loss": 0.074, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.648989898989899, | |
| "grad_norm": 3.4863650798797607, | |
| "learning_rate": 9.006734006734008e-06, | |
| "loss": 0.1207, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.6515151515151514, | |
| "grad_norm": 4.34932804107666, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 0.1209, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.654040404040404, | |
| "grad_norm": 2.05281400680542, | |
| "learning_rate": 8.973063973063973e-06, | |
| "loss": 0.085, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.6565656565656566, | |
| "grad_norm": 5.7974677085876465, | |
| "learning_rate": 8.956228956228958e-06, | |
| "loss": 0.1432, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.6590909090909092, | |
| "grad_norm": 13.796086311340332, | |
| "learning_rate": 8.93939393939394e-06, | |
| "loss": 0.1743, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.6616161616161618, | |
| "grad_norm": 1.2835731506347656, | |
| "learning_rate": 8.922558922558923e-06, | |
| "loss": 0.1032, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.6641414141414141, | |
| "grad_norm": 1.330572247505188, | |
| "learning_rate": 8.905723905723906e-06, | |
| "loss": 0.1194, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.2639822959899902, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.173, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.6691919191919191, | |
| "grad_norm": 2.2905423641204834, | |
| "learning_rate": 8.872053872053873e-06, | |
| "loss": 0.1052, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.6717171717171717, | |
| "grad_norm": 6.86669397354126, | |
| "learning_rate": 8.855218855218855e-06, | |
| "loss": 0.1658, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.6742424242424243, | |
| "grad_norm": 1.9337157011032104, | |
| "learning_rate": 8.83838383838384e-06, | |
| "loss": 0.139, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.676767676767677, | |
| "grad_norm": 1.348889708518982, | |
| "learning_rate": 8.821548821548822e-06, | |
| "loss": 0.1243, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.6792929292929293, | |
| "grad_norm": 1.4817837476730347, | |
| "learning_rate": 8.804713804713805e-06, | |
| "loss": 0.0633, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.6818181818181817, | |
| "grad_norm": 3.970458507537842, | |
| "learning_rate": 8.787878787878788e-06, | |
| "loss": 0.1427, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.6843434343434343, | |
| "grad_norm": 6.352334976196289, | |
| "learning_rate": 8.77104377104377e-06, | |
| "loss": 0.1437, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.6868686868686869, | |
| "grad_norm": 0.6994425654411316, | |
| "learning_rate": 8.754208754208755e-06, | |
| "loss": 0.0398, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.6893939393939394, | |
| "grad_norm": 4.77330207824707, | |
| "learning_rate": 8.737373737373738e-06, | |
| "loss": 0.1319, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.691919191919192, | |
| "grad_norm": 3.855506420135498, | |
| "learning_rate": 8.720538720538722e-06, | |
| "loss": 0.1467, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.6944444444444444, | |
| "grad_norm": 4.957710266113281, | |
| "learning_rate": 8.703703703703705e-06, | |
| "loss": 0.1414, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.696969696969697, | |
| "grad_norm": 2.640568971633911, | |
| "learning_rate": 8.686868686868687e-06, | |
| "loss": 0.2187, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.6994949494949494, | |
| "grad_norm": 3.6980481147766113, | |
| "learning_rate": 8.67003367003367e-06, | |
| "loss": 0.1197, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.702020202020202, | |
| "grad_norm": 3.419555187225342, | |
| "learning_rate": 8.653198653198653e-06, | |
| "loss": 0.164, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.7045454545454546, | |
| "grad_norm": 3.6955320835113525, | |
| "learning_rate": 8.636363636363637e-06, | |
| "loss": 0.1821, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.7070707070707072, | |
| "grad_norm": 1.2104640007019043, | |
| "learning_rate": 8.61952861952862e-06, | |
| "loss": 0.0747, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.7095959595959596, | |
| "grad_norm": 3.7086238861083984, | |
| "learning_rate": 8.602693602693604e-06, | |
| "loss": 0.1402, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.7121212121212122, | |
| "grad_norm": 1.6543469429016113, | |
| "learning_rate": 8.585858585858587e-06, | |
| "loss": 0.0869, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.7146464646464645, | |
| "grad_norm": 4.50585412979126, | |
| "learning_rate": 8.56902356902357e-06, | |
| "loss": 0.0926, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.7171717171717171, | |
| "grad_norm": 2.2351365089416504, | |
| "learning_rate": 8.552188552188552e-06, | |
| "loss": 0.0886, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.7196969696969697, | |
| "grad_norm": 1.8379594087600708, | |
| "learning_rate": 8.535353535353535e-06, | |
| "loss": 0.0671, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.7222222222222223, | |
| "grad_norm": 2.2375223636627197, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.1455, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.7247474747474747, | |
| "grad_norm": 1.758262038230896, | |
| "learning_rate": 8.501683501683502e-06, | |
| "loss": 0.067, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 1.4083460569381714, | |
| "learning_rate": 8.484848484848486e-06, | |
| "loss": 0.0492, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.7297979797979797, | |
| "grad_norm": 2.864366292953491, | |
| "learning_rate": 8.468013468013469e-06, | |
| "loss": 0.1483, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.7323232323232323, | |
| "grad_norm": 1.695508360862732, | |
| "learning_rate": 8.451178451178452e-06, | |
| "loss": 0.0559, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.7348484848484849, | |
| "grad_norm": 5.666776180267334, | |
| "learning_rate": 8.434343434343434e-06, | |
| "loss": 0.1655, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.7373737373737375, | |
| "grad_norm": 4.942101001739502, | |
| "learning_rate": 8.417508417508419e-06, | |
| "loss": 0.0525, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.73989898989899, | |
| "grad_norm": 2.151745557785034, | |
| "learning_rate": 8.400673400673401e-06, | |
| "loss": 0.137, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.7424242424242424, | |
| "grad_norm": 2.4058070182800293, | |
| "learning_rate": 8.383838383838384e-06, | |
| "loss": 0.0805, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.7449494949494948, | |
| "grad_norm": 4.35892915725708, | |
| "learning_rate": 8.367003367003368e-06, | |
| "loss": 0.0764, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.7474747474747474, | |
| "grad_norm": 1.3333408832550049, | |
| "learning_rate": 8.350168350168351e-06, | |
| "loss": 0.0576, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 4.402344703674316, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.1821, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.7525252525252526, | |
| "grad_norm": 4.358558654785156, | |
| "learning_rate": 8.316498316498316e-06, | |
| "loss": 0.1767, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.7550505050505052, | |
| "grad_norm": 2.602311372756958, | |
| "learning_rate": 8.2996632996633e-06, | |
| "loss": 0.1474, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.7575757575757576, | |
| "grad_norm": 3.5266802310943604, | |
| "learning_rate": 8.282828282828283e-06, | |
| "loss": 0.1917, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.76010101010101, | |
| "grad_norm": 5.978867053985596, | |
| "learning_rate": 8.265993265993266e-06, | |
| "loss": 0.1884, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.7626262626262625, | |
| "grad_norm": 2.8455355167388916, | |
| "learning_rate": 8.24915824915825e-06, | |
| "loss": 0.1302, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.7651515151515151, | |
| "grad_norm": 4.014955520629883, | |
| "learning_rate": 8.232323232323233e-06, | |
| "loss": 0.1731, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.7676767676767677, | |
| "grad_norm": 4.700746536254883, | |
| "learning_rate": 8.215488215488216e-06, | |
| "loss": 0.1765, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.7702020202020203, | |
| "grad_norm": 3.462686061859131, | |
| "learning_rate": 8.198653198653199e-06, | |
| "loss": 0.0926, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.7727272727272727, | |
| "grad_norm": 1.5547245740890503, | |
| "learning_rate": 8.181818181818183e-06, | |
| "loss": 0.0325, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.7752525252525253, | |
| "grad_norm": 2.274096727371216, | |
| "learning_rate": 8.164983164983166e-06, | |
| "loss": 0.0642, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 2.7937772274017334, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.1084, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.7803030303030303, | |
| "grad_norm": 1.720742106437683, | |
| "learning_rate": 8.131313131313133e-06, | |
| "loss": 0.101, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.7828282828282829, | |
| "grad_norm": 4.517067909240723, | |
| "learning_rate": 8.114478114478115e-06, | |
| "loss": 0.1059, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.7853535353535355, | |
| "grad_norm": 2.7258083820343018, | |
| "learning_rate": 8.097643097643098e-06, | |
| "loss": 0.1329, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.7878787878787878, | |
| "grad_norm": 2.474179983139038, | |
| "learning_rate": 8.08080808080808e-06, | |
| "loss": 0.1007, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.7904040404040404, | |
| "grad_norm": 2.3355281352996826, | |
| "learning_rate": 8.063973063973065e-06, | |
| "loss": 0.1863, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.7929292929292928, | |
| "grad_norm": 3.959667444229126, | |
| "learning_rate": 8.047138047138048e-06, | |
| "loss": 0.0882, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.7954545454545454, | |
| "grad_norm": 5.953159809112549, | |
| "learning_rate": 8.03030303030303e-06, | |
| "loss": 0.1024, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.797979797979798, | |
| "grad_norm": 3.069732427597046, | |
| "learning_rate": 8.013468013468015e-06, | |
| "loss": 0.084, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.8005050505050506, | |
| "grad_norm": 3.06427001953125, | |
| "learning_rate": 7.996632996632998e-06, | |
| "loss": 0.2176, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.803030303030303, | |
| "grad_norm": 5.320972442626953, | |
| "learning_rate": 7.97979797979798e-06, | |
| "loss": 0.1877, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.8055555555555556, | |
| "grad_norm": 3.8155035972595215, | |
| "learning_rate": 7.962962962962963e-06, | |
| "loss": 0.14, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.808080808080808, | |
| "grad_norm": 2.791696310043335, | |
| "learning_rate": 7.946127946127947e-06, | |
| "loss": 0.0694, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.8106060606060606, | |
| "grad_norm": 1.7592320442199707, | |
| "learning_rate": 7.92929292929293e-06, | |
| "loss": 0.0426, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.8131313131313131, | |
| "grad_norm": 8.306157112121582, | |
| "learning_rate": 7.912457912457913e-06, | |
| "loss": 0.1455, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.8156565656565657, | |
| "grad_norm": 3.3673255443573, | |
| "learning_rate": 7.895622895622897e-06, | |
| "loss": 0.1412, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 3.755908966064453, | |
| "learning_rate": 7.87878787878788e-06, | |
| "loss": 0.1096, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.8207070707070707, | |
| "grad_norm": 1.6641695499420166, | |
| "learning_rate": 7.861952861952862e-06, | |
| "loss": 0.1231, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.823232323232323, | |
| "grad_norm": 3.577352285385132, | |
| "learning_rate": 7.845117845117845e-06, | |
| "loss": 0.07, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.8257575757575757, | |
| "grad_norm": 3.3195016384124756, | |
| "learning_rate": 7.82828282828283e-06, | |
| "loss": 0.2131, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.8282828282828283, | |
| "grad_norm": 2.113675594329834, | |
| "learning_rate": 7.811447811447812e-06, | |
| "loss": 0.077, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.8308080808080809, | |
| "grad_norm": 2.248725414276123, | |
| "learning_rate": 7.794612794612795e-06, | |
| "loss": 0.1106, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 3.8289642333984375, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.0919, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.8358585858585859, | |
| "grad_norm": 2.4651291370391846, | |
| "learning_rate": 7.760942760942762e-06, | |
| "loss": 0.0724, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.8383838383838382, | |
| "grad_norm": 4.7950358390808105, | |
| "learning_rate": 7.744107744107745e-06, | |
| "loss": 0.1148, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.8409090909090908, | |
| "grad_norm": 8.350399017333984, | |
| "learning_rate": 7.727272727272727e-06, | |
| "loss": 0.1526, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.8434343434343434, | |
| "grad_norm": 2.8314502239227295, | |
| "learning_rate": 7.710437710437712e-06, | |
| "loss": 0.1417, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.845959595959596, | |
| "grad_norm": 3.023043155670166, | |
| "learning_rate": 7.693602693602694e-06, | |
| "loss": 0.1971, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.8484848484848486, | |
| "grad_norm": 1.6119197607040405, | |
| "learning_rate": 7.676767676767677e-06, | |
| "loss": 0.0754, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.851010101010101, | |
| "grad_norm": 5.730337142944336, | |
| "learning_rate": 7.659932659932661e-06, | |
| "loss": 0.0786, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.8535353535353534, | |
| "grad_norm": 0.6242827773094177, | |
| "learning_rate": 7.643097643097644e-06, | |
| "loss": 0.0237, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.856060606060606, | |
| "grad_norm": 3.5328094959259033, | |
| "learning_rate": 7.6262626262626275e-06, | |
| "loss": 0.1308, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.8585858585858586, | |
| "grad_norm": 2.5661208629608154, | |
| "learning_rate": 7.60942760942761e-06, | |
| "loss": 0.1202, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.8611111111111112, | |
| "grad_norm": 1.5449377298355103, | |
| "learning_rate": 7.592592592592594e-06, | |
| "loss": 0.0886, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.8636363636363638, | |
| "grad_norm": 4.09519100189209, | |
| "learning_rate": 7.5757575757575764e-06, | |
| "loss": 0.1398, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.8661616161616161, | |
| "grad_norm": 3.5463318824768066, | |
| "learning_rate": 7.558922558922559e-06, | |
| "loss": 0.1023, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.8686868686868687, | |
| "grad_norm": 2.5558698177337646, | |
| "learning_rate": 7.542087542087543e-06, | |
| "loss": 0.1335, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.871212121212121, | |
| "grad_norm": 1.5937213897705078, | |
| "learning_rate": 7.525252525252525e-06, | |
| "loss": 0.0928, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.8737373737373737, | |
| "grad_norm": 2.4672536849975586, | |
| "learning_rate": 7.508417508417509e-06, | |
| "loss": 0.2052, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.8762626262626263, | |
| "grad_norm": 1.365451693534851, | |
| "learning_rate": 7.491582491582492e-06, | |
| "loss": 0.1414, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.878787878787879, | |
| "grad_norm": 0.8678475618362427, | |
| "learning_rate": 7.474747474747476e-06, | |
| "loss": 0.0786, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.8813131313131313, | |
| "grad_norm": 3.8532655239105225, | |
| "learning_rate": 7.457912457912459e-06, | |
| "loss": 0.2117, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.8838383838383839, | |
| "grad_norm": 5.75984525680542, | |
| "learning_rate": 7.441077441077442e-06, | |
| "loss": 0.1238, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.8863636363636362, | |
| "grad_norm": 1.1473771333694458, | |
| "learning_rate": 7.424242424242425e-06, | |
| "loss": 0.0895, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 1.526085376739502, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.1088, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.8914141414141414, | |
| "grad_norm": 4.124934673309326, | |
| "learning_rate": 7.390572390572391e-06, | |
| "loss": 0.0826, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.893939393939394, | |
| "grad_norm": 6.274197101593018, | |
| "learning_rate": 7.373737373737374e-06, | |
| "loss": 0.1513, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.8964646464646466, | |
| "grad_norm": 1.4224315881729126, | |
| "learning_rate": 7.356902356902358e-06, | |
| "loss": 0.1091, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.898989898989899, | |
| "grad_norm": 4.506265640258789, | |
| "learning_rate": 7.340067340067341e-06, | |
| "loss": 0.1017, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.9015151515151514, | |
| "grad_norm": 1.0609605312347412, | |
| "learning_rate": 7.323232323232324e-06, | |
| "loss": 0.0597, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.904040404040404, | |
| "grad_norm": 3.9881186485290527, | |
| "learning_rate": 7.306397306397307e-06, | |
| "loss": 0.1244, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.9065656565656566, | |
| "grad_norm": 1.8625434637069702, | |
| "learning_rate": 7.28956228956229e-06, | |
| "loss": 0.147, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 8.011527061462402, | |
| "learning_rate": 7.272727272727273e-06, | |
| "loss": 0.0823, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.9116161616161618, | |
| "grad_norm": 2.0574049949645996, | |
| "learning_rate": 7.255892255892256e-06, | |
| "loss": 0.0667, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.9141414141414141, | |
| "grad_norm": 1.5154629945755005, | |
| "learning_rate": 7.23905723905724e-06, | |
| "loss": 0.1717, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.9166666666666665, | |
| "grad_norm": 2.105567455291748, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 0.1676, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.9191919191919191, | |
| "grad_norm": 1.6874696016311646, | |
| "learning_rate": 7.2053872053872064e-06, | |
| "loss": 0.1089, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.9217171717171717, | |
| "grad_norm": 2.980811357498169, | |
| "learning_rate": 7.188552188552189e-06, | |
| "loss": 0.1806, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.9242424242424243, | |
| "grad_norm": 2.0981791019439697, | |
| "learning_rate": 7.171717171717172e-06, | |
| "loss": 0.0859, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.926767676767677, | |
| "grad_norm": 1.835482120513916, | |
| "learning_rate": 7.154882154882155e-06, | |
| "loss": 0.1364, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.9292929292929293, | |
| "grad_norm": 4.000125885009766, | |
| "learning_rate": 7.138047138047138e-06, | |
| "loss": 0.1354, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.9318181818181817, | |
| "grad_norm": 4.924983978271484, | |
| "learning_rate": 7.121212121212122e-06, | |
| "loss": 0.1154, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.9343434343434343, | |
| "grad_norm": 1.5840011835098267, | |
| "learning_rate": 7.104377104377105e-06, | |
| "loss": 0.1016, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.9368686868686869, | |
| "grad_norm": 1.5436311960220337, | |
| "learning_rate": 7.087542087542089e-06, | |
| "loss": 0.1168, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.9393939393939394, | |
| "grad_norm": 2.4922754764556885, | |
| "learning_rate": 7.070707070707071e-06, | |
| "loss": 0.1187, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.941919191919192, | |
| "grad_norm": 3.206899881362915, | |
| "learning_rate": 7.053872053872054e-06, | |
| "loss": 0.1184, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.9444444444444444, | |
| "grad_norm": 4.3798828125, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.1997, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.946969696969697, | |
| "grad_norm": 1.3223721981048584, | |
| "learning_rate": 7.02020202020202e-06, | |
| "loss": 0.073, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.9494949494949494, | |
| "grad_norm": 2.0767436027526855, | |
| "learning_rate": 7.0033670033670045e-06, | |
| "loss": 0.1251, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.952020202020202, | |
| "grad_norm": 1.8936235904693604, | |
| "learning_rate": 6.986531986531987e-06, | |
| "loss": 0.0956, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.9545454545454546, | |
| "grad_norm": 6.86482048034668, | |
| "learning_rate": 6.969696969696971e-06, | |
| "loss": 0.1269, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.9570707070707072, | |
| "grad_norm": 2.885071039199829, | |
| "learning_rate": 6.9528619528619534e-06, | |
| "loss": 0.0974, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.9595959595959596, | |
| "grad_norm": 4.58144474029541, | |
| "learning_rate": 6.936026936026936e-06, | |
| "loss": 0.3284, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.9621212121212122, | |
| "grad_norm": 4.064563274383545, | |
| "learning_rate": 6.91919191919192e-06, | |
| "loss": 0.1659, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.9646464646464645, | |
| "grad_norm": 1.5637133121490479, | |
| "learning_rate": 6.902356902356902e-06, | |
| "loss": 0.1369, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.9671717171717171, | |
| "grad_norm": 2.932281494140625, | |
| "learning_rate": 6.885521885521887e-06, | |
| "loss": 0.0865, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.9696969696969697, | |
| "grad_norm": 1.1261810064315796, | |
| "learning_rate": 6.868686868686869e-06, | |
| "loss": 0.1245, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.9722222222222223, | |
| "grad_norm": 3.991880178451538, | |
| "learning_rate": 6.851851851851853e-06, | |
| "loss": 0.188, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.9747474747474747, | |
| "grad_norm": 1.7972675561904907, | |
| "learning_rate": 6.835016835016836e-06, | |
| "loss": 0.1832, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.9772727272727273, | |
| "grad_norm": 2.0975348949432373, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.0416, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.9797979797979797, | |
| "grad_norm": 2.6938462257385254, | |
| "learning_rate": 6.801346801346802e-06, | |
| "loss": 0.1471, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.9823232323232323, | |
| "grad_norm": 2.680722951889038, | |
| "learning_rate": 6.7845117845117845e-06, | |
| "loss": 0.1255, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.9848484848484849, | |
| "grad_norm": 4.923444747924805, | |
| "learning_rate": 6.767676767676769e-06, | |
| "loss": 0.1087, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.9873737373737375, | |
| "grad_norm": 3.3977975845336914, | |
| "learning_rate": 6.7508417508417515e-06, | |
| "loss": 0.1198, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.98989898989899, | |
| "grad_norm": 2.9619626998901367, | |
| "learning_rate": 6.734006734006735e-06, | |
| "loss": 0.104, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.9924242424242424, | |
| "grad_norm": 1.3148123025894165, | |
| "learning_rate": 6.717171717171718e-06, | |
| "loss": 0.0854, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.9949494949494948, | |
| "grad_norm": 1.7584114074707031, | |
| "learning_rate": 6.7003367003367004e-06, | |
| "loss": 0.136, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.9974747474747474, | |
| "grad_norm": 8.245304107666016, | |
| "learning_rate": 6.683501683501684e-06, | |
| "loss": 0.1525, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.205091118812561, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0617, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.759090909090909, | |
| "eval_f1": 0.8949265317438315, | |
| "eval_loss": 0.13888753950595856, | |
| "eval_runtime": 43.47, | |
| "eval_samples_per_second": 20.244, | |
| "eval_steps_per_second": 0.437, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.0025252525252526, | |
| "grad_norm": 2.01649808883667, | |
| "learning_rate": 6.649831649831651e-06, | |
| "loss": 0.1073, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.005050505050505, | |
| "grad_norm": 6.579789161682129, | |
| "learning_rate": 6.632996632996634e-06, | |
| "loss": 0.1704, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.007575757575758, | |
| "grad_norm": 2.323598623275757, | |
| "learning_rate": 6.616161616161617e-06, | |
| "loss": 0.1983, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.01010101010101, | |
| "grad_norm": 2.126936674118042, | |
| "learning_rate": 6.5993265993266e-06, | |
| "loss": 0.1026, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.0126262626262625, | |
| "grad_norm": 1.035873293876648, | |
| "learning_rate": 6.582491582491583e-06, | |
| "loss": 0.0488, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.015151515151515, | |
| "grad_norm": 2.2837603092193604, | |
| "learning_rate": 6.565656565656566e-06, | |
| "loss": 0.1894, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.0176767676767677, | |
| "grad_norm": 7.866192817687988, | |
| "learning_rate": 6.548821548821549e-06, | |
| "loss": 0.2146, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.0202020202020203, | |
| "grad_norm": 4.450189590454102, | |
| "learning_rate": 6.531986531986533e-06, | |
| "loss": 0.0731, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.022727272727273, | |
| "grad_norm": 2.2905592918395996, | |
| "learning_rate": 6.515151515151516e-06, | |
| "loss": 0.0736, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.025252525252525, | |
| "grad_norm": 1.7175313234329224, | |
| "learning_rate": 6.498316498316499e-06, | |
| "loss": 0.1525, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.0277777777777777, | |
| "grad_norm": 3.22578763961792, | |
| "learning_rate": 6.481481481481482e-06, | |
| "loss": 0.1093, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.0303030303030303, | |
| "grad_norm": 1.8242607116699219, | |
| "learning_rate": 6.464646464646466e-06, | |
| "loss": 0.1138, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.032828282828283, | |
| "grad_norm": 2.7062501907348633, | |
| "learning_rate": 6.447811447811448e-06, | |
| "loss": 0.0932, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.0353535353535355, | |
| "grad_norm": 1.2171615362167358, | |
| "learning_rate": 6.430976430976431e-06, | |
| "loss": 0.0692, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.037878787878788, | |
| "grad_norm": 5.950473308563232, | |
| "learning_rate": 6.4141414141414145e-06, | |
| "loss": 0.264, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.04040404040404, | |
| "grad_norm": 4.191005706787109, | |
| "learning_rate": 6.397306397306397e-06, | |
| "loss": 0.0666, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.042929292929293, | |
| "grad_norm": 3.99367618560791, | |
| "learning_rate": 6.3804713804713816e-06, | |
| "loss": 0.1528, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.0454545454545454, | |
| "grad_norm": 0.7054336667060852, | |
| "learning_rate": 6.363636363636364e-06, | |
| "loss": 0.0477, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.047979797979798, | |
| "grad_norm": 3.71244478225708, | |
| "learning_rate": 6.346801346801348e-06, | |
| "loss": 0.1287, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.0505050505050506, | |
| "grad_norm": 3.171588897705078, | |
| "learning_rate": 6.3299663299663304e-06, | |
| "loss": 0.087, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.053030303030303, | |
| "grad_norm": 1.4060291051864624, | |
| "learning_rate": 6.313131313131313e-06, | |
| "loss": 0.1075, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.0555555555555554, | |
| "grad_norm": 2.073291540145874, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 0.0653, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.058080808080808, | |
| "grad_norm": 6.517178058624268, | |
| "learning_rate": 6.279461279461279e-06, | |
| "loss": 0.1234, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.0606060606060606, | |
| "grad_norm": 11.045914649963379, | |
| "learning_rate": 6.262626262626264e-06, | |
| "loss": 0.1273, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.063131313131313, | |
| "grad_norm": 1.7747228145599365, | |
| "learning_rate": 6.245791245791246e-06, | |
| "loss": 0.1618, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.0656565656565657, | |
| "grad_norm": 1.5213820934295654, | |
| "learning_rate": 6.22895622895623e-06, | |
| "loss": 0.106, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.0681818181818183, | |
| "grad_norm": 1.4155036211013794, | |
| "learning_rate": 6.212121212121213e-06, | |
| "loss": 0.0759, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.0707070707070705, | |
| "grad_norm": 1.0913715362548828, | |
| "learning_rate": 6.195286195286195e-06, | |
| "loss": 0.0908, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.073232323232323, | |
| "grad_norm": 4.059940814971924, | |
| "learning_rate": 6.178451178451179e-06, | |
| "loss": 0.1544, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.0757575757575757, | |
| "grad_norm": 1.2122453451156616, | |
| "learning_rate": 6.1616161616161615e-06, | |
| "loss": 0.0959, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.0782828282828283, | |
| "grad_norm": 2.069533109664917, | |
| "learning_rate": 6.144781144781146e-06, | |
| "loss": 0.0488, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.080808080808081, | |
| "grad_norm": 1.685937523841858, | |
| "learning_rate": 6.1279461279461286e-06, | |
| "loss": 0.1315, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.0833333333333335, | |
| "grad_norm": 3.1984479427337646, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 0.117, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.0858585858585856, | |
| "grad_norm": 3.422079086303711, | |
| "learning_rate": 6.094276094276095e-06, | |
| "loss": 0.1104, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.0883838383838382, | |
| "grad_norm": 1.3577680587768555, | |
| "learning_rate": 6.0774410774410774e-06, | |
| "loss": 0.0583, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.090909090909091, | |
| "grad_norm": 2.0477261543273926, | |
| "learning_rate": 6.060606060606061e-06, | |
| "loss": 0.2046, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.0934343434343434, | |
| "grad_norm": 2.3478550910949707, | |
| "learning_rate": 6.043771043771044e-06, | |
| "loss": 0.1482, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.095959595959596, | |
| "grad_norm": 1.0065677165985107, | |
| "learning_rate": 6.026936026936028e-06, | |
| "loss": 0.0322, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.0984848484848486, | |
| "grad_norm": 2.0075066089630127, | |
| "learning_rate": 6.010101010101011e-06, | |
| "loss": 0.1149, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.101010101010101, | |
| "grad_norm": 2.6007728576660156, | |
| "learning_rate": 5.993265993265994e-06, | |
| "loss": 0.1527, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.1035353535353534, | |
| "grad_norm": 2.199341058731079, | |
| "learning_rate": 5.976430976430977e-06, | |
| "loss": 0.0776, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.106060606060606, | |
| "grad_norm": 2.4440650939941406, | |
| "learning_rate": 5.95959595959596e-06, | |
| "loss": 0.0902, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.1085858585858586, | |
| "grad_norm": 1.7312313318252563, | |
| "learning_rate": 5.942760942760943e-06, | |
| "loss": 0.0723, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.111111111111111, | |
| "grad_norm": 2.232499122619629, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.1162, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.1136363636363638, | |
| "grad_norm": 2.4596776962280273, | |
| "learning_rate": 5.90909090909091e-06, | |
| "loss": 0.1808, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.1161616161616164, | |
| "grad_norm": 4.917704105377197, | |
| "learning_rate": 5.892255892255893e-06, | |
| "loss": 0.1169, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.1186868686868685, | |
| "grad_norm": 3.716489553451538, | |
| "learning_rate": 5.875420875420876e-06, | |
| "loss": 0.0809, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.121212121212121, | |
| "grad_norm": 4.413392066955566, | |
| "learning_rate": 5.858585858585859e-06, | |
| "loss": 0.1834, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.1237373737373737, | |
| "grad_norm": 1.872174859046936, | |
| "learning_rate": 5.841750841750842e-06, | |
| "loss": 0.1462, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.1262626262626263, | |
| "grad_norm": 3.7974910736083984, | |
| "learning_rate": 5.824915824915825e-06, | |
| "loss": 0.0939, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.128787878787879, | |
| "grad_norm": 1.4759098291397095, | |
| "learning_rate": 5.808080808080808e-06, | |
| "loss": 0.0713, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.1313131313131315, | |
| "grad_norm": 2.160318613052368, | |
| "learning_rate": 5.791245791245792e-06, | |
| "loss": 0.2184, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.1338383838383836, | |
| "grad_norm": 2.485347270965576, | |
| "learning_rate": 5.774410774410775e-06, | |
| "loss": 0.1732, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.1363636363636362, | |
| "grad_norm": 0.8993260264396667, | |
| "learning_rate": 5.7575757575757586e-06, | |
| "loss": 0.0909, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.138888888888889, | |
| "grad_norm": 1.436485767364502, | |
| "learning_rate": 5.740740740740741e-06, | |
| "loss": 0.0394, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.1414141414141414, | |
| "grad_norm": 0.9625018835067749, | |
| "learning_rate": 5.723905723905724e-06, | |
| "loss": 0.0382, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.143939393939394, | |
| "grad_norm": 1.4799765348434448, | |
| "learning_rate": 5.7070707070707075e-06, | |
| "loss": 0.094, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.1464646464646466, | |
| "grad_norm": 3.625958204269409, | |
| "learning_rate": 5.69023569023569e-06, | |
| "loss": 0.1394, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.148989898989899, | |
| "grad_norm": 1.3515892028808594, | |
| "learning_rate": 5.6734006734006745e-06, | |
| "loss": 0.1068, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.1515151515151514, | |
| "grad_norm": 1.9746239185333252, | |
| "learning_rate": 5.656565656565657e-06, | |
| "loss": 0.1404, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.154040404040404, | |
| "grad_norm": 3.5076723098754883, | |
| "learning_rate": 5.639730639730641e-06, | |
| "loss": 0.1236, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.1565656565656566, | |
| "grad_norm": 1.3625231981277466, | |
| "learning_rate": 5.622895622895623e-06, | |
| "loss": 0.0698, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.159090909090909, | |
| "grad_norm": 2.441847324371338, | |
| "learning_rate": 5.606060606060606e-06, | |
| "loss": 0.1029, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.1616161616161618, | |
| "grad_norm": 3.1259806156158447, | |
| "learning_rate": 5.58922558922559e-06, | |
| "loss": 0.105, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.1641414141414144, | |
| "grad_norm": 5.127650260925293, | |
| "learning_rate": 5.572390572390572e-06, | |
| "loss": 0.1202, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.1666666666666665, | |
| "grad_norm": 1.3531067371368408, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.0812, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.169191919191919, | |
| "grad_norm": 5.6110920906066895, | |
| "learning_rate": 5.538720538720539e-06, | |
| "loss": 0.0898, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.1717171717171717, | |
| "grad_norm": 2.4415769577026367, | |
| "learning_rate": 5.521885521885523e-06, | |
| "loss": 0.231, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.1742424242424243, | |
| "grad_norm": 3.1470277309417725, | |
| "learning_rate": 5.5050505050505056e-06, | |
| "loss": 0.0609, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.176767676767677, | |
| "grad_norm": 2.625209093093872, | |
| "learning_rate": 5.488215488215489e-06, | |
| "loss": 0.1126, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.179292929292929, | |
| "grad_norm": 9.551560401916504, | |
| "learning_rate": 5.471380471380472e-06, | |
| "loss": 0.0788, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.1818181818181817, | |
| "grad_norm": 2.088391065597534, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.1863, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.1843434343434343, | |
| "grad_norm": 2.9452109336853027, | |
| "learning_rate": 5.437710437710438e-06, | |
| "loss": 0.1449, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.186868686868687, | |
| "grad_norm": 2.6503803730010986, | |
| "learning_rate": 5.420875420875421e-06, | |
| "loss": 0.1128, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.1893939393939394, | |
| "grad_norm": 6.2185587882995605, | |
| "learning_rate": 5.404040404040405e-06, | |
| "loss": 0.2125, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.191919191919192, | |
| "grad_norm": 1.5772247314453125, | |
| "learning_rate": 5.387205387205388e-06, | |
| "loss": 0.117, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.1944444444444446, | |
| "grad_norm": 4.648830413818359, | |
| "learning_rate": 5.370370370370371e-06, | |
| "loss": 0.1646, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.196969696969697, | |
| "grad_norm": 2.4655864238739014, | |
| "learning_rate": 5.353535353535354e-06, | |
| "loss": 0.0718, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.1994949494949494, | |
| "grad_norm": 1.3793933391571045, | |
| "learning_rate": 5.336700336700337e-06, | |
| "loss": 0.1087, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.202020202020202, | |
| "grad_norm": 2.5595717430114746, | |
| "learning_rate": 5.31986531986532e-06, | |
| "loss": 0.1177, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.2045454545454546, | |
| "grad_norm": 4.922736167907715, | |
| "learning_rate": 5.303030303030303e-06, | |
| "loss": 0.0976, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.207070707070707, | |
| "grad_norm": 2.5227010250091553, | |
| "learning_rate": 5.286195286195287e-06, | |
| "loss": 0.1744, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.20959595959596, | |
| "grad_norm": 1.9036935567855835, | |
| "learning_rate": 5.26936026936027e-06, | |
| "loss": 0.1184, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.212121212121212, | |
| "grad_norm": 1.5138955116271973, | |
| "learning_rate": 5.252525252525253e-06, | |
| "loss": 0.1052, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.2146464646464645, | |
| "grad_norm": 2.0152668952941895, | |
| "learning_rate": 5.235690235690236e-06, | |
| "loss": 0.0952, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.217171717171717, | |
| "grad_norm": 13.834627151489258, | |
| "learning_rate": 5.218855218855219e-06, | |
| "loss": 0.0788, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.2196969696969697, | |
| "grad_norm": 2.163512945175171, | |
| "learning_rate": 5.202020202020202e-06, | |
| "loss": 0.1584, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 1.2292289733886719, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.1036, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.224747474747475, | |
| "grad_norm": 2.1541199684143066, | |
| "learning_rate": 5.168350168350169e-06, | |
| "loss": 0.0994, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.227272727272727, | |
| "grad_norm": 2.9435672760009766, | |
| "learning_rate": 5.151515151515152e-06, | |
| "loss": 0.1071, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.2297979797979797, | |
| "grad_norm": 4.930500507354736, | |
| "learning_rate": 5.1346801346801356e-06, | |
| "loss": 0.2478, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.2323232323232323, | |
| "grad_norm": 13.543425559997559, | |
| "learning_rate": 5.117845117845118e-06, | |
| "loss": 0.0954, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.234848484848485, | |
| "grad_norm": 1.8627355098724365, | |
| "learning_rate": 5.101010101010101e-06, | |
| "loss": 0.2159, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.2373737373737375, | |
| "grad_norm": 1.9947534799575806, | |
| "learning_rate": 5.0841750841750845e-06, | |
| "loss": 0.0787, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.23989898989899, | |
| "grad_norm": 5.217324733734131, | |
| "learning_rate": 5.067340067340067e-06, | |
| "loss": 0.1191, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.242424242424242, | |
| "grad_norm": 1.540475845336914, | |
| "learning_rate": 5.0505050505050515e-06, | |
| "loss": 0.0773, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.244949494949495, | |
| "grad_norm": 4.879143714904785, | |
| "learning_rate": 5.033670033670034e-06, | |
| "loss": 0.1236, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.2474747474747474, | |
| "grad_norm": 4.0901641845703125, | |
| "learning_rate": 5.016835016835018e-06, | |
| "loss": 0.0619, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 1.8532190322875977, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0767, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.2525252525252526, | |
| "grad_norm": 3.4842894077301025, | |
| "learning_rate": 4.983164983164984e-06, | |
| "loss": 0.0984, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.255050505050505, | |
| "grad_norm": 1.4197821617126465, | |
| "learning_rate": 4.966329966329967e-06, | |
| "loss": 0.0441, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 2.257575757575758, | |
| "grad_norm": 1.3725179433822632, | |
| "learning_rate": 4.94949494949495e-06, | |
| "loss": 0.0636, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.26010101010101, | |
| "grad_norm": 3.0550286769866943, | |
| "learning_rate": 4.932659932659933e-06, | |
| "loss": 0.1222, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 2.2626262626262625, | |
| "grad_norm": 1.3511768579483032, | |
| "learning_rate": 4.915824915824916e-06, | |
| "loss": 0.102, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.265151515151515, | |
| "grad_norm": 2.8341774940490723, | |
| "learning_rate": 4.898989898989899e-06, | |
| "loss": 0.1176, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 2.2676767676767677, | |
| "grad_norm": 5.220274925231934, | |
| "learning_rate": 4.8821548821548826e-06, | |
| "loss": 0.1828, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.2702020202020203, | |
| "grad_norm": 2.0751826763153076, | |
| "learning_rate": 4.865319865319866e-06, | |
| "loss": 0.0472, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 2.2727272727272725, | |
| "grad_norm": 1.0210275650024414, | |
| "learning_rate": 4.848484848484849e-06, | |
| "loss": 0.1155, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.275252525252525, | |
| "grad_norm": 2.244605541229248, | |
| "learning_rate": 4.831649831649832e-06, | |
| "loss": 0.1298, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 2.2777777777777777, | |
| "grad_norm": 1.2191749811172485, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.0553, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.2803030303030303, | |
| "grad_norm": 2.009685516357422, | |
| "learning_rate": 4.7979797979797985e-06, | |
| "loss": 0.2061, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 2.282828282828283, | |
| "grad_norm": 2.537893056869507, | |
| "learning_rate": 4.781144781144781e-06, | |
| "loss": 0.1638, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.2853535353535355, | |
| "grad_norm": 1.8385186195373535, | |
| "learning_rate": 4.764309764309765e-06, | |
| "loss": 0.1457, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 2.287878787878788, | |
| "grad_norm": 3.0959956645965576, | |
| "learning_rate": 4.747474747474748e-06, | |
| "loss": 0.0624, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.29040404040404, | |
| "grad_norm": 1.0412582159042358, | |
| "learning_rate": 4.730639730639731e-06, | |
| "loss": 0.0605, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 2.292929292929293, | |
| "grad_norm": 1.1493240594863892, | |
| "learning_rate": 4.7138047138047145e-06, | |
| "loss": 0.0818, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.2954545454545454, | |
| "grad_norm": 1.573701024055481, | |
| "learning_rate": 4.696969696969698e-06, | |
| "loss": 0.1485, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 2.297979797979798, | |
| "grad_norm": 3.5485622882843018, | |
| "learning_rate": 4.680134680134681e-06, | |
| "loss": 0.0746, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.3005050505050506, | |
| "grad_norm": 2.589240550994873, | |
| "learning_rate": 4.663299663299663e-06, | |
| "loss": 0.0669, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 2.303030303030303, | |
| "grad_norm": 3.300288677215576, | |
| "learning_rate": 4.646464646464647e-06, | |
| "loss": 0.1589, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.3055555555555554, | |
| "grad_norm": 2.2439637184143066, | |
| "learning_rate": 4.62962962962963e-06, | |
| "loss": 0.0927, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 2.308080808080808, | |
| "grad_norm": 3.438167095184326, | |
| "learning_rate": 4.612794612794613e-06, | |
| "loss": 0.1405, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.3106060606060606, | |
| "grad_norm": 1.1554774045944214, | |
| "learning_rate": 4.595959595959597e-06, | |
| "loss": 0.1113, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 2.313131313131313, | |
| "grad_norm": 2.269124984741211, | |
| "learning_rate": 4.57912457912458e-06, | |
| "loss": 0.1054, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.3156565656565657, | |
| "grad_norm": 3.707484722137451, | |
| "learning_rate": 4.562289562289563e-06, | |
| "loss": 0.1573, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 2.3181818181818183, | |
| "grad_norm": 3.806281089782715, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.1247, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.320707070707071, | |
| "grad_norm": 5.063516616821289, | |
| "learning_rate": 4.528619528619529e-06, | |
| "loss": 0.077, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 2.323232323232323, | |
| "grad_norm": 1.84391450881958, | |
| "learning_rate": 4.5117845117845126e-06, | |
| "loss": 0.13, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.3257575757575757, | |
| "grad_norm": 2.5902676582336426, | |
| "learning_rate": 4.494949494949495e-06, | |
| "loss": 0.1043, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 2.3282828282828283, | |
| "grad_norm": 1.1772695779800415, | |
| "learning_rate": 4.478114478114479e-06, | |
| "loss": 0.0875, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.330808080808081, | |
| "grad_norm": 1.865903377532959, | |
| "learning_rate": 4.4612794612794615e-06, | |
| "loss": 0.1552, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 1.9699102640151978, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.0433, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.3358585858585856, | |
| "grad_norm": 3.4536280632019043, | |
| "learning_rate": 4.427609427609428e-06, | |
| "loss": 0.1309, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 2.3383838383838382, | |
| "grad_norm": 9.139911651611328, | |
| "learning_rate": 4.410774410774411e-06, | |
| "loss": 0.1629, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.340909090909091, | |
| "grad_norm": 2.665511131286621, | |
| "learning_rate": 4.393939393939394e-06, | |
| "loss": 0.1347, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 2.3434343434343434, | |
| "grad_norm": 1.851479172706604, | |
| "learning_rate": 4.377104377104377e-06, | |
| "loss": 0.0453, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.345959595959596, | |
| "grad_norm": 4.813875675201416, | |
| "learning_rate": 4.360269360269361e-06, | |
| "loss": 0.1395, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 2.3484848484848486, | |
| "grad_norm": 1.4313777685165405, | |
| "learning_rate": 4.343434343434344e-06, | |
| "loss": 0.1065, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.351010101010101, | |
| "grad_norm": 3.5636346340179443, | |
| "learning_rate": 4.326599326599326e-06, | |
| "loss": 0.2765, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 2.3535353535353534, | |
| "grad_norm": 2.2551841735839844, | |
| "learning_rate": 4.30976430976431e-06, | |
| "loss": 0.0752, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.356060606060606, | |
| "grad_norm": 2.989997625350952, | |
| "learning_rate": 4.292929292929293e-06, | |
| "loss": 0.0626, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 2.3585858585858586, | |
| "grad_norm": 2.648948907852173, | |
| "learning_rate": 4.276094276094276e-06, | |
| "loss": 0.1131, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.361111111111111, | |
| "grad_norm": 4.4058685302734375, | |
| "learning_rate": 4.2592592592592596e-06, | |
| "loss": 0.1696, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 2.3636363636363638, | |
| "grad_norm": 2.665522575378418, | |
| "learning_rate": 4.242424242424243e-06, | |
| "loss": 0.1484, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.3661616161616164, | |
| "grad_norm": 0.8671731352806091, | |
| "learning_rate": 4.225589225589226e-06, | |
| "loss": 0.0346, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 2.3686868686868685, | |
| "grad_norm": 5.202394962310791, | |
| "learning_rate": 4.208754208754209e-06, | |
| "loss": 0.1108, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.371212121212121, | |
| "grad_norm": 1.2443658113479614, | |
| "learning_rate": 4.191919191919192e-06, | |
| "loss": 0.0727, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 2.3737373737373737, | |
| "grad_norm": 2.493161678314209, | |
| "learning_rate": 4.1750841750841755e-06, | |
| "loss": 0.1396, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.3762626262626263, | |
| "grad_norm": 1.5535367727279663, | |
| "learning_rate": 4.158249158249158e-06, | |
| "loss": 0.1128, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 2.378787878787879, | |
| "grad_norm": 1.4870634078979492, | |
| "learning_rate": 4.141414141414142e-06, | |
| "loss": 0.1251, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.3813131313131315, | |
| "grad_norm": 1.0928040742874146, | |
| "learning_rate": 4.124579124579125e-06, | |
| "loss": 0.1148, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 2.3838383838383836, | |
| "grad_norm": 1.3592982292175293, | |
| "learning_rate": 4.107744107744108e-06, | |
| "loss": 0.1567, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.3863636363636362, | |
| "grad_norm": 3.2275450229644775, | |
| "learning_rate": 4.0909090909090915e-06, | |
| "loss": 0.1898, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 2.388888888888889, | |
| "grad_norm": 5.524433135986328, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.1526, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.3914141414141414, | |
| "grad_norm": 2.3239119052886963, | |
| "learning_rate": 4.057239057239058e-06, | |
| "loss": 0.1879, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 2.393939393939394, | |
| "grad_norm": 2.8176567554473877, | |
| "learning_rate": 4.04040404040404e-06, | |
| "loss": 0.0453, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.3964646464646466, | |
| "grad_norm": 4.552126884460449, | |
| "learning_rate": 4.023569023569024e-06, | |
| "loss": 0.1098, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 2.398989898989899, | |
| "grad_norm": 3.1059579849243164, | |
| "learning_rate": 4.0067340067340074e-06, | |
| "loss": 0.1238, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.4015151515151514, | |
| "grad_norm": 2.0037975311279297, | |
| "learning_rate": 3.98989898989899e-06, | |
| "loss": 0.1101, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 2.404040404040404, | |
| "grad_norm": 1.432120442390442, | |
| "learning_rate": 3.973063973063974e-06, | |
| "loss": 0.1475, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.4065656565656566, | |
| "grad_norm": 4.496235370635986, | |
| "learning_rate": 3.956228956228956e-06, | |
| "loss": 0.1285, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 2.409090909090909, | |
| "grad_norm": 2.675267457962036, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 0.1076, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.4116161616161618, | |
| "grad_norm": 1.4617221355438232, | |
| "learning_rate": 3.9225589225589225e-06, | |
| "loss": 0.0742, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 2.4141414141414144, | |
| "grad_norm": 2.676470994949341, | |
| "learning_rate": 3.905723905723906e-06, | |
| "loss": 0.1042, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.4166666666666665, | |
| "grad_norm": 3.1182193756103516, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 0.0782, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 2.419191919191919, | |
| "grad_norm": 1.4750274419784546, | |
| "learning_rate": 3.872053872053872e-06, | |
| "loss": 0.0824, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.4217171717171717, | |
| "grad_norm": 5.715966701507568, | |
| "learning_rate": 3.855218855218856e-06, | |
| "loss": 0.1555, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 2.4242424242424243, | |
| "grad_norm": 1.0495116710662842, | |
| "learning_rate": 3.8383838383838385e-06, | |
| "loss": 0.1218, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.426767676767677, | |
| "grad_norm": 3.0049309730529785, | |
| "learning_rate": 3.821548821548822e-06, | |
| "loss": 0.1283, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 2.429292929292929, | |
| "grad_norm": 1.6869391202926636, | |
| "learning_rate": 3.804713804713805e-06, | |
| "loss": 0.0797, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.4318181818181817, | |
| "grad_norm": 2.2413532733917236, | |
| "learning_rate": 3.7878787878787882e-06, | |
| "loss": 0.1478, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 2.4343434343434343, | |
| "grad_norm": 2.301522731781006, | |
| "learning_rate": 3.7710437710437713e-06, | |
| "loss": 0.1209, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.436868686868687, | |
| "grad_norm": 3.226301431655884, | |
| "learning_rate": 3.7542087542087544e-06, | |
| "loss": 0.1697, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 2.4393939393939394, | |
| "grad_norm": 2.822960376739502, | |
| "learning_rate": 3.737373737373738e-06, | |
| "loss": 0.0748, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.441919191919192, | |
| "grad_norm": 8.013906478881836, | |
| "learning_rate": 3.720538720538721e-06, | |
| "loss": 0.1067, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 1.2187291383743286, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.1054, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.446969696969697, | |
| "grad_norm": 1.9397814273834229, | |
| "learning_rate": 3.686868686868687e-06, | |
| "loss": 0.0697, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 2.4494949494949494, | |
| "grad_norm": 2.722252130508423, | |
| "learning_rate": 3.6700336700336704e-06, | |
| "loss": 0.1208, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.452020202020202, | |
| "grad_norm": 1.2536653280258179, | |
| "learning_rate": 3.6531986531986535e-06, | |
| "loss": 0.0446, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 2.4545454545454546, | |
| "grad_norm": 2.2456796169281006, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.1854, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.457070707070707, | |
| "grad_norm": 3.275261163711548, | |
| "learning_rate": 3.61952861952862e-06, | |
| "loss": 0.1852, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 2.45959595959596, | |
| "grad_norm": 1.8232449293136597, | |
| "learning_rate": 3.6026936026936032e-06, | |
| "loss": 0.1223, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.462121212121212, | |
| "grad_norm": 1.9537675380706787, | |
| "learning_rate": 3.585858585858586e-06, | |
| "loss": 0.0819, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 2.4646464646464645, | |
| "grad_norm": 2.161625862121582, | |
| "learning_rate": 3.569023569023569e-06, | |
| "loss": 0.2049, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.467171717171717, | |
| "grad_norm": 2.769174575805664, | |
| "learning_rate": 3.5521885521885525e-06, | |
| "loss": 0.0915, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 2.4696969696969697, | |
| "grad_norm": 3.9444172382354736, | |
| "learning_rate": 3.5353535353535356e-06, | |
| "loss": 0.114, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.4722222222222223, | |
| "grad_norm": 1.980569839477539, | |
| "learning_rate": 3.5185185185185187e-06, | |
| "loss": 0.0776, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 2.474747474747475, | |
| "grad_norm": 2.1277084350585938, | |
| "learning_rate": 3.5016835016835023e-06, | |
| "loss": 0.1238, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.4772727272727275, | |
| "grad_norm": 2.6043457984924316, | |
| "learning_rate": 3.4848484848484854e-06, | |
| "loss": 0.1507, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 2.4797979797979797, | |
| "grad_norm": 1.3472402095794678, | |
| "learning_rate": 3.468013468013468e-06, | |
| "loss": 0.1353, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.4823232323232323, | |
| "grad_norm": 4.820988655090332, | |
| "learning_rate": 3.451178451178451e-06, | |
| "loss": 0.1156, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 3.138719320297241, | |
| "learning_rate": 3.4343434343434347e-06, | |
| "loss": 0.1345, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.4873737373737375, | |
| "grad_norm": 1.767815113067627, | |
| "learning_rate": 3.417508417508418e-06, | |
| "loss": 0.0567, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 2.48989898989899, | |
| "grad_norm": 1.7450860738754272, | |
| "learning_rate": 3.400673400673401e-06, | |
| "loss": 0.1188, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.492424242424242, | |
| "grad_norm": 1.7766708135604858, | |
| "learning_rate": 3.3838383838383844e-06, | |
| "loss": 0.1241, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 2.494949494949495, | |
| "grad_norm": 4.628079414367676, | |
| "learning_rate": 3.3670033670033675e-06, | |
| "loss": 0.1263, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.4974747474747474, | |
| "grad_norm": 1.5541713237762451, | |
| "learning_rate": 3.3501683501683502e-06, | |
| "loss": 0.0608, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 4.456207752227783, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.1484, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.5025252525252526, | |
| "grad_norm": 3.9640469551086426, | |
| "learning_rate": 3.316498316498317e-06, | |
| "loss": 0.1574, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 2.505050505050505, | |
| "grad_norm": 1.5159541368484497, | |
| "learning_rate": 3.2996632996633e-06, | |
| "loss": 0.0687, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.507575757575758, | |
| "grad_norm": 2.402961254119873, | |
| "learning_rate": 3.282828282828283e-06, | |
| "loss": 0.1176, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 2.51010101010101, | |
| "grad_norm": 1.6217000484466553, | |
| "learning_rate": 3.2659932659932666e-06, | |
| "loss": 0.1544, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.5126262626262625, | |
| "grad_norm": 3.1921989917755127, | |
| "learning_rate": 3.2491582491582497e-06, | |
| "loss": 0.1447, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 2.515151515151515, | |
| "grad_norm": 1.179274082183838, | |
| "learning_rate": 3.232323232323233e-06, | |
| "loss": 0.0994, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.5176767676767677, | |
| "grad_norm": 3.9791829586029053, | |
| "learning_rate": 3.2154882154882155e-06, | |
| "loss": 0.1909, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 2.5202020202020203, | |
| "grad_norm": 2.757751941680908, | |
| "learning_rate": 3.1986531986531986e-06, | |
| "loss": 0.105, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.5227272727272725, | |
| "grad_norm": 0.8614385724067688, | |
| "learning_rate": 3.181818181818182e-06, | |
| "loss": 0.0791, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 2.525252525252525, | |
| "grad_norm": 0.6211748123168945, | |
| "learning_rate": 3.1649831649831652e-06, | |
| "loss": 0.0379, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.5277777777777777, | |
| "grad_norm": 2.238368272781372, | |
| "learning_rate": 3.1481481481481483e-06, | |
| "loss": 0.1195, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 2.5303030303030303, | |
| "grad_norm": 2.4499704837799072, | |
| "learning_rate": 3.131313131313132e-06, | |
| "loss": 0.1324, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.532828282828283, | |
| "grad_norm": 3.4274697303771973, | |
| "learning_rate": 3.114478114478115e-06, | |
| "loss": 0.1922, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 2.5353535353535355, | |
| "grad_norm": 2.302090883255005, | |
| "learning_rate": 3.0976430976430976e-06, | |
| "loss": 0.1323, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.537878787878788, | |
| "grad_norm": 3.9652259349823, | |
| "learning_rate": 3.0808080808080807e-06, | |
| "loss": 0.1251, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 2.5404040404040407, | |
| "grad_norm": 6.590030670166016, | |
| "learning_rate": 3.0639730639730643e-06, | |
| "loss": 0.0688, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.542929292929293, | |
| "grad_norm": 0.5998873114585876, | |
| "learning_rate": 3.0471380471380474e-06, | |
| "loss": 0.0546, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 2.5454545454545454, | |
| "grad_norm": 4.4240899085998535, | |
| "learning_rate": 3.0303030303030305e-06, | |
| "loss": 0.1345, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.547979797979798, | |
| "grad_norm": 2.6441352367401123, | |
| "learning_rate": 3.013468013468014e-06, | |
| "loss": 0.0666, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 2.5505050505050506, | |
| "grad_norm": 1.1558561325073242, | |
| "learning_rate": 2.996632996632997e-06, | |
| "loss": 0.0784, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.5530303030303028, | |
| "grad_norm": 1.2861305475234985, | |
| "learning_rate": 2.97979797979798e-06, | |
| "loss": 0.0839, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 2.5555555555555554, | |
| "grad_norm": 2.3291330337524414, | |
| "learning_rate": 2.962962962962963e-06, | |
| "loss": 0.0824, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.558080808080808, | |
| "grad_norm": 1.6665867567062378, | |
| "learning_rate": 2.9461279461279464e-06, | |
| "loss": 0.1121, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 2.5606060606060606, | |
| "grad_norm": 1.4039171934127808, | |
| "learning_rate": 2.9292929292929295e-06, | |
| "loss": 0.0941, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.563131313131313, | |
| "grad_norm": 1.706173062324524, | |
| "learning_rate": 2.9124579124579126e-06, | |
| "loss": 0.1561, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 2.5656565656565657, | |
| "grad_norm": 1.4657055139541626, | |
| "learning_rate": 2.895622895622896e-06, | |
| "loss": 0.0968, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.5681818181818183, | |
| "grad_norm": 2.3425521850585938, | |
| "learning_rate": 2.8787878787878793e-06, | |
| "loss": 0.0576, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 2.570707070707071, | |
| "grad_norm": 1.266230821609497, | |
| "learning_rate": 2.861952861952862e-06, | |
| "loss": 0.0754, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.573232323232323, | |
| "grad_norm": 2.496561288833618, | |
| "learning_rate": 2.845117845117845e-06, | |
| "loss": 0.0982, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 2.5757575757575757, | |
| "grad_norm": 2.888542890548706, | |
| "learning_rate": 2.8282828282828286e-06, | |
| "loss": 0.0865, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.5782828282828283, | |
| "grad_norm": 1.9701051712036133, | |
| "learning_rate": 2.8114478114478117e-06, | |
| "loss": 0.0496, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 2.580808080808081, | |
| "grad_norm": 5.326476573944092, | |
| "learning_rate": 2.794612794612795e-06, | |
| "loss": 0.1212, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.5833333333333335, | |
| "grad_norm": 3.695080041885376, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.0967, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 2.5858585858585856, | |
| "grad_norm": 2.2361230850219727, | |
| "learning_rate": 2.7609427609427614e-06, | |
| "loss": 0.0793, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.5883838383838382, | |
| "grad_norm": 1.3065497875213623, | |
| "learning_rate": 2.7441077441077445e-06, | |
| "loss": 0.0676, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 2.590909090909091, | |
| "grad_norm": 2.1756739616394043, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.1675, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.5934343434343434, | |
| "grad_norm": 2.2035090923309326, | |
| "learning_rate": 2.7104377104377103e-06, | |
| "loss": 0.1765, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 2.595959595959596, | |
| "grad_norm": 1.7042522430419922, | |
| "learning_rate": 2.693602693602694e-06, | |
| "loss": 0.1223, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.5984848484848486, | |
| "grad_norm": 1.2529280185699463, | |
| "learning_rate": 2.676767676767677e-06, | |
| "loss": 0.0723, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 2.601010101010101, | |
| "grad_norm": 1.5967926979064941, | |
| "learning_rate": 2.65993265993266e-06, | |
| "loss": 0.1243, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.6035353535353534, | |
| "grad_norm": 1.8551892042160034, | |
| "learning_rate": 2.6430976430976436e-06, | |
| "loss": 0.0677, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 2.606060606060606, | |
| "grad_norm": 0.9810446500778198, | |
| "learning_rate": 2.6262626262626267e-06, | |
| "loss": 0.0399, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.6085858585858586, | |
| "grad_norm": 4.027339935302734, | |
| "learning_rate": 2.6094276094276094e-06, | |
| "loss": 0.1253, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 2.611111111111111, | |
| "grad_norm": 1.6822688579559326, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 0.1235, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.6136363636363638, | |
| "grad_norm": 2.5733704566955566, | |
| "learning_rate": 2.575757575757576e-06, | |
| "loss": 0.094, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 2.616161616161616, | |
| "grad_norm": 2.587446689605713, | |
| "learning_rate": 2.558922558922559e-06, | |
| "loss": 0.0614, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.6186868686868685, | |
| "grad_norm": 3.116171360015869, | |
| "learning_rate": 2.5420875420875422e-06, | |
| "loss": 0.063, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 2.621212121212121, | |
| "grad_norm": 4.079165458679199, | |
| "learning_rate": 2.5252525252525258e-06, | |
| "loss": 0.1302, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.6237373737373737, | |
| "grad_norm": 3.22881817817688, | |
| "learning_rate": 2.508417508417509e-06, | |
| "loss": 0.1311, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 2.6262626262626263, | |
| "grad_norm": 2.3561739921569824, | |
| "learning_rate": 2.491582491582492e-06, | |
| "loss": 0.1138, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.628787878787879, | |
| "grad_norm": 1.6347684860229492, | |
| "learning_rate": 2.474747474747475e-06, | |
| "loss": 0.1246, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 2.6313131313131315, | |
| "grad_norm": 2.9931626319885254, | |
| "learning_rate": 2.457912457912458e-06, | |
| "loss": 0.1445, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.633838383838384, | |
| "grad_norm": 0.5848364233970642, | |
| "learning_rate": 2.4410774410774413e-06, | |
| "loss": 0.0661, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 2.6363636363636362, | |
| "grad_norm": 4.181141376495361, | |
| "learning_rate": 2.4242424242424244e-06, | |
| "loss": 0.1234, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.638888888888889, | |
| "grad_norm": 5.948246002197266, | |
| "learning_rate": 2.4074074074074075e-06, | |
| "loss": 0.134, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 2.6414141414141414, | |
| "grad_norm": 1.8077932596206665, | |
| "learning_rate": 2.3905723905723906e-06, | |
| "loss": 0.1052, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.643939393939394, | |
| "grad_norm": 4.848948955535889, | |
| "learning_rate": 2.373737373737374e-06, | |
| "loss": 0.1963, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 2.6464646464646466, | |
| "grad_norm": 2.3405141830444336, | |
| "learning_rate": 2.3569023569023572e-06, | |
| "loss": 0.24, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.648989898989899, | |
| "grad_norm": 3.162492036819458, | |
| "learning_rate": 2.3400673400673403e-06, | |
| "loss": 0.0911, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 2.6515151515151514, | |
| "grad_norm": 4.6703619956970215, | |
| "learning_rate": 2.3232323232323234e-06, | |
| "loss": 0.0713, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.654040404040404, | |
| "grad_norm": 1.252194881439209, | |
| "learning_rate": 2.3063973063973065e-06, | |
| "loss": 0.0678, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 2.6565656565656566, | |
| "grad_norm": 1.4940955638885498, | |
| "learning_rate": 2.28956228956229e-06, | |
| "loss": 0.0321, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.659090909090909, | |
| "grad_norm": 2.759089469909668, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 0.0759, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 2.6616161616161618, | |
| "grad_norm": 4.008279800415039, | |
| "learning_rate": 2.2558922558922563e-06, | |
| "loss": 0.1421, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.6641414141414144, | |
| "grad_norm": 2.280316114425659, | |
| "learning_rate": 2.2390572390572394e-06, | |
| "loss": 0.0971, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 1.5876095294952393, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.0945, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.669191919191919, | |
| "grad_norm": 2.7003700733184814, | |
| "learning_rate": 2.2053872053872056e-06, | |
| "loss": 0.1862, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 2.6717171717171717, | |
| "grad_norm": 2.837354898452759, | |
| "learning_rate": 2.1885521885521887e-06, | |
| "loss": 0.0816, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.6742424242424243, | |
| "grad_norm": 1.9325331449508667, | |
| "learning_rate": 2.171717171717172e-06, | |
| "loss": 0.09, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 2.676767676767677, | |
| "grad_norm": 1.9655112028121948, | |
| "learning_rate": 2.154882154882155e-06, | |
| "loss": 0.189, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.679292929292929, | |
| "grad_norm": 0.8985033631324768, | |
| "learning_rate": 2.138047138047138e-06, | |
| "loss": 0.0415, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 2.6818181818181817, | |
| "grad_norm": 2.287306785583496, | |
| "learning_rate": 2.1212121212121216e-06, | |
| "loss": 0.2154, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.6843434343434343, | |
| "grad_norm": 2.1749632358551025, | |
| "learning_rate": 2.1043771043771047e-06, | |
| "loss": 0.1085, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 2.686868686868687, | |
| "grad_norm": 3.1133999824523926, | |
| "learning_rate": 2.0875420875420878e-06, | |
| "loss": 0.109, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.6893939393939394, | |
| "grad_norm": 1.5289435386657715, | |
| "learning_rate": 2.070707070707071e-06, | |
| "loss": 0.0488, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 2.691919191919192, | |
| "grad_norm": 2.7709944248199463, | |
| "learning_rate": 2.053872053872054e-06, | |
| "loss": 0.1032, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.6944444444444446, | |
| "grad_norm": 3.149768114089966, | |
| "learning_rate": 2.037037037037037e-06, | |
| "loss": 0.0486, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 2.6969696969696972, | |
| "grad_norm": 3.0890722274780273, | |
| "learning_rate": 2.02020202020202e-06, | |
| "loss": 0.1869, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.6994949494949494, | |
| "grad_norm": 4.697057247161865, | |
| "learning_rate": 2.0033670033670037e-06, | |
| "loss": 0.2966, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 2.702020202020202, | |
| "grad_norm": 3.644277334213257, | |
| "learning_rate": 1.986531986531987e-06, | |
| "loss": 0.0869, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.7045454545454546, | |
| "grad_norm": 1.996146559715271, | |
| "learning_rate": 1.96969696969697e-06, | |
| "loss": 0.1297, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 2.707070707070707, | |
| "grad_norm": 1.3258694410324097, | |
| "learning_rate": 1.952861952861953e-06, | |
| "loss": 0.0937, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.7095959595959593, | |
| "grad_norm": 2.5805246829986572, | |
| "learning_rate": 1.936026936026936e-06, | |
| "loss": 0.107, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 2.712121212121212, | |
| "grad_norm": 1.8007394075393677, | |
| "learning_rate": 1.9191919191919192e-06, | |
| "loss": 0.0987, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.7146464646464645, | |
| "grad_norm": 2.052168369293213, | |
| "learning_rate": 1.9023569023569026e-06, | |
| "loss": 0.0753, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 2.717171717171717, | |
| "grad_norm": 1.795806646347046, | |
| "learning_rate": 1.8855218855218857e-06, | |
| "loss": 0.0898, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.7196969696969697, | |
| "grad_norm": 2.1112513542175293, | |
| "learning_rate": 1.868686868686869e-06, | |
| "loss": 0.0948, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 2.7222222222222223, | |
| "grad_norm": 1.7274150848388672, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.0699, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.724747474747475, | |
| "grad_norm": 3.7306082248687744, | |
| "learning_rate": 1.8350168350168352e-06, | |
| "loss": 0.138, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 2.7272727272727275, | |
| "grad_norm": 1.8672465085983276, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.0541, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.7297979797979797, | |
| "grad_norm": 2.303978443145752, | |
| "learning_rate": 1.8013468013468016e-06, | |
| "loss": 0.1123, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 2.7323232323232323, | |
| "grad_norm": 1.74871027469635, | |
| "learning_rate": 1.7845117845117845e-06, | |
| "loss": 0.1266, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.734848484848485, | |
| "grad_norm": 3.29699969291687, | |
| "learning_rate": 1.7676767676767678e-06, | |
| "loss": 0.1918, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 2.7373737373737375, | |
| "grad_norm": 2.935121774673462, | |
| "learning_rate": 1.7508417508417511e-06, | |
| "loss": 0.115, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.73989898989899, | |
| "grad_norm": 4.8938140869140625, | |
| "learning_rate": 1.734006734006734e-06, | |
| "loss": 0.1566, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 2.742424242424242, | |
| "grad_norm": 3.4594430923461914, | |
| "learning_rate": 1.7171717171717173e-06, | |
| "loss": 0.0905, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.744949494949495, | |
| "grad_norm": 2.121217966079712, | |
| "learning_rate": 1.7003367003367005e-06, | |
| "loss": 0.1123, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 2.7474747474747474, | |
| "grad_norm": 2.414285182952881, | |
| "learning_rate": 1.6835016835016838e-06, | |
| "loss": 0.1652, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 3.6288323402404785, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0463, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 2.7525252525252526, | |
| "grad_norm": 1.9368160963058472, | |
| "learning_rate": 1.64983164983165e-06, | |
| "loss": 0.0567, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.755050505050505, | |
| "grad_norm": 1.847935438156128, | |
| "learning_rate": 1.6329966329966333e-06, | |
| "loss": 0.0958, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 2.757575757575758, | |
| "grad_norm": 1.821707010269165, | |
| "learning_rate": 1.6161616161616164e-06, | |
| "loss": 0.0851, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.76010101010101, | |
| "grad_norm": 3.361027240753174, | |
| "learning_rate": 1.5993265993265993e-06, | |
| "loss": 0.0712, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 2.7626262626262625, | |
| "grad_norm": 1.8871111869812012, | |
| "learning_rate": 1.5824915824915826e-06, | |
| "loss": 0.0637, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.765151515151515, | |
| "grad_norm": 3.3805835247039795, | |
| "learning_rate": 1.565656565656566e-06, | |
| "loss": 0.166, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 2.7676767676767677, | |
| "grad_norm": 1.451699137687683, | |
| "learning_rate": 1.5488215488215488e-06, | |
| "loss": 0.1075, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.7702020202020203, | |
| "grad_norm": 1.6252110004425049, | |
| "learning_rate": 1.5319865319865321e-06, | |
| "loss": 0.0511, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 2.7727272727272725, | |
| "grad_norm": 1.8269497156143188, | |
| "learning_rate": 1.5151515151515152e-06, | |
| "loss": 0.0603, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.775252525252525, | |
| "grad_norm": 1.9480081796646118, | |
| "learning_rate": 1.4983164983164986e-06, | |
| "loss": 0.1297, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 1.0791457891464233, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 0.127, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.7803030303030303, | |
| "grad_norm": 1.6918015480041504, | |
| "learning_rate": 1.4646464646464648e-06, | |
| "loss": 0.1229, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 2.782828282828283, | |
| "grad_norm": 1.6666957139968872, | |
| "learning_rate": 1.447811447811448e-06, | |
| "loss": 0.1041, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.7853535353535355, | |
| "grad_norm": 1.4526945352554321, | |
| "learning_rate": 1.430976430976431e-06, | |
| "loss": 0.1327, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 2.787878787878788, | |
| "grad_norm": 4.764105319976807, | |
| "learning_rate": 1.4141414141414143e-06, | |
| "loss": 0.1007, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.7904040404040407, | |
| "grad_norm": 1.458585262298584, | |
| "learning_rate": 1.3973063973063974e-06, | |
| "loss": 0.0867, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 2.792929292929293, | |
| "grad_norm": 1.1463141441345215, | |
| "learning_rate": 1.3804713804713807e-06, | |
| "loss": 0.0722, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.7954545454545454, | |
| "grad_norm": 2.6391751766204834, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 0.0808, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 2.797979797979798, | |
| "grad_norm": 2.5230796337127686, | |
| "learning_rate": 1.346801346801347e-06, | |
| "loss": 0.1696, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.8005050505050506, | |
| "grad_norm": 2.990051507949829, | |
| "learning_rate": 1.32996632996633e-06, | |
| "loss": 0.1824, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 2.8030303030303028, | |
| "grad_norm": 5.150264739990234, | |
| "learning_rate": 1.3131313131313134e-06, | |
| "loss": 0.08, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.8055555555555554, | |
| "grad_norm": 2.4451775550842285, | |
| "learning_rate": 1.2962962962962962e-06, | |
| "loss": 0.0639, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 2.808080808080808, | |
| "grad_norm": 8.441463470458984, | |
| "learning_rate": 1.2794612794612796e-06, | |
| "loss": 0.071, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.8106060606060606, | |
| "grad_norm": 7.7809882164001465, | |
| "learning_rate": 1.2626262626262629e-06, | |
| "loss": 0.0897, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 2.813131313131313, | |
| "grad_norm": 8.197009086608887, | |
| "learning_rate": 1.245791245791246e-06, | |
| "loss": 0.1037, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.8156565656565657, | |
| "grad_norm": 2.672224283218384, | |
| "learning_rate": 1.228956228956229e-06, | |
| "loss": 0.0338, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 2.8181818181818183, | |
| "grad_norm": 2.55483078956604, | |
| "learning_rate": 1.2121212121212122e-06, | |
| "loss": 0.0677, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.820707070707071, | |
| "grad_norm": 7.761810779571533, | |
| "learning_rate": 1.1952861952861953e-06, | |
| "loss": 0.0817, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 2.823232323232323, | |
| "grad_norm": 2.313318967819214, | |
| "learning_rate": 1.1784511784511786e-06, | |
| "loss": 0.1253, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.8257575757575757, | |
| "grad_norm": 0.8076485991477966, | |
| "learning_rate": 1.1616161616161617e-06, | |
| "loss": 0.0363, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 2.8282828282828283, | |
| "grad_norm": 2.6288771629333496, | |
| "learning_rate": 1.144781144781145e-06, | |
| "loss": 0.1451, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.830808080808081, | |
| "grad_norm": 1.7148422002792358, | |
| "learning_rate": 1.1279461279461281e-06, | |
| "loss": 0.1067, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 2.8333333333333335, | |
| "grad_norm": 1.2999204397201538, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.0544, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.8358585858585856, | |
| "grad_norm": 2.9060170650482178, | |
| "learning_rate": 1.0942760942760944e-06, | |
| "loss": 0.1528, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 2.8383838383838382, | |
| "grad_norm": 2.594888210296631, | |
| "learning_rate": 1.0774410774410775e-06, | |
| "loss": 0.104, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.840909090909091, | |
| "grad_norm": 7.884887218475342, | |
| "learning_rate": 1.0606060606060608e-06, | |
| "loss": 0.1301, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 2.8434343434343434, | |
| "grad_norm": 1.9427886009216309, | |
| "learning_rate": 1.0437710437710439e-06, | |
| "loss": 0.2201, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.845959595959596, | |
| "grad_norm": 6.63613748550415, | |
| "learning_rate": 1.026936026936027e-06, | |
| "loss": 0.1505, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 2.8484848484848486, | |
| "grad_norm": 2.172806739807129, | |
| "learning_rate": 1.01010101010101e-06, | |
| "loss": 0.1074, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.851010101010101, | |
| "grad_norm": 2.2825562953948975, | |
| "learning_rate": 9.932659932659934e-07, | |
| "loss": 0.1252, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 2.8535353535353534, | |
| "grad_norm": 1.1408872604370117, | |
| "learning_rate": 9.764309764309765e-07, | |
| "loss": 0.1207, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.856060606060606, | |
| "grad_norm": 2.4947509765625, | |
| "learning_rate": 9.595959595959596e-07, | |
| "loss": 0.0963, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 2.8585858585858586, | |
| "grad_norm": 7.295626640319824, | |
| "learning_rate": 9.427609427609428e-07, | |
| "loss": 0.1011, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.861111111111111, | |
| "grad_norm": 9.468647956848145, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 0.0915, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 2.8636363636363638, | |
| "grad_norm": 1.7602087259292603, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.0556, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.866161616161616, | |
| "grad_norm": 1.6855865716934204, | |
| "learning_rate": 8.922558922558923e-07, | |
| "loss": 0.0916, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 2.8686868686868685, | |
| "grad_norm": 3.8684542179107666, | |
| "learning_rate": 8.754208754208756e-07, | |
| "loss": 0.0927, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.871212121212121, | |
| "grad_norm": 1.5681943893432617, | |
| "learning_rate": 8.585858585858587e-07, | |
| "loss": 0.0907, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 2.8737373737373737, | |
| "grad_norm": 2.357790470123291, | |
| "learning_rate": 8.417508417508419e-07, | |
| "loss": 0.0963, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.8762626262626263, | |
| "grad_norm": 2.0638039112091064, | |
| "learning_rate": 8.24915824915825e-07, | |
| "loss": 0.1217, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 2.878787878787879, | |
| "grad_norm": 7.039210319519043, | |
| "learning_rate": 8.080808080808082e-07, | |
| "loss": 0.1581, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.8813131313131315, | |
| "grad_norm": 2.2965760231018066, | |
| "learning_rate": 7.912457912457913e-07, | |
| "loss": 0.1482, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 2.883838383838384, | |
| "grad_norm": 1.5618226528167725, | |
| "learning_rate": 7.744107744107744e-07, | |
| "loss": 0.1567, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.8863636363636362, | |
| "grad_norm": 1.2720274925231934, | |
| "learning_rate": 7.575757575757576e-07, | |
| "loss": 0.1048, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 1.6947522163391113, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 0.0891, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.8914141414141414, | |
| "grad_norm": 3.2767159938812256, | |
| "learning_rate": 7.23905723905724e-07, | |
| "loss": 0.2284, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 2.893939393939394, | |
| "grad_norm": 12.075784683227539, | |
| "learning_rate": 7.070707070707071e-07, | |
| "loss": 0.1004, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.8964646464646466, | |
| "grad_norm": 1.556806206703186, | |
| "learning_rate": 6.902356902356904e-07, | |
| "loss": 0.1137, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 2.898989898989899, | |
| "grad_norm": 3.214446783065796, | |
| "learning_rate": 6.734006734006735e-07, | |
| "loss": 0.1453, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.9015151515151514, | |
| "grad_norm": 2.274674892425537, | |
| "learning_rate": 6.565656565656567e-07, | |
| "loss": 0.1567, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 2.904040404040404, | |
| "grad_norm": 4.8869781494140625, | |
| "learning_rate": 6.397306397306398e-07, | |
| "loss": 0.2099, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.9065656565656566, | |
| "grad_norm": 4.9651923179626465, | |
| "learning_rate": 6.22895622895623e-07, | |
| "loss": 0.1808, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 2.909090909090909, | |
| "grad_norm": 4.156426906585693, | |
| "learning_rate": 6.060606060606061e-07, | |
| "loss": 0.0797, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.9116161616161618, | |
| "grad_norm": 2.8879013061523438, | |
| "learning_rate": 5.892255892255893e-07, | |
| "loss": 0.1232, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 2.9141414141414144, | |
| "grad_norm": 1.8005517721176147, | |
| "learning_rate": 5.723905723905725e-07, | |
| "loss": 0.112, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.9166666666666665, | |
| "grad_norm": 3.8166842460632324, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 0.0776, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 2.919191919191919, | |
| "grad_norm": 4.17734432220459, | |
| "learning_rate": 5.387205387205387e-07, | |
| "loss": 0.2496, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.9217171717171717, | |
| "grad_norm": 2.027888536453247, | |
| "learning_rate": 5.218855218855219e-07, | |
| "loss": 0.1184, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 2.9242424242424243, | |
| "grad_norm": 0.865708589553833, | |
| "learning_rate": 5.05050505050505e-07, | |
| "loss": 0.0604, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.926767676767677, | |
| "grad_norm": 1.5890415906906128, | |
| "learning_rate": 4.882154882154883e-07, | |
| "loss": 0.1305, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 2.929292929292929, | |
| "grad_norm": 1.054485559463501, | |
| "learning_rate": 4.713804713804714e-07, | |
| "loss": 0.077, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.9318181818181817, | |
| "grad_norm": 1.1664531230926514, | |
| "learning_rate": 4.5454545454545457e-07, | |
| "loss": 0.065, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 2.9343434343434343, | |
| "grad_norm": 1.196090579032898, | |
| "learning_rate": 4.377104377104378e-07, | |
| "loss": 0.0854, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.936868686868687, | |
| "grad_norm": 1.983268141746521, | |
| "learning_rate": 4.2087542087542094e-07, | |
| "loss": 0.1021, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 2.9393939393939394, | |
| "grad_norm": 5.308765888214111, | |
| "learning_rate": 4.040404040404041e-07, | |
| "loss": 0.1535, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.941919191919192, | |
| "grad_norm": 3.1391713619232178, | |
| "learning_rate": 3.872053872053872e-07, | |
| "loss": 0.1295, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 2.9444444444444446, | |
| "grad_norm": 1.9112738370895386, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.1338, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.9469696969696972, | |
| "grad_norm": 1.7345768213272095, | |
| "learning_rate": 3.535353535353536e-07, | |
| "loss": 0.1048, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 2.9494949494949494, | |
| "grad_norm": 1.8400707244873047, | |
| "learning_rate": 3.3670033670033673e-07, | |
| "loss": 0.1345, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.952020202020202, | |
| "grad_norm": 5.5112152099609375, | |
| "learning_rate": 3.198653198653199e-07, | |
| "loss": 0.0901, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 2.9545454545454546, | |
| "grad_norm": 1.7662899494171143, | |
| "learning_rate": 3.0303030303030305e-07, | |
| "loss": 0.1025, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.957070707070707, | |
| "grad_norm": 5.364653587341309, | |
| "learning_rate": 2.8619528619528626e-07, | |
| "loss": 0.098, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 2.9595959595959593, | |
| "grad_norm": 1.2001750469207764, | |
| "learning_rate": 2.6936026936026936e-07, | |
| "loss": 0.049, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.962121212121212, | |
| "grad_norm": 2.842573642730713, | |
| "learning_rate": 2.525252525252525e-07, | |
| "loss": 0.0885, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 2.9646464646464645, | |
| "grad_norm": 1.9140822887420654, | |
| "learning_rate": 2.356902356902357e-07, | |
| "loss": 0.1336, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.967171717171717, | |
| "grad_norm": 1.2715041637420654, | |
| "learning_rate": 2.188552188552189e-07, | |
| "loss": 0.044, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 2.9696969696969697, | |
| "grad_norm": 1.805606722831726, | |
| "learning_rate": 2.0202020202020205e-07, | |
| "loss": 0.1139, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.9722222222222223, | |
| "grad_norm": 0.7524275183677673, | |
| "learning_rate": 1.8518518518518518e-07, | |
| "loss": 0.038, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 2.974747474747475, | |
| "grad_norm": 1.4970057010650635, | |
| "learning_rate": 1.6835016835016837e-07, | |
| "loss": 0.1246, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.9772727272727275, | |
| "grad_norm": 2.653041124343872, | |
| "learning_rate": 1.5151515151515152e-07, | |
| "loss": 0.1941, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 2.9797979797979797, | |
| "grad_norm": 2.8758771419525146, | |
| "learning_rate": 1.3468013468013468e-07, | |
| "loss": 0.1387, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.9823232323232323, | |
| "grad_norm": 4.085249423980713, | |
| "learning_rate": 1.1784511784511785e-07, | |
| "loss": 0.0822, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 2.984848484848485, | |
| "grad_norm": 2.2607507705688477, | |
| "learning_rate": 1.0101010101010103e-07, | |
| "loss": 0.1064, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.9873737373737375, | |
| "grad_norm": 2.853379726409912, | |
| "learning_rate": 8.417508417508418e-08, | |
| "loss": 0.0615, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 2.98989898989899, | |
| "grad_norm": 3.8462393283843994, | |
| "learning_rate": 6.734006734006734e-08, | |
| "loss": 0.1311, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.992424242424242, | |
| "grad_norm": 4.459750652313232, | |
| "learning_rate": 5.050505050505051e-08, | |
| "loss": 0.2523, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 2.994949494949495, | |
| "grad_norm": 2.9024791717529297, | |
| "learning_rate": 3.367003367003367e-08, | |
| "loss": 0.0775, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.9974747474747474, | |
| "grad_norm": 2.9558804035186768, | |
| "learning_rate": 1.6835016835016835e-08, | |
| "loss": 0.1257, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.027782678604126, | |
| "learning_rate": 0.0, | |
| "loss": 0.1071, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.775, | |
| "eval_f1": 0.9063876651982378, | |
| "eval_loss": 0.13235561549663544, | |
| "eval_runtime": 45.6825, | |
| "eval_samples_per_second": 19.263, | |
| "eval_steps_per_second": 0.416, | |
| "step": 1188 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1188, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 251588081479680.0, | |
| "train_batch_size": 20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |