| { |
| "best_global_step": 3000, |
| "best_metric": 1.1457551717758179, |
| "best_model_checkpoint": "/workspace/woodcode_2/checkpoint-3000", |
| "epoch": 0.6942837306179125, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0023142791020597086, |
| "grad_norm": 1.0788178443908691, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 2.405, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004628558204119417, |
| "grad_norm": 0.4521988034248352, |
| "learning_rate": 1.4615384615384617e-05, |
| "loss": 2.2017, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006942837306179125, |
| "grad_norm": 0.41120657324790955, |
| "learning_rate": 2.230769230769231e-05, |
| "loss": 1.9363, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009257116408238834, |
| "grad_norm": 0.28640317916870117, |
| "learning_rate": 3e-05, |
| "loss": 1.748, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011571395510298541, |
| "grad_norm": 0.26409590244293213, |
| "learning_rate": 3.769230769230769e-05, |
| "loss": 1.6238, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01388567461235825, |
| "grad_norm": 0.26786214113235474, |
| "learning_rate": 4.538461538461539e-05, |
| "loss": 1.5538, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01619995371441796, |
| "grad_norm": 0.2931516766548157, |
| "learning_rate": 5.3076923076923076e-05, |
| "loss": 1.4984, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01851423281647767, |
| "grad_norm": 0.29799872636795044, |
| "learning_rate": 6.0769230769230765e-05, |
| "loss": 1.4658, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.020828511918537376, |
| "grad_norm": 0.29269853234291077, |
| "learning_rate": 6.846153846153847e-05, |
| "loss": 1.4265, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.023142791020597082, |
| "grad_norm": 0.3255228102207184, |
| "learning_rate": 7.615384615384616e-05, |
| "loss": 1.4194, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.025457070122656793, |
| "grad_norm": 0.3054940104484558, |
| "learning_rate": 8.384615384615386e-05, |
| "loss": 1.3852, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0277713492247165, |
| "grad_norm": 0.2838137149810791, |
| "learning_rate": 9.153846153846155e-05, |
| "loss": 1.3885, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03008562832677621, |
| "grad_norm": 0.2860707938671112, |
| "learning_rate": 9.923076923076923e-05, |
| "loss": 1.3693, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03239990742883592, |
| "grad_norm": 0.2725190818309784, |
| "learning_rate": 9.999886214268966e-05, |
| "loss": 1.3606, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03471418653089563, |
| "grad_norm": 0.2650243937969208, |
| "learning_rate": 9.999492887526629e-05, |
| "loss": 1.3414, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03702846563295534, |
| "grad_norm": 0.2538904845714569, |
| "learning_rate": 9.998818637106816e-05, |
| "loss": 1.3495, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03934274473501504, |
| "grad_norm": 0.25812944769859314, |
| "learning_rate": 9.99786350089595e-05, |
| "loss": 1.3419, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04165702383707475, |
| "grad_norm": 0.23412390053272247, |
| "learning_rate": 9.996627532563551e-05, |
| "loss": 1.3314, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04397130293913446, |
| "grad_norm": 0.256795197725296, |
| "learning_rate": 9.995110801559215e-05, |
| "loss": 1.3326, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.046285582041194165, |
| "grad_norm": 0.24870288372039795, |
| "learning_rate": 9.993313393108719e-05, |
| "loss": 1.328, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.048599861143253875, |
| "grad_norm": 0.2265051007270813, |
| "learning_rate": 9.991235408209221e-05, |
| "loss": 1.3271, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.050914140245313586, |
| "grad_norm": 0.23642291128635406, |
| "learning_rate": 9.988876963623597e-05, |
| "loss": 1.3268, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.053228419347373296, |
| "grad_norm": 0.2279822677373886, |
| "learning_rate": 9.986238191873874e-05, |
| "loss": 1.3058, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.055542698449433, |
| "grad_norm": 0.23465260863304138, |
| "learning_rate": 9.983319241233782e-05, |
| "loss": 1.3057, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05785697755149271, |
| "grad_norm": 0.22599244117736816, |
| "learning_rate": 9.980120275720424e-05, |
| "loss": 1.313, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06017125665355242, |
| "grad_norm": 0.23743176460266113, |
| "learning_rate": 9.976641475085067e-05, |
| "loss": 1.3004, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06248553575561213, |
| "grad_norm": 0.22628776729106903, |
| "learning_rate": 9.972883034803025e-05, |
| "loss": 1.3059, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06479981485767183, |
| "grad_norm": 0.24011753499507904, |
| "learning_rate": 9.968845166062692e-05, |
| "loss": 1.2905, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06711409395973154, |
| "grad_norm": 0.231357604265213, |
| "learning_rate": 9.96452809575367e-05, |
| "loss": 1.2971, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06942837306179125, |
| "grad_norm": 0.23093485832214355, |
| "learning_rate": 9.959932066454008e-05, |
| "loss": 1.2977, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07174265216385096, |
| "grad_norm": 0.22505411505699158, |
| "learning_rate": 9.955057336416597e-05, |
| "loss": 1.2746, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.07405693126591067, |
| "grad_norm": 0.2227935642004013, |
| "learning_rate": 9.949904179554632e-05, |
| "loss": 1.273, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07637121036797037, |
| "grad_norm": 0.23704655468463898, |
| "learning_rate": 9.944472885426235e-05, |
| "loss": 1.2909, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07868548947003008, |
| "grad_norm": 0.2225590944290161, |
| "learning_rate": 9.938763759218185e-05, |
| "loss": 1.2846, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.08099976857208979, |
| "grad_norm": 0.24287723004817963, |
| "learning_rate": 9.932777121728763e-05, |
| "loss": 1.2989, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0833140476741495, |
| "grad_norm": 0.22559230029582977, |
| "learning_rate": 9.926513309349732e-05, |
| "loss": 1.2803, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08562832677620921, |
| "grad_norm": 0.22119103372097015, |
| "learning_rate": 9.919972674047429e-05, |
| "loss": 1.269, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08794260587826892, |
| "grad_norm": 0.2336379736661911, |
| "learning_rate": 9.913155583342994e-05, |
| "loss": 1.2775, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.09025688498032863, |
| "grad_norm": 0.2086753100156784, |
| "learning_rate": 9.906062420291715e-05, |
| "loss": 1.2868, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09257116408238833, |
| "grad_norm": 0.24568480253219604, |
| "learning_rate": 9.898693583461507e-05, |
| "loss": 1.2746, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09488544318444804, |
| "grad_norm": 0.22321555018424988, |
| "learning_rate": 9.891049486910511e-05, |
| "loss": 1.2682, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09719972228650775, |
| "grad_norm": 0.22601205110549927, |
| "learning_rate": 9.883130560163837e-05, |
| "loss": 1.27, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09951400138856746, |
| "grad_norm": 0.20481973886489868, |
| "learning_rate": 9.874937248189415e-05, |
| "loss": 1.275, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.10182828049062717, |
| "grad_norm": 0.2164992243051529, |
| "learning_rate": 9.866470011373008e-05, |
| "loss": 1.2661, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.10414255959268688, |
| "grad_norm": 0.20576460659503937, |
| "learning_rate": 9.857729325492329e-05, |
| "loss": 1.2626, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10645683869474659, |
| "grad_norm": 0.22202594578266144, |
| "learning_rate": 9.848715681690317e-05, |
| "loss": 1.2488, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.10877111779680629, |
| "grad_norm": 0.20930485427379608, |
| "learning_rate": 9.839429586447533e-05, |
| "loss": 1.2623, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.111085396898866, |
| "grad_norm": 0.23361071944236755, |
| "learning_rate": 9.829871561553702e-05, |
| "loss": 1.2546, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.11339967600092571, |
| "grad_norm": 0.211343452334404, |
| "learning_rate": 9.820042144078397e-05, |
| "loss": 1.2538, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.11571395510298542, |
| "grad_norm": 0.20587043464183807, |
| "learning_rate": 9.809941886340854e-05, |
| "loss": 1.2719, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11571395510298542, |
| "eval_loss": 1.2470530271530151, |
| "eval_runtime": 23.9969, |
| "eval_samples_per_second": 16.002, |
| "eval_steps_per_second": 0.5, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11802823420504513, |
| "grad_norm": 0.20054572820663452, |
| "learning_rate": 9.799571355878947e-05, |
| "loss": 1.2563, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.12034251330710484, |
| "grad_norm": 0.21044516563415527, |
| "learning_rate": 9.788931135417287e-05, |
| "loss": 1.2517, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.12265679240916455, |
| "grad_norm": 0.21391618251800537, |
| "learning_rate": 9.778021822834485e-05, |
| "loss": 1.2491, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.12497107151122426, |
| "grad_norm": 0.2132970243692398, |
| "learning_rate": 9.766844031129552e-05, |
| "loss": 1.2472, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.12728535061328397, |
| "grad_norm": 0.2169645130634308, |
| "learning_rate": 9.755398388387462e-05, |
| "loss": 1.2596, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.12959962971534367, |
| "grad_norm": 0.20134727656841278, |
| "learning_rate": 9.743685537743856e-05, |
| "loss": 1.257, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1319139088174034, |
| "grad_norm": 0.21121706068515778, |
| "learning_rate": 9.731706137348898e-05, |
| "loss": 1.2616, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1342281879194631, |
| "grad_norm": 0.21253220736980438, |
| "learning_rate": 9.7194608603303e-05, |
| "loss": 1.2355, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.13654246702152278, |
| "grad_norm": 0.22279760241508484, |
| "learning_rate": 9.706950394755501e-05, |
| "loss": 1.256, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1388567461235825, |
| "grad_norm": 0.191938579082489, |
| "learning_rate": 9.694175443592993e-05, |
| "loss": 1.2408, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.1411710252256422, |
| "grad_norm": 0.2211560308933258, |
| "learning_rate": 9.681136724672835e-05, |
| "loss": 1.2563, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.14348530432770193, |
| "grad_norm": 0.2078508883714676, |
| "learning_rate": 9.667834970646307e-05, |
| "loss": 1.2323, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.14579958342976163, |
| "grad_norm": 0.21866025030612946, |
| "learning_rate": 9.65427092894475e-05, |
| "loss": 1.261, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.14811386253182135, |
| "grad_norm": 0.20903170108795166, |
| "learning_rate": 9.640445361737556e-05, |
| "loss": 1.2476, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.15042814163388105, |
| "grad_norm": 0.20698365569114685, |
| "learning_rate": 9.626359045889355e-05, |
| "loss": 1.2354, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.15274242073594074, |
| "grad_norm": 0.21057769656181335, |
| "learning_rate": 9.612012772916353e-05, |
| "loss": 1.2527, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.15505669983800047, |
| "grad_norm": 0.2073555439710617, |
| "learning_rate": 9.597407348941865e-05, |
| "loss": 1.2338, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.15737097894006016, |
| "grad_norm": 0.20362691581249237, |
| "learning_rate": 9.582543594651005e-05, |
| "loss": 1.2548, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1596852580421199, |
| "grad_norm": 0.18878686428070068, |
| "learning_rate": 9.56742234524459e-05, |
| "loss": 1.2399, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.16199953714417958, |
| "grad_norm": 0.21003399789333344, |
| "learning_rate": 9.552044450392189e-05, |
| "loss": 1.2366, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1643138162462393, |
| "grad_norm": 0.21605387330055237, |
| "learning_rate": 9.536410774184396e-05, |
| "loss": 1.2419, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.166628095348299, |
| "grad_norm": 0.21591876447200775, |
| "learning_rate": 9.520522195084274e-05, |
| "loss": 1.2412, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1689423744503587, |
| "grad_norm": 0.2058115005493164, |
| "learning_rate": 9.504379605877979e-05, |
| "loss": 1.233, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.17125665355241843, |
| "grad_norm": 0.224104642868042, |
| "learning_rate": 9.487983913624615e-05, |
| "loss": 1.2272, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.17357093265447812, |
| "grad_norm": 0.20306575298309326, |
| "learning_rate": 9.471336039605255e-05, |
| "loss": 1.2278, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.17588521175653785, |
| "grad_norm": 0.1998828798532486, |
| "learning_rate": 9.454436919271169e-05, |
| "loss": 1.2344, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.17819949085859754, |
| "grad_norm": 0.1913456916809082, |
| "learning_rate": 9.437287502191274e-05, |
| "loss": 1.2376, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.18051376996065727, |
| "grad_norm": 0.20067718625068665, |
| "learning_rate": 9.419888751998767e-05, |
| "loss": 1.2586, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.18282804906271696, |
| "grad_norm": 0.1913948804140091, |
| "learning_rate": 9.402241646336977e-05, |
| "loss": 1.2414, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.18514232816477666, |
| "grad_norm": 0.20469442009925842, |
| "learning_rate": 9.38434717680444e-05, |
| "loss": 1.2395, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.18745660726683638, |
| "grad_norm": 0.20488658547401428, |
| "learning_rate": 9.366206348899177e-05, |
| "loss": 1.2259, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.18977088636889608, |
| "grad_norm": 0.21545757353305817, |
| "learning_rate": 9.347820181962185e-05, |
| "loss": 1.2267, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.1920851654709558, |
| "grad_norm": 0.20461086928844452, |
| "learning_rate": 9.329189709120174e-05, |
| "loss": 1.2482, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.1943994445730155, |
| "grad_norm": 0.21476367115974426, |
| "learning_rate": 9.310315977227509e-05, |
| "loss": 1.2321, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.19671372367507522, |
| "grad_norm": 0.20833474397659302, |
| "learning_rate": 9.291200046807382e-05, |
| "loss": 1.22, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.19902800277713492, |
| "grad_norm": 0.19986377656459808, |
| "learning_rate": 9.27184299199223e-05, |
| "loss": 1.2423, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.20134228187919462, |
| "grad_norm": 0.22088144719600677, |
| "learning_rate": 9.252245900463373e-05, |
| "loss": 1.232, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.20365656098125434, |
| "grad_norm": 0.20136182010173798, |
| "learning_rate": 9.2324098733899e-05, |
| "loss": 1.2229, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.20597084008331404, |
| "grad_norm": 0.1938410848379135, |
| "learning_rate": 9.212336025366788e-05, |
| "loss": 1.2227, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.20828511918537376, |
| "grad_norm": 0.202079638838768, |
| "learning_rate": 9.19202548435228e-05, |
| "loss": 1.2197, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.21059939828743346, |
| "grad_norm": 0.20484083890914917, |
| "learning_rate": 9.1714793916045e-05, |
| "loss": 1.2089, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.21291367738949318, |
| "grad_norm": 0.21105819940567017, |
| "learning_rate": 9.150698901617327e-05, |
| "loss": 1.2315, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.21522795649155288, |
| "grad_norm": 0.19875198602676392, |
| "learning_rate": 9.129685182055519e-05, |
| "loss": 1.2233, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.21754223559361258, |
| "grad_norm": 0.201791912317276, |
| "learning_rate": 9.10843941368911e-05, |
| "loss": 1.2324, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.2198565146956723, |
| "grad_norm": 0.20584046840667725, |
| "learning_rate": 9.086962790327056e-05, |
| "loss": 1.2167, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.222170793797732, |
| "grad_norm": 0.19981129467487335, |
| "learning_rate": 9.065256518750154e-05, |
| "loss": 1.2178, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.22448507289979172, |
| "grad_norm": 0.19994951784610748, |
| "learning_rate": 9.043321818643233e-05, |
| "loss": 1.2158, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.22679935200185142, |
| "grad_norm": 0.2023075968027115, |
| "learning_rate": 9.021159922526623e-05, |
| "loss": 1.2353, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.22911363110391114, |
| "grad_norm": 0.1981421411037445, |
| "learning_rate": 8.998772075686896e-05, |
| "loss": 1.2396, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.23142791020597084, |
| "grad_norm": 0.2052128165960312, |
| "learning_rate": 8.976159536106894e-05, |
| "loss": 1.2137, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.23142791020597084, |
| "eval_loss": 1.208183765411377, |
| "eval_runtime": 21.6303, |
| "eval_samples_per_second": 17.753, |
| "eval_steps_per_second": 0.555, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.23374218930803053, |
| "grad_norm": 0.20763066411018372, |
| "learning_rate": 8.953323574395037e-05, |
| "loss": 1.2247, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.23605646841009026, |
| "grad_norm": 0.19439862668514252, |
| "learning_rate": 8.930265473713938e-05, |
| "loss": 1.2239, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.23837074751214996, |
| "grad_norm": 0.188704714179039, |
| "learning_rate": 8.90698652970829e-05, |
| "loss": 1.2331, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.24068502661420968, |
| "grad_norm": 0.2066233903169632, |
| "learning_rate": 8.883488050432074e-05, |
| "loss": 1.2178, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.24299930571626938, |
| "grad_norm": 0.20683979988098145, |
| "learning_rate": 8.859771356275046e-05, |
| "loss": 1.2222, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2453135848183291, |
| "grad_norm": 0.21290378272533417, |
| "learning_rate": 8.835837779888557e-05, |
| "loss": 1.2162, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2476278639203888, |
| "grad_norm": 0.19746707379817963, |
| "learning_rate": 8.811688666110662e-05, |
| "loss": 1.2239, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.24994214302244852, |
| "grad_norm": 0.19365474581718445, |
| "learning_rate": 8.787325371890558e-05, |
| "loss": 1.2187, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2522564221245082, |
| "grad_norm": 0.20299233496189117, |
| "learning_rate": 8.76274926621233e-05, |
| "loss": 1.2075, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.25457070122656794, |
| "grad_norm": 0.20049187541007996, |
| "learning_rate": 8.737961730018034e-05, |
| "loss": 1.2114, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.2568849803286276, |
| "grad_norm": 0.19873455166816711, |
| "learning_rate": 8.712964156130099e-05, |
| "loss": 1.2247, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.25919925943068733, |
| "grad_norm": 0.1992412507534027, |
| "learning_rate": 8.687757949173063e-05, |
| "loss": 1.2164, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.26151353853274706, |
| "grad_norm": 0.2137262374162674, |
| "learning_rate": 8.662344525494644e-05, |
| "loss": 1.2083, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2638278176348068, |
| "grad_norm": 0.20104128122329712, |
| "learning_rate": 8.636725313086162e-05, |
| "loss": 1.2125, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.26614209673686645, |
| "grad_norm": 0.2062898725271225, |
| "learning_rate": 8.610901751502292e-05, |
| "loss": 1.235, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2684563758389262, |
| "grad_norm": 0.20116354525089264, |
| "learning_rate": 8.584875291780178e-05, |
| "loss": 1.217, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2707706549409859, |
| "grad_norm": 0.20894746482372284, |
| "learning_rate": 8.558647396357901e-05, |
| "loss": 1.2173, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.27308493404304557, |
| "grad_norm": 0.19359129667282104, |
| "learning_rate": 8.532219538992301e-05, |
| "loss": 1.2082, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2753992131451053, |
| "grad_norm": 0.1946392059326172, |
| "learning_rate": 8.505593204676162e-05, |
| "loss": 1.2161, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.277713492247165, |
| "grad_norm": 0.2131495177745819, |
| "learning_rate": 8.478769889554781e-05, |
| "loss": 1.2046, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.28002777134922474, |
| "grad_norm": 0.21192453801631927, |
| "learning_rate": 8.451751100841887e-05, |
| "loss": 1.2174, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.2823420504512844, |
| "grad_norm": 0.1986854523420334, |
| "learning_rate": 8.424538356734957e-05, |
| "loss": 1.2124, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.28465632955334413, |
| "grad_norm": 0.19923637807369232, |
| "learning_rate": 8.397133186329903e-05, |
| "loss": 1.2168, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.28697060865540386, |
| "grad_norm": 0.19468043744564056, |
| "learning_rate": 8.36953712953516e-05, |
| "loss": 1.2067, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.2892848877574635, |
| "grad_norm": 0.19150374829769135, |
| "learning_rate": 8.34175173698515e-05, |
| "loss": 1.2118, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.29159916685952325, |
| "grad_norm": 0.19914792478084564, |
| "learning_rate": 8.31377856995315e-05, |
| "loss": 1.2018, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.293913445961583, |
| "grad_norm": 0.19311580061912537, |
| "learning_rate": 8.285619200263567e-05, |
| "loss": 1.2001, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.2962277250636427, |
| "grad_norm": 0.20415401458740234, |
| "learning_rate": 8.257275210203622e-05, |
| "loss": 1.2156, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.29854200416570237, |
| "grad_norm": 0.1939728707075119, |
| "learning_rate": 8.228748192434428e-05, |
| "loss": 1.2035, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.3008562832677621, |
| "grad_norm": 0.1993534117937088, |
| "learning_rate": 8.200039749901511e-05, |
| "loss": 1.1971, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3031705623698218, |
| "grad_norm": 0.19424191117286682, |
| "learning_rate": 8.171151495744727e-05, |
| "loss": 1.1923, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3054848414718815, |
| "grad_norm": 0.19882912933826447, |
| "learning_rate": 8.142085053207629e-05, |
| "loss": 1.1998, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.3077991205739412, |
| "grad_norm": 0.1941244751214981, |
| "learning_rate": 8.112842055546252e-05, |
| "loss": 1.2152, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.31011339967600093, |
| "grad_norm": 0.20408713817596436, |
| "learning_rate": 8.083424145937339e-05, |
| "loss": 1.2202, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.31242767877806066, |
| "grad_norm": 0.19065722823143005, |
| "learning_rate": 8.053832977386015e-05, |
| "loss": 1.2123, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3147419578801203, |
| "grad_norm": 0.20365293323993683, |
| "learning_rate": 8.024070212632892e-05, |
| "loss": 1.1972, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.31705623698218005, |
| "grad_norm": 0.20200444757938385, |
| "learning_rate": 7.994137524060656e-05, |
| "loss": 1.202, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3193705160842398, |
| "grad_norm": 0.19926463067531586, |
| "learning_rate": 7.964036593600084e-05, |
| "loss": 1.1989, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.32168479518629944, |
| "grad_norm": 0.19380785524845123, |
| "learning_rate": 7.933769112635534e-05, |
| "loss": 1.203, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.32399907428835917, |
| "grad_norm": 0.19268542528152466, |
| "learning_rate": 7.903336781909911e-05, |
| "loss": 1.2019, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3263133533904189, |
| "grad_norm": 0.20773714780807495, |
| "learning_rate": 7.872741311429103e-05, |
| "loss": 1.1995, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3286276324924786, |
| "grad_norm": 0.19505122303962708, |
| "learning_rate": 7.841984420365888e-05, |
| "loss": 1.2028, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3309419115945383, |
| "grad_norm": 0.19330574572086334, |
| "learning_rate": 7.811067836963337e-05, |
| "loss": 1.2002, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.333256190696598, |
| "grad_norm": 0.21044421195983887, |
| "learning_rate": 7.779993298437704e-05, |
| "loss": 1.1985, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.33557046979865773, |
| "grad_norm": 0.20081642270088196, |
| "learning_rate": 7.74876255088081e-05, |
| "loss": 1.2131, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3378847489007174, |
| "grad_norm": 0.1973022222518921, |
| "learning_rate": 7.71737734916193e-05, |
| "loss": 1.1997, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.3401990280027771, |
| "grad_norm": 0.19213716685771942, |
| "learning_rate": 7.685839456829183e-05, |
| "loss": 1.201, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.34251330710483685, |
| "grad_norm": 0.19389280676841736, |
| "learning_rate": 7.65415064601044e-05, |
| "loss": 1.2078, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3448275862068966, |
| "grad_norm": 0.20220617949962616, |
| "learning_rate": 7.622312697313754e-05, |
| "loss": 1.2013, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.34714186530895624, |
| "grad_norm": 0.2051166296005249, |
| "learning_rate": 7.59032739972729e-05, |
| "loss": 1.2183, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.34714186530895624, |
| "eval_loss": 1.1873364448547363, |
| "eval_runtime": 21.6444, |
| "eval_samples_per_second": 17.741, |
| "eval_steps_per_second": 0.554, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.34945614441101597, |
| "grad_norm": 0.19153615832328796, |
| "learning_rate": 7.558196550518818e-05, |
| "loss": 1.1948, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3517704235130757, |
| "grad_norm": 0.1992039531469345, |
| "learning_rate": 7.525921955134713e-05, |
| "loss": 1.1868, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.35408470261513536, |
| "grad_norm": 0.20605571568012238, |
| "learning_rate": 7.493505427098517e-05, |
| "loss": 1.199, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3563989817171951, |
| "grad_norm": 0.17926311492919922, |
| "learning_rate": 7.460948787909017e-05, |
| "loss": 1.194, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3587132608192548, |
| "grad_norm": 0.20658712089061737, |
| "learning_rate": 7.428253866937918e-05, |
| "loss": 1.2012, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.36102753992131453, |
| "grad_norm": 0.21082770824432373, |
| "learning_rate": 7.395422501327036e-05, |
| "loss": 1.2004, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.3633418190233742, |
| "grad_norm": 0.20247185230255127, |
| "learning_rate": 7.362456535885066e-05, |
| "loss": 1.1878, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.3656560981254339, |
| "grad_norm": 0.20155729353427887, |
| "learning_rate": 7.329357822983929e-05, |
| "loss": 1.1796, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.36797037722749365, |
| "grad_norm": 0.1960991472005844, |
| "learning_rate": 7.296128222454686e-05, |
| "loss": 1.2043, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.3702846563295533, |
| "grad_norm": 0.19188149273395538, |
| "learning_rate": 7.262769601483024e-05, |
| "loss": 1.2037, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.37259893543161304, |
| "grad_norm": 0.2052951157093048, |
| "learning_rate": 7.229283834504351e-05, |
| "loss": 1.1985, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.37491321453367277, |
| "grad_norm": 0.18684880435466766, |
| "learning_rate": 7.195672803098463e-05, |
| "loss": 1.2023, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3772274936357325, |
| "grad_norm": 0.20104870200157166, |
| "learning_rate": 7.161938395883815e-05, |
| "loss": 1.1892, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.37954177273779216, |
| "grad_norm": 0.19793595373630524, |
| "learning_rate": 7.128082508411406e-05, |
| "loss": 1.1992, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3818560518398519, |
| "grad_norm": 0.20280171930789948, |
| "learning_rate": 7.094107043058264e-05, |
| "loss": 1.2076, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.3841703309419116, |
| "grad_norm": 0.20379236340522766, |
| "learning_rate": 7.060013908920548e-05, |
| "loss": 1.1987, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3864846100439713, |
| "grad_norm": 0.19275911152362823, |
| "learning_rate": 7.025805021706276e-05, |
| "loss": 1.1983, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.388798889146031, |
| "grad_norm": 0.20220735669136047, |
| "learning_rate": 6.991482303627685e-05, |
| "loss": 1.1992, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.3911131682480907, |
| "grad_norm": 0.2047668844461441, |
| "learning_rate": 6.957047683293215e-05, |
| "loss": 1.2086, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.39342744735015045, |
| "grad_norm": 0.19045311212539673, |
| "learning_rate": 6.922503095599142e-05, |
| "loss": 1.1926, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.3957417264522101, |
| "grad_norm": 0.2014586180448532, |
| "learning_rate": 6.887850481620858e-05, |
| "loss": 1.1973, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.39805600555426984, |
| "grad_norm": 0.18599851429462433, |
| "learning_rate": 6.853091788503802e-05, |
| "loss": 1.1956, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.40037028465632957, |
| "grad_norm": 0.2029285877943039, |
| "learning_rate": 6.818228969354037e-05, |
| "loss": 1.2114, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.40268456375838924, |
| "grad_norm": 0.19286784529685974, |
| "learning_rate": 6.783263983128519e-05, |
| "loss": 1.1761, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.40499884286044896, |
| "grad_norm": 0.19630247354507446, |
| "learning_rate": 6.748198794525016e-05, |
| "loss": 1.188, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.4073131219625087, |
| "grad_norm": 0.19817174971103668, |
| "learning_rate": 6.71303537387171e-05, |
| "loss": 1.1885, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.4096274010645684, |
| "grad_norm": 0.19006091356277466, |
| "learning_rate": 6.677775697016484e-05, |
| "loss": 1.1915, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.4119416801666281, |
| "grad_norm": 0.1849374771118164, |
| "learning_rate": 6.642421745215901e-05, |
| "loss": 1.1853, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4142559592686878, |
| "grad_norm": 0.2032414823770523, |
| "learning_rate": 6.606975505023873e-05, |
| "loss": 1.197, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.4165702383707475, |
| "grad_norm": 0.1908976286649704, |
| "learning_rate": 6.571438968180035e-05, |
| "loss": 1.1937, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4188845174728072, |
| "grad_norm": 0.19852004945278168, |
| "learning_rate": 6.535814131497833e-05, |
| "loss": 1.1837, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4211987965748669, |
| "grad_norm": 0.19003674387931824, |
| "learning_rate": 6.50010299675232e-05, |
| "loss": 1.1959, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.42351307567692664, |
| "grad_norm": 0.2054755687713623, |
| "learning_rate": 6.46430757056767e-05, |
| "loss": 1.1943, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.42582735477898637, |
| "grad_norm": 0.19895458221435547, |
| "learning_rate": 6.428429864304432e-05, |
| "loss": 1.1871, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.42814163388104604, |
| "grad_norm": 0.19693517684936523, |
| "learning_rate": 6.39247189394651e-05, |
| "loss": 1.185, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.43045591298310576, |
| "grad_norm": 0.19280746579170227, |
| "learning_rate": 6.356435679987882e-05, |
| "loss": 1.1817, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.4327701920851655, |
| "grad_norm": 0.19104084372520447, |
| "learning_rate": 6.320323247319064e-05, |
| "loss": 1.186, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.43508447118722515, |
| "grad_norm": 0.19598130881786346, |
| "learning_rate": 6.28413662511334e-05, |
| "loss": 1.1946, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4373987502892849, |
| "grad_norm": 0.2072417438030243, |
| "learning_rate": 6.247877846712734e-05, |
| "loss": 1.1921, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4397130293913446, |
| "grad_norm": 0.19743064045906067, |
| "learning_rate": 6.211548949513756e-05, |
| "loss": 1.1825, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4420273084934043, |
| "grad_norm": 0.19049686193466187, |
| "learning_rate": 6.175151974852923e-05, |
| "loss": 1.1893, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.444341587595464, |
| "grad_norm": 0.18704815208911896, |
| "learning_rate": 6.138688967892055e-05, |
| "loss": 1.1851, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4466558666975237, |
| "grad_norm": 0.2007189691066742, |
| "learning_rate": 6.102161977503358e-05, |
| "loss": 1.1791, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.44897014579958344, |
| "grad_norm": 0.19694744050502777, |
| "learning_rate": 6.065573056154289e-05, |
| "loss": 1.1797, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4512844249016431, |
| "grad_norm": 0.1945074051618576, |
| "learning_rate": 6.028924259792235e-05, |
| "loss": 1.1842, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.45359870400370284, |
| "grad_norm": 0.19543199241161346, |
| "learning_rate": 5.9922176477289874e-05, |
| "loss": 1.1897, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.45591298310576256, |
| "grad_norm": 0.20255213975906372, |
| "learning_rate": 5.9554552825250264e-05, |
| "loss": 1.1912, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4582272622078223, |
| "grad_norm": 0.19501863420009613, |
| "learning_rate": 5.918639229873624e-05, |
| "loss": 1.1821, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.46054154130988195, |
| "grad_norm": 0.19646863639354706, |
| "learning_rate": 5.881771558484774e-05, |
| "loss": 1.1756, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4628558204119417, |
| "grad_norm": 0.2014242708683014, |
| "learning_rate": 5.844854339968952e-05, |
| "loss": 1.1853, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4628558204119417, |
| "eval_loss": 1.1698839664459229, |
| "eval_runtime": 21.5892, |
| "eval_samples_per_second": 17.787, |
| "eval_steps_per_second": 0.556, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.4651700995140014, |
| "grad_norm": 0.19205763936042786, |
| "learning_rate": 5.8078896487207015e-05, |
| "loss": 1.1883, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.46748437861606107, |
| "grad_norm": 0.19423869252204895, |
| "learning_rate": 5.770879561802087e-05, |
| "loss": 1.1777, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.4697986577181208, |
| "grad_norm": 0.19925445318222046, |
| "learning_rate": 5.7338261588259726e-05, |
| "loss": 1.1843, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4721129368201805, |
| "grad_norm": 0.18574309349060059, |
| "learning_rate": 5.696731521839167e-05, |
| "loss": 1.1763, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.47442721592224024, |
| "grad_norm": 0.19058012962341309, |
| "learning_rate": 5.6595977352054407e-05, |
| "loss": 1.1797, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.4767414950242999, |
| "grad_norm": 0.1849735677242279, |
| "learning_rate": 5.6224268854883996e-05, |
| "loss": 1.1808, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.47905577412635963, |
| "grad_norm": 0.1923811137676239, |
| "learning_rate": 5.585221061334236e-05, |
| "loss": 1.1744, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.48137005322841936, |
| "grad_norm": 0.1943860650062561, |
| "learning_rate": 5.547982353354376e-05, |
| "loss": 1.1833, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4836843323304791, |
| "grad_norm": 0.20127460360527039, |
| "learning_rate": 5.510712854008001e-05, |
| "loss": 1.1798, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.48599861143253875, |
| "grad_norm": 0.18425202369689941, |
| "learning_rate": 5.473414657484468e-05, |
| "loss": 1.1969, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.4883128905345985, |
| "grad_norm": 0.19612173736095428, |
| "learning_rate": 5.436089859585648e-05, |
| "loss": 1.1707, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.4906271696366582, |
| "grad_norm": 0.18944087624549866, |
| "learning_rate": 5.3987405576081505e-05, |
| "loss": 1.1822, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.49294144873871787, |
| "grad_norm": 0.19573846459388733, |
| "learning_rate": 5.361368850225479e-05, |
| "loss": 1.1831, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4952557278407776, |
| "grad_norm": 0.18912994861602783, |
| "learning_rate": 5.32397683737011e-05, |
| "loss": 1.1859, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.4975700069428373, |
| "grad_norm": 0.19357813894748688, |
| "learning_rate": 5.286566620115493e-05, |
| "loss": 1.1701, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.49988428604489704, |
| "grad_norm": 0.18788059055805206, |
| "learning_rate": 5.249140300557985e-05, |
| "loss": 1.1764, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.5021985651469567, |
| "grad_norm": 0.19492246210575104, |
| "learning_rate": 5.211699981698747e-05, |
| "loss": 1.1898, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.5045128442490164, |
| "grad_norm": 0.21048106253147125, |
| "learning_rate": 5.17424776732556e-05, |
| "loss": 1.1768, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.5068271233510762, |
| "grad_norm": 0.1978602409362793, |
| "learning_rate": 5.1367857618946194e-05, |
| "loss": 1.1791, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.5091414024531359, |
| "grad_norm": 0.19546453654766083, |
| "learning_rate": 5.09931607041229e-05, |
| "loss": 1.1821, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5114556815551956, |
| "grad_norm": 0.19739992916584015, |
| "learning_rate": 5.0618407983168146e-05, |
| "loss": 1.1754, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.5137699606572552, |
| "grad_norm": 0.19072087109088898, |
| "learning_rate": 5.0243620513600145e-05, |
| "loss": 1.1826, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5160842397593149, |
| "grad_norm": 0.1789073944091797, |
| "learning_rate": 4.9868819354889625e-05, |
| "loss": 1.1731, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.5183985188613747, |
| "grad_norm": 0.19865523278713226, |
| "learning_rate": 4.9494025567276544e-05, |
| "loss": 1.1796, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.5207127979634344, |
| "grad_norm": 0.1872965544462204, |
| "learning_rate": 4.9119260210586695e-05, |
| "loss": 1.176, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5230270770654941, |
| "grad_norm": 0.1958765685558319, |
| "learning_rate": 4.874454434304824e-05, |
| "loss": 1.1712, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5253413561675538, |
| "grad_norm": 0.19132095575332642, |
| "learning_rate": 4.8369899020108626e-05, |
| "loss": 1.1786, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5276556352696136, |
| "grad_norm": 0.19474317133426666, |
| "learning_rate": 4.7995345293251284e-05, |
| "loss": 1.1869, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5299699143716732, |
| "grad_norm": 0.19309870898723602, |
| "learning_rate": 4.762090420881289e-05, |
| "loss": 1.1802, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5322841934737329, |
| "grad_norm": 0.2047063410282135, |
| "learning_rate": 4.7246596806800636e-05, |
| "loss": 1.1689, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5345984725757926, |
| "grad_norm": 0.19408148527145386, |
| "learning_rate": 4.687244411971009e-05, |
| "loss": 1.1715, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5369127516778524, |
| "grad_norm": 0.21102771162986755, |
| "learning_rate": 4.649846717134327e-05, |
| "loss": 1.1868, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5392270307799121, |
| "grad_norm": 0.20618434250354767, |
| "learning_rate": 4.612468697562741e-05, |
| "loss": 1.1688, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5415413098819718, |
| "grad_norm": 0.19679012894630432, |
| "learning_rate": 4.575112453543408e-05, |
| "loss": 1.1758, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5438555889840315, |
| "grad_norm": 0.20675049722194672, |
| "learning_rate": 4.537780084139913e-05, |
| "loss": 1.1605, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5461698680860911, |
| "grad_norm": 0.18863654136657715, |
| "learning_rate": 4.500473687074309e-05, |
| "loss": 1.1742, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5484841471881509, |
| "grad_norm": 0.19098520278930664, |
| "learning_rate": 4.463195358609258e-05, |
| "loss": 1.1652, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5507984262902106, |
| "grad_norm": 0.19034633040428162, |
| "learning_rate": 4.4259471934302324e-05, |
| "loss": 1.1716, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.5531127053922703, |
| "grad_norm": 0.19565701484680176, |
| "learning_rate": 4.388731284527816e-05, |
| "loss": 1.1503, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.55542698449433, |
| "grad_norm": 0.19067049026489258, |
| "learning_rate": 4.351549723080097e-05, |
| "loss": 1.1772, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5577412635963898, |
| "grad_norm": 0.19726891815662384, |
| "learning_rate": 4.3144045983351735e-05, |
| "loss": 1.187, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5600555426984495, |
| "grad_norm": 0.19251461327075958, |
| "learning_rate": 4.277297997493737e-05, |
| "loss": 1.1734, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5623698218005091, |
| "grad_norm": 0.19542944431304932, |
| "learning_rate": 4.2402320055918154e-05, |
| "loss": 1.1717, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5646841009025688, |
| "grad_norm": 0.19372211396694183, |
| "learning_rate": 4.203208705383594e-05, |
| "loss": 1.1859, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5669983800046285, |
| "grad_norm": 0.19102297723293304, |
| "learning_rate": 4.1662301772243996e-05, |
| "loss": 1.1609, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5693126591066883, |
| "grad_norm": 0.1865842044353485, |
| "learning_rate": 4.129298498953792e-05, |
| "loss": 1.1898, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.571626938208748, |
| "grad_norm": 0.19476434588432312, |
| "learning_rate": 4.0924157457788226e-05, |
| "loss": 1.1726, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5739412173108077, |
| "grad_norm": 0.19208824634552002, |
| "learning_rate": 4.055583990157416e-05, |
| "loss": 1.1777, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5762554964128674, |
| "grad_norm": 0.1908976435661316, |
| "learning_rate": 4.01880530168192e-05, |
| "loss": 1.1668, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.578569775514927, |
| "grad_norm": 0.19089365005493164, |
| "learning_rate": 3.982081746962826e-05, |
| "loss": 1.1794, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.578569775514927, |
| "eval_loss": 1.1556445360183716, |
| "eval_runtime": 21.5393, |
| "eval_samples_per_second": 17.828, |
| "eval_steps_per_second": 0.557, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5808840546169868, |
| "grad_norm": 0.21013890206813812, |
| "learning_rate": 3.94541538951262e-05, |
| "loss": 1.157, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5831983337190465, |
| "grad_norm": 0.20332397520542145, |
| "learning_rate": 3.908808289629865e-05, |
| "loss": 1.1709, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.5855126128211062, |
| "grad_norm": 0.19428518414497375, |
| "learning_rate": 3.8722625042834025e-05, |
| "loss": 1.1783, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.587826891923166, |
| "grad_norm": 0.19970852136611938, |
| "learning_rate": 3.835780086996794e-05, |
| "loss": 1.1687, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5901411710252257, |
| "grad_norm": 0.18801788985729218, |
| "learning_rate": 3.7993630877329124e-05, |
| "loss": 1.1715, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.5924554501272854, |
| "grad_norm": 0.2128693163394928, |
| "learning_rate": 3.763013552778774e-05, |
| "loss": 1.179, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.594769729229345, |
| "grad_norm": 0.19203241169452667, |
| "learning_rate": 3.726733524630535e-05, |
| "loss": 1.1838, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5970840083314047, |
| "grad_norm": 0.20480811595916748, |
| "learning_rate": 3.690525041878743e-05, |
| "loss": 1.1616, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5993982874334645, |
| "grad_norm": 0.19616341590881348, |
| "learning_rate": 3.6543901390937754e-05, |
| "loss": 1.1416, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.6017125665355242, |
| "grad_norm": 0.1999153196811676, |
| "learning_rate": 3.6183308467115175e-05, |
| "loss": 1.1659, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6040268456375839, |
| "grad_norm": 0.19980020821094513, |
| "learning_rate": 3.582349190919275e-05, |
| "loss": 1.1657, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.6063411247396436, |
| "grad_norm": 0.19510309398174286, |
| "learning_rate": 3.546447193541922e-05, |
| "loss": 1.1701, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.6086554038417034, |
| "grad_norm": 0.18956266343593597, |
| "learning_rate": 3.510626871928287e-05, |
| "loss": 1.1663, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.610969682943763, |
| "grad_norm": 0.18637120723724365, |
| "learning_rate": 3.474890238837806e-05, |
| "loss": 1.1731, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.6132839620458227, |
| "grad_norm": 0.19002896547317505, |
| "learning_rate": 3.439239302327417e-05, |
| "loss": 1.1683, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.6155982411478824, |
| "grad_norm": 0.19537580013275146, |
| "learning_rate": 3.403676065638735e-05, |
| "loss": 1.1652, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.6179125202499421, |
| "grad_norm": 0.1950923502445221, |
| "learning_rate": 3.368202527085476e-05, |
| "loss": 1.1778, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.6202267993520019, |
| "grad_norm": 0.19736339151859283, |
| "learning_rate": 3.332820679941186e-05, |
| "loss": 1.179, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.6225410784540616, |
| "grad_norm": 0.19073107838630676, |
| "learning_rate": 3.297532512327231e-05, |
| "loss": 1.162, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.6248553575561213, |
| "grad_norm": 0.1941593438386917, |
| "learning_rate": 3.262340007101076e-05, |
| "loss": 1.1592, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6271696366581809, |
| "grad_norm": 0.1990540772676468, |
| "learning_rate": 3.227245141744882e-05, |
| "loss": 1.1571, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6294839157602407, |
| "grad_norm": 0.19624970853328705, |
| "learning_rate": 3.192249888254381e-05, |
| "loss": 1.1582, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6317981948623004, |
| "grad_norm": 0.18591170012950897, |
| "learning_rate": 3.157356213028072e-05, |
| "loss": 1.1518, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6341124739643601, |
| "grad_norm": 0.1997700184583664, |
| "learning_rate": 3.122566076756724e-05, |
| "loss": 1.1689, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6364267530664198, |
| "grad_norm": 0.18985426425933838, |
| "learning_rate": 3.087881434313212e-05, |
| "loss": 1.1693, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6387410321684796, |
| "grad_norm": 0.19050361216068268, |
| "learning_rate": 3.053304234642661e-05, |
| "loss": 1.1651, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6410553112705393, |
| "grad_norm": 0.19750134646892548, |
| "learning_rate": 3.0188364206529467e-05, |
| "loss": 1.1657, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6433695903725989, |
| "grad_norm": 0.18156161904335022, |
| "learning_rate": 2.9844799291055083e-05, |
| "loss": 1.1792, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6456838694746586, |
| "grad_norm": 0.19130638241767883, |
| "learning_rate": 2.950236690506537e-05, |
| "loss": 1.1623, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6479981485767183, |
| "grad_norm": 0.20145711302757263, |
| "learning_rate": 2.916108628998484e-05, |
| "loss": 1.162, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6503124276787781, |
| "grad_norm": 0.18573534488677979, |
| "learning_rate": 2.8820976622519558e-05, |
| "loss": 1.1724, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6526267067808378, |
| "grad_norm": 0.19041724503040314, |
| "learning_rate": 2.84820570135795e-05, |
| "loss": 1.1567, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6549409858828975, |
| "grad_norm": 0.19331230223178864, |
| "learning_rate": 2.8144346507204728e-05, |
| "loss": 1.1722, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.6572552649849572, |
| "grad_norm": 0.19323979318141937, |
| "learning_rate": 2.7807864079495306e-05, |
| "loss": 1.1637, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6595695440870168, |
| "grad_norm": 0.18545861542224884, |
| "learning_rate": 2.7472628637545082e-05, |
| "loss": 1.1634, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6618838231890766, |
| "grad_norm": 0.19878439605236053, |
| "learning_rate": 2.7138659018379144e-05, |
| "loss": 1.169, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6641981022911363, |
| "grad_norm": 0.19122658669948578, |
| "learning_rate": 2.680597398789554e-05, |
| "loss": 1.1779, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.666512381393196, |
| "grad_norm": 0.19803395867347717, |
| "learning_rate": 2.647459223981064e-05, |
| "loss": 1.1523, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6688266604952557, |
| "grad_norm": 0.19722239673137665, |
| "learning_rate": 2.614453239460884e-05, |
| "loss": 1.1596, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6711409395973155, |
| "grad_norm": 0.2012769877910614, |
| "learning_rate": 2.581581299849627e-05, |
| "loss": 1.1675, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6734552186993752, |
| "grad_norm": 0.19577769935131073, |
| "learning_rate": 2.5488452522358585e-05, |
| "loss": 1.167, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6757694978014348, |
| "grad_norm": 0.19656306505203247, |
| "learning_rate": 2.5162469360723208e-05, |
| "loss": 1.1737, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6780837769034945, |
| "grad_norm": 0.19946037232875824, |
| "learning_rate": 2.4837881830725584e-05, |
| "loss": 1.1509, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6803980560055543, |
| "grad_norm": 0.1928076148033142, |
| "learning_rate": 2.451470817108007e-05, |
| "loss": 1.1595, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.682712335107614, |
| "grad_norm": 0.19065019488334656, |
| "learning_rate": 2.4192966541054977e-05, |
| "loss": 1.1651, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6850266142096737, |
| "grad_norm": 0.198676198720932, |
| "learning_rate": 2.387267501945233e-05, |
| "loss": 1.1487, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6873408933117334, |
| "grad_norm": 0.19842711091041565, |
| "learning_rate": 2.3553851603591837e-05, |
| "loss": 1.1606, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.6896551724137931, |
| "grad_norm": 0.20439574122428894, |
| "learning_rate": 2.3236514208299796e-05, |
| "loss": 1.1464, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6919694515158528, |
| "grad_norm": 0.18908947706222534, |
| "learning_rate": 2.2920680664902304e-05, |
| "loss": 1.1608, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6942837306179125, |
| "grad_norm": 0.20464125275611877, |
| "learning_rate": 2.260636872022339e-05, |
| "loss": 1.1482, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.6942837306179125, |
| "eval_loss": 1.1457551717758179, |
| "eval_runtime": 21.6541, |
| "eval_samples_per_second": 17.733, |
| "eval_steps_per_second": 0.554, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4321, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.914805497032868e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|