| { |
| "best_global_step": 1200, |
| "best_metric": 1.1594480276107788, |
| "best_model_checkpoint": "/workspace/second_half_run/checkpoint-1200", |
| "epoch": 0.8658008658008658, |
| "eval_steps": 300, |
| "global_step": 1200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007215007215007215, |
| "grad_norm": 0.20672309398651123, |
| "learning_rate": 1.0714285714285714e-05, |
| "loss": 1.2235, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01443001443001443, |
| "grad_norm": 0.20048637688159943, |
| "learning_rate": 2.261904761904762e-05, |
| "loss": 1.1998, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.021645021645021644, |
| "grad_norm": 0.2042124718427658, |
| "learning_rate": 3.4523809523809526e-05, |
| "loss": 1.1912, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02886002886002886, |
| "grad_norm": 0.2033877670764923, |
| "learning_rate": 4.642857142857143e-05, |
| "loss": 1.162, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03607503607503607, |
| "grad_norm": 0.20020800828933716, |
| "learning_rate": 5.833333333333334e-05, |
| "loss": 1.1723, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04329004329004329, |
| "grad_norm": 0.2054547369480133, |
| "learning_rate": 7.023809523809524e-05, |
| "loss": 1.1688, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.050505050505050504, |
| "grad_norm": 0.20819878578186035, |
| "learning_rate": 8.214285714285714e-05, |
| "loss": 1.1621, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05772005772005772, |
| "grad_norm": 0.20804323256015778, |
| "learning_rate": 9.404761904761905e-05, |
| "loss": 1.169, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06493506493506493, |
| "grad_norm": 0.2028702348470688, |
| "learning_rate": 9.999914627107077e-05, |
| "loss": 1.1745, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07215007215007214, |
| "grad_norm": 0.20979009568691254, |
| "learning_rate": 9.999231661456054e-05, |
| "loss": 1.1829, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07936507936507936, |
| "grad_norm": 0.20109142363071442, |
| "learning_rate": 9.997865823444018e-05, |
| "loss": 1.1669, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.08658008658008658, |
| "grad_norm": 0.21667157113552094, |
| "learning_rate": 9.995817299638243e-05, |
| "loss": 1.1771, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09379509379509379, |
| "grad_norm": 0.21592780947685242, |
| "learning_rate": 9.99308636985779e-05, |
| "loss": 1.1784, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10101010101010101, |
| "grad_norm": 0.21357479691505432, |
| "learning_rate": 9.989673407135269e-05, |
| "loss": 1.1827, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10822510822510822, |
| "grad_norm": 0.220377117395401, |
| "learning_rate": 9.985578877665905e-05, |
| "loss": 1.1766, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11544011544011544, |
| "grad_norm": 0.22202736139297485, |
| "learning_rate": 9.980803340743843e-05, |
| "loss": 1.1845, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12265512265512266, |
| "grad_norm": 0.22033752501010895, |
| "learning_rate": 9.97534744868576e-05, |
| "loss": 1.1784, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12987012987012986, |
| "grad_norm": 0.21248167753219604, |
| "learning_rate": 9.969211946741755e-05, |
| "loss": 1.1799, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1370851370851371, |
| "grad_norm": 0.23226460814476013, |
| "learning_rate": 9.96239767299355e-05, |
| "loss": 1.176, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1443001443001443, |
| "grad_norm": 0.21726985275745392, |
| "learning_rate": 9.954905558240025e-05, |
| "loss": 1.1885, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 0.21341446042060852, |
| "learning_rate": 9.946736625870055e-05, |
| "loss": 1.1773, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15873015873015872, |
| "grad_norm": 0.21727606654167175, |
| "learning_rate": 9.937891991722736e-05, |
| "loss": 1.18, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16594516594516595, |
| "grad_norm": 0.2137938290834427, |
| "learning_rate": 9.928372863934965e-05, |
| "loss": 1.1852, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.17316017316017315, |
| "grad_norm": 0.20841728150844574, |
| "learning_rate": 9.918180542776399e-05, |
| "loss": 1.1768, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18037518037518038, |
| "grad_norm": 0.2213844656944275, |
| "learning_rate": 9.907316420471863e-05, |
| "loss": 1.1858, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18759018759018758, |
| "grad_norm": 0.221885547041893, |
| "learning_rate": 9.895781981011169e-05, |
| "loss": 1.1796, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.19480519480519481, |
| "grad_norm": 0.21279644966125488, |
| "learning_rate": 9.883578799946409e-05, |
| "loss": 1.182, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.20202020202020202, |
| "grad_norm": 0.1996086686849594, |
| "learning_rate": 9.870708544176745e-05, |
| "loss": 1.1792, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.20923520923520925, |
| "grad_norm": 0.20936301350593567, |
| "learning_rate": 9.857172971720715e-05, |
| "loss": 1.1862, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21645021645021645, |
| "grad_norm": 0.2194942831993103, |
| "learning_rate": 9.842973931476101e-05, |
| "loss": 1.1715, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.21645021645021645, |
| "eval_loss": 1.175282597541809, |
| "eval_runtime": 24.0253, |
| "eval_samples_per_second": 15.983, |
| "eval_steps_per_second": 0.499, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.22366522366522368, |
| "grad_norm": 0.20830006897449493, |
| "learning_rate": 9.828113362967372e-05, |
| "loss": 1.1837, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.23088023088023088, |
| "grad_norm": 0.19409529864788055, |
| "learning_rate": 9.812593296080757e-05, |
| "loss": 1.1749, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 0.21279384195804596, |
| "learning_rate": 9.796415850786968e-05, |
| "loss": 1.1752, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2453102453102453, |
| "grad_norm": 0.20255984365940094, |
| "learning_rate": 9.779583236851631e-05, |
| "loss": 1.1772, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.25252525252525254, |
| "grad_norm": 0.20980049669742584, |
| "learning_rate": 9.76209775353343e-05, |
| "loss": 1.167, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2597402597402597, |
| "grad_norm": 0.2113814800977707, |
| "learning_rate": 9.743961789270047e-05, |
| "loss": 1.1735, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.26695526695526695, |
| "grad_norm": 0.2063579112291336, |
| "learning_rate": 9.725177821351907e-05, |
| "loss": 1.1638, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2741702741702742, |
| "grad_norm": 0.20841461420059204, |
| "learning_rate": 9.705748415583797e-05, |
| "loss": 1.172, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2813852813852814, |
| "grad_norm": 0.21439822018146515, |
| "learning_rate": 9.685676225934383e-05, |
| "loss": 1.1929, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2886002886002886, |
| "grad_norm": 0.2044590413570404, |
| "learning_rate": 9.664963994173695e-05, |
| "loss": 1.1688, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2958152958152958, |
| "grad_norm": 0.21234646439552307, |
| "learning_rate": 9.643614549498609e-05, |
| "loss": 1.1809, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.2080051600933075, |
| "learning_rate": 9.621630808146397e-05, |
| "loss": 1.1667, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.31024531024531027, |
| "grad_norm": 0.20338976383209229, |
| "learning_rate": 9.599015772996375e-05, |
| "loss": 1.1836, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 0.20682169497013092, |
| "learning_rate": 9.57577253315973e-05, |
| "loss": 1.1732, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3246753246753247, |
| "grad_norm": 0.2108171582221985, |
| "learning_rate": 9.551904263557558e-05, |
| "loss": 1.1886, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.3318903318903319, |
| "grad_norm": 0.21451614797115326, |
| "learning_rate": 9.527414224487182e-05, |
| "loss": 1.1756, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.33910533910533913, |
| "grad_norm": 0.2017257660627365, |
| "learning_rate": 9.502305761176818e-05, |
| "loss": 1.1884, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3463203463203463, |
| "grad_norm": 0.19700396060943604, |
| "learning_rate": 9.476582303328626e-05, |
| "loss": 1.1786, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.35353535353535354, |
| "grad_norm": 0.22976048290729523, |
| "learning_rate": 9.450247364650227e-05, |
| "loss": 1.1682, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.36075036075036077, |
| "grad_norm": 0.19582359492778778, |
| "learning_rate": 9.423304542374749e-05, |
| "loss": 1.1799, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.36796536796536794, |
| "grad_norm": 0.21123509109020233, |
| "learning_rate": 9.395757516769464e-05, |
| "loss": 1.1892, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.37518037518037517, |
| "grad_norm": 0.2202620804309845, |
| "learning_rate": 9.367610050633075e-05, |
| "loss": 1.1847, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3823953823953824, |
| "grad_norm": 0.20339728891849518, |
| "learning_rate": 9.338865988781736e-05, |
| "loss": 1.1823, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.38961038961038963, |
| "grad_norm": 0.2076507806777954, |
| "learning_rate": 9.309529257523872e-05, |
| "loss": 1.1622, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3968253968253968, |
| "grad_norm": 0.21566936373710632, |
| "learning_rate": 9.279603864123858e-05, |
| "loss": 1.192, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.21168456971645355, |
| "learning_rate": 9.249093896254643e-05, |
| "loss": 1.1725, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.41125541125541126, |
| "grad_norm": 0.2162477672100067, |
| "learning_rate": 9.218003521439404e-05, |
| "loss": 1.1688, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4184704184704185, |
| "grad_norm": 0.20714333653450012, |
| "learning_rate": 9.186336986482267e-05, |
| "loss": 1.1738, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.42568542568542567, |
| "grad_norm": 0.1988476812839508, |
| "learning_rate": 9.154098616888219e-05, |
| "loss": 1.168, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.4329004329004329, |
| "grad_norm": 0.21605175733566284, |
| "learning_rate": 9.121292816272267e-05, |
| "loss": 1.1653, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4329004329004329, |
| "eval_loss": 1.1736949682235718, |
| "eval_runtime": 21.5888, |
| "eval_samples_per_second": 17.787, |
| "eval_steps_per_second": 0.556, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4401154401154401, |
| "grad_norm": 0.21150672435760498, |
| "learning_rate": 9.087924065757919e-05, |
| "loss": 1.1744, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.44733044733044736, |
| "grad_norm": 0.1942724734544754, |
| "learning_rate": 9.053996923365084e-05, |
| "loss": 1.1798, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.20731911063194275, |
| "learning_rate": 9.019516023387473e-05, |
| "loss": 1.1738, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.46176046176046176, |
| "grad_norm": 0.20489269495010376, |
| "learning_rate": 8.98448607575956e-05, |
| "loss": 1.1714, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.468975468975469, |
| "grad_norm": 0.20433245599269867, |
| "learning_rate": 8.948911865413248e-05, |
| "loss": 1.1719, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 0.21800757944583893, |
| "learning_rate": 8.912798251624251e-05, |
| "loss": 1.1717, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4834054834054834, |
| "grad_norm": 0.19788488745689392, |
| "learning_rate": 8.876150167348348e-05, |
| "loss": 1.1781, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4906204906204906, |
| "grad_norm": 0.22694332897663116, |
| "learning_rate": 8.838972618547561e-05, |
| "loss": 1.1507, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.49783549783549785, |
| "grad_norm": 0.21313263475894928, |
| "learning_rate": 8.801270683506362e-05, |
| "loss": 1.1711, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5050505050505051, |
| "grad_norm": 0.20376580953598022, |
| "learning_rate": 8.763049512138008e-05, |
| "loss": 1.1699, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5122655122655123, |
| "grad_norm": 0.20033618807792664, |
| "learning_rate": 8.724314325281078e-05, |
| "loss": 1.1777, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5194805194805194, |
| "grad_norm": 0.2068224996328354, |
| "learning_rate": 8.685070413986338e-05, |
| "loss": 1.1796, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5266955266955267, |
| "grad_norm": 0.2078939974308014, |
| "learning_rate": 8.645323138794001e-05, |
| "loss": 1.1748, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5339105339105339, |
| "grad_norm": 0.21399526298046112, |
| "learning_rate": 8.605077929001508e-05, |
| "loss": 1.1665, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5411255411255411, |
| "grad_norm": 0.2034529596567154, |
| "learning_rate": 8.5643402819219e-05, |
| "loss": 1.1763, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5483405483405484, |
| "grad_norm": 0.2097165733575821, |
| "learning_rate": 8.523115762132925e-05, |
| "loss": 1.1652, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.20482751727104187, |
| "learning_rate": 8.48141000071693e-05, |
| "loss": 1.1604, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5627705627705628, |
| "grad_norm": 0.1976885050535202, |
| "learning_rate": 8.439228694491683e-05, |
| "loss": 1.1803, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.56998556998557, |
| "grad_norm": 0.21513821184635162, |
| "learning_rate": 8.39657760523222e-05, |
| "loss": 1.1726, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5772005772005772, |
| "grad_norm": 0.20503391325473785, |
| "learning_rate": 8.353462558883806e-05, |
| "loss": 1.1788, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5844155844155844, |
| "grad_norm": 0.2092897891998291, |
| "learning_rate": 8.309889444766135e-05, |
| "loss": 1.1754, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.5916305916305916, |
| "grad_norm": 0.21620312333106995, |
| "learning_rate": 8.265864214768883e-05, |
| "loss": 1.1653, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5988455988455988, |
| "grad_norm": 0.20629604160785675, |
| "learning_rate": 8.221392882538708e-05, |
| "loss": 1.157, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.19690963625907898, |
| "learning_rate": 8.176481522657801e-05, |
| "loss": 1.1704, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6132756132756133, |
| "grad_norm": 0.213628888130188, |
| "learning_rate": 8.131136269814139e-05, |
| "loss": 1.1799, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6204906204906205, |
| "grad_norm": 0.2039552927017212, |
| "learning_rate": 8.085363317963505e-05, |
| "loss": 1.1731, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6277056277056277, |
| "grad_norm": 0.2059933841228485, |
| "learning_rate": 8.039168919483428e-05, |
| "loss": 1.1563, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.19335155189037323, |
| "learning_rate": 7.992559384319137e-05, |
| "loss": 1.1685, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6421356421356421, |
| "grad_norm": 0.20764502882957458, |
| "learning_rate": 7.945541079121641e-05, |
| "loss": 1.162, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "grad_norm": 0.20053254067897797, |
| "learning_rate": 7.898120426378088e-05, |
| "loss": 1.184, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6493506493506493, |
| "eval_loss": 1.1676597595214844, |
| "eval_runtime": 21.5481, |
| "eval_samples_per_second": 17.821, |
| "eval_steps_per_second": 0.557, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6565656565656566, |
| "grad_norm": 0.20835046470165253, |
| "learning_rate": 7.850303903534473e-05, |
| "loss": 1.1692, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6637806637806638, |
| "grad_norm": 0.20102348923683167, |
| "learning_rate": 7.802098042110846e-05, |
| "loss": 1.1531, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.670995670995671, |
| "grad_norm": 0.21355856955051422, |
| "learning_rate": 7.753509426809147e-05, |
| "loss": 1.1632, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.6782106782106783, |
| "grad_norm": 0.2094719111919403, |
| "learning_rate": 7.704544694613755e-05, |
| "loss": 1.1534, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.6854256854256854, |
| "grad_norm": 0.21322083473205566, |
| "learning_rate": 7.655210533884912e-05, |
| "loss": 1.1697, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.6926406926406926, |
| "grad_norm": 0.20414821803569794, |
| "learning_rate": 7.605513683445118e-05, |
| "loss": 1.1677, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6998556998556998, |
| "grad_norm": 0.20397794246673584, |
| "learning_rate": 7.555460931658647e-05, |
| "loss": 1.1581, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7070707070707071, |
| "grad_norm": 0.21406404674053192, |
| "learning_rate": 7.505059115504279e-05, |
| "loss": 1.1696, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.1953253448009491, |
| "learning_rate": 7.454315119641403e-05, |
| "loss": 1.1626, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7215007215007215, |
| "grad_norm": 0.2156262844800949, |
| "learning_rate": 7.403235875469603e-05, |
| "loss": 1.1674, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7287157287157288, |
| "grad_norm": 0.19182373583316803, |
| "learning_rate": 7.351828360181862e-05, |
| "loss": 1.1735, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7359307359307359, |
| "grad_norm": 0.1975948065519333, |
| "learning_rate": 7.300099595811506e-05, |
| "loss": 1.167, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7431457431457431, |
| "grad_norm": 0.19967731833457947, |
| "learning_rate": 7.248056648273034e-05, |
| "loss": 1.1631, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7503607503607503, |
| "grad_norm": 0.19803431630134583, |
| "learning_rate": 7.19570662639693e-05, |
| "loss": 1.1638, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.20940466225147247, |
| "learning_rate": 7.14305668095865e-05, |
| "loss": 1.1676, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7647907647907648, |
| "grad_norm": 0.20945611596107483, |
| "learning_rate": 7.090114003701838e-05, |
| "loss": 1.1798, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.772005772005772, |
| "grad_norm": 0.20469647645950317, |
| "learning_rate": 7.03688582635598e-05, |
| "loss": 1.1502, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.7792207792207793, |
| "grad_norm": 0.19986766576766968, |
| "learning_rate": 6.983379419648586e-05, |
| "loss": 1.159, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.7864357864357865, |
| "grad_norm": 0.20784717798233032, |
| "learning_rate": 6.929602092312023e-05, |
| "loss": 1.1593, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.7936507936507936, |
| "grad_norm": 0.20664818584918976, |
| "learning_rate": 6.87556119008519e-05, |
| "loss": 1.17, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8008658008658008, |
| "grad_norm": 0.20311331748962402, |
| "learning_rate": 6.821264094710125e-05, |
| "loss": 1.1516, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.20795664191246033, |
| "learning_rate": 6.76671822292368e-05, |
| "loss": 1.1488, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8152958152958153, |
| "grad_norm": 0.1966182142496109, |
| "learning_rate": 6.711931025444444e-05, |
| "loss": 1.1633, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8225108225108225, |
| "grad_norm": 0.22438089549541473, |
| "learning_rate": 6.656909985954994e-05, |
| "loss": 1.1661, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8297258297258298, |
| "grad_norm": 0.2064308524131775, |
| "learning_rate": 6.601662620079669e-05, |
| "loss": 1.1622, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.836940836940837, |
| "grad_norm": 0.20877982676029205, |
| "learning_rate": 6.546196474357961e-05, |
| "loss": 1.1759, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8441558441558441, |
| "grad_norm": 0.21362321078777313, |
| "learning_rate": 6.490519125213701e-05, |
| "loss": 1.161, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8513708513708513, |
| "grad_norm": 0.20032328367233276, |
| "learning_rate": 6.434638177920144e-05, |
| "loss": 1.1633, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8585858585858586, |
| "grad_norm": 0.21117402613162994, |
| "learning_rate": 6.378561265561134e-05, |
| "loss": 1.1693, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8658008658008658, |
| "grad_norm": 0.19220539927482605, |
| "learning_rate": 6.32229604798845e-05, |
| "loss": 1.1766, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8658008658008658, |
| "eval_loss": 1.1594480276107788, |
| "eval_runtime": 21.5758, |
| "eval_samples_per_second": 17.798, |
| "eval_steps_per_second": 0.556, |
| "step": 1200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2772, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1659221988131471e+19, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|