| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 3728, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.000536480686695279, | |
| "grad_norm": 2.0863339521447086, | |
| "learning_rate": 2.6809651474530834e-08, | |
| "loss": 0.2851, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002682403433476395, | |
| "grad_norm": 2.019867688346717, | |
| "learning_rate": 1.3404825737265417e-07, | |
| "loss": 0.281, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00536480686695279, | |
| "grad_norm": 2.043241454451721, | |
| "learning_rate": 2.6809651474530835e-07, | |
| "loss": 0.272, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008047210300429184, | |
| "grad_norm": 1.8858706299088983, | |
| "learning_rate": 4.021447721179625e-07, | |
| "loss": 0.2711, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01072961373390558, | |
| "grad_norm": 1.7673503995687727, | |
| "learning_rate": 5.361930294906167e-07, | |
| "loss": 0.2735, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013412017167381975, | |
| "grad_norm": 2.323464675632955, | |
| "learning_rate": 6.702412868632709e-07, | |
| "loss": 0.2725, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016094420600858368, | |
| "grad_norm": 2.5673838750685647, | |
| "learning_rate": 8.04289544235925e-07, | |
| "loss": 0.2744, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018776824034334765, | |
| "grad_norm": 1.9187222847932, | |
| "learning_rate": 9.383378016085791e-07, | |
| "loss": 0.2761, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02145922746781116, | |
| "grad_norm": 1.8984966074581404, | |
| "learning_rate": 1.0723860589812334e-06, | |
| "loss": 0.2655, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.024141630901287552, | |
| "grad_norm": 1.8659220642427434, | |
| "learning_rate": 1.2064343163538874e-06, | |
| "loss": 0.2793, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02682403433476395, | |
| "grad_norm": 1.9238255304447283, | |
| "learning_rate": 1.3404825737265418e-06, | |
| "loss": 0.2669, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029506437768240343, | |
| "grad_norm": 1.8501661181906428, | |
| "learning_rate": 1.4745308310991958e-06, | |
| "loss": 0.2711, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.032188841201716736, | |
| "grad_norm": 1.9878940051253793, | |
| "learning_rate": 1.60857908847185e-06, | |
| "loss": 0.26, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03487124463519313, | |
| "grad_norm": 2.094483621764117, | |
| "learning_rate": 1.7426273458445042e-06, | |
| "loss": 0.2694, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03755364806866953, | |
| "grad_norm": 2.058467307343078, | |
| "learning_rate": 1.8766756032171582e-06, | |
| "loss": 0.266, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.040236051502145924, | |
| "grad_norm": 2.008096065466058, | |
| "learning_rate": 2.0107238605898126e-06, | |
| "loss": 0.2623, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04291845493562232, | |
| "grad_norm": 1.849039686452229, | |
| "learning_rate": 2.1447721179624668e-06, | |
| "loss": 0.2677, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04560085836909871, | |
| "grad_norm": 2.092389670059048, | |
| "learning_rate": 2.278820375335121e-06, | |
| "loss": 0.2618, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.048283261802575105, | |
| "grad_norm": 1.8886682449515093, | |
| "learning_rate": 2.4128686327077747e-06, | |
| "loss": 0.2657, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.050965665236051505, | |
| "grad_norm": 2.2114135594284305, | |
| "learning_rate": 2.5469168900804294e-06, | |
| "loss": 0.2676, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0536480686695279, | |
| "grad_norm": 1.9846013216504284, | |
| "learning_rate": 2.6809651474530836e-06, | |
| "loss": 0.269, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05633047210300429, | |
| "grad_norm": 1.9945348953325985, | |
| "learning_rate": 2.8150134048257378e-06, | |
| "loss": 0.2681, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.059012875536480686, | |
| "grad_norm": 1.8180659313873635, | |
| "learning_rate": 2.9490616621983915e-06, | |
| "loss": 0.2704, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06169527896995708, | |
| "grad_norm": 2.225519442017157, | |
| "learning_rate": 3.0831099195710457e-06, | |
| "loss": 0.2731, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06437768240343347, | |
| "grad_norm": 2.2160771277529117, | |
| "learning_rate": 3.2171581769437e-06, | |
| "loss": 0.2686, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06706008583690987, | |
| "grad_norm": 1.9249390642006592, | |
| "learning_rate": 3.351206434316354e-06, | |
| "loss": 0.2761, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.06974248927038626, | |
| "grad_norm": 1.9328682746531594, | |
| "learning_rate": 3.4852546916890083e-06, | |
| "loss": 0.2714, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07242489270386267, | |
| "grad_norm": 2.1186813690148214, | |
| "learning_rate": 3.6193029490616625e-06, | |
| "loss": 0.2701, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07510729613733906, | |
| "grad_norm": 2.1933333909107287, | |
| "learning_rate": 3.7533512064343163e-06, | |
| "loss": 0.2791, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07778969957081545, | |
| "grad_norm": 2.129819677044279, | |
| "learning_rate": 3.8873994638069705e-06, | |
| "loss": 0.2797, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08047210300429185, | |
| "grad_norm": 2.067811024124003, | |
| "learning_rate": 4.021447721179625e-06, | |
| "loss": 0.2762, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08315450643776824, | |
| "grad_norm": 2.1707210015708083, | |
| "learning_rate": 4.155495978552279e-06, | |
| "loss": 0.279, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.08583690987124463, | |
| "grad_norm": 2.2019940721149007, | |
| "learning_rate": 4.2895442359249335e-06, | |
| "loss": 0.2738, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08851931330472103, | |
| "grad_norm": 2.2099864687326662, | |
| "learning_rate": 4.423592493297587e-06, | |
| "loss": 0.268, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09120171673819742, | |
| "grad_norm": 2.22468718531686, | |
| "learning_rate": 4.557640750670242e-06, | |
| "loss": 0.2829, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.09388412017167382, | |
| "grad_norm": 2.1800134691918767, | |
| "learning_rate": 4.691689008042896e-06, | |
| "loss": 0.2856, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.09656652360515021, | |
| "grad_norm": 2.103884261534838, | |
| "learning_rate": 4.8257372654155495e-06, | |
| "loss": 0.2791, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0992489270386266, | |
| "grad_norm": 2.0714593934541408, | |
| "learning_rate": 4.959785522788204e-06, | |
| "loss": 0.273, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.10193133047210301, | |
| "grad_norm": 2.058846978716926, | |
| "learning_rate": 5.093833780160859e-06, | |
| "loss": 0.284, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1046137339055794, | |
| "grad_norm": 2.076409950200697, | |
| "learning_rate": 5.2278820375335125e-06, | |
| "loss": 0.2792, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.1072961373390558, | |
| "grad_norm": 2.1243909119008766, | |
| "learning_rate": 5.361930294906167e-06, | |
| "loss": 0.282, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.10997854077253219, | |
| "grad_norm": 2.3388393818726585, | |
| "learning_rate": 5.495978552278821e-06, | |
| "loss": 0.2925, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.11266094420600858, | |
| "grad_norm": 2.451045371521958, | |
| "learning_rate": 5.6300268096514755e-06, | |
| "loss": 0.2994, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.11534334763948498, | |
| "grad_norm": 2.1487473131618255, | |
| "learning_rate": 5.764075067024129e-06, | |
| "loss": 0.2848, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.11802575107296137, | |
| "grad_norm": 2.4814960698366604, | |
| "learning_rate": 5.898123324396783e-06, | |
| "loss": 0.2774, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12070815450643776, | |
| "grad_norm": 2.410915656859707, | |
| "learning_rate": 6.032171581769437e-06, | |
| "loss": 0.2809, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.12339055793991416, | |
| "grad_norm": 2.396133050866378, | |
| "learning_rate": 6.1662198391420915e-06, | |
| "loss": 0.2849, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.12607296137339055, | |
| "grad_norm": 2.2801377751578578, | |
| "learning_rate": 6.300268096514745e-06, | |
| "loss": 0.2918, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.12875536480686695, | |
| "grad_norm": 2.34490728705815, | |
| "learning_rate": 6.4343163538874e-06, | |
| "loss": 0.2987, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.13143776824034334, | |
| "grad_norm": 2.3642964302216214, | |
| "learning_rate": 6.5683646112600545e-06, | |
| "loss": 0.2889, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.13412017167381973, | |
| "grad_norm": 2.380305337416477, | |
| "learning_rate": 6.702412868632708e-06, | |
| "loss": 0.2938, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.13680257510729613, | |
| "grad_norm": 2.2797615102374853, | |
| "learning_rate": 6.836461126005363e-06, | |
| "loss": 0.2839, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.13948497854077252, | |
| "grad_norm": 2.3413285353478455, | |
| "learning_rate": 6.970509383378017e-06, | |
| "loss": 0.2919, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.14216738197424894, | |
| "grad_norm": 2.3862345681550874, | |
| "learning_rate": 7.104557640750671e-06, | |
| "loss": 0.2937, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.14484978540772533, | |
| "grad_norm": 2.3320150218284303, | |
| "learning_rate": 7.238605898123325e-06, | |
| "loss": 0.2985, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.14753218884120173, | |
| "grad_norm": 2.35670634905087, | |
| "learning_rate": 7.37265415549598e-06, | |
| "loss": 0.2989, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.15021459227467812, | |
| "grad_norm": 2.416487625597099, | |
| "learning_rate": 7.506702412868633e-06, | |
| "loss": 0.3064, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.15289699570815452, | |
| "grad_norm": 2.433303207584211, | |
| "learning_rate": 7.640750670241287e-06, | |
| "loss": 0.2919, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.1555793991416309, | |
| "grad_norm": 2.3339942787804766, | |
| "learning_rate": 7.774798927613941e-06, | |
| "loss": 0.2992, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1582618025751073, | |
| "grad_norm": 2.359252347466936, | |
| "learning_rate": 7.908847184986595e-06, | |
| "loss": 0.3022, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.1609442060085837, | |
| "grad_norm": 2.584700516100613, | |
| "learning_rate": 8.04289544235925e-06, | |
| "loss": 0.2936, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1636266094420601, | |
| "grad_norm": 2.6035628076524473, | |
| "learning_rate": 8.176943699731904e-06, | |
| "loss": 0.3063, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.16630901287553648, | |
| "grad_norm": 2.344260798418792, | |
| "learning_rate": 8.310991957104558e-06, | |
| "loss": 0.3025, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.16899141630901288, | |
| "grad_norm": 2.3092835960344074, | |
| "learning_rate": 8.445040214477213e-06, | |
| "loss": 0.3088, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.17167381974248927, | |
| "grad_norm": 2.4791987185928663, | |
| "learning_rate": 8.579088471849867e-06, | |
| "loss": 0.3066, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.17435622317596566, | |
| "grad_norm": 2.4438632774093674, | |
| "learning_rate": 8.71313672922252e-06, | |
| "loss": 0.3023, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.17703862660944206, | |
| "grad_norm": 2.6299682555734147, | |
| "learning_rate": 8.847184986595175e-06, | |
| "loss": 0.3067, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.17972103004291845, | |
| "grad_norm": 2.3929731274938097, | |
| "learning_rate": 8.98123324396783e-06, | |
| "loss": 0.2967, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.18240343347639484, | |
| "grad_norm": 2.489569539616038, | |
| "learning_rate": 9.115281501340484e-06, | |
| "loss": 0.3028, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.18508583690987124, | |
| "grad_norm": 2.4774372151229436, | |
| "learning_rate": 9.249329758713138e-06, | |
| "loss": 0.3184, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.18776824034334763, | |
| "grad_norm": 2.6390058785683244, | |
| "learning_rate": 9.383378016085791e-06, | |
| "loss": 0.3129, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19045064377682402, | |
| "grad_norm": 2.559704702422249, | |
| "learning_rate": 9.517426273458445e-06, | |
| "loss": 0.3189, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.19313304721030042, | |
| "grad_norm": 2.5923004925849202, | |
| "learning_rate": 9.651474530831099e-06, | |
| "loss": 0.3179, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1958154506437768, | |
| "grad_norm": 2.5402736760924305, | |
| "learning_rate": 9.785522788203754e-06, | |
| "loss": 0.3149, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1984978540772532, | |
| "grad_norm": 2.425562826716764, | |
| "learning_rate": 9.919571045576408e-06, | |
| "loss": 0.3199, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.20118025751072963, | |
| "grad_norm": 2.5852834491116146, | |
| "learning_rate": 9.999991231716779e-06, | |
| "loss": 0.3229, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.20386266094420602, | |
| "grad_norm": 2.5995942631655193, | |
| "learning_rate": 9.999892588883699e-06, | |
| "loss": 0.318, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2065450643776824, | |
| "grad_norm": 2.7426449146592917, | |
| "learning_rate": 9.99968434503304e-06, | |
| "loss": 0.3207, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2092274678111588, | |
| "grad_norm": 2.589479755333145, | |
| "learning_rate": 9.999366504729645e-06, | |
| "loss": 0.3261, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2119098712446352, | |
| "grad_norm": 2.5197646152950184, | |
| "learning_rate": 9.998939074940788e-06, | |
| "loss": 0.3156, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2145922746781116, | |
| "grad_norm": 2.652204652785218, | |
| "learning_rate": 9.998402065036018e-06, | |
| "loss": 0.3287, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.217274678111588, | |
| "grad_norm": 2.941963697553996, | |
| "learning_rate": 9.997755486786954e-06, | |
| "loss": 0.3152, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.21995708154506438, | |
| "grad_norm": 2.514230531663027, | |
| "learning_rate": 9.996999354367028e-06, | |
| "loss": 0.3224, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.22263948497854077, | |
| "grad_norm": 2.613820892247126, | |
| "learning_rate": 9.996133684351172e-06, | |
| "loss": 0.3223, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.22532188841201717, | |
| "grad_norm": 2.60586168357607, | |
| "learning_rate": 9.995158495715459e-06, | |
| "loss": 0.3304, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.22800429184549356, | |
| "grad_norm": 2.73014784630188, | |
| "learning_rate": 9.994073809836677e-06, | |
| "loss": 0.3233, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.23068669527896996, | |
| "grad_norm": 2.808246799344738, | |
| "learning_rate": 9.992879650491877e-06, | |
| "loss": 0.3287, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.23336909871244635, | |
| "grad_norm": 2.4427023808762733, | |
| "learning_rate": 9.991576043857833e-06, | |
| "loss": 0.3246, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.23605150214592274, | |
| "grad_norm": 2.8001081105122734, | |
| "learning_rate": 9.990163018510484e-06, | |
| "loss": 0.3254, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.23873390557939914, | |
| "grad_norm": 2.5756753850013236, | |
| "learning_rate": 9.988640605424298e-06, | |
| "loss": 0.3303, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.24141630901287553, | |
| "grad_norm": 2.6382834663452464, | |
| "learning_rate": 9.987008837971595e-06, | |
| "loss": 0.3259, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24409871244635192, | |
| "grad_norm": 2.874308167284836, | |
| "learning_rate": 9.98526775192182e-06, | |
| "loss": 0.3287, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.24678111587982832, | |
| "grad_norm": 2.577949722207028, | |
| "learning_rate": 9.983417385440755e-06, | |
| "loss": 0.3285, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2494635193133047, | |
| "grad_norm": 2.605209899345705, | |
| "learning_rate": 9.981457779089678e-06, | |
| "loss": 0.3393, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.2521459227467811, | |
| "grad_norm": 2.669405429442268, | |
| "learning_rate": 9.979388975824485e-06, | |
| "loss": 0.3297, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2548283261802575, | |
| "grad_norm": 2.6050099070733737, | |
| "learning_rate": 9.977211020994735e-06, | |
| "loss": 0.3267, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2575107296137339, | |
| "grad_norm": 2.5738484109153066, | |
| "learning_rate": 9.97492396234267e-06, | |
| "loss": 0.3241, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2601931330472103, | |
| "grad_norm": 2.6751920468972887, | |
| "learning_rate": 9.972527850002154e-06, | |
| "loss": 0.3211, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2628755364806867, | |
| "grad_norm": 2.6515899868690687, | |
| "learning_rate": 9.970022736497588e-06, | |
| "loss": 0.3343, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2655579399141631, | |
| "grad_norm": 2.833435887671815, | |
| "learning_rate": 9.96740867674275e-06, | |
| "loss": 0.3213, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.26824034334763946, | |
| "grad_norm": 2.5723877096036127, | |
| "learning_rate": 9.964685728039596e-06, | |
| "loss": 0.3273, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2709227467811159, | |
| "grad_norm": 2.421773456878221, | |
| "learning_rate": 9.961853950076992e-06, | |
| "loss": 0.325, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.27360515021459225, | |
| "grad_norm": 2.72433255261523, | |
| "learning_rate": 9.958913404929423e-06, | |
| "loss": 0.3259, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2762875536480687, | |
| "grad_norm": 2.511740357955884, | |
| "learning_rate": 9.955864157055623e-06, | |
| "loss": 0.3335, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.27896995708154504, | |
| "grad_norm": 2.570325380578064, | |
| "learning_rate": 9.95270627329716e-06, | |
| "loss": 0.3238, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.28165236051502146, | |
| "grad_norm": 2.807983699039292, | |
| "learning_rate": 9.949439822876975e-06, | |
| "loss": 0.3338, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2843347639484979, | |
| "grad_norm": 2.66736822736115, | |
| "learning_rate": 9.94606487739787e-06, | |
| "loss": 0.3377, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.28701716738197425, | |
| "grad_norm": 2.603191440095429, | |
| "learning_rate": 9.942581510840919e-06, | |
| "loss": 0.3348, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.28969957081545067, | |
| "grad_norm": 2.5473646746035414, | |
| "learning_rate": 9.93898979956387e-06, | |
| "loss": 0.3261, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.29238197424892703, | |
| "grad_norm": 2.599401887659365, | |
| "learning_rate": 9.935289822299456e-06, | |
| "loss": 0.33, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.29506437768240346, | |
| "grad_norm": 2.40975591177543, | |
| "learning_rate": 9.931481660153672e-06, | |
| "loss": 0.3286, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2977467811158798, | |
| "grad_norm": 2.4894490507567197, | |
| "learning_rate": 9.927565396604001e-06, | |
| "loss": 0.3291, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.30042918454935624, | |
| "grad_norm": 2.5722016173007125, | |
| "learning_rate": 9.923541117497586e-06, | |
| "loss": 0.337, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3031115879828326, | |
| "grad_norm": 2.5550725925072877, | |
| "learning_rate": 9.919408911049333e-06, | |
| "loss": 0.3346, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.30579399141630903, | |
| "grad_norm": 2.5433106872337072, | |
| "learning_rate": 9.915168867839997e-06, | |
| "loss": 0.3364, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3084763948497854, | |
| "grad_norm": 2.450943392872261, | |
| "learning_rate": 9.910821080814184e-06, | |
| "loss": 0.3354, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3111587982832618, | |
| "grad_norm": 2.546643411521105, | |
| "learning_rate": 9.90636564527832e-06, | |
| "loss": 0.3325, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3138412017167382, | |
| "grad_norm": 2.4957026778211526, | |
| "learning_rate": 9.901802658898552e-06, | |
| "loss": 0.3279, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3165236051502146, | |
| "grad_norm": 2.6557291209936413, | |
| "learning_rate": 9.897132221698624e-06, | |
| "loss": 0.3401, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.31920600858369097, | |
| "grad_norm": 2.6636098661539283, | |
| "learning_rate": 9.892354436057665e-06, | |
| "loss": 0.3265, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3218884120171674, | |
| "grad_norm": 2.4076735348215395, | |
| "learning_rate": 9.887469406707962e-06, | |
| "loss": 0.3384, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.32457081545064376, | |
| "grad_norm": 2.6766617663933006, | |
| "learning_rate": 9.882477240732652e-06, | |
| "loss": 0.3364, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3272532188841202, | |
| "grad_norm": 2.3629027511324665, | |
| "learning_rate": 9.877378047563378e-06, | |
| "loss": 0.3377, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.32993562231759654, | |
| "grad_norm": 2.454480581820658, | |
| "learning_rate": 9.872171938977895e-06, | |
| "loss": 0.3472, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.33261802575107297, | |
| "grad_norm": 2.467294504215762, | |
| "learning_rate": 9.866859029097613e-06, | |
| "loss": 0.3394, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.33530042918454933, | |
| "grad_norm": 2.434454219419277, | |
| "learning_rate": 9.8614394343851e-06, | |
| "loss": 0.3384, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.33798283261802575, | |
| "grad_norm": 2.588554106379298, | |
| "learning_rate": 9.855913273641531e-06, | |
| "loss": 0.3486, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3406652360515021, | |
| "grad_norm": 2.559726676405475, | |
| "learning_rate": 9.850280668004072e-06, | |
| "loss": 0.3368, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.34334763948497854, | |
| "grad_norm": 2.5431858822060085, | |
| "learning_rate": 9.844541740943239e-06, | |
| "loss": 0.3212, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.34603004291845496, | |
| "grad_norm": 2.5877373233512446, | |
| "learning_rate": 9.838696618260182e-06, | |
| "loss": 0.3326, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.3487124463519313, | |
| "grad_norm": 2.5309813288979917, | |
| "learning_rate": 9.832745428083934e-06, | |
| "loss": 0.3331, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.35139484978540775, | |
| "grad_norm": 2.4354927332842364, | |
| "learning_rate": 9.826688300868597e-06, | |
| "loss": 0.3413, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.3540772532188841, | |
| "grad_norm": 2.5015034823966893, | |
| "learning_rate": 9.820525369390486e-06, | |
| "loss": 0.3409, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.35675965665236054, | |
| "grad_norm": 2.5950385006418766, | |
| "learning_rate": 9.814256768745212e-06, | |
| "loss": 0.333, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.3594420600858369, | |
| "grad_norm": 2.661302238907792, | |
| "learning_rate": 9.80788263634473e-06, | |
| "loss": 0.3332, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3621244635193133, | |
| "grad_norm": 2.684828328695924, | |
| "learning_rate": 9.801403111914324e-06, | |
| "loss": 0.3358, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3648068669527897, | |
| "grad_norm": 2.4226357461522245, | |
| "learning_rate": 9.794818337489535e-06, | |
| "loss": 0.3395, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3674892703862661, | |
| "grad_norm": 2.633280418543926, | |
| "learning_rate": 9.788128457413064e-06, | |
| "loss": 0.3415, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.3701716738197425, | |
| "grad_norm": 2.7709279052969924, | |
| "learning_rate": 9.78133361833159e-06, | |
| "loss": 0.3396, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.3728540772532189, | |
| "grad_norm": 2.4891302152361683, | |
| "learning_rate": 9.774433969192569e-06, | |
| "loss": 0.3413, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.37553648068669526, | |
| "grad_norm": 2.448140021927178, | |
| "learning_rate": 9.767429661240966e-06, | |
| "loss": 0.337, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3782188841201717, | |
| "grad_norm": 2.4979263971763292, | |
| "learning_rate": 9.760320848015932e-06, | |
| "loss": 0.3356, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.38090128755364805, | |
| "grad_norm": 2.5647155012401575, | |
| "learning_rate": 9.75310768534745e-06, | |
| "loss": 0.3276, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.38358369098712447, | |
| "grad_norm": 2.678305589560622, | |
| "learning_rate": 9.745790331352907e-06, | |
| "loss": 0.3332, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.38626609442060084, | |
| "grad_norm": 2.444142509679277, | |
| "learning_rate": 9.73836894643364e-06, | |
| "loss": 0.3411, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.38894849785407726, | |
| "grad_norm": 2.5901281336103605, | |
| "learning_rate": 9.730843693271413e-06, | |
| "loss": 0.3433, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.3916309012875536, | |
| "grad_norm": 2.6069745191548277, | |
| "learning_rate": 9.723214736824847e-06, | |
| "loss": 0.3269, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.39431330472103004, | |
| "grad_norm": 2.466120600187352, | |
| "learning_rate": 9.715482244325816e-06, | |
| "loss": 0.3438, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.3969957081545064, | |
| "grad_norm": 2.704267296891685, | |
| "learning_rate": 9.707646385275766e-06, | |
| "loss": 0.3408, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.39967811158798283, | |
| "grad_norm": 2.4389005799413037, | |
| "learning_rate": 9.699707331442016e-06, | |
| "loss": 0.3438, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.40236051502145925, | |
| "grad_norm": 2.5076690789214364, | |
| "learning_rate": 9.691665256853978e-06, | |
| "loss": 0.3329, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4050429184549356, | |
| "grad_norm": 2.4796685866213717, | |
| "learning_rate": 9.683520337799353e-06, | |
| "loss": 0.3379, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.40772532188841204, | |
| "grad_norm": 4.153485428842475, | |
| "learning_rate": 9.675272752820258e-06, | |
| "loss": 0.3319, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4104077253218884, | |
| "grad_norm": 2.5641045685817727, | |
| "learning_rate": 9.666922682709317e-06, | |
| "loss": 0.3434, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.4130901287553648, | |
| "grad_norm": 2.5739549498843477, | |
| "learning_rate": 9.6584703105057e-06, | |
| "loss": 0.3367, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4157725321888412, | |
| "grad_norm": 2.506896854676805, | |
| "learning_rate": 9.649915821491107e-06, | |
| "loss": 0.3362, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4184549356223176, | |
| "grad_norm": 2.4070337960809294, | |
| "learning_rate": 9.641259403185706e-06, | |
| "loss": 0.3352, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.421137339055794, | |
| "grad_norm": 2.4353089647919783, | |
| "learning_rate": 9.632501245344024e-06, | |
| "loss": 0.3397, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4238197424892704, | |
| "grad_norm": 2.643735395039157, | |
| "learning_rate": 9.623641539950787e-06, | |
| "loss": 0.3495, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.42650214592274677, | |
| "grad_norm": 2.478560964786462, | |
| "learning_rate": 9.614680481216712e-06, | |
| "loss": 0.3432, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4291845493562232, | |
| "grad_norm": 2.577614990446464, | |
| "learning_rate": 9.60561826557425e-06, | |
| "loss": 0.3383, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43186695278969955, | |
| "grad_norm": 2.5172456862717825, | |
| "learning_rate": 9.596455091673282e-06, | |
| "loss": 0.3373, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.434549356223176, | |
| "grad_norm": 2.4359928535441138, | |
| "learning_rate": 9.587191160376758e-06, | |
| "loss": 0.3407, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.43723175965665234, | |
| "grad_norm": 2.465539807171554, | |
| "learning_rate": 9.577826674756301e-06, | |
| "loss": 0.3344, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.43991416309012876, | |
| "grad_norm": 2.2960811499386153, | |
| "learning_rate": 9.56836184008775e-06, | |
| "loss": 0.3382, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.44259656652360513, | |
| "grad_norm": 2.372331098589979, | |
| "learning_rate": 9.558796863846663e-06, | |
| "loss": 0.3339, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.44527896995708155, | |
| "grad_norm": 2.581631110852197, | |
| "learning_rate": 9.549131955703772e-06, | |
| "loss": 0.323, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4479613733905579, | |
| "grad_norm": 2.3697193776417707, | |
| "learning_rate": 9.539367327520382e-06, | |
| "loss": 0.3458, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.45064377682403434, | |
| "grad_norm": 2.5130569622507135, | |
| "learning_rate": 9.529503193343726e-06, | |
| "loss": 0.3418, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.4533261802575107, | |
| "grad_norm": 2.4362641659919806, | |
| "learning_rate": 9.519539769402282e-06, | |
| "loss": 0.3454, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.4560085836909871, | |
| "grad_norm": 2.6421344257402106, | |
| "learning_rate": 9.509477274101019e-06, | |
| "loss": 0.3373, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.45869098712446355, | |
| "grad_norm": 2.38501065072516, | |
| "learning_rate": 9.499315928016619e-06, | |
| "loss": 0.3282, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.4613733905579399, | |
| "grad_norm": 2.414855309500928, | |
| "learning_rate": 9.489055953892644e-06, | |
| "loss": 0.3254, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.46405579399141633, | |
| "grad_norm": 2.5052929819879077, | |
| "learning_rate": 9.478697576634646e-06, | |
| "loss": 0.3326, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.4667381974248927, | |
| "grad_norm": 2.489365469431507, | |
| "learning_rate": 9.46824102330524e-06, | |
| "loss": 0.3393, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.4694206008583691, | |
| "grad_norm": 2.4291904681349568, | |
| "learning_rate": 9.457686523119128e-06, | |
| "loss": 0.335, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.4721030042918455, | |
| "grad_norm": 2.4057320722711757, | |
| "learning_rate": 9.447034307438068e-06, | |
| "loss": 0.3269, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4747854077253219, | |
| "grad_norm": 2.5244213882736237, | |
| "learning_rate": 9.436284609765818e-06, | |
| "loss": 0.335, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.47746781115879827, | |
| "grad_norm": 2.429979427144697, | |
| "learning_rate": 9.425437665742998e-06, | |
| "loss": 0.3376, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4801502145922747, | |
| "grad_norm": 2.50403465078303, | |
| "learning_rate": 9.414493713141936e-06, | |
| "loss": 0.3317, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.48283261802575106, | |
| "grad_norm": 2.3406910645072374, | |
| "learning_rate": 9.403452991861452e-06, | |
| "loss": 0.3323, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4855150214592275, | |
| "grad_norm": 2.4499576375372776, | |
| "learning_rate": 9.392315743921606e-06, | |
| "loss": 0.3434, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.48819742489270385, | |
| "grad_norm": 2.3837396316239925, | |
| "learning_rate": 9.381082213458384e-06, | |
| "loss": 0.3379, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.49087982832618027, | |
| "grad_norm": 2.283083285021325, | |
| "learning_rate": 9.36975264671835e-06, | |
| "loss": 0.3416, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.49356223175965663, | |
| "grad_norm": 2.453493510513127, | |
| "learning_rate": 9.358327292053244e-06, | |
| "loss": 0.3332, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.49624463519313305, | |
| "grad_norm": 2.497590155715813, | |
| "learning_rate": 9.346806399914547e-06, | |
| "loss": 0.332, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.4989270386266094, | |
| "grad_norm": 2.4578743205162854, | |
| "learning_rate": 9.335190222847988e-06, | |
| "loss": 0.3221, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5016094420600858, | |
| "grad_norm": 2.441472272597816, | |
| "learning_rate": 9.323479015488e-06, | |
| "loss": 0.3268, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5042918454935622, | |
| "grad_norm": 2.319230879720979, | |
| "learning_rate": 9.311673034552146e-06, | |
| "loss": 0.3289, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5069742489270386, | |
| "grad_norm": 2.4121543980436524, | |
| "learning_rate": 9.299772538835492e-06, | |
| "loss": 0.3343, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.509656652360515, | |
| "grad_norm": 2.394868024721506, | |
| "learning_rate": 9.28777778920493e-06, | |
| "loss": 0.3271, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5123390557939914, | |
| "grad_norm": 2.4436982704679786, | |
| "learning_rate": 9.27568904859346e-06, | |
| "loss": 0.3363, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5150214592274678, | |
| "grad_norm": 2.3832231268645057, | |
| "learning_rate": 9.26350658199443e-06, | |
| "loss": 0.3362, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5177038626609443, | |
| "grad_norm": 2.327822759382941, | |
| "learning_rate": 9.251230656455722e-06, | |
| "loss": 0.3398, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5203862660944206, | |
| "grad_norm": 2.439342840620097, | |
| "learning_rate": 9.238861541073909e-06, | |
| "loss": 0.3416, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.523068669527897, | |
| "grad_norm": 2.385481519390706, | |
| "learning_rate": 9.226399506988336e-06, | |
| "loss": 0.3307, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5257510729613734, | |
| "grad_norm": 2.4266226741554187, | |
| "learning_rate": 9.213844827375196e-06, | |
| "loss": 0.3389, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5284334763948498, | |
| "grad_norm": 2.466403363273194, | |
| "learning_rate": 9.201197777441533e-06, | |
| "loss": 0.3266, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5311158798283262, | |
| "grad_norm": 2.356016335190441, | |
| "learning_rate": 9.188458634419213e-06, | |
| "loss": 0.333, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5337982832618026, | |
| "grad_norm": 2.434189564629439, | |
| "learning_rate": 9.175627677558842e-06, | |
| "loss": 0.3231, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5364806866952789, | |
| "grad_norm": 2.335622267331965, | |
| "learning_rate": 9.162705188123647e-06, | |
| "loss": 0.319, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5391630901287554, | |
| "grad_norm": 2.3022256652508886, | |
| "learning_rate": 9.149691449383313e-06, | |
| "loss": 0.3329, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5418454935622318, | |
| "grad_norm": 2.352723118113408, | |
| "learning_rate": 9.136586746607767e-06, | |
| "loss": 0.3243, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.5445278969957081, | |
| "grad_norm": 2.3738093381466197, | |
| "learning_rate": 9.123391367060937e-06, | |
| "loss": 0.3403, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5472103004291845, | |
| "grad_norm": 2.486621636388831, | |
| "learning_rate": 9.110105599994436e-06, | |
| "loss": 0.3416, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.549892703862661, | |
| "grad_norm": 2.183874840676628, | |
| "learning_rate": 9.096729736641242e-06, | |
| "loss": 0.3334, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5525751072961373, | |
| "grad_norm": 2.350951978916979, | |
| "learning_rate": 9.0832640702093e-06, | |
| "loss": 0.3379, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.5552575107296137, | |
| "grad_norm": 2.4066282347575676, | |
| "learning_rate": 9.0697088958751e-06, | |
| "loss": 0.3354, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.5579399141630901, | |
| "grad_norm": 2.1575463339610113, | |
| "learning_rate": 9.056064510777204e-06, | |
| "loss": 0.3248, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.5606223175965666, | |
| "grad_norm": 2.616297568647876, | |
| "learning_rate": 9.042331214009736e-06, | |
| "loss": 0.3309, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.5633047210300429, | |
| "grad_norm": 2.327723872267469, | |
| "learning_rate": 9.028509306615825e-06, | |
| "loss": 0.3301, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5659871244635193, | |
| "grad_norm": 2.3750699885317426, | |
| "learning_rate": 9.014599091581e-06, | |
| "loss": 0.3305, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.5686695278969958, | |
| "grad_norm": 2.3155068078689074, | |
| "learning_rate": 9.000600873826558e-06, | |
| "loss": 0.3376, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.5713519313304721, | |
| "grad_norm": 2.3330209228120538, | |
| "learning_rate": 8.98651496020287e-06, | |
| "loss": 0.3328, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.5740343347639485, | |
| "grad_norm": 2.3406393186838663, | |
| "learning_rate": 8.972341659482666e-06, | |
| "loss": 0.3316, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.5767167381974249, | |
| "grad_norm": 2.234164678010478, | |
| "learning_rate": 8.958081282354253e-06, | |
| "loss": 0.3344, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.5793991416309013, | |
| "grad_norm": 2.350491195121751, | |
| "learning_rate": 8.943734141414719e-06, | |
| "loss": 0.3326, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.5820815450643777, | |
| "grad_norm": 2.194661519862346, | |
| "learning_rate": 8.929300551163068e-06, | |
| "loss": 0.3287, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.5847639484978541, | |
| "grad_norm": 2.306271735341111, | |
| "learning_rate": 8.914780827993332e-06, | |
| "loss": 0.3246, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.5874463519313304, | |
| "grad_norm": 2.3938196044575055, | |
| "learning_rate": 8.900175290187636e-06, | |
| "loss": 0.3354, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.5901287553648069, | |
| "grad_norm": 2.393591199824571, | |
| "learning_rate": 8.885484257909218e-06, | |
| "loss": 0.3339, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5928111587982833, | |
| "grad_norm": 2.29507846222384, | |
| "learning_rate": 8.870708053195414e-06, | |
| "loss": 0.3276, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.5954935622317596, | |
| "grad_norm": 2.386042824631869, | |
| "learning_rate": 8.855846999950595e-06, | |
| "loss": 0.3331, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.598175965665236, | |
| "grad_norm": 2.3737048550845308, | |
| "learning_rate": 8.840901423939075e-06, | |
| "loss": 0.3267, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.6008583690987125, | |
| "grad_norm": 2.3457094796065947, | |
| "learning_rate": 8.825871652777955e-06, | |
| "loss": 0.332, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6035407725321889, | |
| "grad_norm": 2.469177839155163, | |
| "learning_rate": 8.81075801592996e-06, | |
| "loss": 0.3306, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6062231759656652, | |
| "grad_norm": 2.185053219295239, | |
| "learning_rate": 8.795560844696198e-06, | |
| "loss": 0.3293, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6089055793991416, | |
| "grad_norm": 2.387598614909069, | |
| "learning_rate": 8.780280472208915e-06, | |
| "loss": 0.3216, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.6115879828326181, | |
| "grad_norm": 2.173546188624302, | |
| "learning_rate": 8.764917233424179e-06, | |
| "loss": 0.3238, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6142703862660944, | |
| "grad_norm": 2.3583055905659687, | |
| "learning_rate": 8.749471465114548e-06, | |
| "loss": 0.3333, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6169527896995708, | |
| "grad_norm": 2.360546414469146, | |
| "learning_rate": 8.73394350586168e-06, | |
| "loss": 0.3286, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6196351931330472, | |
| "grad_norm": 2.318106769491821, | |
| "learning_rate": 8.71833369604891e-06, | |
| "loss": 0.3311, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6223175965665236, | |
| "grad_norm": 2.2690977329855584, | |
| "learning_rate": 8.702642377853803e-06, | |
| "loss": 0.3278, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 2.3528719925610533, | |
| "learning_rate": 8.686869895240631e-06, | |
| "loss": 0.3252, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6276824034334764, | |
| "grad_norm": 2.3175542680444017, | |
| "learning_rate": 8.671016593952853e-06, | |
| "loss": 0.3307, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6303648068669528, | |
| "grad_norm": 2.263552176980709, | |
| "learning_rate": 8.655082821505524e-06, | |
| "loss": 0.3258, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6330472103004292, | |
| "grad_norm": 2.343567579732669, | |
| "learning_rate": 8.639068927177684e-06, | |
| "loss": 0.3211, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6357296137339056, | |
| "grad_norm": 2.2780220598628365, | |
| "learning_rate": 8.622975262004694e-06, | |
| "loss": 0.3298, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6384120171673819, | |
| "grad_norm": 2.2947818816488272, | |
| "learning_rate": 8.606802178770551e-06, | |
| "loss": 0.315, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6410944206008584, | |
| "grad_norm": 2.365849669313569, | |
| "learning_rate": 8.590550032000146e-06, | |
| "loss": 0.3232, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6437768240343348, | |
| "grad_norm": 2.144107893647448, | |
| "learning_rate": 8.574219177951495e-06, | |
| "loss": 0.3181, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6464592274678111, | |
| "grad_norm": 2.416375076775248, | |
| "learning_rate": 8.557809974607936e-06, | |
| "loss": 0.3229, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.6491416309012875, | |
| "grad_norm": 2.302421552968707, | |
| "learning_rate": 8.541322781670272e-06, | |
| "loss": 0.3245, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.651824034334764, | |
| "grad_norm": 2.230585990950688, | |
| "learning_rate": 8.524757960548888e-06, | |
| "loss": 0.328, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.6545064377682404, | |
| "grad_norm": 2.2888151168997597, | |
| "learning_rate": 8.50811587435584e-06, | |
| "loss": 0.33, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.6571888412017167, | |
| "grad_norm": 2.2856839548113164, | |
| "learning_rate": 8.491396887896878e-06, | |
| "loss": 0.3224, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6598712446351931, | |
| "grad_norm": 2.120698181701614, | |
| "learning_rate": 8.474601367663463e-06, | |
| "loss": 0.3292, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.6625536480686696, | |
| "grad_norm": 2.372814557865873, | |
| "learning_rate": 8.457729681824722e-06, | |
| "loss": 0.3338, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.6652360515021459, | |
| "grad_norm": 2.2493974335307847, | |
| "learning_rate": 8.440782200219391e-06, | |
| "loss": 0.3316, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.6679184549356223, | |
| "grad_norm": 2.277320210657522, | |
| "learning_rate": 8.423759294347693e-06, | |
| "loss": 0.3341, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.6706008583690987, | |
| "grad_norm": 2.3582542622866898, | |
| "learning_rate": 8.40666133736321e-06, | |
| "loss": 0.3311, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6732832618025751, | |
| "grad_norm": 2.0865613012796804, | |
| "learning_rate": 8.389488704064686e-06, | |
| "loss": 0.3227, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.6759656652360515, | |
| "grad_norm": 2.4350698606722823, | |
| "learning_rate": 8.372241770887826e-06, | |
| "loss": 0.3358, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.6786480686695279, | |
| "grad_norm": 2.3729375593599755, | |
| "learning_rate": 8.354920915897038e-06, | |
| "loss": 0.3238, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.6813304721030042, | |
| "grad_norm": 1.9989806654658056, | |
| "learning_rate": 8.337526518777143e-06, | |
| "loss": 0.3216, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.6840128755364807, | |
| "grad_norm": 2.136326711892197, | |
| "learning_rate": 8.32005896082506e-06, | |
| "loss": 0.3226, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.6866952789699571, | |
| "grad_norm": 2.191149877600861, | |
| "learning_rate": 8.302518624941435e-06, | |
| "loss": 0.3289, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.6893776824034334, | |
| "grad_norm": 2.411570960647483, | |
| "learning_rate": 8.284905895622265e-06, | |
| "loss": 0.3253, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.6920600858369099, | |
| "grad_norm": 2.0894935734976445, | |
| "learning_rate": 8.26722115895045e-06, | |
| "loss": 0.3188, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.6947424892703863, | |
| "grad_norm": 2.335048531249719, | |
| "learning_rate": 8.249464802587353e-06, | |
| "loss": 0.3344, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.6974248927038627, | |
| "grad_norm": 2.1431080115701286, | |
| "learning_rate": 8.231637215764273e-06, | |
| "loss": 0.3335, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.700107296137339, | |
| "grad_norm": 2.1729056458527616, | |
| "learning_rate": 8.21373878927394e-06, | |
| "loss": 0.3142, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.7027896995708155, | |
| "grad_norm": 2.0992512222503885, | |
| "learning_rate": 8.195769915461931e-06, | |
| "loss": 0.3199, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7054721030042919, | |
| "grad_norm": 2.4125083176551505, | |
| "learning_rate": 8.177730988218083e-06, | |
| "loss": 0.3222, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.7081545064377682, | |
| "grad_norm": 2.1167993655688337, | |
| "learning_rate": 8.159622402967841e-06, | |
| "loss": 0.3207, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.7108369098712446, | |
| "grad_norm": 2.132358313527919, | |
| "learning_rate": 8.141444556663612e-06, | |
| "loss": 0.3302, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.7135193133047211, | |
| "grad_norm": 2.2305452399275474, | |
| "learning_rate": 8.123197847776043e-06, | |
| "loss": 0.3186, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.7162017167381974, | |
| "grad_norm": 2.1240401193752465, | |
| "learning_rate": 8.104882676285301e-06, | |
| "loss": 0.3224, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.7188841201716738, | |
| "grad_norm": 2.427318922218822, | |
| "learning_rate": 8.086499443672297e-06, | |
| "loss": 0.3273, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.7215665236051502, | |
| "grad_norm": 2.20325872744776, | |
| "learning_rate": 8.068048552909887e-06, | |
| "loss": 0.3156, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.7242489270386266, | |
| "grad_norm": 2.2501615842663214, | |
| "learning_rate": 8.049530408454041e-06, | |
| "loss": 0.3196, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.726931330472103, | |
| "grad_norm": 2.01520964420955, | |
| "learning_rate": 8.030945416234971e-06, | |
| "loss": 0.3134, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.7296137339055794, | |
| "grad_norm": 2.092562343745031, | |
| "learning_rate": 8.012293983648247e-06, | |
| "loss": 0.3109, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.7322961373390557, | |
| "grad_norm": 2.13112956895763, | |
| "learning_rate": 7.993576519545844e-06, | |
| "loss": 0.3113, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.7349785407725322, | |
| "grad_norm": 2.342314257305073, | |
| "learning_rate": 7.974793434227203e-06, | |
| "loss": 0.324, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.7376609442060086, | |
| "grad_norm": 2.199143638436571, | |
| "learning_rate": 7.955945139430221e-06, | |
| "loss": 0.315, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.740343347639485, | |
| "grad_norm": 2.0119638311974386, | |
| "learning_rate": 7.937032048322231e-06, | |
| "loss": 0.3087, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.7430257510729614, | |
| "grad_norm": 2.1782754033524845, | |
| "learning_rate": 7.918054575490943e-06, | |
| "loss": 0.3242, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.7457081545064378, | |
| "grad_norm": 2.206496414169751, | |
| "learning_rate": 7.899013136935365e-06, | |
| "loss": 0.3063, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.7483905579399142, | |
| "grad_norm": 2.0423709558066405, | |
| "learning_rate": 7.879908150056668e-06, | |
| "loss": 0.3117, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.7510729613733905, | |
| "grad_norm": 2.095655750811691, | |
| "learning_rate": 7.860740033649053e-06, | |
| "loss": 0.3229, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.753755364806867, | |
| "grad_norm": 2.0335948169947526, | |
| "learning_rate": 7.841509207890555e-06, | |
| "loss": 0.3047, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.7564377682403434, | |
| "grad_norm": 2.21206434496724, | |
| "learning_rate": 7.822216094333847e-06, | |
| "loss": 0.3098, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.7591201716738197, | |
| "grad_norm": 2.1541492290043376, | |
| "learning_rate": 7.802861115896988e-06, | |
| "loss": 0.3081, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.7618025751072961, | |
| "grad_norm": 2.0550566165604756, | |
| "learning_rate": 7.783444696854161e-06, | |
| "loss": 0.3095, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.7644849785407726, | |
| "grad_norm": 2.0920475443521744, | |
| "learning_rate": 7.763967262826363e-06, | |
| "loss": 0.3055, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.7671673819742489, | |
| "grad_norm": 2.1615944487046708, | |
| "learning_rate": 7.74442924077209e-06, | |
| "loss": 0.3093, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.7698497854077253, | |
| "grad_norm": 2.1144096218335613, | |
| "learning_rate": 7.724831058977955e-06, | |
| "loss": 0.3176, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.7725321888412017, | |
| "grad_norm": 2.1346312946899455, | |
| "learning_rate": 7.705173147049326e-06, | |
| "loss": 0.3156, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.7752145922746781, | |
| "grad_norm": 2.047468450694068, | |
| "learning_rate": 7.685455935900886e-06, | |
| "loss": 0.3052, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.7778969957081545, | |
| "grad_norm": 2.0538961869366643, | |
| "learning_rate": 7.665679857747204e-06, | |
| "loss": 0.3273, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7805793991416309, | |
| "grad_norm": 2.1286681747583254, | |
| "learning_rate": 7.645845346093246e-06, | |
| "loss": 0.3179, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.7832618025751072, | |
| "grad_norm": 1.994233749675141, | |
| "learning_rate": 7.625952835724892e-06, | |
| "loss": 0.3165, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.7859442060085837, | |
| "grad_norm": 2.2976339300960373, | |
| "learning_rate": 7.606002762699378e-06, | |
| "loss": 0.3184, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.7886266094420601, | |
| "grad_norm": 2.039358308364566, | |
| "learning_rate": 7.585995564335764e-06, | |
| "loss": 0.3109, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.7913090128755365, | |
| "grad_norm": 2.2075630804735056, | |
| "learning_rate": 7.565931679205329e-06, | |
| "loss": 0.319, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.7939914163090128, | |
| "grad_norm": 2.0106829444211733, | |
| "learning_rate": 7.545811547121969e-06, | |
| "loss": 0.3119, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.7966738197424893, | |
| "grad_norm": 2.0666007146751553, | |
| "learning_rate": 7.525635609132543e-06, | |
| "loss": 0.3251, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.7993562231759657, | |
| "grad_norm": 2.088216008028052, | |
| "learning_rate": 7.505404307507227e-06, | |
| "loss": 0.3073, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.802038626609442, | |
| "grad_norm": 2.06365869935419, | |
| "learning_rate": 7.48511808572979e-06, | |
| "loss": 0.3159, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.8047210300429185, | |
| "grad_norm": 2.1479596414620246, | |
| "learning_rate": 7.464777388487899e-06, | |
| "loss": 0.3135, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8074034334763949, | |
| "grad_norm": 2.140064633721639, | |
| "learning_rate": 7.4443826616633555e-06, | |
| "loss": 0.319, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.8100858369098712, | |
| "grad_norm": 2.2172941608185814, | |
| "learning_rate": 7.423934352322324e-06, | |
| "loss": 0.3202, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.8127682403433476, | |
| "grad_norm": 2.0860696548068343, | |
| "learning_rate": 7.403432908705537e-06, | |
| "loss": 0.315, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.8154506437768241, | |
| "grad_norm": 2.0693450416677712, | |
| "learning_rate": 7.382878780218466e-06, | |
| "loss": 0.3144, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.8181330472103004, | |
| "grad_norm": 2.2841738710721353, | |
| "learning_rate": 7.362272417421467e-06, | |
| "loss": 0.3063, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.8208154506437768, | |
| "grad_norm": 2.147885334701556, | |
| "learning_rate": 7.341614272019912e-06, | |
| "loss": 0.3074, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.8234978540772532, | |
| "grad_norm": 2.0237250051045392, | |
| "learning_rate": 7.3209047968542815e-06, | |
| "loss": 0.3148, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.8261802575107297, | |
| "grad_norm": 2.0887045166680323, | |
| "learning_rate": 7.300144445890236e-06, | |
| "loss": 0.3109, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.828862660944206, | |
| "grad_norm": 2.0283593925356262, | |
| "learning_rate": 7.279333674208671e-06, | |
| "loss": 0.3149, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.8315450643776824, | |
| "grad_norm": 2.0419728278236513, | |
| "learning_rate": 7.258472937995736e-06, | |
| "loss": 0.303, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8342274678111588, | |
| "grad_norm": 2.1405345264719915, | |
| "learning_rate": 7.23756269453284e-06, | |
| "loss": 0.3206, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.8369098712446352, | |
| "grad_norm": 2.1040140267852863, | |
| "learning_rate": 7.216603402186618e-06, | |
| "loss": 0.3168, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.8395922746781116, | |
| "grad_norm": 2.1273226915661914, | |
| "learning_rate": 7.195595520398898e-06, | |
| "loss": 0.3187, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.842274678111588, | |
| "grad_norm": 2.0095214683675424, | |
| "learning_rate": 7.174539509676612e-06, | |
| "loss": 0.3097, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.8449570815450643, | |
| "grad_norm": 2.0016814081151892, | |
| "learning_rate": 7.153435831581722e-06, | |
| "loss": 0.3198, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8476394849785408, | |
| "grad_norm": 1.965454808053445, | |
| "learning_rate": 7.132284948721079e-06, | |
| "loss": 0.31, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.8503218884120172, | |
| "grad_norm": 2.136352585229065, | |
| "learning_rate": 7.1110873247363035e-06, | |
| "loss": 0.3056, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.8530042918454935, | |
| "grad_norm": 2.0773545777295976, | |
| "learning_rate": 7.089843424293606e-06, | |
| "loss": 0.3116, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.85568669527897, | |
| "grad_norm": 1.998502422749954, | |
| "learning_rate": 7.0685537130736145e-06, | |
| "loss": 0.3206, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.8583690987124464, | |
| "grad_norm": 2.0273327421570153, | |
| "learning_rate": 7.047218657761156e-06, | |
| "loss": 0.3061, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8610515021459227, | |
| "grad_norm": 2.0600139958469823, | |
| "learning_rate": 7.025838726035032e-06, | |
| "loss": 0.3122, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.8637339055793991, | |
| "grad_norm": 1.9151781235741088, | |
| "learning_rate": 7.004414386557765e-06, | |
| "loss": 0.308, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.8664163090128756, | |
| "grad_norm": 2.165460530379817, | |
| "learning_rate": 6.982946108965326e-06, | |
| "loss": 0.3129, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.869098712446352, | |
| "grad_norm": 1.9524659521047858, | |
| "learning_rate": 6.961434363856836e-06, | |
| "loss": 0.3032, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.8717811158798283, | |
| "grad_norm": 2.062634602507452, | |
| "learning_rate": 6.939879622784259e-06, | |
| "loss": 0.3138, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.8744635193133047, | |
| "grad_norm": 1.9574471265790148, | |
| "learning_rate": 6.918282358242053e-06, | |
| "loss": 0.3074, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.8771459227467812, | |
| "grad_norm": 1.9771976107010916, | |
| "learning_rate": 6.896643043656826e-06, | |
| "loss": 0.3013, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.8798283261802575, | |
| "grad_norm": 1.937187823090658, | |
| "learning_rate": 6.874962153376945e-06, | |
| "loss": 0.3016, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.8825107296137339, | |
| "grad_norm": 1.990154459364653, | |
| "learning_rate": 6.853240162662149e-06, | |
| "loss": 0.3017, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.8851931330472103, | |
| "grad_norm": 1.951646105453254, | |
| "learning_rate": 6.831477547673122e-06, | |
| "loss": 0.3053, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8878755364806867, | |
| "grad_norm": 1.9520892795805238, | |
| "learning_rate": 6.8096747854610634e-06, | |
| "loss": 0.3089, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.8905579399141631, | |
| "grad_norm": 2.0126176266313807, | |
| "learning_rate": 6.787832353957225e-06, | |
| "loss": 0.3103, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.8932403433476395, | |
| "grad_norm": 1.9125432962564908, | |
| "learning_rate": 6.7659507319624355e-06, | |
| "loss": 0.3069, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.8959227467811158, | |
| "grad_norm": 2.052079654464445, | |
| "learning_rate": 6.744030399136606e-06, | |
| "loss": 0.3033, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.8986051502145923, | |
| "grad_norm": 2.050877524519969, | |
| "learning_rate": 6.722071835988217e-06, | |
| "loss": 0.3106, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.9012875536480687, | |
| "grad_norm": 1.9919732658432157, | |
| "learning_rate": 6.700075523863783e-06, | |
| "loss": 0.3025, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.903969957081545, | |
| "grad_norm": 1.9896839822490924, | |
| "learning_rate": 6.678041944937297e-06, | |
| "loss": 0.305, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.9066523605150214, | |
| "grad_norm": 1.9158011231101228, | |
| "learning_rate": 6.655971582199672e-06, | |
| "loss": 0.3049, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.9093347639484979, | |
| "grad_norm": 2.038467794641684, | |
| "learning_rate": 6.633864919448143e-06, | |
| "loss": 0.3139, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.9120171673819742, | |
| "grad_norm": 1.9786294167283611, | |
| "learning_rate": 6.611722441275666e-06, | |
| "loss": 0.2998, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9146995708154506, | |
| "grad_norm": 1.9967546997105834, | |
| "learning_rate": 6.589544633060298e-06, | |
| "loss": 0.3061, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.9173819742489271, | |
| "grad_norm": 1.9765034643155208, | |
| "learning_rate": 6.5673319809545496e-06, | |
| "loss": 0.3103, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.9200643776824035, | |
| "grad_norm": 1.9778652823225957, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.3181, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.9227467811158798, | |
| "grad_norm": 1.973804092462756, | |
| "learning_rate": 6.522804093490305e-06, | |
| "loss": 0.3051, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.9254291845493562, | |
| "grad_norm": 1.9300897085251152, | |
| "learning_rate": 6.50048983421313e-06, | |
| "loss": 0.3132, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9281115879828327, | |
| "grad_norm": 2.0163968945583575, | |
| "learning_rate": 6.478142683186827e-06, | |
| "loss": 0.3029, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.930793991416309, | |
| "grad_norm": 2.094134693771536, | |
| "learning_rate": 6.455763130276019e-06, | |
| "loss": 0.3089, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.9334763948497854, | |
| "grad_norm": 1.9641907295696857, | |
| "learning_rate": 6.433351666055598e-06, | |
| "loss": 0.2969, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.9361587982832618, | |
| "grad_norm": 1.9534671249367734, | |
| "learning_rate": 6.410908781799974e-06, | |
| "loss": 0.2992, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.9388412017167382, | |
| "grad_norm": 1.9928634051631366, | |
| "learning_rate": 6.388434969472307e-06, | |
| "loss": 0.3076, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9415236051502146, | |
| "grad_norm": 1.8618767245463352, | |
| "learning_rate": 6.365930721713718e-06, | |
| "loss": 0.3073, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.944206008583691, | |
| "grad_norm": 1.898851709477168, | |
| "learning_rate": 6.343396531832497e-06, | |
| "loss": 0.3117, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.9468884120171673, | |
| "grad_norm": 1.9713962762389659, | |
| "learning_rate": 6.320832893793285e-06, | |
| "loss": 0.313, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.9495708154506438, | |
| "grad_norm": 1.9747041589737182, | |
| "learning_rate": 6.298240302206242e-06, | |
| "loss": 0.3023, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.9522532188841202, | |
| "grad_norm": 1.8841058711871417, | |
| "learning_rate": 6.275619252316213e-06, | |
| "loss": 0.3067, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.9549356223175965, | |
| "grad_norm": 1.897284633014723, | |
| "learning_rate": 6.25297023999187e-06, | |
| "loss": 0.3126, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.9576180257510729, | |
| "grad_norm": 1.9041195266763296, | |
| "learning_rate": 6.2302937617148365e-06, | |
| "loss": 0.3109, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.9603004291845494, | |
| "grad_norm": 1.83771743974229, | |
| "learning_rate": 6.20759031456881e-06, | |
| "loss": 0.3096, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.9629828326180258, | |
| "grad_norm": 1.80625064211799, | |
| "learning_rate": 6.184860396228664e-06, | |
| "loss": 0.31, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.9656652360515021, | |
| "grad_norm": 1.8621679216479052, | |
| "learning_rate": 6.1621045049495376e-06, | |
| "loss": 0.2862, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9683476394849786, | |
| "grad_norm": 1.8652988069119067, | |
| "learning_rate": 6.139323139555914e-06, | |
| "loss": 0.3074, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.971030042918455, | |
| "grad_norm": 1.8792185612098447, | |
| "learning_rate": 6.116516799430689e-06, | |
| "loss": 0.3022, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.9737124463519313, | |
| "grad_norm": 1.9111804324769437, | |
| "learning_rate": 6.0936859845042164e-06, | |
| "loss": 0.2989, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.9763948497854077, | |
| "grad_norm": 1.8921650943031454, | |
| "learning_rate": 6.07083119524336e-06, | |
| "loss": 0.3011, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.9790772532188842, | |
| "grad_norm": 1.9316953376536945, | |
| "learning_rate": 6.047952932640513e-06, | |
| "loss": 0.2955, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.9817596566523605, | |
| "grad_norm": 1.8846297197157076, | |
| "learning_rate": 6.0250516982026205e-06, | |
| "loss": 0.3123, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.9844420600858369, | |
| "grad_norm": 1.8897008544103802, | |
| "learning_rate": 6.002127993940187e-06, | |
| "loss": 0.2921, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.9871244635193133, | |
| "grad_norm": 1.9963273617506934, | |
| "learning_rate": 5.979182322356269e-06, | |
| "loss": 0.3186, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.9898068669527897, | |
| "grad_norm": 1.9405309808504616, | |
| "learning_rate": 5.956215186435464e-06, | |
| "loss": 0.3043, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.9924892703862661, | |
| "grad_norm": 1.9668200808889422, | |
| "learning_rate": 5.9332270896328815e-06, | |
| "loss": 0.2928, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9951716738197425, | |
| "grad_norm": 1.9086363268892765, | |
| "learning_rate": 5.910218535863106e-06, | |
| "loss": 0.2956, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.9978540772532188, | |
| "grad_norm": 1.937226981087176, | |
| "learning_rate": 5.8871900294891525e-06, | |
| "loss": 0.3003, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_runtime": 263.7729, | |
| "eval_samples_per_second": 3.791, | |
| "eval_steps_per_second": 0.948, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 1.0005364806866952, | |
| "grad_norm": 2.1676471565285427, | |
| "learning_rate": 5.864142075311414e-06, | |
| "loss": 0.282, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 1.0032188841201717, | |
| "grad_norm": 2.5924971299208486, | |
| "learning_rate": 5.84107517855659e-06, | |
| "loss": 0.2023, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.0059012875536482, | |
| "grad_norm": 1.9918660104270434, | |
| "learning_rate": 5.817989844866613e-06, | |
| "loss": 0.1922, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.0085836909871244, | |
| "grad_norm": 2.0582911095011767, | |
| "learning_rate": 5.794886580287565e-06, | |
| "loss": 0.2015, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.011266094420601, | |
| "grad_norm": 1.9383711709784341, | |
| "learning_rate": 5.77176589125859e-06, | |
| "loss": 0.1984, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 1.0139484978540771, | |
| "grad_norm": 2.201190788077746, | |
| "learning_rate": 5.7486282846007835e-06, | |
| "loss": 0.1889, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.0166309012875536, | |
| "grad_norm": 1.9001195225997398, | |
| "learning_rate": 5.725474267506088e-06, | |
| "loss": 0.2041, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 1.01931330472103, | |
| "grad_norm": 2.3294489874909132, | |
| "learning_rate": 5.702304347526172e-06, | |
| "loss": 0.1902, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.0219957081545064, | |
| "grad_norm": 2.121399773248172, | |
| "learning_rate": 5.679119032561311e-06, | |
| "loss": 0.1842, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 1.0246781115879828, | |
| "grad_norm": 1.9845020506511497, | |
| "learning_rate": 5.655918830849243e-06, | |
| "loss": 0.1882, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.0273605150214593, | |
| "grad_norm": 1.8436034837755606, | |
| "learning_rate": 5.632704250954039e-06, | |
| "loss": 0.1988, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 1.0300429184549356, | |
| "grad_norm": 1.9694560666463294, | |
| "learning_rate": 5.6094758017549436e-06, | |
| "loss": 0.1916, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.032725321888412, | |
| "grad_norm": 1.9394642490903848, | |
| "learning_rate": 5.5862339924352306e-06, | |
| "loss": 0.1952, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.0354077253218885, | |
| "grad_norm": 1.8828700417191846, | |
| "learning_rate": 5.562979332471035e-06, | |
| "loss": 0.1833, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.0380901287553648, | |
| "grad_norm": 1.8908778787975598, | |
| "learning_rate": 5.539712331620186e-06, | |
| "loss": 0.1951, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 1.0407725321888412, | |
| "grad_norm": 1.9116753445216206, | |
| "learning_rate": 5.516433499911035e-06, | |
| "loss": 0.1907, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.0434549356223175, | |
| "grad_norm": 1.9940047534149794, | |
| "learning_rate": 5.493143347631272e-06, | |
| "loss": 0.1886, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 1.046137339055794, | |
| "grad_norm": 1.9320996949249498, | |
| "learning_rate": 5.4698423853167425e-06, | |
| "loss": 0.1919, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0488197424892705, | |
| "grad_norm": 1.9586772638426304, | |
| "learning_rate": 5.446531123740257e-06, | |
| "loss": 0.1935, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 1.0515021459227467, | |
| "grad_norm": 2.0011255021437075, | |
| "learning_rate": 5.4232100739003855e-06, | |
| "loss": 0.1945, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.0541845493562232, | |
| "grad_norm": 1.9483623165568755, | |
| "learning_rate": 5.399879747010275e-06, | |
| "loss": 0.2003, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 1.0568669527896997, | |
| "grad_norm": 1.9115878997120979, | |
| "learning_rate": 5.376540654486422e-06, | |
| "loss": 0.1883, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.059549356223176, | |
| "grad_norm": 2.0116661404263145, | |
| "learning_rate": 5.353193307937477e-06, | |
| "loss": 0.2008, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.0622317596566524, | |
| "grad_norm": 2.0832605757484397, | |
| "learning_rate": 5.32983821915302e-06, | |
| "loss": 0.1891, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.0649141630901287, | |
| "grad_norm": 1.8531543345713375, | |
| "learning_rate": 5.306475900092348e-06, | |
| "loss": 0.1882, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 1.0675965665236051, | |
| "grad_norm": 1.8450655041844881, | |
| "learning_rate": 5.283106862873253e-06, | |
| "loss": 0.1833, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.0702789699570816, | |
| "grad_norm": 1.9044883788251383, | |
| "learning_rate": 5.259731619760792e-06, | |
| "loss": 0.1865, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 1.0729613733905579, | |
| "grad_norm": 2.0028672116669997, | |
| "learning_rate": 5.236350683156055e-06, | |
| "loss": 0.1902, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0756437768240343, | |
| "grad_norm": 1.814402898247639, | |
| "learning_rate": 5.212964565584944e-06, | |
| "loss": 0.1905, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 1.0783261802575108, | |
| "grad_norm": 1.8872679325335933, | |
| "learning_rate": 5.189573779686929e-06, | |
| "loss": 0.1949, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.081008583690987, | |
| "grad_norm": 1.8891608815102554, | |
| "learning_rate": 5.166178838203808e-06, | |
| "loss": 0.1924, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 1.0836909871244635, | |
| "grad_norm": 1.8837503323500486, | |
| "learning_rate": 5.142780253968481e-06, | |
| "loss": 0.1861, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.0863733905579398, | |
| "grad_norm": 1.9577533659989275, | |
| "learning_rate": 5.119378539893693e-06, | |
| "loss": 0.1926, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.0890557939914163, | |
| "grad_norm": 1.8789212764123366, | |
| "learning_rate": 5.095974208960799e-06, | |
| "loss": 0.194, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.0917381974248928, | |
| "grad_norm": 2.0837412968641074, | |
| "learning_rate": 5.072567774208518e-06, | |
| "loss": 0.1938, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 1.094420600858369, | |
| "grad_norm": 1.806432095417855, | |
| "learning_rate": 5.049159748721685e-06, | |
| "loss": 0.1875, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.0971030042918455, | |
| "grad_norm": 2.0304775866784204, | |
| "learning_rate": 5.025750645620004e-06, | |
| "loss": 0.1865, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 1.099785407725322, | |
| "grad_norm": 1.9275239991524047, | |
| "learning_rate": 5.002340978046807e-06, | |
| "loss": 0.1852, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.1024678111587982, | |
| "grad_norm": 1.90287230101775, | |
| "learning_rate": 4.978931259157791e-06, | |
| "loss": 0.1835, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 1.1051502145922747, | |
| "grad_norm": 1.890259840627779, | |
| "learning_rate": 4.955522002109782e-06, | |
| "loss": 0.189, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.1078326180257512, | |
| "grad_norm": 1.90298316348042, | |
| "learning_rate": 4.932113720049485e-06, | |
| "loss": 0.1952, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 1.1105150214592274, | |
| "grad_norm": 1.8449671760022834, | |
| "learning_rate": 4.908706926102229e-06, | |
| "loss": 0.1827, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.113197424892704, | |
| "grad_norm": 1.9616445138070648, | |
| "learning_rate": 4.885302133360722e-06, | |
| "loss": 0.1952, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.1158798283261802, | |
| "grad_norm": 1.926872317223682, | |
| "learning_rate": 4.8618998548738065e-06, | |
| "loss": 0.1912, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.1185622317596566, | |
| "grad_norm": 1.8440433865572448, | |
| "learning_rate": 4.8385006036352104e-06, | |
| "loss": 0.1795, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 1.121244635193133, | |
| "grad_norm": 1.8722843865210008, | |
| "learning_rate": 4.8151048925723014e-06, | |
| "loss": 0.194, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.1239270386266094, | |
| "grad_norm": 1.9272856053757133, | |
| "learning_rate": 4.791713234534844e-06, | |
| "loss": 0.1846, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 1.1266094420600858, | |
| "grad_norm": 1.874257158338157, | |
| "learning_rate": 4.768326142283757e-06, | |
| "loss": 0.1833, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.1292918454935623, | |
| "grad_norm": 1.8978133368632915, | |
| "learning_rate": 4.744944128479879e-06, | |
| "loss": 0.1873, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 1.1319742489270386, | |
| "grad_norm": 1.8595773129657196, | |
| "learning_rate": 4.7215677056727185e-06, | |
| "loss": 0.1904, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.134656652360515, | |
| "grad_norm": 1.9790845158535326, | |
| "learning_rate": 4.698197386289232e-06, | |
| "loss": 0.1908, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 1.1373390557939915, | |
| "grad_norm": 2.0046956900246267, | |
| "learning_rate": 4.674833682622577e-06, | |
| "loss": 0.1935, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.1400214592274678, | |
| "grad_norm": 2.098142362831301, | |
| "learning_rate": 4.6514771068209e-06, | |
| "loss": 0.1856, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.1427038626609443, | |
| "grad_norm": 1.8329956977422865, | |
| "learning_rate": 4.628128170876093e-06, | |
| "loss": 0.1896, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.1453862660944205, | |
| "grad_norm": 1.949948729000319, | |
| "learning_rate": 4.604787386612579e-06, | |
| "loss": 0.1893, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 1.148068669527897, | |
| "grad_norm": 1.8445942344820512, | |
| "learning_rate": 4.581455265676089e-06, | |
| "loss": 0.1941, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.1507510729613735, | |
| "grad_norm": 1.805772422930948, | |
| "learning_rate": 4.558132319522451e-06, | |
| "loss": 0.1736, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 1.1534334763948497, | |
| "grad_norm": 1.7406255756178988, | |
| "learning_rate": 4.534819059406374e-06, | |
| "loss": 0.1866, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1561158798283262, | |
| "grad_norm": 2.1014658354178644, | |
| "learning_rate": 4.511515996370244e-06, | |
| "loss": 0.1953, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 1.1587982832618025, | |
| "grad_norm": 1.9708726994392791, | |
| "learning_rate": 4.488223641232915e-06, | |
| "loss": 0.1896, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.161480686695279, | |
| "grad_norm": 1.9787947613530399, | |
| "learning_rate": 4.464942504578524e-06, | |
| "loss": 0.1934, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 1.1641630901287554, | |
| "grad_norm": 1.992107685089113, | |
| "learning_rate": 4.441673096745287e-06, | |
| "loss": 0.1925, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.1668454935622319, | |
| "grad_norm": 2.016896097316237, | |
| "learning_rate": 4.418415927814315e-06, | |
| "loss": 0.1848, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.1695278969957081, | |
| "grad_norm": 1.858625974711402, | |
| "learning_rate": 4.395171507598441e-06, | |
| "loss": 0.1854, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.1722103004291846, | |
| "grad_norm": 2.02254903047215, | |
| "learning_rate": 4.371940345631027e-06, | |
| "loss": 0.195, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 1.1748927038626609, | |
| "grad_norm": 1.8176535693204507, | |
| "learning_rate": 4.348722951154816e-06, | |
| "loss": 0.1832, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.1775751072961373, | |
| "grad_norm": 1.8304723547384139, | |
| "learning_rate": 4.3255198331107485e-06, | |
| "loss": 0.1879, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 1.1802575107296138, | |
| "grad_norm": 1.8955122948963208, | |
| "learning_rate": 4.302331500126824e-06, | |
| "loss": 0.1871, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.18293991416309, | |
| "grad_norm": 1.89004399640177, | |
| "learning_rate": 4.279158460506939e-06, | |
| "loss": 0.1819, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 1.1856223175965666, | |
| "grad_norm": 1.8317609875432832, | |
| "learning_rate": 4.256001222219751e-06, | |
| "loss": 0.1831, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.1883047210300428, | |
| "grad_norm": 1.9374939589512181, | |
| "learning_rate": 4.232860292887537e-06, | |
| "loss": 0.1806, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 1.1909871244635193, | |
| "grad_norm": 2.0303723616744778, | |
| "learning_rate": 4.2097361797750815e-06, | |
| "loss": 0.1822, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.1936695278969958, | |
| "grad_norm": 1.8192962113282214, | |
| "learning_rate": 4.1866293897785356e-06, | |
| "loss": 0.1844, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.196351931330472, | |
| "grad_norm": 1.8604357202266497, | |
| "learning_rate": 4.16354042941432e-06, | |
| "loss": 0.1827, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.1990343347639485, | |
| "grad_norm": 1.7595957688522177, | |
| "learning_rate": 4.1404698048080175e-06, | |
| "loss": 0.1934, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 1.201716738197425, | |
| "grad_norm": 1.973110320134582, | |
| "learning_rate": 4.117418021683278e-06, | |
| "loss": 0.1906, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.2043991416309012, | |
| "grad_norm": 1.8884430376988128, | |
| "learning_rate": 4.094385585350736e-06, | |
| "loss": 0.189, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 1.2070815450643777, | |
| "grad_norm": 1.8745063485974562, | |
| "learning_rate": 4.0713730006969285e-06, | |
| "loss": 0.1844, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.2097639484978542, | |
| "grad_norm": 1.8721552889135795, | |
| "learning_rate": 4.048380772173231e-06, | |
| "loss": 0.1922, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 1.2124463519313304, | |
| "grad_norm": 2.0429736332378536, | |
| "learning_rate": 4.0254094037848005e-06, | |
| "loss": 0.1857, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.215128755364807, | |
| "grad_norm": 1.8897709376195266, | |
| "learning_rate": 4.002459399079523e-06, | |
| "loss": 0.1853, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 1.2178111587982832, | |
| "grad_norm": 1.9011418004749525, | |
| "learning_rate": 3.979531261136981e-06, | |
| "loss": 0.1886, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.2204935622317596, | |
| "grad_norm": 1.969594278349951, | |
| "learning_rate": 3.956625492557417e-06, | |
| "loss": 0.1857, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.2231759656652361, | |
| "grad_norm": 1.912032996448432, | |
| "learning_rate": 3.933742595450733e-06, | |
| "loss": 0.1849, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.2258583690987124, | |
| "grad_norm": 1.916906298425393, | |
| "learning_rate": 3.910883071425463e-06, | |
| "loss": 0.1873, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 1.2285407725321889, | |
| "grad_norm": 1.8578700716982628, | |
| "learning_rate": 3.8880474215777915e-06, | |
| "loss": 0.1838, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.2312231759656653, | |
| "grad_norm": 1.776030564762745, | |
| "learning_rate": 3.865236146480562e-06, | |
| "loss": 0.1851, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 1.2339055793991416, | |
| "grad_norm": 1.9137765272421998, | |
| "learning_rate": 3.842449746172311e-06, | |
| "loss": 0.1799, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.236587982832618, | |
| "grad_norm": 2.0605656430444546, | |
| "learning_rate": 3.8196887201463e-06, | |
| "loss": 0.191, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 1.2392703862660945, | |
| "grad_norm": 1.915088708869453, | |
| "learning_rate": 3.796953567339571e-06, | |
| "loss": 0.1952, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.2419527896995708, | |
| "grad_norm": 1.8829918728592658, | |
| "learning_rate": 3.7742447861220027e-06, | |
| "loss": 0.1858, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 1.2446351931330473, | |
| "grad_norm": 1.9585388606793075, | |
| "learning_rate": 3.7515628742854006e-06, | |
| "loss": 0.1858, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.2473175965665235, | |
| "grad_norm": 1.8552310125655131, | |
| "learning_rate": 3.7289083290325668e-06, | |
| "loss": 0.1847, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.7754071029501433, | |
| "learning_rate": 3.706281646966409e-06, | |
| "loss": 0.1812, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.2526824034334765, | |
| "grad_norm": 1.875617151757241, | |
| "learning_rate": 3.6836833240790625e-06, | |
| "loss": 0.185, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 1.2553648068669527, | |
| "grad_norm": 1.8611799174871593, | |
| "learning_rate": 3.6611138557410047e-06, | |
| "loss": 0.1856, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.2580472103004292, | |
| "grad_norm": 1.8285828739346939, | |
| "learning_rate": 3.638573736690202e-06, | |
| "loss": 0.1857, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 1.2607296137339055, | |
| "grad_norm": 1.892962676656495, | |
| "learning_rate": 3.6160634610212642e-06, | |
| "loss": 0.1811, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.263412017167382, | |
| "grad_norm": 1.9947034876105998, | |
| "learning_rate": 3.5935835221746183e-06, | |
| "loss": 0.1797, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 1.2660944206008584, | |
| "grad_norm": 1.8965138864268372, | |
| "learning_rate": 3.5711344129256832e-06, | |
| "loss": 0.1858, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.268776824034335, | |
| "grad_norm": 1.8162594165458399, | |
| "learning_rate": 3.548716625374074e-06, | |
| "loss": 0.1852, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 1.2714592274678111, | |
| "grad_norm": 1.8466606151367597, | |
| "learning_rate": 3.5263306509328103e-06, | |
| "loss": 0.1905, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.2741416309012876, | |
| "grad_norm": 1.804299478995684, | |
| "learning_rate": 3.5039769803175545e-06, | |
| "loss": 0.1849, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.2768240343347639, | |
| "grad_norm": 1.8598714876367384, | |
| "learning_rate": 3.481656103535839e-06, | |
| "loss": 0.1863, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.2795064377682404, | |
| "grad_norm": 1.8324237852834893, | |
| "learning_rate": 3.459368509876338e-06, | |
| "loss": 0.1872, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 1.2821888412017168, | |
| "grad_norm": 1.9428876845890815, | |
| "learning_rate": 3.437114687898132e-06, | |
| "loss": 0.1848, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.284871244635193, | |
| "grad_norm": 1.813341484571633, | |
| "learning_rate": 3.414895125420013e-06, | |
| "loss": 0.1794, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 1.2875536480686696, | |
| "grad_norm": 1.891484957705928, | |
| "learning_rate": 3.3927103095097725e-06, | |
| "loss": 0.1826, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.2902360515021458, | |
| "grad_norm": 1.8787480346224221, | |
| "learning_rate": 3.370560726473537e-06, | |
| "loss": 0.1871, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 1.2929184549356223, | |
| "grad_norm": 1.856366211207286, | |
| "learning_rate": 3.348446861845106e-06, | |
| "loss": 0.1826, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.2956008583690988, | |
| "grad_norm": 1.7873050403851052, | |
| "learning_rate": 3.3263692003753056e-06, | |
| "loss": 0.1805, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 1.298283261802575, | |
| "grad_norm": 1.9278434650179597, | |
| "learning_rate": 3.304328226021365e-06, | |
| "loss": 0.1927, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.3009656652360515, | |
| "grad_norm": 1.705467306294372, | |
| "learning_rate": 3.282324421936307e-06, | |
| "loss": 0.1807, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.3036480686695278, | |
| "grad_norm": 2.0175168076056136, | |
| "learning_rate": 3.2603582704583547e-06, | |
| "loss": 0.1814, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.3063304721030042, | |
| "grad_norm": 1.9599572180968123, | |
| "learning_rate": 3.2384302531003676e-06, | |
| "loss": 0.1809, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 1.3090128755364807, | |
| "grad_norm": 1.88144040592366, | |
| "learning_rate": 3.216540850539272e-06, | |
| "loss": 0.1859, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.3116952789699572, | |
| "grad_norm": 1.9414190234849964, | |
| "learning_rate": 3.1946905426055353e-06, | |
| "loss": 0.1855, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 1.3143776824034334, | |
| "grad_norm": 1.7891027254290737, | |
| "learning_rate": 3.172879808272642e-06, | |
| "loss": 0.1886, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.31706008583691, | |
| "grad_norm": 1.779801789067273, | |
| "learning_rate": 3.151109125646601e-06, | |
| "loss": 0.1804, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 1.3197424892703862, | |
| "grad_norm": 1.6910321536471429, | |
| "learning_rate": 3.1293789719554562e-06, | |
| "loss": 0.1888, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.3224248927038627, | |
| "grad_norm": 1.8978060550474511, | |
| "learning_rate": 3.107689823538833e-06, | |
| "loss": 0.1845, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 1.3251072961373391, | |
| "grad_norm": 1.7996160906604526, | |
| "learning_rate": 3.086042155837491e-06, | |
| "loss": 0.182, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.3277896995708154, | |
| "grad_norm": 1.8604318440083263, | |
| "learning_rate": 3.0644364433829076e-06, | |
| "loss": 0.1868, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.3304721030042919, | |
| "grad_norm": 1.7265652913644474, | |
| "learning_rate": 3.0428731597868706e-06, | |
| "loss": 0.1791, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.3331545064377681, | |
| "grad_norm": 1.8572102327854523, | |
| "learning_rate": 3.021352777731096e-06, | |
| "loss": 0.1889, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.3358369098712446, | |
| "grad_norm": 1.978318614280375, | |
| "learning_rate": 2.9998757689568775e-06, | |
| "loss": 0.1826, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.338519313304721, | |
| "grad_norm": 1.814421622601161, | |
| "learning_rate": 2.978442604254729e-06, | |
| "loss": 0.1811, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 1.3412017167381975, | |
| "grad_norm": 1.8994448714761314, | |
| "learning_rate": 2.9570537534540765e-06, | |
| "loss": 0.1885, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3438841201716738, | |
| "grad_norm": 1.7626420531321445, | |
| "learning_rate": 2.935709685412954e-06, | |
| "loss": 0.1805, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 1.3465665236051503, | |
| "grad_norm": 1.8364054864432415, | |
| "learning_rate": 2.9144108680077288e-06, | |
| "loss": 0.1826, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.3492489270386265, | |
| "grad_norm": 1.847180352588691, | |
| "learning_rate": 2.8931577681228407e-06, | |
| "loss": 0.1807, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 1.351931330472103, | |
| "grad_norm": 1.9524895693219806, | |
| "learning_rate": 2.871950851640577e-06, | |
| "loss": 0.1854, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.3546137339055795, | |
| "grad_norm": 1.7963474200991176, | |
| "learning_rate": 2.8507905834308417e-06, | |
| "loss": 0.1877, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.3572961373390557, | |
| "grad_norm": 1.7833300163922787, | |
| "learning_rate": 2.8296774273409944e-06, | |
| "loss": 0.1823, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.3599785407725322, | |
| "grad_norm": 1.9793055958250187, | |
| "learning_rate": 2.8086118461856494e-06, | |
| "loss": 0.1804, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 1.3626609442060085, | |
| "grad_norm": 1.8681720017812595, | |
| "learning_rate": 2.787594301736556e-06, | |
| "loss": 0.1898, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.365343347639485, | |
| "grad_norm": 2.05995430381249, | |
| "learning_rate": 2.7666252547124596e-06, | |
| "loss": 0.1833, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 1.3680257510729614, | |
| "grad_norm": 1.7854047855241022, | |
| "learning_rate": 2.745705164769015e-06, | |
| "loss": 0.179, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.370708154506438, | |
| "grad_norm": 1.8166180302356945, | |
| "learning_rate": 2.724834490488705e-06, | |
| "loss": 0.1813, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 1.3733905579399142, | |
| "grad_norm": 1.8066041043622898, | |
| "learning_rate": 2.7040136893707813e-06, | |
| "loss": 0.1789, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.3760729613733906, | |
| "grad_norm": 1.903301979210332, | |
| "learning_rate": 2.683243217821248e-06, | |
| "loss": 0.1769, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 1.378755364806867, | |
| "grad_norm": 1.7466773845577472, | |
| "learning_rate": 2.66252353114285e-06, | |
| "loss": 0.1841, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.3814377682403434, | |
| "grad_norm": 1.8171236308268557, | |
| "learning_rate": 2.6418550835250946e-06, | |
| "loss": 0.1816, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.3841201716738198, | |
| "grad_norm": 1.8941618390274477, | |
| "learning_rate": 2.621238328034289e-06, | |
| "loss": 0.1792, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.386802575107296, | |
| "grad_norm": 1.688474610894831, | |
| "learning_rate": 2.60067371660362e-06, | |
| "loss": 0.172, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 1.3894849785407726, | |
| "grad_norm": 1.918062296580036, | |
| "learning_rate": 2.5801617000232416e-06, | |
| "loss": 0.1875, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.3921673819742488, | |
| "grad_norm": 1.8511243429388167, | |
| "learning_rate": 2.559702727930386e-06, | |
| "loss": 0.1765, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 1.3948497854077253, | |
| "grad_norm": 1.7999901888436554, | |
| "learning_rate": 2.5392972487995247e-06, | |
| "loss": 0.1785, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.3975321888412018, | |
| "grad_norm": 1.7053303808234024, | |
| "learning_rate": 2.5189457099325153e-06, | |
| "loss": 0.1756, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 1.400214592274678, | |
| "grad_norm": 1.8663136591492098, | |
| "learning_rate": 2.498648557448824e-06, | |
| "loss": 0.1753, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.4028969957081545, | |
| "grad_norm": 1.7472818518305704, | |
| "learning_rate": 2.4784062362757156e-06, | |
| "loss": 0.176, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 1.4055793991416308, | |
| "grad_norm": 1.769076507145111, | |
| "learning_rate": 2.458219190138526e-06, | |
| "loss": 0.1815, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.4082618025751072, | |
| "grad_norm": 1.8072957625179205, | |
| "learning_rate": 2.4380878615509156e-06, | |
| "loss": 0.1792, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.4109442060085837, | |
| "grad_norm": 1.7819943428340819, | |
| "learning_rate": 2.418012691805191e-06, | |
| "loss": 0.1784, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.4136266094420602, | |
| "grad_norm": 1.805441370100766, | |
| "learning_rate": 2.3979941209626072e-06, | |
| "loss": 0.1763, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 1.4163090128755365, | |
| "grad_norm": 1.9782749901291634, | |
| "learning_rate": 2.3780325878437415e-06, | |
| "loss": 0.1781, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.418991416309013, | |
| "grad_norm": 1.852385566702638, | |
| "learning_rate": 2.358128530018858e-06, | |
| "loss": 0.1836, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 1.4216738197424892, | |
| "grad_norm": 2.0347102265117227, | |
| "learning_rate": 2.3382823837983314e-06, | |
| "loss": 0.178, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.4243562231759657, | |
| "grad_norm": 1.8292259452694213, | |
| "learning_rate": 2.318494584223072e-06, | |
| "loss": 0.1863, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 1.4270386266094421, | |
| "grad_norm": 1.8186682872751867, | |
| "learning_rate": 2.2987655650549862e-06, | |
| "loss": 0.1779, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.4297210300429184, | |
| "grad_norm": 1.8137249215325242, | |
| "learning_rate": 2.2790957587674876e-06, | |
| "loss": 0.1773, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 1.4324034334763949, | |
| "grad_norm": 1.657463931666108, | |
| "learning_rate": 2.2594855965359906e-06, | |
| "loss": 0.181, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.4350858369098711, | |
| "grad_norm": 1.8416553188106275, | |
| "learning_rate": 2.2399355082284804e-06, | |
| "loss": 0.1837, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.4377682403433476, | |
| "grad_norm": 1.8567384673546208, | |
| "learning_rate": 2.2204459223960716e-06, | |
| "loss": 0.1753, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.440450643776824, | |
| "grad_norm": 1.8228824438814255, | |
| "learning_rate": 2.2010172662636377e-06, | |
| "loss": 0.1876, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 1.4431330472103006, | |
| "grad_norm": 1.913150616422004, | |
| "learning_rate": 2.1816499657204183e-06, | |
| "loss": 0.1757, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.4458154506437768, | |
| "grad_norm": 1.7948486965689003, | |
| "learning_rate": 2.1623444453107067e-06, | |
| "loss": 0.1811, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 1.4484978540772533, | |
| "grad_norm": 1.7836453698169266, | |
| "learning_rate": 2.1431011282245274e-06, | |
| "loss": 0.1839, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.4511802575107295, | |
| "grad_norm": 1.755770567268044, | |
| "learning_rate": 2.12392043628837e-06, | |
| "loss": 0.1772, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 1.453862660944206, | |
| "grad_norm": 1.773946245819418, | |
| "learning_rate": 2.10480278995594e-06, | |
| "loss": 0.1793, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.4565450643776825, | |
| "grad_norm": 1.7879606075165497, | |
| "learning_rate": 2.0857486082989344e-06, | |
| "loss": 0.183, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 1.4592274678111588, | |
| "grad_norm": 1.7610119078458486, | |
| "learning_rate": 2.0667583089978673e-06, | |
| "loss": 0.1808, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.4619098712446352, | |
| "grad_norm": 1.92829549876405, | |
| "learning_rate": 2.0478323083329072e-06, | |
| "loss": 0.1743, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.4645922746781115, | |
| "grad_norm": 1.8350507362803108, | |
| "learning_rate": 2.028971021174754e-06, | |
| "loss": 0.1846, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.467274678111588, | |
| "grad_norm": 1.7803185569940212, | |
| "learning_rate": 2.0101748609755407e-06, | |
| "loss": 0.1838, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 1.4699570815450644, | |
| "grad_norm": 2.1675460899901, | |
| "learning_rate": 1.9914442397597756e-06, | |
| "loss": 0.176, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.4726394849785407, | |
| "grad_norm": 1.8724514691396603, | |
| "learning_rate": 1.9727795681153083e-06, | |
| "loss": 0.1687, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 1.4753218884120172, | |
| "grad_norm": 1.6802576933125253, | |
| "learning_rate": 1.954181255184331e-06, | |
| "loss": 0.1752, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4780042918454936, | |
| "grad_norm": 1.6820388901704724, | |
| "learning_rate": 1.935649708654403e-06, | |
| "loss": 0.1792, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 1.48068669527897, | |
| "grad_norm": 1.7979050607058435, | |
| "learning_rate": 1.9171853347495234e-06, | |
| "loss": 0.1756, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.4833690987124464, | |
| "grad_norm": 1.7614860072950995, | |
| "learning_rate": 1.8987885382212235e-06, | |
| "loss": 0.1793, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 1.4860515021459229, | |
| "grad_norm": 1.7504341827692425, | |
| "learning_rate": 1.8804597223396865e-06, | |
| "loss": 0.1819, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.488733905579399, | |
| "grad_norm": 1.8669933379497394, | |
| "learning_rate": 1.8621992888849217e-06, | |
| "loss": 0.1807, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.4914163090128756, | |
| "grad_norm": 1.725707828875719, | |
| "learning_rate": 1.8440076381379395e-06, | |
| "loss": 0.1774, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.4940987124463518, | |
| "grad_norm": 1.7856774718839294, | |
| "learning_rate": 1.8258851688720009e-06, | |
| "loss": 0.1703, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 1.4967811158798283, | |
| "grad_norm": 1.9173370042151976, | |
| "learning_rate": 1.807832278343849e-06, | |
| "loss": 0.1825, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.4994635193133048, | |
| "grad_norm": 1.8338932891918263, | |
| "learning_rate": 1.7898493622850227e-06, | |
| "loss": 0.1771, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 1.5021459227467813, | |
| "grad_norm": 1.6990771812486194, | |
| "learning_rate": 1.771936814893167e-06, | |
| "loss": 0.1776, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.5048283261802575, | |
| "grad_norm": 1.9186848087579995, | |
| "learning_rate": 1.7540950288234033e-06, | |
| "loss": 0.1868, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 1.5075107296137338, | |
| "grad_norm": 1.7935654243257526, | |
| "learning_rate": 1.7363243951797155e-06, | |
| "loss": 0.1757, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.5101931330472103, | |
| "grad_norm": 1.7009512554045776, | |
| "learning_rate": 1.7186253035063738e-06, | |
| "loss": 0.1683, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 1.5128755364806867, | |
| "grad_norm": 1.6934539545605378, | |
| "learning_rate": 1.7009981417794114e-06, | |
| "loss": 0.1726, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.5155579399141632, | |
| "grad_norm": 1.7582377866252281, | |
| "learning_rate": 1.6834432963980957e-06, | |
| "loss": 0.1778, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.5182403433476395, | |
| "grad_norm": 1.8632756654647402, | |
| "learning_rate": 1.6659611521764807e-06, | |
| "loss": 0.1768, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.5209227467811157, | |
| "grad_norm": 1.8284613621943484, | |
| "learning_rate": 1.6485520923349529e-06, | |
| "loss": 0.1721, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 1.5236051502145922, | |
| "grad_norm": 1.7899204272020013, | |
| "learning_rate": 1.6312164984918516e-06, | |
| "loss": 0.1626, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.5262875536480687, | |
| "grad_norm": 1.770033358177422, | |
| "learning_rate": 1.6139547506550808e-06, | |
| "loss": 0.1815, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 1.5289699570815452, | |
| "grad_norm": 1.7649583624949958, | |
| "learning_rate": 1.5967672272137968e-06, | |
| "loss": 0.1847, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.5316523605150214, | |
| "grad_norm": 1.7927358149656596, | |
| "learning_rate": 1.5796543049301033e-06, | |
| "loss": 0.1752, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 1.5343347639484979, | |
| "grad_norm": 2.0042367491150577, | |
| "learning_rate": 1.5626163589307991e-06, | |
| "loss": 0.1822, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.5370171673819741, | |
| "grad_norm": 1.7107730523248998, | |
| "learning_rate": 1.5456537626991525e-06, | |
| "loss": 0.1694, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 1.5396995708154506, | |
| "grad_norm": 1.7441203072639992, | |
| "learning_rate": 1.5287668880667107e-06, | |
| "loss": 0.1708, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.542381974248927, | |
| "grad_norm": 1.8476477662752824, | |
| "learning_rate": 1.5119561052051546e-06, | |
| "loss": 0.1735, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.5450643776824036, | |
| "grad_norm": 1.8492021302759543, | |
| "learning_rate": 1.495221782618183e-06, | |
| "loss": 0.1792, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.5477467811158798, | |
| "grad_norm": 1.8722713008127667, | |
| "learning_rate": 1.4785642871334349e-06, | |
| "loss": 0.1699, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 1.550429184549356, | |
| "grad_norm": 1.8179023947737942, | |
| "learning_rate": 1.4619839838944416e-06, | |
| "loss": 0.1763, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.5531115879828326, | |
| "grad_norm": 1.7771808696153055, | |
| "learning_rate": 1.4454812363526339e-06, | |
| "loss": 0.1732, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 1.555793991416309, | |
| "grad_norm": 1.7067802122286628, | |
| "learning_rate": 1.429056406259368e-06, | |
| "loss": 0.1628, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.5584763948497855, | |
| "grad_norm": 1.8463964562057142, | |
| "learning_rate": 1.4127098536579982e-06, | |
| "loss": 0.1819, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 1.5611587982832618, | |
| "grad_norm": 1.7971311069451972, | |
| "learning_rate": 1.3964419368759786e-06, | |
| "loss": 0.1706, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.5638412017167382, | |
| "grad_norm": 1.789561911147988, | |
| "learning_rate": 1.380253012517019e-06, | |
| "loss": 0.1684, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 1.5665236051502145, | |
| "grad_norm": 1.7986915556625842, | |
| "learning_rate": 1.3641434354532595e-06, | |
| "loss": 0.1749, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.569206008583691, | |
| "grad_norm": 1.728513882390203, | |
| "learning_rate": 1.3481135588174926e-06, | |
| "loss": 0.1717, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.5718884120171674, | |
| "grad_norm": 1.8474557981862971, | |
| "learning_rate": 1.332163733995427e-06, | |
| "loss": 0.1797, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.574570815450644, | |
| "grad_norm": 1.8124533163869139, | |
| "learning_rate": 1.3162943106179748e-06, | |
| "loss": 0.175, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 1.5772532188841202, | |
| "grad_norm": 1.8363148844561816, | |
| "learning_rate": 1.3005056365536067e-06, | |
| "loss": 0.1763, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.5799356223175964, | |
| "grad_norm": 1.7667627502771912, | |
| "learning_rate": 1.2847980579007003e-06, | |
| "loss": 0.1709, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 1.582618025751073, | |
| "grad_norm": 1.8384934616186792, | |
| "learning_rate": 1.2691719189799774e-06, | |
| "loss": 0.1809, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5853004291845494, | |
| "grad_norm": 1.7432995409757972, | |
| "learning_rate": 1.253627562326936e-06, | |
| "loss": 0.1712, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 1.5879828326180259, | |
| "grad_norm": 1.6497919957428313, | |
| "learning_rate": 1.2381653286843648e-06, | |
| "loss": 0.1695, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.5906652360515021, | |
| "grad_norm": 1.8578458306330736, | |
| "learning_rate": 1.2227855569948477e-06, | |
| "loss": 0.1788, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 1.5933476394849786, | |
| "grad_norm": 1.7101495845126193, | |
| "learning_rate": 1.2074885843933542e-06, | |
| "loss": 0.1668, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.5960300429184548, | |
| "grad_norm": 1.8706776246586578, | |
| "learning_rate": 1.1922747461998425e-06, | |
| "loss": 0.1792, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.5987124463519313, | |
| "grad_norm": 1.7146003209383944, | |
| "learning_rate": 1.1771443759119028e-06, | |
| "loss": 0.1762, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.6013948497854078, | |
| "grad_norm": 1.7587464469782965, | |
| "learning_rate": 1.162097805197459e-06, | |
| "loss": 0.1721, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 1.6040772532188843, | |
| "grad_norm": 1.671773670726898, | |
| "learning_rate": 1.147135363887485e-06, | |
| "loss": 0.1736, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.6067596566523605, | |
| "grad_norm": 1.7831449677470894, | |
| "learning_rate": 1.1322573799687904e-06, | |
| "loss": 0.1821, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 1.6094420600858368, | |
| "grad_norm": 1.749081297778821, | |
| "learning_rate": 1.1174641795768132e-06, | |
| "loss": 0.1749, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6121244635193133, | |
| "grad_norm": 1.6885288263413876, | |
| "learning_rate": 1.1027560869884845e-06, | |
| "loss": 0.1803, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 1.6148068669527897, | |
| "grad_norm": 1.7789601448566963, | |
| "learning_rate": 1.0881334246151114e-06, | |
| "loss": 0.1792, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.6174892703862662, | |
| "grad_norm": 1.797127788770941, | |
| "learning_rate": 1.073596512995317e-06, | |
| "loss": 0.1766, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 1.6201716738197425, | |
| "grad_norm": 1.6913152928957935, | |
| "learning_rate": 1.0591456707880077e-06, | |
| "loss": 0.1677, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.6228540772532187, | |
| "grad_norm": 1.9388861906655925, | |
| "learning_rate": 1.0447812147653885e-06, | |
| "loss": 0.1693, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.6255364806866952, | |
| "grad_norm": 1.8287339322524556, | |
| "learning_rate": 1.0305034598060254e-06, | |
| "loss": 0.185, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.6282188841201717, | |
| "grad_norm": 1.7359121362575998, | |
| "learning_rate": 1.0163127188879352e-06, | |
| "loss": 0.1696, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 1.6309012875536482, | |
| "grad_norm": 1.7976891275154785, | |
| "learning_rate": 1.0022093030817316e-06, | |
| "loss": 0.1734, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.6335836909871244, | |
| "grad_norm": 1.7789540175564873, | |
| "learning_rate": 9.88193521543797e-07, | |
| "loss": 0.1717, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 1.636266094420601, | |
| "grad_norm": 1.7088064901560376, | |
| "learning_rate": 9.742656815095175e-07, | |
| "loss": 0.1693, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.6389484978540771, | |
| "grad_norm": 1.6486482877435467, | |
| "learning_rate": 9.604260882865395e-07, | |
| "loss": 0.1743, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 1.6416309012875536, | |
| "grad_norm": 1.7097523703340793, | |
| "learning_rate": 9.466750452480816e-07, | |
| "loss": 0.1675, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.64431330472103, | |
| "grad_norm": 1.8142751972429312, | |
| "learning_rate": 9.330128538262784e-07, | |
| "loss": 0.1673, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 1.6469957081545066, | |
| "grad_norm": 1.7335787796330107, | |
| "learning_rate": 9.194398135055815e-07, | |
| "loss": 0.1744, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.6496781115879828, | |
| "grad_norm": 1.7933634237592693, | |
| "learning_rate": 9.059562218161894e-07, | |
| "loss": 0.1731, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.652360515021459, | |
| "grad_norm": 1.7144165521860568, | |
| "learning_rate": 8.925623743275235e-07, | |
| "loss": 0.1794, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.6550429184549356, | |
| "grad_norm": 1.7854348497119321, | |
| "learning_rate": 8.792585646417568e-07, | |
| "loss": 0.1775, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 1.657725321888412, | |
| "grad_norm": 1.7455552036989075, | |
| "learning_rate": 8.660450843873647e-07, | |
| "loss": 0.1787, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.6604077253218885, | |
| "grad_norm": 1.7895684168501305, | |
| "learning_rate": 8.529222232127526e-07, | |
| "loss": 0.1748, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 1.6630901287553648, | |
| "grad_norm": 1.8436594740961931, | |
| "learning_rate": 8.398902687798832e-07, | |
| "loss": 0.1695, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.6657725321888412, | |
| "grad_norm": 1.7479630576592862, | |
| "learning_rate": 8.269495067579891e-07, | |
| "loss": 0.184, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 1.6684549356223175, | |
| "grad_norm": 1.7572581212303144, | |
| "learning_rate": 8.141002208172977e-07, | |
| "loss": 0.1761, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.671137339055794, | |
| "grad_norm": 1.754681973827706, | |
| "learning_rate": 8.013426926228274e-07, | |
| "loss": 0.1735, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 1.6738197424892705, | |
| "grad_norm": 1.9053550149600742, | |
| "learning_rate": 7.886772018281969e-07, | |
| "loss": 0.1796, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.676502145922747, | |
| "grad_norm": 1.7066025328597623, | |
| "learning_rate": 7.761040260695074e-07, | |
| "loss": 0.168, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.6791845493562232, | |
| "grad_norm": 1.8140596030248617, | |
| "learning_rate": 7.636234409592524e-07, | |
| "loss": 0.1685, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.6818669527896994, | |
| "grad_norm": 1.823899761561944, | |
| "learning_rate": 7.512357200802722e-07, | |
| "loss": 0.174, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 1.684549356223176, | |
| "grad_norm": 1.8364772641113434, | |
| "learning_rate": 7.389411349797654e-07, | |
| "loss": 0.1705, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.6872317596566524, | |
| "grad_norm": 1.6806223651736183, | |
| "learning_rate": 7.267399551633253e-07, | |
| "loss": 0.1706, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 1.6899141630901289, | |
| "grad_norm": 1.891605548895355, | |
| "learning_rate": 7.146324480890476e-07, | |
| "loss": 0.1779, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6925965665236051, | |
| "grad_norm": 1.788446168769966, | |
| "learning_rate": 7.026188791616484e-07, | |
| "loss": 0.1789, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 1.6952789699570814, | |
| "grad_norm": 1.6493280124322705, | |
| "learning_rate": 6.906995117266641e-07, | |
| "loss": 0.1719, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.6979613733905579, | |
| "grad_norm": 1.7790805165351633, | |
| "learning_rate": 6.788746070646646e-07, | |
| "loss": 0.1785, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 1.7006437768240343, | |
| "grad_norm": 2.173524080131244, | |
| "learning_rate": 6.671444243855368e-07, | |
| "loss": 0.1694, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.7033261802575108, | |
| "grad_norm": 1.7419492752952908, | |
| "learning_rate": 6.555092208227953e-07, | |
| "loss": 0.1729, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.7060085836909873, | |
| "grad_norm": 1.7470702873021877, | |
| "learning_rate": 6.439692514279516e-07, | |
| "loss": 0.1664, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.7086909871244635, | |
| "grad_norm": 1.7707011703025006, | |
| "learning_rate": 6.325247691649139e-07, | |
| "loss": 0.1673, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 1.7113733905579398, | |
| "grad_norm": 1.8390785050997485, | |
| "learning_rate": 6.211760249044535e-07, | |
| "loss": 0.1677, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.7140557939914163, | |
| "grad_norm": 1.6988621285323615, | |
| "learning_rate": 6.099232674187e-07, | |
| "loss": 0.1697, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 1.7167381974248928, | |
| "grad_norm": 1.666246086200389, | |
| "learning_rate": 5.987667433756844e-07, | |
| "loss": 0.1706, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.7194206008583692, | |
| "grad_norm": 1.8080719591700882, | |
| "learning_rate": 5.877066973339379e-07, | |
| "loss": 0.1745, | |
| "step": 3205 | |
| }, | |
| { | |
| "epoch": 1.7221030042918455, | |
| "grad_norm": 1.721957786717317, | |
| "learning_rate": 5.767433717371301e-07, | |
| "loss": 0.1713, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.7247854077253217, | |
| "grad_norm": 1.717789424603017, | |
| "learning_rate": 5.658770069087521e-07, | |
| "loss": 0.1678, | |
| "step": 3215 | |
| }, | |
| { | |
| "epoch": 1.7274678111587982, | |
| "grad_norm": 1.7053886441436985, | |
| "learning_rate": 5.551078410468486e-07, | |
| "loss": 0.1672, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.7301502145922747, | |
| "grad_norm": 1.819056657288576, | |
| "learning_rate": 5.444361102187979e-07, | |
| "loss": 0.1714, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.7328326180257512, | |
| "grad_norm": 1.7995104574020357, | |
| "learning_rate": 5.338620483561386e-07, | |
| "loss": 0.1752, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.7355150214592274, | |
| "grad_norm": 1.6994520700013371, | |
| "learning_rate": 5.233858872494357e-07, | |
| "loss": 0.1753, | |
| "step": 3235 | |
| }, | |
| { | |
| "epoch": 1.738197424892704, | |
| "grad_norm": 1.8671476647333491, | |
| "learning_rate": 5.130078565432089e-07, | |
| "loss": 0.1703, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.7408798283261802, | |
| "grad_norm": 1.6772491004527577, | |
| "learning_rate": 5.027281837308873e-07, | |
| "loss": 0.1628, | |
| "step": 3245 | |
| }, | |
| { | |
| "epoch": 1.7435622317596566, | |
| "grad_norm": 1.754411882099184, | |
| "learning_rate": 4.925470941498345e-07, | |
| "loss": 0.1689, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.746244635193133, | |
| "grad_norm": 1.6353039993025373, | |
| "learning_rate": 4.824648109763991e-07, | |
| "loss": 0.1692, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 1.7489270386266096, | |
| "grad_norm": 1.7434544510607133, | |
| "learning_rate": 4.724815552210288e-07, | |
| "loss": 0.1704, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.7516094420600858, | |
| "grad_norm": 1.6704471472999787, | |
| "learning_rate": 4.6259754572342e-07, | |
| "loss": 0.1716, | |
| "step": 3265 | |
| }, | |
| { | |
| "epoch": 1.754291845493562, | |
| "grad_norm": 1.7516950888101692, | |
| "learning_rate": 4.5281299914773146e-07, | |
| "loss": 0.1741, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.7569742489270386, | |
| "grad_norm": 1.7040325954172368, | |
| "learning_rate": 4.43128129977819e-07, | |
| "loss": 0.1748, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.759656652360515, | |
| "grad_norm": 1.7021022207307568, | |
| "learning_rate": 4.3354315051254927e-07, | |
| "loss": 0.1765, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.7623390557939915, | |
| "grad_norm": 1.7196523854425305, | |
| "learning_rate": 4.2405827086113406e-07, | |
| "loss": 0.1698, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 1.7650214592274678, | |
| "grad_norm": 1.702915568531188, | |
| "learning_rate": 4.146736989385336e-07, | |
| "loss": 0.1598, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.7677038626609443, | |
| "grad_norm": 1.6995494482853168, | |
| "learning_rate": 4.0538964046089426e-07, | |
| "loss": 0.1756, | |
| "step": 3295 | |
| }, | |
| { | |
| "epoch": 1.7703862660944205, | |
| "grad_norm": 1.7641412694504541, | |
| "learning_rate": 3.962062989410359e-07, | |
| "loss": 0.1726, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.773068669527897, | |
| "grad_norm": 1.7780397843294091, | |
| "learning_rate": 3.871238756840029e-07, | |
| "loss": 0.1743, | |
| "step": 3305 | |
| }, | |
| { | |
| "epoch": 1.7757510729613735, | |
| "grad_norm": 1.812082659957161, | |
| "learning_rate": 3.7814256978263465e-07, | |
| "loss": 0.1701, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.77843347639485, | |
| "grad_norm": 1.7020500555044733, | |
| "learning_rate": 3.6926257811321585e-07, | |
| "loss": 0.1656, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 1.7811158798283262, | |
| "grad_norm": 1.8126139098321488, | |
| "learning_rate": 3.604840953311506e-07, | |
| "loss": 0.1676, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.7837982832618025, | |
| "grad_norm": 1.7560277944500373, | |
| "learning_rate": 3.518073138667044e-07, | |
| "loss": 0.1711, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.786480686695279, | |
| "grad_norm": 1.763432081520576, | |
| "learning_rate": 3.4323242392077737e-07, | |
| "loss": 0.1711, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.7891630901287554, | |
| "grad_norm": 1.6297190806246264, | |
| "learning_rate": 3.347596134607406e-07, | |
| "loss": 0.1682, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 1.7918454935622319, | |
| "grad_norm": 1.8062944470139457, | |
| "learning_rate": 3.263890682163129e-07, | |
| "loss": 0.1749, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.7945278969957081, | |
| "grad_norm": 1.7881739648077752, | |
| "learning_rate": 3.1812097167549127e-07, | |
| "loss": 0.1735, | |
| "step": 3345 | |
| }, | |
| { | |
| "epoch": 1.7972103004291844, | |
| "grad_norm": 1.6572256761747672, | |
| "learning_rate": 3.0995550508052976e-07, | |
| "loss": 0.1727, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.7998927038626609, | |
| "grad_norm": 1.6259819425995783, | |
| "learning_rate": 3.018928474239613e-07, | |
| "loss": 0.1652, | |
| "step": 3355 | |
| }, | |
| { | |
| "epoch": 1.8025751072961373, | |
| "grad_norm": 1.7420901379335734, | |
| "learning_rate": 2.9393317544468003e-07, | |
| "loss": 0.1777, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.8052575107296138, | |
| "grad_norm": 1.7760823431379773, | |
| "learning_rate": 2.860766636240636e-07, | |
| "loss": 0.1739, | |
| "step": 3365 | |
| }, | |
| { | |
| "epoch": 1.80793991416309, | |
| "grad_norm": 1.643271804266458, | |
| "learning_rate": 2.7832348418215084e-07, | |
| "loss": 0.1705, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.8106223175965666, | |
| "grad_norm": 1.7104159344251477, | |
| "learning_rate": 2.7067380707386235e-07, | |
| "loss": 0.1735, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.8133047210300428, | |
| "grad_norm": 1.6791868888450323, | |
| "learning_rate": 2.631277999852799e-07, | |
| "loss": 0.1724, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.8159871244635193, | |
| "grad_norm": 1.7140730397456243, | |
| "learning_rate": 2.556856283299691e-07, | |
| "loss": 0.1676, | |
| "step": 3385 | |
| }, | |
| { | |
| "epoch": 1.8186695278969958, | |
| "grad_norm": 1.6077185197341657, | |
| "learning_rate": 2.483474552453513e-07, | |
| "loss": 0.1746, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.8213519313304722, | |
| "grad_norm": 1.7870974144659697, | |
| "learning_rate": 2.4111344158912863e-07, | |
| "loss": 0.1721, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 1.8240343347639485, | |
| "grad_norm": 1.6827957273473264, | |
| "learning_rate": 2.3398374593576022e-07, | |
| "loss": 0.1614, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.8267167381974247, | |
| "grad_norm": 1.6876082896664166, | |
| "learning_rate": 2.2695852457298328e-07, | |
| "loss": 0.1725, | |
| "step": 3405 | |
| }, | |
| { | |
| "epoch": 1.8293991416309012, | |
| "grad_norm": 1.7726305091723993, | |
| "learning_rate": 2.2003793149838692e-07, | |
| "loss": 0.1703, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.8320815450643777, | |
| "grad_norm": 1.8349898381032062, | |
| "learning_rate": 2.1322211841604046e-07, | |
| "loss": 0.1684, | |
| "step": 3415 | |
| }, | |
| { | |
| "epoch": 1.8347639484978542, | |
| "grad_norm": 1.8816226837004817, | |
| "learning_rate": 2.0651123473316103e-07, | |
| "loss": 0.18, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.8374463519313304, | |
| "grad_norm": 1.7671307777064689, | |
| "learning_rate": 1.9990542755684738e-07, | |
| "loss": 0.1673, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.840128755364807, | |
| "grad_norm": 1.6997077829480403, | |
| "learning_rate": 1.9340484169084627e-07, | |
| "loss": 0.1695, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.8428111587982832, | |
| "grad_norm": 1.7386958389708476, | |
| "learning_rate": 1.870096196323856e-07, | |
| "loss": 0.175, | |
| "step": 3435 | |
| }, | |
| { | |
| "epoch": 1.8454935622317596, | |
| "grad_norm": 1.679414270357438, | |
| "learning_rate": 1.8071990156904362e-07, | |
| "loss": 0.1676, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.8481759656652361, | |
| "grad_norm": 1.7180649273928403, | |
| "learning_rate": 1.7453582537568404e-07, | |
| "loss": 0.171, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 1.8508583690987126, | |
| "grad_norm": 1.761958633555483, | |
| "learning_rate": 1.6845752661142744e-07, | |
| "loss": 0.1727, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.8535407725321889, | |
| "grad_norm": 1.723808407240347, | |
| "learning_rate": 1.624851385166809e-07, | |
| "loss": 0.1688, | |
| "step": 3455 | |
| }, | |
| { | |
| "epoch": 1.856223175965665, | |
| "grad_norm": 1.7305314750444647, | |
| "learning_rate": 1.5661879201022135e-07, | |
| "loss": 0.1642, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.8589055793991416, | |
| "grad_norm": 1.8485234245901698, | |
| "learning_rate": 1.5085861568631845e-07, | |
| "loss": 0.1748, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 1.861587982832618, | |
| "grad_norm": 1.6803556355862883, | |
| "learning_rate": 1.4520473581192407e-07, | |
| "loss": 0.1644, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.8642703862660945, | |
| "grad_norm": 1.870312744732992, | |
| "learning_rate": 1.396572763238957e-07, | |
| "loss": 0.1699, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.8669527896995708, | |
| "grad_norm": 1.678351269677729, | |
| "learning_rate": 1.3421635882628958e-07, | |
| "loss": 0.1701, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.869635193133047, | |
| "grad_norm": 1.900918288526919, | |
| "learning_rate": 1.2888210258768464e-07, | |
| "loss": 0.1737, | |
| "step": 3485 | |
| }, | |
| { | |
| "epoch": 1.8723175965665235, | |
| "grad_norm": 1.7501849452560176, | |
| "learning_rate": 1.2365462453857612e-07, | |
| "loss": 0.171, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.875, | |
| "grad_norm": 1.7643463297208026, | |
| "learning_rate": 1.1853403926880725e-07, | |
| "loss": 0.1803, | |
| "step": 3495 | |
| }, | |
| { | |
| "epoch": 1.8776824034334765, | |
| "grad_norm": 1.7456320007378667, | |
| "learning_rate": 1.1352045902506158e-07, | |
| "loss": 0.1772, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.880364806866953, | |
| "grad_norm": 1.6733217604292803, | |
| "learning_rate": 1.0861399370839964e-07, | |
| "loss": 0.1708, | |
| "step": 3505 | |
| }, | |
| { | |
| "epoch": 1.8830472103004292, | |
| "grad_norm": 1.7601489763981855, | |
| "learning_rate": 1.03814750871849e-07, | |
| "loss": 0.1729, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.8857296137339055, | |
| "grad_norm": 1.7554273513816643, | |
| "learning_rate": 9.912283571805015e-08, | |
| "loss": 0.1725, | |
| "step": 3515 | |
| }, | |
| { | |
| "epoch": 1.888412017167382, | |
| "grad_norm": 1.8155918592259217, | |
| "learning_rate": 9.45383510969472e-08, | |
| "loss": 0.179, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.8910944206008584, | |
| "grad_norm": 1.724970161445885, | |
| "learning_rate": 9.006139750353526e-08, | |
| "loss": 0.1719, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.893776824034335, | |
| "grad_norm": 1.7143787726536936, | |
| "learning_rate": 8.569207307565664e-08, | |
| "loss": 0.178, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.8964592274678111, | |
| "grad_norm": 1.7675589277617567, | |
| "learning_rate": 8.143047359184863e-08, | |
| "loss": 0.1767, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.8991416309012874, | |
| "grad_norm": 1.7988231066894382, | |
| "learning_rate": 7.727669246924697e-08, | |
| "loss": 0.1721, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.9018240343347639, | |
| "grad_norm": 1.7917443907599273, | |
| "learning_rate": 7.32308207615351e-08, | |
| "loss": 0.1729, | |
| "step": 3545 | |
| }, | |
| { | |
| "epoch": 1.9045064377682404, | |
| "grad_norm": 1.7247547707337405, | |
| "learning_rate": 6.929294715694923e-08, | |
| "loss": 0.1675, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.9071888412017168, | |
| "grad_norm": 1.7092676553238282, | |
| "learning_rate": 6.54631579763343e-08, | |
| "loss": 0.1732, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 1.909871244635193, | |
| "grad_norm": 1.6963652820446242, | |
| "learning_rate": 6.174153717125264e-08, | |
| "loss": 0.1648, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.9125536480686696, | |
| "grad_norm": 1.6728309532039858, | |
| "learning_rate": 5.812816632214169e-08, | |
| "loss": 0.1744, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 1.9152360515021458, | |
| "grad_norm": 1.8234146842434622, | |
| "learning_rate": 5.4623124636528635e-08, | |
| "loss": 0.1743, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.9179184549356223, | |
| "grad_norm": 1.7337294621225616, | |
| "learning_rate": 5.122648894728854e-08, | |
| "loss": 0.1583, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.9206008583690988, | |
| "grad_norm": 1.8631680318842838, | |
| "learning_rate": 4.7938333710969564e-08, | |
| "loss": 0.1678, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.9232832618025753, | |
| "grad_norm": 1.6286398974109075, | |
| "learning_rate": 4.4758731006149804e-08, | |
| "loss": 0.1655, | |
| "step": 3585 | |
| }, | |
| { | |
| "epoch": 1.9259656652360515, | |
| "grad_norm": 1.6651386998279623, | |
| "learning_rate": 4.16877505318658e-08, | |
| "loss": 0.1649, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.9286480686695278, | |
| "grad_norm": 1.7215780598096833, | |
| "learning_rate": 3.872545960608099e-08, | |
| "loss": 0.1716, | |
| "step": 3595 | |
| }, | |
| { | |
| "epoch": 1.9313304721030042, | |
| "grad_norm": 1.8496948473611403, | |
| "learning_rate": 3.587192316420962e-08, | |
| "loss": 0.1836, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.9340128755364807, | |
| "grad_norm": 1.789269898205407, | |
| "learning_rate": 3.312720375769518e-08, | |
| "loss": 0.1711, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 1.9366952789699572, | |
| "grad_norm": 1.7661067152019674, | |
| "learning_rate": 3.04913615526381e-08, | |
| "loss": 0.1711, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.9393776824034334, | |
| "grad_norm": 1.7277593438439514, | |
| "learning_rate": 2.796445432847794e-08, | |
| "loss": 0.1683, | |
| "step": 3615 | |
| }, | |
| { | |
| "epoch": 1.94206008583691, | |
| "grad_norm": 1.666312242597901, | |
| "learning_rate": 2.554653747672442e-08, | |
| "loss": 0.168, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.9447424892703862, | |
| "grad_norm": 1.7365407287758425, | |
| "learning_rate": 2.323766399974614e-08, | |
| "loss": 0.1703, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.9474248927038627, | |
| "grad_norm": 1.7377335353091357, | |
| "learning_rate": 2.1037884509605976e-08, | |
| "loss": 0.1748, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.9501072961373391, | |
| "grad_norm": 1.8309351566068142, | |
| "learning_rate": 1.8947247226954736e-08, | |
| "loss": 0.1713, | |
| "step": 3635 | |
| }, | |
| { | |
| "epoch": 1.9527896995708156, | |
| "grad_norm": 1.776864049048742, | |
| "learning_rate": 1.6965797979971442e-08, | |
| "loss": 0.1773, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.9554721030042919, | |
| "grad_norm": 1.6767580087158616, | |
| "learning_rate": 1.509358020336027e-08, | |
| "loss": 0.1666, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 1.9581545064377681, | |
| "grad_norm": 1.817374124019115, | |
| "learning_rate": 1.3330634937396835e-08, | |
| "loss": 0.1616, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.9608369098712446, | |
| "grad_norm": 1.7773594339045193, | |
| "learning_rate": 1.1677000827030604e-08, | |
| "loss": 0.1727, | |
| "step": 3655 | |
| }, | |
| { | |
| "epoch": 1.963519313304721, | |
| "grad_norm": 1.7837035371769219, | |
| "learning_rate": 1.0132714121037223e-08, | |
| "loss": 0.175, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.9662017167381975, | |
| "grad_norm": 1.6240013537396132, | |
| "learning_rate": 8.697808671221385e-09, | |
| "loss": 0.1707, | |
| "step": 3665 | |
| }, | |
| { | |
| "epoch": 1.9688841201716738, | |
| "grad_norm": 1.6065913572501274, | |
| "learning_rate": 7.3723159316796414e-09, | |
| "loss": 0.1727, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.97156652360515, | |
| "grad_norm": 1.7024659714900665, | |
| "learning_rate": 6.1562649581059505e-09, | |
| "loss": 0.168, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.9742489270386265, | |
| "grad_norm": 1.6959109954027434, | |
| "learning_rate": 5.049682407157752e-09, | |
| "loss": 0.1685, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.976931330472103, | |
| "grad_norm": 1.6969423589462422, | |
| "learning_rate": 4.052592535871425e-09, | |
| "loss": 0.1694, | |
| "step": 3685 | |
| }, | |
| { | |
| "epoch": 1.9796137339055795, | |
| "grad_norm": 1.8395970305653224, | |
| "learning_rate": 3.1650172011293834e-09, | |
| "loss": 0.1794, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.9822961373390557, | |
| "grad_norm": 1.6774442707442048, | |
| "learning_rate": 2.3869758591810177e-09, | |
| "loss": 0.1662, | |
| "step": 3695 | |
| }, | |
| { | |
| "epoch": 1.9849785407725322, | |
| "grad_norm": 1.6932165167708886, | |
| "learning_rate": 1.718485565218031e-09, | |
| "loss": 0.1676, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.9876609442060085, | |
| "grad_norm": 1.7427139009863861, | |
| "learning_rate": 1.15956097299752e-09, | |
| "loss": 0.1648, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 1.990343347639485, | |
| "grad_norm": 1.701084604202847, | |
| "learning_rate": 7.102143345238955e-10, | |
| "loss": 0.1702, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.9930257510729614, | |
| "grad_norm": 1.703457401005494, | |
| "learning_rate": 3.7045549977909877e-10, | |
| "loss": 0.1695, | |
| "step": 3715 | |
| }, | |
| { | |
| "epoch": 1.995708154506438, | |
| "grad_norm": 1.6886515476568755, | |
| "learning_rate": 1.4029191650555274e-10, | |
| "loss": 0.1751, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.9983905579399142, | |
| "grad_norm": 1.7655789088614147, | |
| "learning_rate": 1.9728630044069107e-11, | |
| "loss": 0.1713, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_runtime": 263.6121, | |
| "eval_samples_per_second": 3.793, | |
| "eval_steps_per_second": 0.948, | |
| "step": 3728 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 3728, | |
| "total_flos": 390283678187520.0, | |
| "train_loss": 0.24746462182329704, | |
| "train_runtime": 33763.0865, | |
| "train_samples_per_second": 1.766, | |
| "train_steps_per_second": 0.11 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3728, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 390283678187520.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |