| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.25250227479526843, |
| "eval_steps": 500, |
| "global_step": 555, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 11.705517638294971, |
| "learning_rate": 5e-05, |
| "loss": 0.3487, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 5.625533664291238, |
| "learning_rate": 4.999999897855645e-05, |
| "loss": 0.3475, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 3.050231759218471, |
| "learning_rate": 4.9999995914225884e-05, |
| "loss": 0.2509, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 2.8736287685120083, |
| "learning_rate": 4.999999080700855e-05, |
| "loss": 0.2033, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 5.641734287312084, |
| "learning_rate": 4.9999983656904865e-05, |
| "loss": 0.2507, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 21.95717145768768, |
| "learning_rate": 4.999997446391541e-05, |
| "loss": 0.272, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 1.7784000344733917, |
| "learning_rate": 4.999996322804095e-05, |
| "loss": 0.2085, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 1.5833500335813995, |
| "learning_rate": 4.999994994928239e-05, |
| "loss": 0.192, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 2.130403672123271, |
| "learning_rate": 4.999993462764082e-05, |
| "loss": 0.2674, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 1.496781741273883, |
| "learning_rate": 4.9999917263117485e-05, |
| "loss": 0.1765, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 2.0423071988934147, |
| "learning_rate": 4.9999897855713816e-05, |
| "loss": 0.2361, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 2.451764386913058, |
| "learning_rate": 4.999987640543139e-05, |
| "loss": 0.2698, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 3.972515109472195, |
| "learning_rate": 4.999985291227196e-05, |
| "loss": 0.2393, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 2.0773905159558765, |
| "learning_rate": 4.999982737623745e-05, |
| "loss": 0.3335, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 1.5506423565582652, |
| "learning_rate": 4.999979979732995e-05, |
| "loss": 0.2581, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 1.2950827876188111, |
| "learning_rate": 4.9999770175551705e-05, |
| "loss": 0.175, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 1.2558416571692808, |
| "learning_rate": 4.999973851090514e-05, |
| "loss": 0.2674, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 1.1485865015052659, |
| "learning_rate": 4.999970480339285e-05, |
| "loss": 0.2361, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 2.1071510228354704, |
| "learning_rate": 4.9999669053017564e-05, |
| "loss": 0.2395, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 1.7585226852484823, |
| "learning_rate": 4.999963125978223e-05, |
| "loss": 0.1951, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 1.619518025852925, |
| "learning_rate": 4.999959142368992e-05, |
| "loss": 0.2135, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 7.23106820979905, |
| "learning_rate": 4.9999549544743906e-05, |
| "loss": 0.2357, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 1.0407840606007694, |
| "learning_rate": 4.99995056229476e-05, |
| "loss": 0.1978, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 1.2281492084048224, |
| "learning_rate": 4.999945965830458e-05, |
| "loss": 0.236, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 1.3164987460279216, |
| "learning_rate": 4.9999411650818623e-05, |
| "loss": 0.2037, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 1.0633028261158373, |
| "learning_rate": 4.999936160049364e-05, |
| "loss": 0.219, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 1.2941998128360934, |
| "learning_rate": 4.999930950733372e-05, |
| "loss": 0.2755, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 1.1685438468523945, |
| "learning_rate": 4.9999255371343125e-05, |
| "loss": 0.2103, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 1.1942326366823812, |
| "learning_rate": 4.999919919252628e-05, |
| "loss": 0.2538, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 1.1062150278612115, |
| "learning_rate": 4.999914097088777e-05, |
| "loss": 0.2381, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 1.37690938622929, |
| "learning_rate": 4.9999080706432355e-05, |
| "loss": 0.2374, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 1.5049840020802696, |
| "learning_rate": 4.999901839916496e-05, |
| "loss": 0.2026, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 1.269006405808121, |
| "learning_rate": 4.9998954049090676e-05, |
| "loss": 0.2178, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 1.3598431657906636, |
| "learning_rate": 4.999888765621475e-05, |
| "loss": 0.2078, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 1.2486022776853285, |
| "learning_rate": 4.999881922054264e-05, |
| "loss": 0.1988, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 1.0168849023345046, |
| "learning_rate": 4.9998748742079904e-05, |
| "loss": 0.17, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 1.3378814176122384, |
| "learning_rate": 4.999867622083232e-05, |
| "loss": 0.2478, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 1.0143269949823155, |
| "learning_rate": 4.99986016568058e-05, |
| "loss": 0.2275, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 1.5776510807323647, |
| "learning_rate": 4.999852505000645e-05, |
| "loss": 0.2522, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 0.9272338649997853, |
| "learning_rate": 4.999844640044052e-05, |
| "loss": 0.1526, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 1.3110035115076597, |
| "learning_rate": 4.999836570811445e-05, |
| "loss": 0.2594, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 1.102238378713438, |
| "learning_rate": 4.999828297303483e-05, |
| "loss": 0.2455, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 1.2400668175759613, |
| "learning_rate": 4.9998198195208405e-05, |
| "loss": 0.234, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 1.2706427374996991, |
| "learning_rate": 4.999811137464212e-05, |
| "loss": 0.2333, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 1.0227954451905796, |
| "learning_rate": 4.999802251134307e-05, |
| "loss": 0.2718, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 1.129854136221206, |
| "learning_rate": 4.99979316053185e-05, |
| "loss": 0.1993, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 1.0566869480663346, |
| "learning_rate": 4.999783865657585e-05, |
| "loss": 0.3201, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 1.3644671171523401, |
| "learning_rate": 4.9997743665122723e-05, |
| "loss": 0.2672, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 1.3371621386186787, |
| "learning_rate": 4.999764663096686e-05, |
| "loss": 0.2982, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 0.9819251723210562, |
| "learning_rate": 4.999754755411621e-05, |
| "loss": 0.1932, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 1.272139842264722, |
| "learning_rate": 4.999744643457886e-05, |
| "loss": 0.2352, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 1.4633848011272832, |
| "learning_rate": 4.999734327236307e-05, |
| "loss": 0.2331, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 0.8534385316122761, |
| "learning_rate": 4.999723806747729e-05, |
| "loss": 0.1465, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 1.0949860395099258, |
| "learning_rate": 4.999713081993009e-05, |
| "loss": 0.1731, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 1.1829726026120988, |
| "learning_rate": 4.999702152973025e-05, |
| "loss": 0.1986, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 1.4588831566316596, |
| "learning_rate": 4.999691019688669e-05, |
| "loss": 0.2027, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 0.9041150159503183, |
| "learning_rate": 4.999679682140852e-05, |
| "loss": 0.136, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 1.184894654186384, |
| "learning_rate": 4.9996681403305e-05, |
| "loss": 0.2959, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 1.7698815697030899, |
| "learning_rate": 4.999656394258555e-05, |
| "loss": 0.2511, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 0.9092461242140687, |
| "learning_rate": 4.999644443925978e-05, |
| "loss": 0.2103, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 1.1062857329941562, |
| "learning_rate": 4.999632289333746e-05, |
| "loss": 0.23, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 1.4212490673236147, |
| "learning_rate": 4.9996199304828514e-05, |
| "loss": 0.2075, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 1.5285991392407892, |
| "learning_rate": 4.999607367374304e-05, |
| "loss": 0.2937, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 1.535895466709353, |
| "learning_rate": 4.999594600009131e-05, |
| "loss": 0.2176, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 1.3542478803064237, |
| "learning_rate": 4.999581628388375e-05, |
| "loss": 0.2478, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 1.005104821194542, |
| "learning_rate": 4.999568452513097e-05, |
| "loss": 0.2768, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 1.0446720362047501, |
| "learning_rate": 4.999555072384372e-05, |
| "loss": 0.2457, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 1.2903168733267987, |
| "learning_rate": 4.999541488003295e-05, |
| "loss": 0.2146, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 0.9466626958786747, |
| "learning_rate": 4.999527699370975e-05, |
| "loss": 0.2266, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 1.0943470369291173, |
| "learning_rate": 4.99951370648854e-05, |
| "loss": 0.216, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 1.0584653377465871, |
| "learning_rate": 4.9994995093571314e-05, |
| "loss": 0.2074, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 1.3190163096713692, |
| "learning_rate": 4.999485107977912e-05, |
| "loss": 0.2571, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 1.0710531576617517, |
| "learning_rate": 4.999470502352056e-05, |
| "loss": 0.2183, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 1.171898418000334, |
| "learning_rate": 4.9994556924807584e-05, |
| "loss": 0.257, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 0.9318333327642333, |
| "learning_rate": 4.999440678365229e-05, |
| "loss": 0.2151, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 1.0540431316354797, |
| "learning_rate": 4.999425460006695e-05, |
| "loss": 0.2168, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 0.9359829647848551, |
| "learning_rate": 4.999410037406399e-05, |
| "loss": 0.2089, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 1.6371641130606778, |
| "learning_rate": 4.999394410565603e-05, |
| "loss": 0.3167, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 1.6259583096921861, |
| "learning_rate": 4.999378579485583e-05, |
| "loss": 0.2061, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 1.0290562777993864, |
| "learning_rate": 4.9993625441676315e-05, |
| "loss": 0.2324, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 0.8318367643913458, |
| "learning_rate": 4.999346304613061e-05, |
| "loss": 0.2347, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 1.4118508193951111, |
| "learning_rate": 4.9993298608231966e-05, |
| "loss": 0.2771, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 1.058471337149328, |
| "learning_rate": 4.999313212799382e-05, |
| "loss": 0.2237, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 1.4253258272976375, |
| "learning_rate": 4.99929636054298e-05, |
| "loss": 0.2862, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 1.0894164736230274, |
| "learning_rate": 4.999279304055366e-05, |
| "loss": 0.2713, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 0.93733361977436, |
| "learning_rate": 4.999262043337933e-05, |
| "loss": 0.2419, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 1.0408412273845644, |
| "learning_rate": 4.999244578392094e-05, |
| "loss": 0.2187, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 0.869634294861204, |
| "learning_rate": 4.9992269092192734e-05, |
| "loss": 0.2132, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 1.052994124344848, |
| "learning_rate": 4.999209035820917e-05, |
| "loss": 0.2339, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 0.8888036175609889, |
| "learning_rate": 4.999190958198483e-05, |
| "loss": 0.1959, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 1.1422410696009744, |
| "learning_rate": 4.999172676353451e-05, |
| "loss": 0.1869, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 0.8567024180556292, |
| "learning_rate": 4.999154190287314e-05, |
| "loss": 0.2041, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 1.1583793253948331, |
| "learning_rate": 4.999135500001583e-05, |
| "loss": 0.2838, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 0.8971093580462918, |
| "learning_rate": 4.999116605497784e-05, |
| "loss": 0.1783, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 1.1454812103959264, |
| "learning_rate": 4.999097506777463e-05, |
| "loss": 0.2448, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 1.1673198678019097, |
| "learning_rate": 4.9990782038421794e-05, |
| "loss": 0.2797, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 0.9411242321621639, |
| "learning_rate": 4.9990586966935107e-05, |
| "loss": 0.1617, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 1.1181547368420937, |
| "learning_rate": 4.99903898533305e-05, |
| "loss": 0.237, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 1.1224266668961962, |
| "learning_rate": 4.9990190697624095e-05, |
| "loss": 0.2244, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 0.9107011406834199, |
| "learning_rate": 4.998998949983217e-05, |
| "loss": 0.1685, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 1.1715429796165449, |
| "learning_rate": 4.998978625997115e-05, |
| "loss": 0.2729, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 1.1980256314734712, |
| "learning_rate": 4.998958097805765e-05, |
| "loss": 0.2302, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 1.1482917301959503, |
| "learning_rate": 4.998937365410844e-05, |
| "loss": 0.1985, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 1.1228041619613163, |
| "learning_rate": 4.9989164288140463e-05, |
| "loss": 0.1813, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 1.1989246971355767, |
| "learning_rate": 4.998895288017085e-05, |
| "loss": 0.2369, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 1.2592548396604681, |
| "learning_rate": 4.9988739430216834e-05, |
| "loss": 0.2173, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 1.087082920682345, |
| "learning_rate": 4.9988523938295896e-05, |
| "loss": 0.2001, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 1.3222813789625618, |
| "learning_rate": 4.9988306404425625e-05, |
| "loss": 0.2867, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 0.8632279272612142, |
| "learning_rate": 4.9988086828623796e-05, |
| "loss": 0.2089, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 1.5618851887465837, |
| "learning_rate": 4.998786521090836e-05, |
| "loss": 0.2855, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 1.0324519425398178, |
| "learning_rate": 4.998764155129742e-05, |
| "loss": 0.2673, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 1.6911595693690926, |
| "learning_rate": 4.998741584980926e-05, |
| "loss": 0.26, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 1.2144334923385869, |
| "learning_rate": 4.998718810646231e-05, |
| "loss": 0.2418, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 0.6311838497342976, |
| "learning_rate": 4.99869583212752e-05, |
| "loss": 0.1868, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 1.0223030232681207, |
| "learning_rate": 4.998672649426669e-05, |
| "loss": 0.3238, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 1.0506982443808877, |
| "learning_rate": 4.998649262545574e-05, |
| "loss": 0.2441, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 1.3332445373041606, |
| "learning_rate": 4.998625671486144e-05, |
| "loss": 0.2522, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 1.6324668993126747, |
| "learning_rate": 4.998601876250308e-05, |
| "loss": 0.236, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 0.9485734961244265, |
| "learning_rate": 4.9985778768400105e-05, |
| "loss": 0.1682, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 1.0098569259084629, |
| "learning_rate": 4.998553673257212e-05, |
| "loss": 0.2621, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 1.022249078413692, |
| "learning_rate": 4.9985292655038905e-05, |
| "loss": 0.2101, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 0.8802940837276332, |
| "learning_rate": 4.9985046535820414e-05, |
| "loss": 0.261, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 0.7428228951488147, |
| "learning_rate": 4.9984798374936746e-05, |
| "loss": 0.1769, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 1.26166207922868, |
| "learning_rate": 4.998454817240819e-05, |
| "loss": 0.228, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 1.0488702039329023, |
| "learning_rate": 4.998429592825519e-05, |
| "loss": 0.2232, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 1.1303588453646947, |
| "learning_rate": 4.998404164249835e-05, |
| "loss": 0.2916, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 0.9768207513960141, |
| "learning_rate": 4.998378531515845e-05, |
| "loss": 0.2007, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 1.3353829960148134, |
| "learning_rate": 4.9983526946256445e-05, |
| "loss": 0.2173, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 0.7001213335640923, |
| "learning_rate": 4.998326653581343e-05, |
| "loss": 0.166, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 1.2238624903386932, |
| "learning_rate": 4.9983004083850715e-05, |
| "loss": 0.2569, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 1.3684464189312067, |
| "learning_rate": 4.9982739590389715e-05, |
| "loss": 0.3189, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 0.7768254773526782, |
| "learning_rate": 4.9982473055452066e-05, |
| "loss": 0.1713, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 0.6728637449638813, |
| "learning_rate": 4.9982204479059536e-05, |
| "loss": 0.2166, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 1.6115148715610486, |
| "learning_rate": 4.998193386123408e-05, |
| "loss": 0.2796, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 1.108628942130757, |
| "learning_rate": 4.99816612019978e-05, |
| "loss": 0.2579, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 1.1878551775987127, |
| "learning_rate": 4.998138650137298e-05, |
| "loss": 0.2313, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 1.0455316421319232, |
| "learning_rate": 4.998110975938207e-05, |
| "loss": 0.2745, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 1.0745831940051274, |
| "learning_rate": 4.998083097604769e-05, |
| "loss": 0.2655, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 1.087020436890146, |
| "learning_rate": 4.9980550151392615e-05, |
| "loss": 0.1969, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 1.2353262431194252, |
| "learning_rate": 4.9980267285439786e-05, |
| "loss": 0.2487, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 1.004527220834801, |
| "learning_rate": 4.997998237821233e-05, |
| "loss": 0.2572, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 0.6481729806282698, |
| "learning_rate": 4.997969542973352e-05, |
| "loss": 0.1804, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 0.9165848395572888, |
| "learning_rate": 4.997940644002681e-05, |
| "loss": 0.2285, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 1.3735320351537703, |
| "learning_rate": 4.997911540911581e-05, |
| "loss": 0.3031, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 0.8961782777502066, |
| "learning_rate": 4.99788223370243e-05, |
| "loss": 0.1931, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 0.9534806680319905, |
| "learning_rate": 4.997852722377624e-05, |
| "loss": 0.2524, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 0.8781945527977916, |
| "learning_rate": 4.997823006939573e-05, |
| "loss": 0.2098, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 1.0789792103175795, |
| "learning_rate": 4.997793087390706e-05, |
| "loss": 0.2162, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 0.9657175690419708, |
| "learning_rate": 4.997762963733468e-05, |
| "loss": 0.2094, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 1.2297473059622832, |
| "learning_rate": 4.997732635970321e-05, |
| "loss": 0.2803, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 0.6893752587378985, |
| "learning_rate": 4.997702104103742e-05, |
| "loss": 0.2118, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 1.1015130942838443, |
| "learning_rate": 4.997671368136226e-05, |
| "loss": 0.3035, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 1.0061561004814914, |
| "learning_rate": 4.997640428070286e-05, |
| "loss": 0.2895, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 0.9743072677983932, |
| "learning_rate": 4.99760928390845e-05, |
| "loss": 0.1877, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 1.0058819296060422, |
| "learning_rate": 4.997577935653261e-05, |
| "loss": 0.2035, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 1.1103663912790263, |
| "learning_rate": 4.9975463833072836e-05, |
| "loss": 0.2053, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 0.7832277046257746, |
| "learning_rate": 4.9975146268730934e-05, |
| "loss": 0.1901, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 0.9268069397893121, |
| "learning_rate": 4.997482666353287e-05, |
| "loss": 0.1847, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 0.9868441187983981, |
| "learning_rate": 4.997450501750476e-05, |
| "loss": 0.2229, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 0.8267098621379654, |
| "learning_rate": 4.9974181330672875e-05, |
| "loss": 0.1871, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 1.2522404547410633, |
| "learning_rate": 4.997385560306368e-05, |
| "loss": 0.2523, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 0.918397795660698, |
| "learning_rate": 4.997352783470378e-05, |
| "loss": 0.216, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 0.9403402374937561, |
| "learning_rate": 4.9973198025619974e-05, |
| "loss": 0.1893, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 0.8198319378738574, |
| "learning_rate": 4.997286617583919e-05, |
| "loss": 0.1564, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 0.949925121676635, |
| "learning_rate": 4.997253228538857e-05, |
| "loss": 0.2288, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 1.1504182808013323, |
| "learning_rate": 4.997219635429539e-05, |
| "loss": 0.2445, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 1.0240326241437807, |
| "learning_rate": 4.997185838258709e-05, |
| "loss": 0.2218, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 1.2834368493772934, |
| "learning_rate": 4.997151837029129e-05, |
| "loss": 0.2349, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 0.8593772097880827, |
| "learning_rate": 4.997117631743579e-05, |
| "loss": 0.195, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 1.1249976489970943, |
| "learning_rate": 4.997083222404852e-05, |
| "loss": 0.229, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 0.9365739267800142, |
| "learning_rate": 4.997048609015762e-05, |
| "loss": 0.1726, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 1.1274650771481407, |
| "learning_rate": 4.997013791579136e-05, |
| "loss": 0.2207, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 1.0674007035527455, |
| "learning_rate": 4.996978770097819e-05, |
| "loss": 0.2367, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 0.98369179113072, |
| "learning_rate": 4.996943544574672e-05, |
| "loss": 0.2159, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 0.9766441420396662, |
| "learning_rate": 4.9969081150125765e-05, |
| "loss": 0.2068, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 1.0882037334216006, |
| "learning_rate": 4.996872481414425e-05, |
| "loss": 0.2406, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 1.3698592883299627, |
| "learning_rate": 4.9968366437831305e-05, |
| "loss": 0.2928, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 0.6987520632973869, |
| "learning_rate": 4.99680060212162e-05, |
| "loss": 0.1764, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 1.3027058622416998, |
| "learning_rate": 4.9967643564328407e-05, |
| "loss": 0.2724, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 0.8947764994248179, |
| "learning_rate": 4.996727906719754e-05, |
| "loss": 0.2063, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 0.9763597484914425, |
| "learning_rate": 4.996691252985336e-05, |
| "loss": 0.2028, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 0.9150363076013049, |
| "learning_rate": 4.996654395232585e-05, |
| "loss": 0.2276, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 0.9103497357752012, |
| "learning_rate": 4.9966173334645115e-05, |
| "loss": 0.2185, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 0.8247430263974811, |
| "learning_rate": 4.9965800676841445e-05, |
| "loss": 0.2071, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 0.7480935933799238, |
| "learning_rate": 4.996542597894529e-05, |
| "loss": 0.235, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 0.7351885920182925, |
| "learning_rate": 4.996504924098726e-05, |
| "loss": 0.2277, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 0.6852087113550299, |
| "learning_rate": 4.996467046299814e-05, |
| "loss": 0.1962, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 0.7911136395423962, |
| "learning_rate": 4.9964289645008896e-05, |
| "loss": 0.2893, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 1.0822147942792688, |
| "learning_rate": 4.996390678705065e-05, |
| "loss": 0.3031, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 0.8503276338672248, |
| "learning_rate": 4.996352188915467e-05, |
| "loss": 0.2383, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 1.2786178320902941, |
| "learning_rate": 4.9963134951352416e-05, |
| "loss": 0.312, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 1.0179661012144667, |
| "learning_rate": 4.99627459736755e-05, |
| "loss": 0.2831, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 1.0898701286404884, |
| "learning_rate": 4.996235495615572e-05, |
| "loss": 0.2295, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 1.3217944667311619, |
| "learning_rate": 4.996196189882502e-05, |
| "loss": 0.2423, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 1.4153844083331932, |
| "learning_rate": 4.996156680171552e-05, |
| "loss": 0.3296, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 1.0161813818526169, |
| "learning_rate": 4.996116966485951e-05, |
| "loss": 0.242, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 1.1502736682331154, |
| "learning_rate": 4.996077048828944e-05, |
| "loss": 0.3, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 0.8340955562868464, |
| "learning_rate": 4.996036927203792e-05, |
| "loss": 0.2298, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 1.178331791996245, |
| "learning_rate": 4.995996601613775e-05, |
| "loss": 0.2546, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 0.9820106506239689, |
| "learning_rate": 4.995956072062187e-05, |
| "loss": 0.2085, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 1.524177968916044, |
| "learning_rate": 4.995915338552341e-05, |
| "loss": 0.2749, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 2.7908020909742537, |
| "learning_rate": 4.9958744010875646e-05, |
| "loss": 0.2282, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 1.161533853858501, |
| "learning_rate": 4.995833259671203e-05, |
| "loss": 0.272, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 1.3305550414451668, |
| "learning_rate": 4.995791914306619e-05, |
| "loss": 0.2615, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 0.9446324909125462, |
| "learning_rate": 4.995750364997192e-05, |
| "loss": 0.1978, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 2.025455569978896, |
| "learning_rate": 4.995708611746314e-05, |
| "loss": 0.2749, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 1.0190667998091618, |
| "learning_rate": 4.995666654557399e-05, |
| "loss": 0.2572, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 0.9876461146649904, |
| "learning_rate": 4.9956244934338756e-05, |
| "loss": 0.267, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 0.9085146041356152, |
| "learning_rate": 4.9955821283791895e-05, |
| "loss": 0.2624, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 1.0259809279791963, |
| "learning_rate": 4.9955395593968e-05, |
| "loss": 0.3108, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 1.2613005448897872, |
| "learning_rate": 4.995496786490189e-05, |
| "loss": 0.284, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 0.9490464963042589, |
| "learning_rate": 4.9954538096628504e-05, |
| "loss": 0.2577, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 1.094140068752516, |
| "learning_rate": 4.995410628918294e-05, |
| "loss": 0.2277, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 0.9564858342611784, |
| "learning_rate": 4.995367244260052e-05, |
| "loss": 0.252, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 0.9326203499108876, |
| "learning_rate": 4.9953236556916675e-05, |
| "loss": 0.1728, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 1.13491356573659, |
| "learning_rate": 4.9952798632167016e-05, |
| "loss": 0.2961, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 0.9866062616028243, |
| "learning_rate": 4.995235866838735e-05, |
| "loss": 0.2416, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 0.9499384498938079, |
| "learning_rate": 4.995191666561361e-05, |
| "loss": 0.2232, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 0.8667087934433083, |
| "learning_rate": 4.995147262388192e-05, |
| "loss": 0.166, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 1.2643430421929842, |
| "learning_rate": 4.9951026543228576e-05, |
| "loss": 0.2608, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 0.8444091468054182, |
| "learning_rate": 4.995057842369002e-05, |
| "loss": 0.1427, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 1.0191236814021594, |
| "learning_rate": 4.995012826530287e-05, |
| "loss": 0.2715, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 0.9113183017750295, |
| "learning_rate": 4.9949676068103904e-05, |
| "loss": 0.2562, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 0.9633172136168865, |
| "learning_rate": 4.994922183213009e-05, |
| "loss": 0.2465, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 0.8031972085872137, |
| "learning_rate": 4.994876555741853e-05, |
| "loss": 0.1843, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 1.223758887068779, |
| "learning_rate": 4.994830724400652e-05, |
| "loss": 0.3388, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 0.8627575799289282, |
| "learning_rate": 4.9947846891931517e-05, |
| "loss": 0.1879, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 0.9083456008163456, |
| "learning_rate": 4.9947384501231115e-05, |
| "loss": 0.2431, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 0.967227861205465, |
| "learning_rate": 4.994692007194312e-05, |
| "loss": 0.2291, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 0.6397850394521112, |
| "learning_rate": 4.9946453604105475e-05, |
| "loss": 0.172, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 0.8612207427346141, |
| "learning_rate": 4.99459850977563e-05, |
| "loss": 0.193, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 1.2587193005638013, |
| "learning_rate": 4.9945514552933875e-05, |
| "loss": 0.2351, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 0.8448212904705226, |
| "learning_rate": 4.994504196967665e-05, |
| "loss": 0.2292, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 0.9479092781594234, |
| "learning_rate": 4.994456734802325e-05, |
| "loss": 0.2009, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 0.7949133571568955, |
| "learning_rate": 4.994409068801246e-05, |
| "loss": 0.2409, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 0.6674087296660387, |
| "learning_rate": 4.994361198968323e-05, |
| "loss": 0.1628, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 1.201564983604896, |
| "learning_rate": 4.994313125307466e-05, |
| "loss": 0.2677, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 1.0431680248749846, |
| "learning_rate": 4.994264847822605e-05, |
| "loss": 0.289, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 0.9653424594034078, |
| "learning_rate": 4.994216366517685e-05, |
| "loss": 0.1968, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 0.9125387759488122, |
| "learning_rate": 4.994167681396666e-05, |
| "loss": 0.2617, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 0.9352652381091711, |
| "learning_rate": 4.9941187924635294e-05, |
| "loss": 0.2033, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 0.8486085196314103, |
| "learning_rate": 4.9940696997222667e-05, |
| "loss": 0.2521, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 0.7169086937032756, |
| "learning_rate": 4.994020403176893e-05, |
| "loss": 0.1947, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 0.7848172375628268, |
| "learning_rate": 4.993970902831434e-05, |
| "loss": 0.2079, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 1.1215727046768524, |
| "learning_rate": 4.993921198689935e-05, |
| "loss": 0.1722, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 0.8394128217152594, |
| "learning_rate": 4.993871290756459e-05, |
| "loss": 0.2277, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 1.0300622720949144, |
| "learning_rate": 4.993821179035083e-05, |
| "loss": 0.2363, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 1.0103941806691277, |
| "learning_rate": 4.993770863529902e-05, |
| "loss": 0.2604, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 1.5350533138761926, |
| "learning_rate": 4.9937203442450284e-05, |
| "loss": 0.2361, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 0.8993849316410959, |
| "learning_rate": 4.993669621184589e-05, |
| "loss": 0.2132, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 1.0288784473172377, |
| "learning_rate": 4.99361869435273e-05, |
| "loss": 0.1867, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 0.9642598487133237, |
| "learning_rate": 4.993567563753613e-05, |
| "loss": 0.2327, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 0.9627848388659644, |
| "learning_rate": 4.9935162293914136e-05, |
| "loss": 0.1813, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 1.1969772124808065, |
| "learning_rate": 4.993464691270331e-05, |
| "loss": 0.2246, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 0.8925549446282988, |
| "learning_rate": 4.9934129493945724e-05, |
| "loss": 0.2193, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 0.9009083702443086, |
| "learning_rate": 4.993361003768369e-05, |
| "loss": 0.1619, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 0.926318675344456, |
| "learning_rate": 4.9933088543959624e-05, |
| "loss": 0.2685, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 0.9816855771807739, |
| "learning_rate": 4.993256501281618e-05, |
| "loss": 0.2272, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 0.8392667690733058, |
| "learning_rate": 4.9932039444296105e-05, |
| "loss": 0.2492, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 1.189859214740981, |
| "learning_rate": 4.9931511838442364e-05, |
| "loss": 0.2537, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 0.7328258163767538, |
| "learning_rate": 4.993098219529807e-05, |
| "loss": 0.2301, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 0.9123068157116897, |
| "learning_rate": 4.9930450514906486e-05, |
| "loss": 0.2432, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 1.0391414474764322, |
| "learning_rate": 4.992991679731108e-05, |
| "loss": 0.2477, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 0.9268303743602482, |
| "learning_rate": 4.992938104255545e-05, |
| "loss": 0.215, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 1.013893387467843, |
| "learning_rate": 4.9928843250683385e-05, |
| "loss": 0.2857, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 0.8602676777999626, |
| "learning_rate": 4.9928303421738825e-05, |
| "loss": 0.2401, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 0.6803832120130948, |
| "learning_rate": 4.992776155576588e-05, |
| "loss": 0.1871, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 0.6598469160631033, |
| "learning_rate": 4.9927217652808847e-05, |
| "loss": 0.174, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 1.2973520906729556, |
| "learning_rate": 4.992667171291215e-05, |
| "loss": 0.2946, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 1.2009193015365256, |
| "learning_rate": 4.992612373612041e-05, |
| "loss": 0.245, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 1.1244329576372483, |
| "learning_rate": 4.99255737224784e-05, |
| "loss": 0.2643, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 0.6441013893547167, |
| "learning_rate": 4.9925021672031066e-05, |
| "loss": 0.1954, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 1.0576659351271296, |
| "learning_rate": 4.992446758482353e-05, |
| "loss": 0.2442, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 0.9585929099664303, |
| "learning_rate": 4.992391146090106e-05, |
| "loss": 0.2368, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 0.7049091722308071, |
| "learning_rate": 4.9923353300309096e-05, |
| "loss": 0.1842, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 0.7320434028517127, |
| "learning_rate": 4.992279310309326e-05, |
| "loss": 0.2441, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 1.0894014477962464, |
| "learning_rate": 4.992223086929931e-05, |
| "loss": 0.3448, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 0.9204993506670345, |
| "learning_rate": 4.99216665989732e-05, |
| "loss": 0.242, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 0.940564609463127, |
| "learning_rate": 4.992110029216106e-05, |
| "loss": 0.2307, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 0.7446733630340908, |
| "learning_rate": 4.992053194890913e-05, |
| "loss": 0.1732, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 0.9067882861735025, |
| "learning_rate": 4.991996156926387e-05, |
| "loss": 0.2074, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 1.1603498037062319, |
| "learning_rate": 4.99193891532719e-05, |
| "loss": 0.2815, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 0.609720699641334, |
| "learning_rate": 4.9918814700979977e-05, |
| "loss": 0.25, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 0.9279501297825673, |
| "learning_rate": 4.9918238212435046e-05, |
| "loss": 0.2151, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 1.145147710706986, |
| "learning_rate": 4.991765968768422e-05, |
| "loss": 0.2294, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 1.0963594661629925, |
| "learning_rate": 4.991707912677478e-05, |
| "loss": 0.2154, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 0.8368215676876812, |
| "learning_rate": 4.991649652975414e-05, |
| "loss": 0.1895, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 1.2728391512581174, |
| "learning_rate": 4.991591189666994e-05, |
| "loss": 0.336, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 0.9217489919301896, |
| "learning_rate": 4.991532522756993e-05, |
| "loss": 0.2335, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 0.7733762857858478, |
| "learning_rate": 4.991473652250207e-05, |
| "loss": 0.2109, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 0.9885925779386018, |
| "learning_rate": 4.991414578151444e-05, |
| "loss": 0.1912, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 1.1103839302767298, |
| "learning_rate": 4.991355300465534e-05, |
| "loss": 0.2717, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 1.115391456880468, |
| "learning_rate": 4.99129581919732e-05, |
| "loss": 0.2988, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 1.0310643412765783, |
| "learning_rate": 4.991236134351661e-05, |
| "loss": 0.1864, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 0.9946918597809171, |
| "learning_rate": 4.991176245933437e-05, |
| "loss": 0.2637, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 0.9411380293671334, |
| "learning_rate": 4.991116153947539e-05, |
| "loss": 0.2349, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 1.0955222841539587, |
| "learning_rate": 4.9910558583988784e-05, |
| "loss": 0.2716, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 1.1869760751324405, |
| "learning_rate": 4.9909953592923835e-05, |
| "loss": 0.2991, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 0.8305527348011584, |
| "learning_rate": 4.990934656632997e-05, |
| "loss": 0.2322, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 1.0771681383539558, |
| "learning_rate": 4.9908737504256786e-05, |
| "loss": 0.282, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 1.0219579324950732, |
| "learning_rate": 4.9908126406754066e-05, |
| "loss": 0.1965, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 1.0882518015247715, |
| "learning_rate": 4.9907513273871744e-05, |
| "loss": 0.2154, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 1.0267367119820836, |
| "learning_rate": 4.99068981056599e-05, |
| "loss": 0.2081, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 1.0586655405854108, |
| "learning_rate": 4.990628090216885e-05, |
| "loss": 0.2764, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 1.1488116625073979, |
| "learning_rate": 4.990566166344898e-05, |
| "loss": 0.2099, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 1.0108872679147511, |
| "learning_rate": 4.9905040389550913e-05, |
| "loss": 0.219, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 1.128979049955556, |
| "learning_rate": 4.9904417080525426e-05, |
| "loss": 0.2271, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 0.9184211601572116, |
| "learning_rate": 4.990379173642343e-05, |
| "loss": 0.2153, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 0.7498191369944927, |
| "learning_rate": 4.990316435729604e-05, |
| "loss": 0.1545, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 0.7359338352556679, |
| "learning_rate": 4.990253494319453e-05, |
| "loss": 0.2569, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 1.1586293882987138, |
| "learning_rate": 4.990190349417032e-05, |
| "loss": 0.3302, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 1.0303817169406913, |
| "learning_rate": 4.990127001027501e-05, |
| "loss": 0.2244, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 0.9208675144447142, |
| "learning_rate": 4.9900634491560366e-05, |
| "loss": 0.2853, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 1.2080650894164364, |
| "learning_rate": 4.989999693807832e-05, |
| "loss": 0.2598, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 1.3176583061924925, |
| "learning_rate": 4.989935734988098e-05, |
| "loss": 0.2665, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 1.155936755367866, |
| "learning_rate": 4.9898715727020594e-05, |
| "loss": 0.2152, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 1.244015310208092, |
| "learning_rate": 4.9898072069549604e-05, |
| "loss": 0.3472, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 0.6619110373379009, |
| "learning_rate": 4.9897426377520605e-05, |
| "loss": 0.1743, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 0.863849098825801, |
| "learning_rate": 4.989677865098635e-05, |
| "loss": 0.2153, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 0.7649715189892816, |
| "learning_rate": 4.989612888999978e-05, |
| "loss": 0.1691, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 0.8713448612177858, |
| "learning_rate": 4.9895477094613994e-05, |
| "loss": 0.2083, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 0.728985195295809, |
| "learning_rate": 4.989482326488224e-05, |
| "loss": 0.203, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 0.888181720232883, |
| "learning_rate": 4.989416740085796e-05, |
| "loss": 0.2295, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 1.0922587657288432, |
| "learning_rate": 4.9893509502594735e-05, |
| "loss": 0.2684, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 0.9971220529421283, |
| "learning_rate": 4.989284957014633e-05, |
| "loss": 0.2617, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 0.9870448743720823, |
| "learning_rate": 4.989218760356668e-05, |
| "loss": 0.2318, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 0.9018589981975008, |
| "learning_rate": 4.9891523602909864e-05, |
| "loss": 0.2247, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 0.8598483031018114, |
| "learning_rate": 4.989085756823015e-05, |
| "loss": 0.2632, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 0.8643781078050121, |
| "learning_rate": 4.9890189499581966e-05, |
| "loss": 0.2734, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 0.7687464204962443, |
| "learning_rate": 4.9889519397019897e-05, |
| "loss": 0.1697, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 1.0106162938009775, |
| "learning_rate": 4.98888472605987e-05, |
| "loss": 0.2288, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 1.0671673871836995, |
| "learning_rate": 4.98881730903733e-05, |
| "loss": 0.2301, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 0.8400240886557742, |
| "learning_rate": 4.98874968863988e-05, |
| "loss": 0.2399, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 0.9664168166027034, |
| "learning_rate": 4.988681864873044e-05, |
| "loss": 0.2194, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 0.969020664042685, |
| "learning_rate": 4.988613837742364e-05, |
| "loss": 0.2592, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 1.0823638241901714, |
| "learning_rate": 4.988545607253401e-05, |
| "loss": 0.2276, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 1.2217498650546794, |
| "learning_rate": 4.9884771734117283e-05, |
| "loss": 0.2991, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 0.7501090403731316, |
| "learning_rate": 4.988408536222939e-05, |
| "loss": 0.1973, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 0.8921758076703502, |
| "learning_rate": 4.988339695692641e-05, |
| "loss": 0.2525, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 0.9226622299381118, |
| "learning_rate": 4.988270651826461e-05, |
| "loss": 0.2292, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 1.152103512210007, |
| "learning_rate": 4.9882014046300406e-05, |
| "loss": 0.2679, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 0.9363582022115696, |
| "learning_rate": 4.988131954109038e-05, |
| "loss": 0.2523, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 0.7051604140254526, |
| "learning_rate": 4.988062300269128e-05, |
| "loss": 0.2668, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 0.784310468103235, |
| "learning_rate": 4.987992443116003e-05, |
| "loss": 0.1844, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 1.081270224193415, |
| "learning_rate": 4.9879223826553715e-05, |
| "loss": 0.2158, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 0.9309294315799115, |
| "learning_rate": 4.987852118892957e-05, |
| "loss": 0.3169, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 0.8582087279169507, |
| "learning_rate": 4.987781651834503e-05, |
| "loss": 0.2792, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 0.9222514197836991, |
| "learning_rate": 4.9877109814857684e-05, |
| "loss": 0.2174, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 1.2123517073269288, |
| "learning_rate": 4.987640107852525e-05, |
| "loss": 0.2949, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 0.8447356256741352, |
| "learning_rate": 4.987569030940567e-05, |
| "loss": 0.2381, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 0.748932841941387, |
| "learning_rate": 4.987497750755702e-05, |
| "loss": 0.1292, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 0.898610169466206, |
| "learning_rate": 4.9874262673037534e-05, |
| "loss": 0.2873, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 1.0240724236709602, |
| "learning_rate": 4.987354580590563e-05, |
| "loss": 0.2417, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 0.8622421630738083, |
| "learning_rate": 4.98728269062199e-05, |
| "loss": 0.2551, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 0.8134532647830077, |
| "learning_rate": 4.987210597403907e-05, |
| "loss": 0.2174, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 0.9534453999496592, |
| "learning_rate": 4.987138300942208e-05, |
| "loss": 0.2366, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 1.0371844945284734, |
| "learning_rate": 4.9870658012427974e-05, |
| "loss": 0.2335, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 0.9004202299256066, |
| "learning_rate": 4.986993098311601e-05, |
| "loss": 0.242, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 0.9781473802486437, |
| "learning_rate": 4.9869201921545605e-05, |
| "loss": 0.2204, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 0.8274281201466194, |
| "learning_rate": 4.986847082777632e-05, |
| "loss": 0.2348, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 0.9334603870585367, |
| "learning_rate": 4.9867737701867904e-05, |
| "loss": 0.2563, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 0.9312184132525261, |
| "learning_rate": 4.986700254388027e-05, |
| "loss": 0.3632, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 0.8232839110015855, |
| "learning_rate": 4.9866265353873484e-05, |
| "loss": 0.2639, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 1.2193684853775268, |
| "learning_rate": 4.9865526131907794e-05, |
| "loss": 0.2573, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 0.9585919856440537, |
| "learning_rate": 4.986478487804359e-05, |
| "loss": 0.2898, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 1.1391585911626447, |
| "learning_rate": 4.986404159234146e-05, |
| "loss": 0.2924, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 0.8553657475720432, |
| "learning_rate": 4.9863296274862134e-05, |
| "loss": 0.2228, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 0.7763035651416975, |
| "learning_rate": 4.9862548925666516e-05, |
| "loss": 0.2544, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 0.9598552659524475, |
| "learning_rate": 4.986179954481568e-05, |
| "loss": 0.2266, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 0.7915076916881143, |
| "learning_rate": 4.986104813237086e-05, |
| "loss": 0.2141, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 0.93062460876264, |
| "learning_rate": 4.986029468839346e-05, |
| "loss": 0.2217, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 0.9695657860712834, |
| "learning_rate": 4.985953921294505e-05, |
| "loss": 0.3365, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 1.242731787426362, |
| "learning_rate": 4.9858781706087355e-05, |
| "loss": 0.2818, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 0.8957753411556577, |
| "learning_rate": 4.985802216788228e-05, |
| "loss": 0.2692, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 1.0273281941675514, |
| "learning_rate": 4.985726059839189e-05, |
| "loss": 0.2713, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 0.5842102943211992, |
| "learning_rate": 4.985649699767842e-05, |
| "loss": 0.1419, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 0.7692282188943281, |
| "learning_rate": 4.985573136580427e-05, |
| "loss": 0.2284, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 1.4274488633088522, |
| "learning_rate": 4.985496370283199e-05, |
| "loss": 0.2241, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 0.9628915318446656, |
| "learning_rate": 4.9854194008824326e-05, |
| "loss": 0.2869, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 0.9678778944028185, |
| "learning_rate": 4.9853422283844176e-05, |
| "loss": 0.2386, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 1.2851973245888821, |
| "learning_rate": 4.985264852795459e-05, |
| "loss": 0.3403, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 1.1512549644049486, |
| "learning_rate": 4.98518727412188e-05, |
| "loss": 0.272, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 1.1034128519120743, |
| "learning_rate": 4.9851094923700194e-05, |
| "loss": 0.2314, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 1.0792900244616377, |
| "learning_rate": 4.985031507546234e-05, |
| "loss": 0.2241, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 1.1833709250710265, |
| "learning_rate": 4.984953319656896e-05, |
| "loss": 0.2627, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 1.526133028770585, |
| "learning_rate": 4.9848749287083945e-05, |
| "loss": 0.3124, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 0.8253890824154251, |
| "learning_rate": 4.984796334707136e-05, |
| "loss": 0.2363, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 1.3213065879963553, |
| "learning_rate": 4.984717537659542e-05, |
| "loss": 0.2883, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 1.2277238402287651, |
| "learning_rate": 4.9846385375720515e-05, |
| "loss": 0.2034, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 0.9029988848875732, |
| "learning_rate": 4.984559334451121e-05, |
| "loss": 0.1922, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 1.011318694572055, |
| "learning_rate": 4.984479928303221e-05, |
| "loss": 0.16, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 0.7842684083287716, |
| "learning_rate": 4.984400319134841e-05, |
| "loss": 0.1513, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 1.0047121229003204, |
| "learning_rate": 4.984320506952487e-05, |
| "loss": 0.2775, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 0.96838578798944, |
| "learning_rate": 4.9842404917626794e-05, |
| "loss": 0.2581, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 0.8552424874926928, |
| "learning_rate": 4.984160273571958e-05, |
| "loss": 0.2185, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 0.7215923747123871, |
| "learning_rate": 4.9840798523868783e-05, |
| "loss": 0.2648, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 0.8273795348852334, |
| "learning_rate": 4.9839992282140104e-05, |
| "loss": 0.2044, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 0.9785918148494038, |
| "learning_rate": 4.983918401059943e-05, |
| "loss": 0.2477, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 1.107312964787855, |
| "learning_rate": 4.983837370931282e-05, |
| "loss": 0.281, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 0.9090559022978792, |
| "learning_rate": 4.983756137834646e-05, |
| "loss": 0.2436, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 1.2144867941478938, |
| "learning_rate": 4.9836747017766765e-05, |
| "loss": 0.2858, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 1.111249776533361, |
| "learning_rate": 4.9835930627640264e-05, |
| "loss": 0.2887, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 1.2328261736683936, |
| "learning_rate": 4.983511220803367e-05, |
| "loss": 0.3321, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 1.0256852027579921, |
| "learning_rate": 4.9834291759013864e-05, |
| "loss": 0.2447, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 0.8916587179445885, |
| "learning_rate": 4.983346928064788e-05, |
| "loss": 0.235, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 0.8010374639077099, |
| "learning_rate": 4.983264477300293e-05, |
| "loss": 0.2049, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 0.9259601734043552, |
| "learning_rate": 4.98318182361464e-05, |
| "loss": 0.2666, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 1.0996800951329422, |
| "learning_rate": 4.9830989670145825e-05, |
| "loss": 0.3126, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 0.9825813831593347, |
| "learning_rate": 4.98301590750689e-05, |
| "loss": 0.2945, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 0.826298037357574, |
| "learning_rate": 4.9829326450983514e-05, |
| "loss": 0.2436, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 1.0778605984812317, |
| "learning_rate": 4.98284917979577e-05, |
| "loss": 0.244, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 0.8826503961722449, |
| "learning_rate": 4.9827655116059656e-05, |
| "loss": 0.2285, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 0.6783880431833905, |
| "learning_rate": 4.982681640535776e-05, |
| "loss": 0.2263, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 0.850607062096716, |
| "learning_rate": 4.9825975665920544e-05, |
| "loss": 0.1964, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 1.2053147239277264, |
| "learning_rate": 4.9825132897816705e-05, |
| "loss": 0.2821, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 0.9124205049308536, |
| "learning_rate": 4.982428810111512e-05, |
| "loss": 0.2914, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 0.6319974506555571, |
| "learning_rate": 4.982344127588481e-05, |
| "loss": 0.1761, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 1.112189111727749, |
| "learning_rate": 4.982259242219499e-05, |
| "loss": 0.2411, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 0.8057841883773158, |
| "learning_rate": 4.982174154011501e-05, |
| "loss": 0.1791, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 0.9489628556352278, |
| "learning_rate": 4.982088862971441e-05, |
| "loss": 0.2822, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 1.2587143584633964, |
| "learning_rate": 4.982003369106287e-05, |
| "loss": 0.2872, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 1.0751564520471593, |
| "learning_rate": 4.981917672423027e-05, |
| "loss": 0.1679, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 0.8596781622427906, |
| "learning_rate": 4.9818317729286637e-05, |
| "loss": 0.201, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 0.800295294104614, |
| "learning_rate": 4.981745670630216e-05, |
| "loss": 0.2201, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 0.825800169362288, |
| "learning_rate": 4.981659365534718e-05, |
| "loss": 0.2608, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 1.2537016281438078, |
| "learning_rate": 4.981572857649225e-05, |
| "loss": 0.3201, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 0.6630773711070428, |
| "learning_rate": 4.981486146980804e-05, |
| "loss": 0.1721, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 1.0456272834524787, |
| "learning_rate": 4.981399233536541e-05, |
| "loss": 0.2339, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 0.7588258951531742, |
| "learning_rate": 4.98131211732354e-05, |
| "loss": 0.2164, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 1.1842167043892005, |
| "learning_rate": 4.981224798348917e-05, |
| "loss": 0.2695, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 0.7817668941153749, |
| "learning_rate": 4.981137276619809e-05, |
| "loss": 0.2618, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 1.6115466860711452, |
| "learning_rate": 4.981049552143368e-05, |
| "loss": 0.2529, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 4.0902956604962775, |
| "learning_rate": 4.980961624926761e-05, |
| "loss": 0.4753, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 0.7960678611320803, |
| "learning_rate": 4.980873494977174e-05, |
| "loss": 0.2948, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 1.0070958810069257, |
| "learning_rate": 4.980785162301809e-05, |
| "loss": 0.2567, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 0.8191302299252047, |
| "learning_rate": 4.980696626907883e-05, |
| "loss": 0.2168, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 0.7937387047871365, |
| "learning_rate": 4.980607888802633e-05, |
| "loss": 0.1955, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 0.8432008094692945, |
| "learning_rate": 4.9805189479933075e-05, |
| "loss": 0.2084, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 0.8073292212036548, |
| "learning_rate": 4.9804298044871755e-05, |
| "loss": 0.2026, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 1.1300035170898861, |
| "learning_rate": 4.9803404582915216e-05, |
| "loss": 0.289, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 0.8002343868088962, |
| "learning_rate": 4.9802509094136464e-05, |
| "loss": 0.2003, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 1.1332893383280338, |
| "learning_rate": 4.980161157860868e-05, |
| "loss": 0.2597, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 1.3383795894954178, |
| "learning_rate": 4.98007120364052e-05, |
| "loss": 0.2769, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 0.7103836091735324, |
| "learning_rate": 4.9799810467599515e-05, |
| "loss": 0.1927, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 0.9076048167447247, |
| "learning_rate": 4.979890687226533e-05, |
| "loss": 0.2232, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 1.0814403812455224, |
| "learning_rate": 4.979800125047647e-05, |
| "loss": 0.2275, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 0.9521195412119995, |
| "learning_rate": 4.979709360230692e-05, |
| "loss": 0.2505, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 0.8132871987985927, |
| "learning_rate": 4.9796183927830874e-05, |
| "loss": 0.1968, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 0.7688587774051666, |
| "learning_rate": 4.979527222712266e-05, |
| "loss": 0.1934, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 0.9493574268445784, |
| "learning_rate": 4.979435850025676e-05, |
| "loss": 0.2343, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 1.3577065524441854, |
| "learning_rate": 4.979344274730786e-05, |
| "loss": 0.2941, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 1.51315156565673, |
| "learning_rate": 4.979252496835079e-05, |
| "loss": 0.3188, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 1.293280483202906, |
| "learning_rate": 4.979160516346053e-05, |
| "loss": 0.3135, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 0.9037700868581328, |
| "learning_rate": 4.979068333271227e-05, |
| "loss": 0.234, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 1.573289370729168, |
| "learning_rate": 4.9789759476181306e-05, |
| "loss": 0.3095, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 0.8457915006097118, |
| "learning_rate": 4.9788833593943166e-05, |
| "loss": 0.2473, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 0.8884327578476467, |
| "learning_rate": 4.978790568607347e-05, |
| "loss": 0.2191, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 0.9155886552129602, |
| "learning_rate": 4.9786975752648074e-05, |
| "loss": 0.2867, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 0.9084469838644137, |
| "learning_rate": 4.978604379374295e-05, |
| "loss": 0.2251, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 0.9575633260624943, |
| "learning_rate": 4.978510980943427e-05, |
| "loss": 0.2169, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 1.1825466207472919, |
| "learning_rate": 4.978417379979834e-05, |
| "loss": 0.3653, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 0.8295971856442961, |
| "learning_rate": 4.978323576491164e-05, |
| "loss": 0.1957, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 0.8642494306586788, |
| "learning_rate": 4.978229570485085e-05, |
| "loss": 0.2874, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 0.726091863884625, |
| "learning_rate": 4.978135361969276e-05, |
| "loss": 0.2367, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 0.8327914763971832, |
| "learning_rate": 4.978040950951437e-05, |
| "loss": 0.2376, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 0.8801851999194134, |
| "learning_rate": 4.9779463374392824e-05, |
| "loss": 0.2941, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 0.7662817087663885, |
| "learning_rate": 4.977851521440543e-05, |
| "loss": 0.236, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 0.6744745228210125, |
| "learning_rate": 4.977756502962967e-05, |
| "loss": 0.1539, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 0.8640316984393297, |
| "learning_rate": 4.9776612820143195e-05, |
| "loss": 0.1792, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 0.8306296700939642, |
| "learning_rate": 4.977565858602381e-05, |
| "loss": 0.2239, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.8473860965213286, |
| "learning_rate": 4.9774702327349484e-05, |
| "loss": 0.1683, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 1.0675935292798793, |
| "learning_rate": 4.977374404419837e-05, |
| "loss": 0.2583, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 0.8722899471545442, |
| "learning_rate": 4.977278373664877e-05, |
| "loss": 0.2007, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 0.8736194584238443, |
| "learning_rate": 4.977182140477916e-05, |
| "loss": 0.2573, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 0.8929224577448658, |
| "learning_rate": 4.9770857048668166e-05, |
| "loss": 0.2224, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 1.1703430933958943, |
| "learning_rate": 4.9769890668394605e-05, |
| "loss": 0.2598, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 1.1687716722578678, |
| "learning_rate": 4.976892226403743e-05, |
| "loss": 0.2949, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 0.7851741818146389, |
| "learning_rate": 4.976795183567579e-05, |
| "loss": 0.1967, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 1.3368751218650363, |
| "learning_rate": 4.976697938338898e-05, |
| "loss": 0.1944, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 0.7804098679474321, |
| "learning_rate": 4.976600490725645e-05, |
| "loss": 0.2162, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 0.8067282580539209, |
| "learning_rate": 4.976502840735785e-05, |
| "loss": 0.2696, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 0.881636931021168, |
| "learning_rate": 4.976404988377297e-05, |
| "loss": 0.1693, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 0.9778356024485488, |
| "learning_rate": 4.9763069336581755e-05, |
| "loss": 0.3015, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 1.5158998257963476, |
| "learning_rate": 4.976208676586435e-05, |
| "loss": 0.2786, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 1.4003653927664346, |
| "learning_rate": 4.976110217170104e-05, |
| "loss": 0.2286, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 1.113968195239722, |
| "learning_rate": 4.976011555417228e-05, |
| "loss": 0.2918, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 0.6814550890931536, |
| "learning_rate": 4.975912691335869e-05, |
| "loss": 0.1758, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 1.0250197993224708, |
| "learning_rate": 4.975813624934107e-05, |
| "loss": 0.2103, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 0.8530099663867843, |
| "learning_rate": 4.975714356220035e-05, |
| "loss": 0.1978, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 0.8857383817331087, |
| "learning_rate": 4.9756148852017656e-05, |
| "loss": 0.214, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 1.0067157373964037, |
| "learning_rate": 4.9755152118874294e-05, |
| "loss": 0.2172, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 0.7700875743307447, |
| "learning_rate": 4.975415336285168e-05, |
| "loss": 0.1673, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 0.8793173662516012, |
| "learning_rate": 4.9753152584031445e-05, |
| "loss": 0.213, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 0.7806046431854715, |
| "learning_rate": 4.975214978249537e-05, |
| "loss": 0.2404, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 1.0408321729609522, |
| "learning_rate": 4.975114495832539e-05, |
| "loss": 0.3285, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 0.9763010914811852, |
| "learning_rate": 4.975013811160362e-05, |
| "loss": 0.2676, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 0.9362866491749535, |
| "learning_rate": 4.9749129242412326e-05, |
| "loss": 0.2318, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 0.9030290795617781, |
| "learning_rate": 4.9748118350833974e-05, |
| "loss": 0.2987, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 1.0989605755069412, |
| "learning_rate": 4.974710543695114e-05, |
| "loss": 0.2956, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 1.1146444201421968, |
| "learning_rate": 4.974609050084661e-05, |
| "loss": 0.2891, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 0.806494567271344, |
| "learning_rate": 4.9745073542603314e-05, |
| "loss": 0.2719, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 0.9335000122264804, |
| "learning_rate": 4.974405456230435e-05, |
| "loss": 0.2584, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 1.1989868007046802, |
| "learning_rate": 4.9743033560033e-05, |
| "loss": 0.1895, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 0.8206793184670869, |
| "learning_rate": 4.974201053587268e-05, |
| "loss": 0.2312, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 0.8762870112318909, |
| "learning_rate": 4.974098548990701e-05, |
| "loss": 0.1712, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 0.8810795527204738, |
| "learning_rate": 4.9739958422219714e-05, |
| "loss": 0.2485, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 0.9131927875121051, |
| "learning_rate": 4.9738929332894755e-05, |
| "loss": 0.2081, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 1.1875363818207734, |
| "learning_rate": 4.97378982220162e-05, |
| "loss": 0.244, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 1.0859170773747107, |
| "learning_rate": 4.973686508966832e-05, |
| "loss": 0.2055, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 0.9133043813219233, |
| "learning_rate": 4.973582993593554e-05, |
| "loss": 0.2148, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 1.310737464485677, |
| "learning_rate": 4.973479276090244e-05, |
| "loss": 0.2494, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 0.8529092254297224, |
| "learning_rate": 4.973375356465378e-05, |
| "loss": 0.1725, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 0.9465212302042785, |
| "learning_rate": 4.973271234727447e-05, |
| "loss": 0.2329, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 0.7256334452449963, |
| "learning_rate": 4.973166910884961e-05, |
| "loss": 0.1795, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 1.093598727987344, |
| "learning_rate": 4.973062384946442e-05, |
| "loss": 0.3119, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 0.9015553238361032, |
| "learning_rate": 4.9729576569204345e-05, |
| "loss": 0.1417, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 0.7225604951862246, |
| "learning_rate": 4.972852726815495e-05, |
| "loss": 0.1946, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 2.0905779078889766, |
| "learning_rate": 4.9727475946401966e-05, |
| "loss": 0.2124, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 0.9099581003839937, |
| "learning_rate": 4.972642260403133e-05, |
| "loss": 0.2184, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 0.7192008189313542, |
| "learning_rate": 4.9725367241129104e-05, |
| "loss": 0.2284, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 0.6847301751652567, |
| "learning_rate": 4.972430985778152e-05, |
| "loss": 0.1635, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 1.0884703037776753, |
| "learning_rate": 4.9723250454074985e-05, |
| "loss": 0.2305, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 1.4823406398429884, |
| "learning_rate": 4.9722189030096076e-05, |
| "loss": 0.3141, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 0.9543228410916551, |
| "learning_rate": 4.972112558593153e-05, |
| "loss": 0.24, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 0.8547072683379531, |
| "learning_rate": 4.9720060121668235e-05, |
| "loss": 0.2701, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 0.9503547053850286, |
| "learning_rate": 4.9718992637393256e-05, |
| "loss": 0.2121, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 0.9098591591106474, |
| "learning_rate": 4.971792313319384e-05, |
| "loss": 0.2523, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 0.7280425034062946, |
| "learning_rate": 4.971685160915737e-05, |
| "loss": 0.2164, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 0.8961690750579513, |
| "learning_rate": 4.9715778065371396e-05, |
| "loss": 0.2212, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 0.8118127840747844, |
| "learning_rate": 4.971470250192366e-05, |
| "loss": 0.2166, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 1.318299849716252, |
| "learning_rate": 4.971362491890205e-05, |
| "loss": 0.3136, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 1.0081824604890162, |
| "learning_rate": 4.971254531639461e-05, |
| "loss": 0.2202, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 1.2126916853817178, |
| "learning_rate": 4.971146369448957e-05, |
| "loss": 0.2829, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 0.8472581042257032, |
| "learning_rate": 4.971038005327531e-05, |
| "loss": 0.2084, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 0.7594809604183035, |
| "learning_rate": 4.970929439284039e-05, |
| "loss": 0.2295, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 0.8636454287212753, |
| "learning_rate": 4.970820671327351e-05, |
| "loss": 0.2478, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 0.7813693526252882, |
| "learning_rate": 4.970711701466357e-05, |
| "loss": 0.1914, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 0.7683586186622385, |
| "learning_rate": 4.9706025297099595e-05, |
| "loss": 0.2627, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 0.9410000807935135, |
| "learning_rate": 4.970493156067081e-05, |
| "loss": 0.2666, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 0.8580159793846174, |
| "learning_rate": 4.970383580546658e-05, |
| "loss": 0.2128, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 1.1638124362464524, |
| "learning_rate": 4.9702738031576445e-05, |
| "loss": 0.2837, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 0.8867809918481854, |
| "learning_rate": 4.970163823909013e-05, |
| "loss": 0.2137, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 1.1121220889601369, |
| "learning_rate": 4.970053642809748e-05, |
| "loss": 0.2494, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 1.0107895407168916, |
| "learning_rate": 4.969943259868853e-05, |
| "loss": 0.2514, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 0.77613339632877, |
| "learning_rate": 4.969832675095351e-05, |
| "loss": 0.1802, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 0.8752967804017977, |
| "learning_rate": 4.969721888498275e-05, |
| "loss": 0.2978, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 0.8441570308900757, |
| "learning_rate": 4.96961090008668e-05, |
| "loss": 0.199, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 0.6963956824698917, |
| "learning_rate": 4.969499709869635e-05, |
| "loss": 0.2844, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 0.8791555529767203, |
| "learning_rate": 4.969388317856225e-05, |
| "loss": 0.2021, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 1.015606520913881, |
| "learning_rate": 4.969276724055554e-05, |
| "loss": 0.3095, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 0.7831490460351049, |
| "learning_rate": 4.9691649284767406e-05, |
| "loss": 0.1886, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 0.983327555515575, |
| "learning_rate": 4.969052931128919e-05, |
| "loss": 0.2694, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 0.755569830796656, |
| "learning_rate": 4.968940732021243e-05, |
| "loss": 0.2242, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 0.9138676059935472, |
| "learning_rate": 4.968828331162879e-05, |
| "loss": 0.2757, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 0.9345695141979689, |
| "learning_rate": 4.968715728563014e-05, |
| "loss": 0.2751, |
| "step": 555 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4896118628352.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|