| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.262511373976342, |
| "eval_steps": 500, |
| "global_step": 2775, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045495905368516835, |
| "grad_norm": 10.08474414591373, |
| "learning_rate": 5e-06, |
| "loss": 0.4268, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0009099181073703367, |
| "grad_norm": 7.187634396883529, |
| "learning_rate": 4.999999897855645e-06, |
| "loss": 0.4238, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001364877161055505, |
| "grad_norm": 3.8327630883917294, |
| "learning_rate": 4.9999995914225884e-06, |
| "loss": 0.2838, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0018198362147406734, |
| "grad_norm": 4.248807424602059, |
| "learning_rate": 4.999999080700855e-06, |
| "loss": 0.236, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0022747952684258415, |
| "grad_norm": 4.089663323785212, |
| "learning_rate": 4.999998365690486e-06, |
| "loss": 0.2601, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00272975432211101, |
| "grad_norm": 3.9876649053708864, |
| "learning_rate": 4.999997446391542e-06, |
| "loss": 0.2326, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0031847133757961785, |
| "grad_norm": 2.9111466473566785, |
| "learning_rate": 4.999996322804095e-06, |
| "loss": 0.2269, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.003639672429481347, |
| "grad_norm": 2.5524867538991827, |
| "learning_rate": 4.999994994928239e-06, |
| "loss": 0.2052, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.004094631483166515, |
| "grad_norm": 2.5545174637937094, |
| "learning_rate": 4.999993462764082e-06, |
| "loss": 0.2696, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.004549590536851683, |
| "grad_norm": 2.4518559406151006, |
| "learning_rate": 4.999991726311749e-06, |
| "loss": 0.1618, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005004549590536852, |
| "grad_norm": 3.405646599606387, |
| "learning_rate": 4.999989785571382e-06, |
| "loss": 0.2355, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00545950864422202, |
| "grad_norm": 3.4944361203618186, |
| "learning_rate": 4.999987640543139e-06, |
| "loss": 0.2585, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.005914467697907188, |
| "grad_norm": 2.5057214280143674, |
| "learning_rate": 4.999985291227196e-06, |
| "loss": 0.2235, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.006369426751592357, |
| "grad_norm": 3.617819326198201, |
| "learning_rate": 4.999982737623746e-06, |
| "loss": 0.3207, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.006824385805277525, |
| "grad_norm": 2.953536091708363, |
| "learning_rate": 4.999979979732995e-06, |
| "loss": 0.2543, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007279344858962694, |
| "grad_norm": 2.6415876340824465, |
| "learning_rate": 4.999977017555171e-06, |
| "loss": 0.174, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0077343039126478615, |
| "grad_norm": 2.62032982183088, |
| "learning_rate": 4.999973851090514e-06, |
| "loss": 0.2526, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.00818926296633303, |
| "grad_norm": 2.208495589846344, |
| "learning_rate": 4.999970480339284e-06, |
| "loss": 0.2381, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.008644222020018199, |
| "grad_norm": 4.827328107147866, |
| "learning_rate": 4.9999669053017564e-06, |
| "loss": 0.2259, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.009099181073703366, |
| "grad_norm": 3.293711347019613, |
| "learning_rate": 4.9999631259782235e-06, |
| "loss": 0.1889, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009554140127388535, |
| "grad_norm": 2.9449987435140708, |
| "learning_rate": 4.999959142368993e-06, |
| "loss": 0.1916, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.010009099181073703, |
| "grad_norm": 2.4684804441032533, |
| "learning_rate": 4.999954954474391e-06, |
| "loss": 0.2267, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.010464058234758872, |
| "grad_norm": 2.420072565048825, |
| "learning_rate": 4.9999505622947594e-06, |
| "loss": 0.1781, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01091901728844404, |
| "grad_norm": 3.190045330917334, |
| "learning_rate": 4.999945965830458e-06, |
| "loss": 0.204, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.011373976342129208, |
| "grad_norm": 3.144753224980832, |
| "learning_rate": 4.999941165081863e-06, |
| "loss": 0.1837, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011828935395814377, |
| "grad_norm": 2.2772166419161026, |
| "learning_rate": 4.999936160049364e-06, |
| "loss": 0.203, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.012283894449499545, |
| "grad_norm": 2.842182064416549, |
| "learning_rate": 4.999930950733373e-06, |
| "loss": 0.2594, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.012738853503184714, |
| "grad_norm": 2.689259909233601, |
| "learning_rate": 4.999925537134312e-06, |
| "loss": 0.1829, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.013193812556869881, |
| "grad_norm": 2.6543387078431233, |
| "learning_rate": 4.9999199192526286e-06, |
| "loss": 0.209, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01364877161055505, |
| "grad_norm": 2.660710953873218, |
| "learning_rate": 4.9999140970887775e-06, |
| "loss": 0.2084, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014103730664240218, |
| "grad_norm": 3.1124474906382065, |
| "learning_rate": 4.999908070643236e-06, |
| "loss": 0.2088, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.014558689717925387, |
| "grad_norm": 2.750714892828661, |
| "learning_rate": 4.999901839916495e-06, |
| "loss": 0.1738, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.015013648771610554, |
| "grad_norm": 2.6053321715737314, |
| "learning_rate": 4.999895404909067e-06, |
| "loss": 0.1723, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.015468607825295723, |
| "grad_norm": 2.8576481166567587, |
| "learning_rate": 4.999888765621476e-06, |
| "loss": 0.1729, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01592356687898089, |
| "grad_norm": 2.773654545068012, |
| "learning_rate": 4.999881922054264e-06, |
| "loss": 0.1453, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01637852593266606, |
| "grad_norm": 2.037109443657936, |
| "learning_rate": 4.999874874207991e-06, |
| "loss": 0.1197, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01683348498635123, |
| "grad_norm": 2.6994551736744268, |
| "learning_rate": 4.999867622083232e-06, |
| "loss": 0.2238, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.017288444040036398, |
| "grad_norm": 2.634969731102202, |
| "learning_rate": 4.99986016568058e-06, |
| "loss": 0.2118, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.017743403093721567, |
| "grad_norm": 2.955393409573457, |
| "learning_rate": 4.999852505000646e-06, |
| "loss": 0.2215, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.018198362147406732, |
| "grad_norm": 2.0111122791563285, |
| "learning_rate": 4.999844640044053e-06, |
| "loss": 0.1216, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0186533212010919, |
| "grad_norm": 2.7660608350268077, |
| "learning_rate": 4.999836570811445e-06, |
| "loss": 0.1948, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01910828025477707, |
| "grad_norm": 2.581238704515564, |
| "learning_rate": 4.999828297303483e-06, |
| "loss": 0.2053, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.019563239308462238, |
| "grad_norm": 2.921825171868496, |
| "learning_rate": 4.9998198195208405e-06, |
| "loss": 0.2124, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.020018198362147407, |
| "grad_norm": 2.5257433259743145, |
| "learning_rate": 4.999811137464212e-06, |
| "loss": 0.1754, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.020473157415832575, |
| "grad_norm": 2.4051206013490947, |
| "learning_rate": 4.999802251134307e-06, |
| "loss": 0.2384, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.020928116469517744, |
| "grad_norm": 2.824019582183984, |
| "learning_rate": 4.99979316053185e-06, |
| "loss": 0.1845, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.021383075523202913, |
| "grad_norm": 2.4758052686748395, |
| "learning_rate": 4.999783865657585e-06, |
| "loss": 0.2639, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.02183803457688808, |
| "grad_norm": 3.3028306393170053, |
| "learning_rate": 4.999774366512272e-06, |
| "loss": 0.221, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.022292993630573247, |
| "grad_norm": 3.108709580219038, |
| "learning_rate": 4.9997646630966865e-06, |
| "loss": 0.2205, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.022747952684258416, |
| "grad_norm": 2.076369424843288, |
| "learning_rate": 4.999754755411621e-06, |
| "loss": 0.1336, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023202911737943584, |
| "grad_norm": 2.7444959299225715, |
| "learning_rate": 4.9997446434578865e-06, |
| "loss": 0.1836, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.023657870791628753, |
| "grad_norm": 3.2836031890921418, |
| "learning_rate": 4.999734327236307e-06, |
| "loss": 0.1877, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.024112829845313922, |
| "grad_norm": 1.951056721435438, |
| "learning_rate": 4.999723806747728e-06, |
| "loss": 0.1151, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.02456778889899909, |
| "grad_norm": 2.6138639966442203, |
| "learning_rate": 4.99971308199301e-06, |
| "loss": 0.1363, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02502274795268426, |
| "grad_norm": 2.444124379430723, |
| "learning_rate": 4.999702152973025e-06, |
| "loss": 0.1482, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.025477707006369428, |
| "grad_norm": 2.4597235759126987, |
| "learning_rate": 4.9996910196886694e-06, |
| "loss": 0.133, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.025932666060054597, |
| "grad_norm": 2.6784146485916343, |
| "learning_rate": 4.999679682140852e-06, |
| "loss": 0.1174, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.026387625113739762, |
| "grad_norm": 2.7424790633709564, |
| "learning_rate": 4.999668140330499e-06, |
| "loss": 0.252, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02684258416742493, |
| "grad_norm": 3.348265074283292, |
| "learning_rate": 4.999656394258555e-06, |
| "loss": 0.1925, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0272975432211101, |
| "grad_norm": 2.1154638113016193, |
| "learning_rate": 4.999644443925978e-06, |
| "loss": 0.1836, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.027752502274795268, |
| "grad_norm": 2.4179191653959484, |
| "learning_rate": 4.999632289333746e-06, |
| "loss": 0.153, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.028207461328480437, |
| "grad_norm": 3.9087207564649495, |
| "learning_rate": 4.999619930482852e-06, |
| "loss": 0.17, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.028662420382165606, |
| "grad_norm": 3.9984836138839994, |
| "learning_rate": 4.999607367374304e-06, |
| "loss": 0.2311, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.029117379435850774, |
| "grad_norm": 3.296600637312694, |
| "learning_rate": 4.999594600009131e-06, |
| "loss": 0.1665, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.029572338489535943, |
| "grad_norm": 3.086306216989983, |
| "learning_rate": 4.999581628388375e-06, |
| "loss": 0.212, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.03002729754322111, |
| "grad_norm": 2.48917207768275, |
| "learning_rate": 4.999568452513097e-06, |
| "loss": 0.236, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.030482256596906277, |
| "grad_norm": 2.42340749830043, |
| "learning_rate": 4.9995550723843726e-06, |
| "loss": 0.1917, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.030937215650591446, |
| "grad_norm": 3.0972614391682396, |
| "learning_rate": 4.999541488003295e-06, |
| "loss": 0.1765, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.03139217470427662, |
| "grad_norm": 2.3696589048498193, |
| "learning_rate": 4.999527699370975e-06, |
| "loss": 0.1814, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.03184713375796178, |
| "grad_norm": 2.875746597678631, |
| "learning_rate": 4.99951370648854e-06, |
| "loss": 0.1878, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03230209281164695, |
| "grad_norm": 2.4253311315699606, |
| "learning_rate": 4.999499509357132e-06, |
| "loss": 0.15, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.03275705186533212, |
| "grad_norm": 2.766432808739805, |
| "learning_rate": 4.999485107977912e-06, |
| "loss": 0.1889, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.033212010919017286, |
| "grad_norm": 2.625328870617005, |
| "learning_rate": 4.999470502352057e-06, |
| "loss": 0.1719, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.03366696997270246, |
| "grad_norm": 2.982643055808138, |
| "learning_rate": 4.999455692480759e-06, |
| "loss": 0.2113, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.034121929026387623, |
| "grad_norm": 2.242621960634031, |
| "learning_rate": 4.999440678365229e-06, |
| "loss": 0.1721, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.034576888080072796, |
| "grad_norm": 2.4926186894362976, |
| "learning_rate": 4.999425460006695e-06, |
| "loss": 0.173, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03503184713375796, |
| "grad_norm": 2.3671699591796305, |
| "learning_rate": 4.9994100374063995e-06, |
| "loss": 0.1687, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03548680618744313, |
| "grad_norm": 3.4429608280507216, |
| "learning_rate": 4.9993944105656035e-06, |
| "loss": 0.2649, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0359417652411283, |
| "grad_norm": 2.0807531109765987, |
| "learning_rate": 4.999378579485582e-06, |
| "loss": 0.1476, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.036396724294813464, |
| "grad_norm": 2.5883097677868334, |
| "learning_rate": 4.999362544167632e-06, |
| "loss": 0.162, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.036851683348498636, |
| "grad_norm": 1.9494729618347428, |
| "learning_rate": 4.99934630461306e-06, |
| "loss": 0.1869, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0373066424021838, |
| "grad_norm": 3.2846426885249205, |
| "learning_rate": 4.999329860823197e-06, |
| "loss": 0.203, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03776160145586897, |
| "grad_norm": 2.6587615060855616, |
| "learning_rate": 4.999313212799383e-06, |
| "loss": 0.1773, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03821656050955414, |
| "grad_norm": 3.210244688238914, |
| "learning_rate": 4.99929636054298e-06, |
| "loss": 0.2184, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03867151956323931, |
| "grad_norm": 2.2958732125888224, |
| "learning_rate": 4.999279304055366e-06, |
| "loss": 0.2084, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.039126478616924476, |
| "grad_norm": 2.3139948703024857, |
| "learning_rate": 4.999262043337933e-06, |
| "loss": 0.1973, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.03958143767060965, |
| "grad_norm": 2.6677501256903002, |
| "learning_rate": 4.999244578392094e-06, |
| "loss": 0.1808, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.040036396724294813, |
| "grad_norm": 2.1844571391295524, |
| "learning_rate": 4.9992269092192736e-06, |
| "loss": 0.1761, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.04049135577797998, |
| "grad_norm": 2.4616623603088947, |
| "learning_rate": 4.9992090358209166e-06, |
| "loss": 0.1731, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.04094631483166515, |
| "grad_norm": 2.337094817685032, |
| "learning_rate": 4.9991909581984835e-06, |
| "loss": 0.1714, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.041401273885350316, |
| "grad_norm": 2.769205118473802, |
| "learning_rate": 4.999172676353451e-06, |
| "loss": 0.1286, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.04185623293903549, |
| "grad_norm": 1.993822184781022, |
| "learning_rate": 4.999154190287314e-06, |
| "loss": 0.1722, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.042311191992720654, |
| "grad_norm": 2.4020441009943716, |
| "learning_rate": 4.999135500001583e-06, |
| "loss": 0.2235, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.042766151046405826, |
| "grad_norm": 2.0794454896454013, |
| "learning_rate": 4.9991166054977844e-06, |
| "loss": 0.1424, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.04322111010009099, |
| "grad_norm": 2.5362620116303636, |
| "learning_rate": 4.999097506777463e-06, |
| "loss": 0.1878, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04367606915377616, |
| "grad_norm": 2.3575608544869393, |
| "learning_rate": 4.999078203842179e-06, |
| "loss": 0.2241, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.04413102820746133, |
| "grad_norm": 2.0445052328297217, |
| "learning_rate": 4.999058696693511e-06, |
| "loss": 0.1196, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.044585987261146494, |
| "grad_norm": 2.7989157148193615, |
| "learning_rate": 4.99903898533305e-06, |
| "loss": 0.186, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.045040946314831666, |
| "grad_norm": 2.6048410678209177, |
| "learning_rate": 4.99901906976241e-06, |
| "loss": 0.1675, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.04549590536851683, |
| "grad_norm": 2.232255651321915, |
| "learning_rate": 4.998998949983217e-06, |
| "loss": 0.1379, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.045950864422202004, |
| "grad_norm": 2.8190134265237203, |
| "learning_rate": 4.998978625997115e-06, |
| "loss": 0.2079, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.04640582347588717, |
| "grad_norm": 2.3706098438086003, |
| "learning_rate": 4.998958097805765e-06, |
| "loss": 0.141, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04686078252957234, |
| "grad_norm": 2.44520778150716, |
| "learning_rate": 4.9989373654108445e-06, |
| "loss": 0.164, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.047315741583257506, |
| "grad_norm": 3.5342837078815115, |
| "learning_rate": 4.9989164288140465e-06, |
| "loss": 0.1548, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04777070063694268, |
| "grad_norm": 2.0458160494053836, |
| "learning_rate": 4.998895288017085e-06, |
| "loss": 0.179, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.048225659690627844, |
| "grad_norm": 2.205598400099282, |
| "learning_rate": 4.998873943021684e-06, |
| "loss": 0.1614, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04868061874431301, |
| "grad_norm": 2.511554629528065, |
| "learning_rate": 4.998852393829589e-06, |
| "loss": 0.1659, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.04913557779799818, |
| "grad_norm": 3.219796004043862, |
| "learning_rate": 4.9988306404425625e-06, |
| "loss": 0.2276, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.049590536851683346, |
| "grad_norm": 1.752131198173806, |
| "learning_rate": 4.99880868286238e-06, |
| "loss": 0.1742, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.05004549590536852, |
| "grad_norm": 3.361908404370123, |
| "learning_rate": 4.998786521090836e-06, |
| "loss": 0.1724, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.050500454959053684, |
| "grad_norm": 2.360660279895669, |
| "learning_rate": 4.9987641551297426e-06, |
| "loss": 0.1999, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.050955414012738856, |
| "grad_norm": 2.307324595436377, |
| "learning_rate": 4.998741584980926e-06, |
| "loss": 0.2101, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.05141037306642402, |
| "grad_norm": 2.6034298836542247, |
| "learning_rate": 4.9987188106462314e-06, |
| "loss": 0.167, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.051865332120109194, |
| "grad_norm": 1.5842459657245014, |
| "learning_rate": 4.99869583212752e-06, |
| "loss": 0.1538, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.05232029117379436, |
| "grad_norm": 2.627805184680893, |
| "learning_rate": 4.9986726494266694e-06, |
| "loss": 0.2522, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.052775250227479524, |
| "grad_norm": 2.5410809044474907, |
| "learning_rate": 4.998649262545574e-06, |
| "loss": 0.1776, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.053230209281164696, |
| "grad_norm": 2.076630177156468, |
| "learning_rate": 4.998625671486144e-06, |
| "loss": 0.1828, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.05368516833484986, |
| "grad_norm": 2.5484627386038343, |
| "learning_rate": 4.998601876250308e-06, |
| "loss": 0.1781, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.054140127388535034, |
| "grad_norm": 2.0245969343413983, |
| "learning_rate": 4.998577876840011e-06, |
| "loss": 0.1157, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0545950864422202, |
| "grad_norm": 2.1240696181789143, |
| "learning_rate": 4.9985536732572124e-06, |
| "loss": 0.2097, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05505004549590537, |
| "grad_norm": 2.4280518543324776, |
| "learning_rate": 4.998529265503891e-06, |
| "loss": 0.1631, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.055505004549590536, |
| "grad_norm": 2.203499108228096, |
| "learning_rate": 4.9985046535820416e-06, |
| "loss": 0.2094, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.05595996360327571, |
| "grad_norm": 1.7616968616285278, |
| "learning_rate": 4.998479837493675e-06, |
| "loss": 0.1265, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.056414922656960874, |
| "grad_norm": 2.790115396130319, |
| "learning_rate": 4.9984548172408195e-06, |
| "loss": 0.162, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.05686988171064604, |
| "grad_norm": 2.7234581680187087, |
| "learning_rate": 4.998429592825519e-06, |
| "loss": 0.1901, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.05732484076433121, |
| "grad_norm": 2.7369239231742375, |
| "learning_rate": 4.998404164249835e-06, |
| "loss": 0.2289, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05777979981801638, |
| "grad_norm": 2.145081624481222, |
| "learning_rate": 4.998378531515845e-06, |
| "loss": 0.1267, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05823475887170155, |
| "grad_norm": 3.4112888898442586, |
| "learning_rate": 4.998352694625645e-06, |
| "loss": 0.1536, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.058689717925386714, |
| "grad_norm": 1.8616422473229426, |
| "learning_rate": 4.998326653581343e-06, |
| "loss": 0.1342, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.059144676979071886, |
| "grad_norm": 2.107533644057457, |
| "learning_rate": 4.998300408385072e-06, |
| "loss": 0.1774, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05959963603275705, |
| "grad_norm": 3.079768243729869, |
| "learning_rate": 4.998273959038972e-06, |
| "loss": 0.228, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.06005459508644222, |
| "grad_norm": 1.7403897659478, |
| "learning_rate": 4.998247305545207e-06, |
| "loss": 0.1257, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.06050955414012739, |
| "grad_norm": 1.663929944748691, |
| "learning_rate": 4.998220447905953e-06, |
| "loss": 0.1857, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.060964513193812554, |
| "grad_norm": 2.604082553460826, |
| "learning_rate": 4.998193386123408e-06, |
| "loss": 0.1724, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.061419472247497726, |
| "grad_norm": 2.662434521006077, |
| "learning_rate": 4.99816612019978e-06, |
| "loss": 0.1858, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.06187443130118289, |
| "grad_norm": 2.747586314783755, |
| "learning_rate": 4.998138650137298e-06, |
| "loss": 0.1764, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.062329390354868064, |
| "grad_norm": 2.299433423879838, |
| "learning_rate": 4.998110975938208e-06, |
| "loss": 0.2321, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.06278434940855324, |
| "grad_norm": 2.527715242455789, |
| "learning_rate": 4.998083097604769e-06, |
| "loss": 0.2159, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0632393084622384, |
| "grad_norm": 2.5218619075726285, |
| "learning_rate": 4.998055015139261e-06, |
| "loss": 0.1608, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.06369426751592357, |
| "grad_norm": 3.0047644164754495, |
| "learning_rate": 4.998026728543979e-06, |
| "loss": 0.2065, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06414922656960874, |
| "grad_norm": 2.178572369709547, |
| "learning_rate": 4.997998237821233e-06, |
| "loss": 0.1865, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0646041856232939, |
| "grad_norm": 1.5759272732327654, |
| "learning_rate": 4.997969542973352e-06, |
| "loss": 0.141, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.06505914467697907, |
| "grad_norm": 2.0811820514545554, |
| "learning_rate": 4.997940644002681e-06, |
| "loss": 0.1676, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.06551410373066424, |
| "grad_norm": 3.4671123551644403, |
| "learning_rate": 4.997911540911581e-06, |
| "loss": 0.2163, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.06596906278434941, |
| "grad_norm": 2.2842746412883312, |
| "learning_rate": 4.99788223370243e-06, |
| "loss": 0.1677, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06642402183803457, |
| "grad_norm": 2.3367815299616734, |
| "learning_rate": 4.9978527223776245e-06, |
| "loss": 0.1811, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.06687898089171974, |
| "grad_norm": 2.088943555321838, |
| "learning_rate": 4.9978230069395735e-06, |
| "loss": 0.1627, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.06733393994540492, |
| "grad_norm": 2.5972570174963474, |
| "learning_rate": 4.9977930873907065e-06, |
| "loss": 0.1415, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.06778889899909009, |
| "grad_norm": 2.3401595363726595, |
| "learning_rate": 4.997762963733468e-06, |
| "loss": 0.148, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.06824385805277525, |
| "grad_norm": 2.894021920414895, |
| "learning_rate": 4.997732635970321e-06, |
| "loss": 0.2262, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06869881710646042, |
| "grad_norm": 1.7373422038949267, |
| "learning_rate": 4.9977021041037425e-06, |
| "loss": 0.1697, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.06915377616014559, |
| "grad_norm": 2.5175987385537697, |
| "learning_rate": 4.9976713681362265e-06, |
| "loss": 0.2353, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06960873521383075, |
| "grad_norm": 2.4396682297474563, |
| "learning_rate": 4.997640428070286e-06, |
| "loss": 0.2143, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.07006369426751592, |
| "grad_norm": 2.2947939267715087, |
| "learning_rate": 4.99760928390845e-06, |
| "loss": 0.1369, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0705186533212011, |
| "grad_norm": 2.4758802729165326, |
| "learning_rate": 4.997577935653262e-06, |
| "loss": 0.1498, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.07097361237488627, |
| "grad_norm": 2.283530414912182, |
| "learning_rate": 4.9975463833072835e-06, |
| "loss": 0.1558, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 1.785546461501872, |
| "learning_rate": 4.997514626873093e-06, |
| "loss": 0.1548, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0718835304822566, |
| "grad_norm": 2.5778925367686645, |
| "learning_rate": 4.997482666353287e-06, |
| "loss": 0.1568, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.07233848953594177, |
| "grad_norm": 2.14376664899083, |
| "learning_rate": 4.997450501750476e-06, |
| "loss": 0.169, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.07279344858962693, |
| "grad_norm": 1.7889496418860382, |
| "learning_rate": 4.997418133067288e-06, |
| "loss": 0.1178, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0732484076433121, |
| "grad_norm": 2.734023407734539, |
| "learning_rate": 4.997385560306368e-06, |
| "loss": 0.2024, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.07370336669699727, |
| "grad_norm": 2.438529690680932, |
| "learning_rate": 4.997352783470379e-06, |
| "loss": 0.1877, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.07415832575068244, |
| "grad_norm": 2.358353345441234, |
| "learning_rate": 4.997319802561997e-06, |
| "loss": 0.1349, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0746132848043676, |
| "grad_norm": 2.1448042331352677, |
| "learning_rate": 4.9972866175839196e-06, |
| "loss": 0.1268, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.07506824385805277, |
| "grad_norm": 2.279102892849676, |
| "learning_rate": 4.9972532285388575e-06, |
| "loss": 0.1799, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.07552320291173795, |
| "grad_norm": 2.5140889210625543, |
| "learning_rate": 4.997219635429538e-06, |
| "loss": 0.1876, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.07597816196542312, |
| "grad_norm": 2.6687467871063664, |
| "learning_rate": 4.997185838258709e-06, |
| "loss": 0.1787, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.07643312101910828, |
| "grad_norm": 3.3415050416363354, |
| "learning_rate": 4.997151837029129e-06, |
| "loss": 0.1799, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.07688808007279345, |
| "grad_norm": 1.9269629920973084, |
| "learning_rate": 4.997117631743579e-06, |
| "loss": 0.1397, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.07734303912647862, |
| "grad_norm": 3.00621227688512, |
| "learning_rate": 4.997083222404852e-06, |
| "loss": 0.1967, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07779799818016378, |
| "grad_norm": 2.2615169475731327, |
| "learning_rate": 4.997048609015762e-06, |
| "loss": 0.1288, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.07825295723384895, |
| "grad_norm": 2.4342779650863724, |
| "learning_rate": 4.997013791579136e-06, |
| "loss": 0.186, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.07870791628753412, |
| "grad_norm": 2.4576007392784542, |
| "learning_rate": 4.996978770097819e-06, |
| "loss": 0.1577, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0791628753412193, |
| "grad_norm": 2.4106466164039766, |
| "learning_rate": 4.996943544574673e-06, |
| "loss": 0.1886, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.07961783439490445, |
| "grad_norm": 2.5961861603572225, |
| "learning_rate": 4.996908115012576e-06, |
| "loss": 0.1621, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08007279344858963, |
| "grad_norm": 2.833499016976519, |
| "learning_rate": 4.996872481414425e-06, |
| "loss": 0.1818, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0805277525022748, |
| "grad_norm": 3.5757833649912834, |
| "learning_rate": 4.9968366437831305e-06, |
| "loss": 0.2517, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.08098271155595996, |
| "grad_norm": 1.5552303076468192, |
| "learning_rate": 4.99680060212162e-06, |
| "loss": 0.1245, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.08143767060964513, |
| "grad_norm": 2.2202920086611213, |
| "learning_rate": 4.996764356432841e-06, |
| "loss": 0.2174, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.0818926296633303, |
| "grad_norm": 2.1293059669722196, |
| "learning_rate": 4.996727906719754e-06, |
| "loss": 0.1605, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08234758871701547, |
| "grad_norm": 2.212380091830394, |
| "learning_rate": 4.9966912529853365e-06, |
| "loss": 0.125, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.08280254777070063, |
| "grad_norm": 2.1098748731042507, |
| "learning_rate": 4.996654395232585e-06, |
| "loss": 0.17, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0832575068243858, |
| "grad_norm": 2.3315908475718183, |
| "learning_rate": 4.996617333464512e-06, |
| "loss": 0.1678, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.08371246587807098, |
| "grad_norm": 2.100678357161413, |
| "learning_rate": 4.996580067684145e-06, |
| "loss": 0.1512, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.08416742493175614, |
| "grad_norm": 1.6542642571071706, |
| "learning_rate": 4.996542597894528e-06, |
| "loss": 0.1875, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.08462238398544131, |
| "grad_norm": 1.500567296289452, |
| "learning_rate": 4.996504924098726e-06, |
| "loss": 0.1579, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.08507734303912648, |
| "grad_norm": 1.5859042172394868, |
| "learning_rate": 4.9964670462998145e-06, |
| "loss": 0.146, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.08553230209281165, |
| "grad_norm": 1.7178165607526288, |
| "learning_rate": 4.99642896450089e-06, |
| "loss": 0.2372, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.08598726114649681, |
| "grad_norm": 2.8492778772061484, |
| "learning_rate": 4.9963906787050656e-06, |
| "loss": 0.2504, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.08644222020018198, |
| "grad_norm": 1.9406179967433874, |
| "learning_rate": 4.996352188915467e-06, |
| "loss": 0.1733, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.08689717925386715, |
| "grad_norm": 2.811015878830941, |
| "learning_rate": 4.996313495135242e-06, |
| "loss": 0.2133, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.08735213830755233, |
| "grad_norm": 2.222839682156962, |
| "learning_rate": 4.9962745973675505e-06, |
| "loss": 0.2113, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.08780709736123748, |
| "grad_norm": 2.6159522481523343, |
| "learning_rate": 4.996235495615572e-06, |
| "loss": 0.1622, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.08826205641492266, |
| "grad_norm": 2.3708185697184847, |
| "learning_rate": 4.996196189882503e-06, |
| "loss": 0.1685, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.08871701546860783, |
| "grad_norm": 3.228308382699869, |
| "learning_rate": 4.996156680171552e-06, |
| "loss": 0.2332, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08917197452229299, |
| "grad_norm": 2.351705904801359, |
| "learning_rate": 4.996116966485951e-06, |
| "loss": 0.1816, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.08962693357597816, |
| "grad_norm": 2.320092450855665, |
| "learning_rate": 4.996077048828944e-06, |
| "loss": 0.2321, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.09008189262966333, |
| "grad_norm": 1.960036016410063, |
| "learning_rate": 4.996036927203793e-06, |
| "loss": 0.1745, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0905368516833485, |
| "grad_norm": 2.3679323522005573, |
| "learning_rate": 4.995996601613775e-06, |
| "loss": 0.1927, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.09099181073703366, |
| "grad_norm": 2.1775512973195723, |
| "learning_rate": 4.9959560720621875e-06, |
| "loss": 0.1576, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09144676979071883, |
| "grad_norm": 2.286317354363178, |
| "learning_rate": 4.995915338552341e-06, |
| "loss": 0.2184, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.09190172884440401, |
| "grad_norm": 2.0945800180559275, |
| "learning_rate": 4.995874401087565e-06, |
| "loss": 0.1572, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.09235668789808917, |
| "grad_norm": 2.741714725855865, |
| "learning_rate": 4.9958332596712035e-06, |
| "loss": 0.2087, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.09281164695177434, |
| "grad_norm": 3.0871074584367864, |
| "learning_rate": 4.99579191430662e-06, |
| "loss": 0.1968, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.09326660600545951, |
| "grad_norm": 1.9723075192584005, |
| "learning_rate": 4.995750364997192e-06, |
| "loss": 0.1507, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.09372156505914468, |
| "grad_norm": 1.8988997770559113, |
| "learning_rate": 4.995708611746314e-06, |
| "loss": 0.1288, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.09417652411282984, |
| "grad_norm": 2.420700916830186, |
| "learning_rate": 4.995666654557399e-06, |
| "loss": 0.1988, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.09463148316651501, |
| "grad_norm": 2.370720479747693, |
| "learning_rate": 4.995624493433876e-06, |
| "loss": 0.2215, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.09508644222020018, |
| "grad_norm": 2.2764445558307607, |
| "learning_rate": 4.995582128379189e-06, |
| "loss": 0.1984, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.09554140127388536, |
| "grad_norm": 2.382102062046725, |
| "learning_rate": 4.9955395593968e-06, |
| "loss": 0.2535, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09599636032757052, |
| "grad_norm": 2.833827673252778, |
| "learning_rate": 4.99549678649019e-06, |
| "loss": 0.1998, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.09645131938125569, |
| "grad_norm": 2.486472694935685, |
| "learning_rate": 4.99545380966285e-06, |
| "loss": 0.2118, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.09690627843494086, |
| "grad_norm": 3.0088319794179883, |
| "learning_rate": 4.995410628918294e-06, |
| "loss": 0.1584, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.09736123748862602, |
| "grad_norm": 1.975326638907469, |
| "learning_rate": 4.995367244260052e-06, |
| "loss": 0.1832, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.09781619654231119, |
| "grad_norm": 1.9912128526989044, |
| "learning_rate": 4.995323655691667e-06, |
| "loss": 0.1346, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.09827115559599636, |
| "grad_norm": 2.603090937917312, |
| "learning_rate": 4.995279863216702e-06, |
| "loss": 0.2124, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.09872611464968153, |
| "grad_norm": 2.053886430988171, |
| "learning_rate": 4.995235866838735e-06, |
| "loss": 0.1567, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.09918107370336669, |
| "grad_norm": 2.039621450617981, |
| "learning_rate": 4.995191666561361e-06, |
| "loss": 0.1694, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.09963603275705187, |
| "grad_norm": 2.0601930905500394, |
| "learning_rate": 4.995147262388192e-06, |
| "loss": 0.1264, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.10009099181073704, |
| "grad_norm": 3.3199244613439802, |
| "learning_rate": 4.995102654322858e-06, |
| "loss": 0.2204, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1005459508644222, |
| "grad_norm": 2.1212806825874906, |
| "learning_rate": 4.995057842369002e-06, |
| "loss": 0.1122, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.10100090991810737, |
| "grad_norm": 2.157454599738766, |
| "learning_rate": 4.995012826530287e-06, |
| "loss": 0.1977, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.10145586897179254, |
| "grad_norm": 1.9698536511203952, |
| "learning_rate": 4.99496760681039e-06, |
| "loss": 0.1934, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.10191082802547771, |
| "grad_norm": 1.9533190562259675, |
| "learning_rate": 4.994922183213009e-06, |
| "loss": 0.1686, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.10236578707916287, |
| "grad_norm": 1.8311151598660917, |
| "learning_rate": 4.9948765557418535e-06, |
| "loss": 0.1376, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.10282074613284804, |
| "grad_norm": 2.6814547442935766, |
| "learning_rate": 4.994830724400653e-06, |
| "loss": 0.2536, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.10327570518653321, |
| "grad_norm": 2.065521179879655, |
| "learning_rate": 4.994784689193151e-06, |
| "loss": 0.1594, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.10373066424021839, |
| "grad_norm": 2.082741947039302, |
| "learning_rate": 4.994738450123111e-06, |
| "loss": 0.1792, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.10418562329390355, |
| "grad_norm": 2.1268040832192896, |
| "learning_rate": 4.994692007194312e-06, |
| "loss": 0.1746, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.10464058234758872, |
| "grad_norm": 1.6028966765046104, |
| "learning_rate": 4.994645360410547e-06, |
| "loss": 0.1442, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.10509554140127389, |
| "grad_norm": 2.051519186273431, |
| "learning_rate": 4.99459850977563e-06, |
| "loss": 0.1501, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.10555050045495905, |
| "grad_norm": 2.0348997381654774, |
| "learning_rate": 4.994551455293388e-06, |
| "loss": 0.1544, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.10600545950864422, |
| "grad_norm": 2.1087346651931758, |
| "learning_rate": 4.9945041969676654e-06, |
| "loss": 0.1768, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.10646041856232939, |
| "grad_norm": 2.2918772612100704, |
| "learning_rate": 4.994456734802325e-06, |
| "loss": 0.1361, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.10691537761601456, |
| "grad_norm": 1.6027315868889764, |
| "learning_rate": 4.994409068801247e-06, |
| "loss": 0.1905, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.10737033666969972, |
| "grad_norm": 1.3896946472755238, |
| "learning_rate": 4.994361198968323e-06, |
| "loss": 0.1282, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.1078252957233849, |
| "grad_norm": 2.8336860099519687, |
| "learning_rate": 4.994313125307466e-06, |
| "loss": 0.1795, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.10828025477707007, |
| "grad_norm": 2.3591551410924034, |
| "learning_rate": 4.994264847822605e-06, |
| "loss": 0.2012, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.10873521383075523, |
| "grad_norm": 1.963795078441063, |
| "learning_rate": 4.994216366517684e-06, |
| "loss": 0.122, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.1091901728844404, |
| "grad_norm": 2.2161995153888356, |
| "learning_rate": 4.994167681396667e-06, |
| "loss": 0.2013, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.10964513193812557, |
| "grad_norm": 2.116594401017286, |
| "learning_rate": 4.994118792463529e-06, |
| "loss": 0.1678, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.11010009099181074, |
| "grad_norm": 2.004374732998671, |
| "learning_rate": 4.994069699722267e-06, |
| "loss": 0.1937, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.1105550500454959, |
| "grad_norm": 1.8488901498313728, |
| "learning_rate": 4.994020403176893e-06, |
| "loss": 0.1668, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.11101000909918107, |
| "grad_norm": 1.9972157818131948, |
| "learning_rate": 4.9939709028314345e-06, |
| "loss": 0.1589, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.11146496815286625, |
| "grad_norm": 2.748474268313726, |
| "learning_rate": 4.993921198689935e-06, |
| "loss": 0.1244, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.11191992720655142, |
| "grad_norm": 2.2905102593877893, |
| "learning_rate": 4.993871290756459e-06, |
| "loss": 0.1828, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.11237488626023658, |
| "grad_norm": 2.4243824405880825, |
| "learning_rate": 4.9938211790350835e-06, |
| "loss": 0.1534, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.11282984531392175, |
| "grad_norm": 2.7563047154810767, |
| "learning_rate": 4.993770863529902e-06, |
| "loss": 0.2186, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.11328480436760692, |
| "grad_norm": 2.0782876036120044, |
| "learning_rate": 4.993720344245029e-06, |
| "loss": 0.1519, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.11373976342129208, |
| "grad_norm": 2.1737696697985065, |
| "learning_rate": 4.99366962118459e-06, |
| "loss": 0.1705, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11419472247497725, |
| "grad_norm": 2.117835290775163, |
| "learning_rate": 4.99361869435273e-06, |
| "loss": 0.1279, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.11464968152866242, |
| "grad_norm": 2.2816195263684906, |
| "learning_rate": 4.993567563753613e-06, |
| "loss": 0.1498, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.1151046405823476, |
| "grad_norm": 2.303960194203604, |
| "learning_rate": 4.993516229391414e-06, |
| "loss": 0.1505, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.11555959963603275, |
| "grad_norm": 2.932533158282557, |
| "learning_rate": 4.993464691270331e-06, |
| "loss": 0.1672, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.11601455868971793, |
| "grad_norm": 2.050977411803408, |
| "learning_rate": 4.993412949394572e-06, |
| "loss": 0.1511, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1164695177434031, |
| "grad_norm": 1.9367899744301398, |
| "learning_rate": 4.993361003768369e-06, |
| "loss": 0.1203, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.11692447679708826, |
| "grad_norm": 2.3417493914717027, |
| "learning_rate": 4.993308854395963e-06, |
| "loss": 0.1782, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.11737943585077343, |
| "grad_norm": 2.2791020802299498, |
| "learning_rate": 4.993256501281618e-06, |
| "loss": 0.1643, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1178343949044586, |
| "grad_norm": 2.051233293233244, |
| "learning_rate": 4.993203944429611e-06, |
| "loss": 0.1761, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.11828935395814377, |
| "grad_norm": 2.554462221777923, |
| "learning_rate": 4.993151183844236e-06, |
| "loss": 0.1654, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.11874431301182893, |
| "grad_norm": 1.8796649091666686, |
| "learning_rate": 4.9930982195298065e-06, |
| "loss": 0.1826, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.1191992720655141, |
| "grad_norm": 2.1843940505934336, |
| "learning_rate": 4.9930450514906484e-06, |
| "loss": 0.1755, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.11965423111919928, |
| "grad_norm": 2.600288448730721, |
| "learning_rate": 4.9929916797311075e-06, |
| "loss": 0.1758, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.12010919017288443, |
| "grad_norm": 2.0789865508427714, |
| "learning_rate": 4.992938104255545e-06, |
| "loss": 0.1571, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.1205641492265696, |
| "grad_norm": 2.6999799828889546, |
| "learning_rate": 4.992884325068339e-06, |
| "loss": 0.2177, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.12101910828025478, |
| "grad_norm": 2.1928099848185756, |
| "learning_rate": 4.992830342173882e-06, |
| "loss": 0.1831, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.12147406733393995, |
| "grad_norm": 1.6337451712782205, |
| "learning_rate": 4.992776155576589e-06, |
| "loss": 0.1506, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.12192902638762511, |
| "grad_norm": 1.2235042033062622, |
| "learning_rate": 4.992721765280884e-06, |
| "loss": 0.1214, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.12238398544131028, |
| "grad_norm": 2.8845660466122873, |
| "learning_rate": 4.992667171291215e-06, |
| "loss": 0.2148, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.12283894449499545, |
| "grad_norm": 2.7398139900638476, |
| "learning_rate": 4.992612373612042e-06, |
| "loss": 0.1661, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12329390354868063, |
| "grad_norm": 3.738889974273454, |
| "learning_rate": 4.99255737224784e-06, |
| "loss": 0.2297, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.12374886260236578, |
| "grad_norm": 1.5329721181759282, |
| "learning_rate": 4.9925021672031075e-06, |
| "loss": 0.1486, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.12420382165605096, |
| "grad_norm": 2.3823467276559875, |
| "learning_rate": 4.992446758482353e-06, |
| "loss": 0.1552, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.12465878070973613, |
| "grad_norm": 2.1454290127697924, |
| "learning_rate": 4.992391146090106e-06, |
| "loss": 0.1736, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.1251137397634213, |
| "grad_norm": 1.4949223744659494, |
| "learning_rate": 4.99233533003091e-06, |
| "loss": 0.1373, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12556869881710647, |
| "grad_norm": 1.5553413773794396, |
| "learning_rate": 4.992279310309326e-06, |
| "loss": 0.1835, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.12602365787079162, |
| "grad_norm": 2.969806225573073, |
| "learning_rate": 4.9922230869299316e-06, |
| "loss": 0.2793, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.1264786169244768, |
| "grad_norm": 2.3168611268442763, |
| "learning_rate": 4.992166659897321e-06, |
| "loss": 0.1922, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.12693357597816196, |
| "grad_norm": 2.3995795142770455, |
| "learning_rate": 4.992110029216106e-06, |
| "loss": 0.1955, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.12738853503184713, |
| "grad_norm": 1.6975631974230885, |
| "learning_rate": 4.992053194890914e-06, |
| "loss": 0.1112, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1278434940855323, |
| "grad_norm": 2.087297197910066, |
| "learning_rate": 4.991996156926388e-06, |
| "loss": 0.1333, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.12829845313921748, |
| "grad_norm": 2.6326611217122475, |
| "learning_rate": 4.9919389153271904e-06, |
| "loss": 0.2017, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.12875341219290265, |
| "grad_norm": 1.4167548054089978, |
| "learning_rate": 4.991881470097998e-06, |
| "loss": 0.2074, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.1292083712465878, |
| "grad_norm": 2.325650637419427, |
| "learning_rate": 4.991823821243505e-06, |
| "loss": 0.1777, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.12966333030027297, |
| "grad_norm": 2.7279251785825, |
| "learning_rate": 4.991765968768422e-06, |
| "loss": 0.1801, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.13011828935395814, |
| "grad_norm": 2.9061020144564087, |
| "learning_rate": 4.991707912677477e-06, |
| "loss": 0.1702, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.1305732484076433, |
| "grad_norm": 1.8358268112205725, |
| "learning_rate": 4.991649652975414e-06, |
| "loss": 0.1433, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.13102820746132848, |
| "grad_norm": 2.5332736723438636, |
| "learning_rate": 4.991591189666994e-06, |
| "loss": 0.2469, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.13148316651501366, |
| "grad_norm": 2.1606263891645527, |
| "learning_rate": 4.991532522756993e-06, |
| "loss": 0.18, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.13193812556869883, |
| "grad_norm": 1.995831189895407, |
| "learning_rate": 4.991473652250207e-06, |
| "loss": 0.1577, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13239308462238397, |
| "grad_norm": 2.4955613558163754, |
| "learning_rate": 4.991414578151445e-06, |
| "loss": 0.1544, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.13284804367606914, |
| "grad_norm": 2.2942486381281326, |
| "learning_rate": 4.991355300465535e-06, |
| "loss": 0.1794, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.13330300272975432, |
| "grad_norm": 2.6074492667183486, |
| "learning_rate": 4.99129581919732e-06, |
| "loss": 0.2319, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.1337579617834395, |
| "grad_norm": 2.563328131279355, |
| "learning_rate": 4.9912361343516616e-06, |
| "loss": 0.1498, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.13421292083712466, |
| "grad_norm": 2.2818975551142535, |
| "learning_rate": 4.991176245933437e-06, |
| "loss": 0.1996, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.13466787989080983, |
| "grad_norm": 2.3084476659986874, |
| "learning_rate": 4.9911161539475385e-06, |
| "loss": 0.1837, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.135122838944495, |
| "grad_norm": 2.271697592195805, |
| "learning_rate": 4.991055858398879e-06, |
| "loss": 0.1839, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.13557779799818018, |
| "grad_norm": 2.7071752536725993, |
| "learning_rate": 4.990995359292384e-06, |
| "loss": 0.2051, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.13603275705186532, |
| "grad_norm": 2.1654433443615444, |
| "learning_rate": 4.990934656632997e-06, |
| "loss": 0.1845, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1364877161055505, |
| "grad_norm": 2.56820477539861, |
| "learning_rate": 4.990873750425679e-06, |
| "loss": 0.1987, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13694267515923567, |
| "grad_norm": 1.8972328280195017, |
| "learning_rate": 4.990812640675406e-06, |
| "loss": 0.1352, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.13739763421292084, |
| "grad_norm": 2.160948607003053, |
| "learning_rate": 4.990751327387174e-06, |
| "loss": 0.1788, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.137852593266606, |
| "grad_norm": 2.2034240871386026, |
| "learning_rate": 4.99068981056599e-06, |
| "loss": 0.14, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.13830755232029118, |
| "grad_norm": 2.273981179049363, |
| "learning_rate": 4.990628090216885e-06, |
| "loss": 0.1914, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.13876251137397635, |
| "grad_norm": 2.0189718711860096, |
| "learning_rate": 4.990566166344898e-06, |
| "loss": 0.1455, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.1392174704276615, |
| "grad_norm": 2.596979330537977, |
| "learning_rate": 4.990504038955092e-06, |
| "loss": 0.1503, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.13967242948134667, |
| "grad_norm": 2.694293011033057, |
| "learning_rate": 4.990441708052542e-06, |
| "loss": 0.1582, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.14012738853503184, |
| "grad_norm": 2.00968932243832, |
| "learning_rate": 4.9903791736423435e-06, |
| "loss": 0.1531, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.14058234758871702, |
| "grad_norm": 1.7247039385783955, |
| "learning_rate": 4.9903164357296044e-06, |
| "loss": 0.1258, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.1410373066424022, |
| "grad_norm": 1.4795211673422664, |
| "learning_rate": 4.990253494319453e-06, |
| "loss": 0.1918, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14149226569608736, |
| "grad_norm": 2.4289846785611573, |
| "learning_rate": 4.990190349417032e-06, |
| "loss": 0.264, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.14194722474977253, |
| "grad_norm": 2.1742573666821245, |
| "learning_rate": 4.990127001027501e-06, |
| "loss": 0.1382, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.14240218380345768, |
| "grad_norm": 2.051388070470128, |
| "learning_rate": 4.990063449156037e-06, |
| "loss": 0.234, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 2.3613735603207435, |
| "learning_rate": 4.989999693807832e-06, |
| "loss": 0.1963, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.14331210191082802, |
| "grad_norm": 3.162328527546947, |
| "learning_rate": 4.989935734988098e-06, |
| "loss": 0.1913, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.1437670609645132, |
| "grad_norm": 2.8669333432356967, |
| "learning_rate": 4.98987157270206e-06, |
| "loss": 0.15, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.14422202001819837, |
| "grad_norm": 2.383827835780797, |
| "learning_rate": 4.989807206954961e-06, |
| "loss": 0.2103, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.14467697907188354, |
| "grad_norm": 1.6341024017470744, |
| "learning_rate": 4.9897426377520605e-06, |
| "loss": 0.1393, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.1451319381255687, |
| "grad_norm": 2.146073254076934, |
| "learning_rate": 4.989677865098636e-06, |
| "loss": 0.1836, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.14558689717925385, |
| "grad_norm": 1.6889700199846902, |
| "learning_rate": 4.989612888999978e-06, |
| "loss": 0.1257, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14604185623293903, |
| "grad_norm": 1.6032091805420865, |
| "learning_rate": 4.9895477094614e-06, |
| "loss": 0.1578, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1464968152866242, |
| "grad_norm": 1.8161786006418608, |
| "learning_rate": 4.989482326488225e-06, |
| "loss": 0.1492, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.14695177434030937, |
| "grad_norm": 1.9978970628488169, |
| "learning_rate": 4.989416740085796e-06, |
| "loss": 0.1637, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.14740673339399454, |
| "grad_norm": 2.7066161025891335, |
| "learning_rate": 4.9893509502594735e-06, |
| "loss": 0.1963, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.14786169244767972, |
| "grad_norm": 2.420242793982077, |
| "learning_rate": 4.9892849570146335e-06, |
| "loss": 0.1877, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.1483166515013649, |
| "grad_norm": 2.153067326288121, |
| "learning_rate": 4.989218760356668e-06, |
| "loss": 0.1635, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.14877161055505003, |
| "grad_norm": 2.0543349130585216, |
| "learning_rate": 4.989152360290987e-06, |
| "loss": 0.1744, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1492265696087352, |
| "grad_norm": 2.1211312409383716, |
| "learning_rate": 4.989085756823015e-06, |
| "loss": 0.2104, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.14968152866242038, |
| "grad_norm": 1.9888085672791085, |
| "learning_rate": 4.989018949958197e-06, |
| "loss": 0.1876, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.15013648771610555, |
| "grad_norm": 1.7510207885281333, |
| "learning_rate": 4.98895193970199e-06, |
| "loss": 0.1251, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15059144676979072, |
| "grad_norm": 2.132384994640236, |
| "learning_rate": 4.9888847260598705e-06, |
| "loss": 0.154, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.1510464058234759, |
| "grad_norm": 2.323691709571053, |
| "learning_rate": 4.98881730903733e-06, |
| "loss": 0.1599, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.15150136487716107, |
| "grad_norm": 1.7667120167873211, |
| "learning_rate": 4.98874968863988e-06, |
| "loss": 0.1706, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.15195632393084624, |
| "grad_norm": 2.2465388060545424, |
| "learning_rate": 4.988681864873044e-06, |
| "loss": 0.152, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.15241128298453138, |
| "grad_norm": 2.150731238347554, |
| "learning_rate": 4.988613837742364e-06, |
| "loss": 0.1784, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.15286624203821655, |
| "grad_norm": 2.6552266788081913, |
| "learning_rate": 4.9885456072534015e-06, |
| "loss": 0.1692, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.15332120109190173, |
| "grad_norm": 2.6431963904654867, |
| "learning_rate": 4.988477173411728e-06, |
| "loss": 0.2313, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1537761601455869, |
| "grad_norm": 1.6862589720746106, |
| "learning_rate": 4.988408536222939e-06, |
| "loss": 0.1569, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.15423111919927207, |
| "grad_norm": 2.4287850849792343, |
| "learning_rate": 4.9883396956926416e-06, |
| "loss": 0.2077, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.15468607825295724, |
| "grad_norm": 2.1310532776354556, |
| "learning_rate": 4.988270651826462e-06, |
| "loss": 0.1603, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15514103730664242, |
| "grad_norm": 2.426464258613891, |
| "learning_rate": 4.988201404630041e-06, |
| "loss": 0.1804, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.15559599636032756, |
| "grad_norm": 2.2461225244692966, |
| "learning_rate": 4.988131954109038e-06, |
| "loss": 0.1749, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.15605095541401273, |
| "grad_norm": 1.7543756867291544, |
| "learning_rate": 4.988062300269128e-06, |
| "loss": 0.2141, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.1565059144676979, |
| "grad_norm": 1.8842714079345257, |
| "learning_rate": 4.987992443116003e-06, |
| "loss": 0.1509, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.15696087352138308, |
| "grad_norm": 2.5046760683256917, |
| "learning_rate": 4.987922382655372e-06, |
| "loss": 0.1555, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.15741583257506825, |
| "grad_norm": 2.3171833195987186, |
| "learning_rate": 4.987852118892958e-06, |
| "loss": 0.259, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.15787079162875342, |
| "grad_norm": 1.7971407845013883, |
| "learning_rate": 4.987781651834503e-06, |
| "loss": 0.2111, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.1583257506824386, |
| "grad_norm": 2.229282526599637, |
| "learning_rate": 4.987710981485768e-06, |
| "loss": 0.1639, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.15878070973612374, |
| "grad_norm": 2.090625191317677, |
| "learning_rate": 4.987640107852525e-06, |
| "loss": 0.2123, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.1592356687898089, |
| "grad_norm": 2.117001720390773, |
| "learning_rate": 4.987569030940567e-06, |
| "loss": 0.1762, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.15969062784349408, |
| "grad_norm": 1.7897158962626623, |
| "learning_rate": 4.987497750755702e-06, |
| "loss": 0.0935, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.16014558689717925, |
| "grad_norm": 2.0946360877045906, |
| "learning_rate": 4.987426267303753e-06, |
| "loss": 0.2049, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.16060054595086443, |
| "grad_norm": 2.07614941330386, |
| "learning_rate": 4.987354580590563e-06, |
| "loss": 0.1858, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1610555050045496, |
| "grad_norm": 1.6797770286484157, |
| "learning_rate": 4.987282690621991e-06, |
| "loss": 0.1652, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.16151046405823477, |
| "grad_norm": 1.6413851962480772, |
| "learning_rate": 4.987210597403907e-06, |
| "loss": 0.156, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.16196542311191992, |
| "grad_norm": 2.5143144976994285, |
| "learning_rate": 4.987138300942208e-06, |
| "loss": 0.1804, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1624203821656051, |
| "grad_norm": 2.128297430906798, |
| "learning_rate": 4.987065801242798e-06, |
| "loss": 0.1634, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.16287534121929026, |
| "grad_norm": 2.039358127433988, |
| "learning_rate": 4.986993098311601e-06, |
| "loss": 0.172, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.16333030027297543, |
| "grad_norm": 2.2470477292441906, |
| "learning_rate": 4.986920192154561e-06, |
| "loss": 0.1419, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1637852593266606, |
| "grad_norm": 1.8708576936226033, |
| "learning_rate": 4.986847082777632e-06, |
| "loss": 0.165, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16424021838034578, |
| "grad_norm": 2.2426713628374406, |
| "learning_rate": 4.986773770186791e-06, |
| "loss": 0.2113, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.16469517743403095, |
| "grad_norm": 2.1231842278965716, |
| "learning_rate": 4.986700254388027e-06, |
| "loss": 0.2583, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.1651501364877161, |
| "grad_norm": 1.9962414368551604, |
| "learning_rate": 4.986626535387349e-06, |
| "loss": 0.2146, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.16560509554140126, |
| "grad_norm": 2.7738560722941656, |
| "learning_rate": 4.9865526131907795e-06, |
| "loss": 0.1913, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.16606005459508644, |
| "grad_norm": 1.8910905183030835, |
| "learning_rate": 4.9864784878043595e-06, |
| "loss": 0.2243, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1665150136487716, |
| "grad_norm": 2.943803252646498, |
| "learning_rate": 4.986404159234146e-06, |
| "loss": 0.2169, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.16696997270245678, |
| "grad_norm": 2.067283855325497, |
| "learning_rate": 4.986329627486213e-06, |
| "loss": 0.1392, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.16742493175614195, |
| "grad_norm": 1.7900649282380081, |
| "learning_rate": 4.986254892566652e-06, |
| "loss": 0.1929, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.16787989080982713, |
| "grad_norm": 2.05364008592912, |
| "learning_rate": 4.9861799544815684e-06, |
| "loss": 0.1539, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.16833484986351227, |
| "grad_norm": 1.8722252354131819, |
| "learning_rate": 4.986104813237086e-06, |
| "loss": 0.1584, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16878980891719744, |
| "grad_norm": 2.127812745723865, |
| "learning_rate": 4.986029468839346e-06, |
| "loss": 0.1618, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.16924476797088261, |
| "grad_norm": 2.4926065420888643, |
| "learning_rate": 4.985953921294505e-06, |
| "loss": 0.2601, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.1696997270245678, |
| "grad_norm": 2.973425717527041, |
| "learning_rate": 4.985878170608736e-06, |
| "loss": 0.1919, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.17015468607825296, |
| "grad_norm": 2.1354583522718604, |
| "learning_rate": 4.985802216788228e-06, |
| "loss": 0.1904, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.17060964513193813, |
| "grad_norm": 2.4618549416407634, |
| "learning_rate": 4.98572605983919e-06, |
| "loss": 0.2137, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1710646041856233, |
| "grad_norm": 1.3365138469487268, |
| "learning_rate": 4.985649699767842e-06, |
| "loss": 0.1069, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.17151956323930848, |
| "grad_norm": 1.9602605162416638, |
| "learning_rate": 4.985573136580427e-06, |
| "loss": 0.1723, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.17197452229299362, |
| "grad_norm": 1.6915428216688142, |
| "learning_rate": 4.9854963702832e-06, |
| "loss": 0.1673, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.1724294813466788, |
| "grad_norm": 2.0131015516091875, |
| "learning_rate": 4.985419400882433e-06, |
| "loss": 0.2159, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.17288444040036396, |
| "grad_norm": 1.8436996177818286, |
| "learning_rate": 4.985342228384418e-06, |
| "loss": 0.1777, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17333939945404914, |
| "grad_norm": 3.2955423815059257, |
| "learning_rate": 4.985264852795459e-06, |
| "loss": 0.2759, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1737943585077343, |
| "grad_norm": 2.386347589584829, |
| "learning_rate": 4.98518727412188e-06, |
| "loss": 0.1958, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.17424931756141948, |
| "grad_norm": 2.5771465793014294, |
| "learning_rate": 4.98510949237002e-06, |
| "loss": 0.1861, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.17470427661510465, |
| "grad_norm": 2.420697255730561, |
| "learning_rate": 4.985031507546234e-06, |
| "loss": 0.1538, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.1751592356687898, |
| "grad_norm": 2.6016330527075895, |
| "learning_rate": 4.984953319656896e-06, |
| "loss": 0.1981, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.17561419472247497, |
| "grad_norm": 2.671850671096213, |
| "learning_rate": 4.984874928708395e-06, |
| "loss": 0.1802, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.17606915377616014, |
| "grad_norm": 2.329893515854394, |
| "learning_rate": 4.984796334707136e-06, |
| "loss": 0.1916, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.17652411282984531, |
| "grad_norm": 2.900381887848387, |
| "learning_rate": 4.984717537659542e-06, |
| "loss": 0.1851, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.1769790718835305, |
| "grad_norm": 2.8920348384518295, |
| "learning_rate": 4.984638537572052e-06, |
| "loss": 0.1614, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.17743403093721566, |
| "grad_norm": 1.7590905699687769, |
| "learning_rate": 4.984559334451121e-06, |
| "loss": 0.1182, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.17788898999090083, |
| "grad_norm": 1.992998204932115, |
| "learning_rate": 4.984479928303221e-06, |
| "loss": 0.1097, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.17834394904458598, |
| "grad_norm": 1.7032225308271054, |
| "learning_rate": 4.984400319134841e-06, |
| "loss": 0.1166, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.17879890809827115, |
| "grad_norm": 2.170562253873519, |
| "learning_rate": 4.984320506952487e-06, |
| "loss": 0.2253, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.17925386715195632, |
| "grad_norm": 2.237592089222373, |
| "learning_rate": 4.9842404917626796e-06, |
| "loss": 0.1949, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.1797088262056415, |
| "grad_norm": 2.0106916989450587, |
| "learning_rate": 4.984160273571959e-06, |
| "loss": 0.1681, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.18016378525932666, |
| "grad_norm": 1.5887484417784243, |
| "learning_rate": 4.9840798523868785e-06, |
| "loss": 0.1987, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.18061874431301184, |
| "grad_norm": 2.1863186231198677, |
| "learning_rate": 4.983999228214011e-06, |
| "loss": 0.1688, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.181073703366697, |
| "grad_norm": 1.73818173181658, |
| "learning_rate": 4.983918401059943e-06, |
| "loss": 0.1667, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.18152866242038215, |
| "grad_norm": 2.507383020515962, |
| "learning_rate": 4.983837370931282e-06, |
| "loss": 0.1969, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.18198362147406733, |
| "grad_norm": 2.0632014051403793, |
| "learning_rate": 4.983756137834647e-06, |
| "loss": 0.183, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1824385805277525, |
| "grad_norm": 2.830188740520148, |
| "learning_rate": 4.9836747017766765e-06, |
| "loss": 0.2093, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.18289353958143767, |
| "grad_norm": 2.5110616036547038, |
| "learning_rate": 4.983593062764027e-06, |
| "loss": 0.2322, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.18334849863512284, |
| "grad_norm": 3.686743248745681, |
| "learning_rate": 4.983511220803367e-06, |
| "loss": 0.2445, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.18380345768880801, |
| "grad_norm": 1.679708381839253, |
| "learning_rate": 4.983429175901386e-06, |
| "loss": 0.1796, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.1842584167424932, |
| "grad_norm": 2.1827593155516722, |
| "learning_rate": 4.983346928064788e-06, |
| "loss": 0.1674, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.18471337579617833, |
| "grad_norm": 1.60561536399989, |
| "learning_rate": 4.9832644773002935e-06, |
| "loss": 0.1696, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.1851683348498635, |
| "grad_norm": 2.3818871014331418, |
| "learning_rate": 4.98318182361464e-06, |
| "loss": 0.231, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.18562329390354868, |
| "grad_norm": 2.466498074147868, |
| "learning_rate": 4.9830989670145825e-06, |
| "loss": 0.2363, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.18607825295723385, |
| "grad_norm": 2.3360214493938485, |
| "learning_rate": 4.9830159075068905e-06, |
| "loss": 0.2211, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.18653321201091902, |
| "grad_norm": 1.8065829881072444, |
| "learning_rate": 4.9829326450983514e-06, |
| "loss": 0.1743, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1869881710646042, |
| "grad_norm": 2.69540573324766, |
| "learning_rate": 4.98284917979577e-06, |
| "loss": 0.1876, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.18744313011828936, |
| "grad_norm": 1.8906354216406325, |
| "learning_rate": 4.9827655116059656e-06, |
| "loss": 0.1592, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.18789808917197454, |
| "grad_norm": 1.743151148777257, |
| "learning_rate": 4.9826816405357755e-06, |
| "loss": 0.1746, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.18835304822565968, |
| "grad_norm": 1.5963849264202556, |
| "learning_rate": 4.982597566592054e-06, |
| "loss": 0.1244, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.18880800727934485, |
| "grad_norm": 2.7157092244830205, |
| "learning_rate": 4.982513289781671e-06, |
| "loss": 0.2332, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.18926296633303002, |
| "grad_norm": 1.9931400703765212, |
| "learning_rate": 4.982428810111512e-06, |
| "loss": 0.2113, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1897179253867152, |
| "grad_norm": 1.3604077425808516, |
| "learning_rate": 4.9823441275884814e-06, |
| "loss": 0.1305, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.19017288444040037, |
| "grad_norm": 2.2607598123619517, |
| "learning_rate": 4.982259242219499e-06, |
| "loss": 0.1723, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.19062784349408554, |
| "grad_norm": 1.867118589561207, |
| "learning_rate": 4.9821741540115006e-06, |
| "loss": 0.1355, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1910828025477707, |
| "grad_norm": 2.11150758750875, |
| "learning_rate": 4.982088862971441e-06, |
| "loss": 0.2181, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19153776160145586, |
| "grad_norm": 2.922634212063935, |
| "learning_rate": 4.982003369106287e-06, |
| "loss": 0.1935, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.19199272065514103, |
| "grad_norm": 1.8213621057521336, |
| "learning_rate": 4.981917672423028e-06, |
| "loss": 0.1159, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.1924476797088262, |
| "grad_norm": 1.9973203363112062, |
| "learning_rate": 4.981831772928664e-06, |
| "loss": 0.1644, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.19290263876251137, |
| "grad_norm": 1.6435298569620178, |
| "learning_rate": 4.981745670630216e-06, |
| "loss": 0.1676, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.19335759781619655, |
| "grad_norm": 1.7090737346215599, |
| "learning_rate": 4.981659365534718e-06, |
| "loss": 0.1947, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19381255686988172, |
| "grad_norm": 2.8644071628055365, |
| "learning_rate": 4.981572857649225e-06, |
| "loss": 0.2412, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1942675159235669, |
| "grad_norm": 1.5071870677678134, |
| "learning_rate": 4.981486146980804e-06, |
| "loss": 0.1247, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.19472247497725204, |
| "grad_norm": 2.5523639597283436, |
| "learning_rate": 4.9813992335365415e-06, |
| "loss": 0.1636, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.1951774340309372, |
| "grad_norm": 1.6766352791010617, |
| "learning_rate": 4.98131211732354e-06, |
| "loss": 0.1659, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.19563239308462238, |
| "grad_norm": 2.6626571731411985, |
| "learning_rate": 4.981224798348917e-06, |
| "loss": 0.1777, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.19608735213830755, |
| "grad_norm": 1.7748484056177547, |
| "learning_rate": 4.981137276619809e-06, |
| "loss": 0.2038, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.19654231119199272, |
| "grad_norm": 1.6726970249923665, |
| "learning_rate": 4.9810495521433675e-06, |
| "loss": 0.167, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1969972702456779, |
| "grad_norm": 2.3836088959731407, |
| "learning_rate": 4.9809616249267616e-06, |
| "loss": 0.1967, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.19745222929936307, |
| "grad_norm": 1.9478244630239012, |
| "learning_rate": 4.980873494977174e-06, |
| "loss": 0.2259, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1979071883530482, |
| "grad_norm": 2.601912538074716, |
| "learning_rate": 4.98078516230181e-06, |
| "loss": 0.196, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.19836214740673339, |
| "grad_norm": 1.8252963162031037, |
| "learning_rate": 4.980696626907884e-06, |
| "loss": 0.1551, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.19881710646041856, |
| "grad_norm": 1.7882792458437706, |
| "learning_rate": 4.980607888802633e-06, |
| "loss": 0.1547, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.19927206551410373, |
| "grad_norm": 1.8674433444840757, |
| "learning_rate": 4.980518947993307e-06, |
| "loss": 0.1625, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1997270245677889, |
| "grad_norm": 2.050135562104488, |
| "learning_rate": 4.980429804487176e-06, |
| "loss": 0.1706, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.20018198362147407, |
| "grad_norm": 3.040028729336044, |
| "learning_rate": 4.980340458291521e-06, |
| "loss": 0.2235, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20063694267515925, |
| "grad_norm": 1.755025572252995, |
| "learning_rate": 4.980250909413646e-06, |
| "loss": 0.1451, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.2010919017288444, |
| "grad_norm": 2.636610646301175, |
| "learning_rate": 4.980161157860867e-06, |
| "loss": 0.1869, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.20154686078252956, |
| "grad_norm": 2.5942914069340715, |
| "learning_rate": 4.980071203640519e-06, |
| "loss": 0.1633, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.20200181983621474, |
| "grad_norm": 1.5184266230548011, |
| "learning_rate": 4.979981046759952e-06, |
| "loss": 0.1441, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.2024567788898999, |
| "grad_norm": 1.8681142182661066, |
| "learning_rate": 4.979890687226533e-06, |
| "loss": 0.1596, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.20291173794358508, |
| "grad_norm": 2.48564323404002, |
| "learning_rate": 4.979800125047647e-06, |
| "loss": 0.1481, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.20336669699727025, |
| "grad_norm": 2.3390506413519514, |
| "learning_rate": 4.979709360230692e-06, |
| "loss": 0.1889, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.20382165605095542, |
| "grad_norm": 2.017468095007692, |
| "learning_rate": 4.979618392783087e-06, |
| "loss": 0.1417, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.20427661510464057, |
| "grad_norm": 1.729598330112352, |
| "learning_rate": 4.979527222712266e-06, |
| "loss": 0.142, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.20473157415832574, |
| "grad_norm": 2.1368144580931747, |
| "learning_rate": 4.9794358500256765e-06, |
| "loss": 0.1636, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2051865332120109, |
| "grad_norm": 1.9994448136168699, |
| "learning_rate": 4.979344274730786e-06, |
| "loss": 0.1604, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.20564149226569609, |
| "grad_norm": 3.428795563882251, |
| "learning_rate": 4.979252496835079e-06, |
| "loss": 0.2394, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.20609645131938126, |
| "grad_norm": 2.6996852974810768, |
| "learning_rate": 4.979160516346054e-06, |
| "loss": 0.2375, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.20655141037306643, |
| "grad_norm": 1.9797680166732188, |
| "learning_rate": 4.979068333271227e-06, |
| "loss": 0.1842, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.2070063694267516, |
| "grad_norm": 3.003957390141276, |
| "learning_rate": 4.978975947618131e-06, |
| "loss": 0.193, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.20746132848043677, |
| "grad_norm": 2.00845771414247, |
| "learning_rate": 4.978883359394316e-06, |
| "loss": 0.198, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.20791628753412192, |
| "grad_norm": 2.0203437551682186, |
| "learning_rate": 4.978790568607347e-06, |
| "loss": 0.1643, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.2083712465878071, |
| "grad_norm": 2.112746362210305, |
| "learning_rate": 4.9786975752648076e-06, |
| "loss": 0.2327, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.20882620564149226, |
| "grad_norm": 1.9220582393008747, |
| "learning_rate": 4.978604379374295e-06, |
| "loss": 0.1549, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.20928116469517744, |
| "grad_norm": 2.1402572457657545, |
| "learning_rate": 4.978510980943427e-06, |
| "loss": 0.139, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2097361237488626, |
| "grad_norm": 2.4018554173698914, |
| "learning_rate": 4.978417379979834e-06, |
| "loss": 0.2455, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.21019108280254778, |
| "grad_norm": 1.951258020011642, |
| "learning_rate": 4.978323576491165e-06, |
| "loss": 0.1552, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.21064604185623295, |
| "grad_norm": 2.1010768496853323, |
| "learning_rate": 4.978229570485085e-06, |
| "loss": 0.2383, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.2111010009099181, |
| "grad_norm": 1.5821441832613072, |
| "learning_rate": 4.978135361969276e-06, |
| "loss": 0.1851, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.21155595996360327, |
| "grad_norm": 1.6009355908322205, |
| "learning_rate": 4.9780409509514375e-06, |
| "loss": 0.175, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.21201091901728844, |
| "grad_norm": 1.8650365534886528, |
| "learning_rate": 4.977946337439282e-06, |
| "loss": 0.2302, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.2124658780709736, |
| "grad_norm": 1.6321720020750403, |
| "learning_rate": 4.9778515214405436e-06, |
| "loss": 0.1919, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.21292083712465878, |
| "grad_norm": 1.5102194582450883, |
| "learning_rate": 4.977756502962967e-06, |
| "loss": 0.1206, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.21337579617834396, |
| "grad_norm": 2.069100224324352, |
| "learning_rate": 4.97766128201432e-06, |
| "loss": 0.1429, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.21383075523202913, |
| "grad_norm": 1.8931152672148568, |
| "learning_rate": 4.977565858602381e-06, |
| "loss": 0.1634, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 1.9388931474803874, |
| "learning_rate": 4.977470232734949e-06, |
| "loss": 0.1138, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.21474067333939945, |
| "grad_norm": 2.52659442383892, |
| "learning_rate": 4.977374404419838e-06, |
| "loss": 0.2011, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.21519563239308462, |
| "grad_norm": 1.9831728669000206, |
| "learning_rate": 4.977278373664877e-06, |
| "loss": 0.1475, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.2156505914467698, |
| "grad_norm": 1.8342304339485977, |
| "learning_rate": 4.977182140477916e-06, |
| "loss": 0.1801, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.21610555050045496, |
| "grad_norm": 1.9321185937866436, |
| "learning_rate": 4.977085704866817e-06, |
| "loss": 0.1787, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21656050955414013, |
| "grad_norm": 1.8230541452731504, |
| "learning_rate": 4.97698906683946e-06, |
| "loss": 0.202, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.2170154686078253, |
| "grad_norm": 2.4982489548908062, |
| "learning_rate": 4.9768922264037435e-06, |
| "loss": 0.2283, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.21747042766151045, |
| "grad_norm": 2.134742327126813, |
| "learning_rate": 4.976795183567579e-06, |
| "loss": 0.1544, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.21792538671519562, |
| "grad_norm": 2.9581764452635184, |
| "learning_rate": 4.976697938338898e-06, |
| "loss": 0.1674, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.2183803457688808, |
| "grad_norm": 1.712602080023381, |
| "learning_rate": 4.976600490725645e-06, |
| "loss": 0.1568, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.21883530482256597, |
| "grad_norm": 1.7418610812844693, |
| "learning_rate": 4.976502840735785e-06, |
| "loss": 0.1945, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.21929026387625114, |
| "grad_norm": 2.138071978494717, |
| "learning_rate": 4.976404988377297e-06, |
| "loss": 0.1512, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.2197452229299363, |
| "grad_norm": 2.346885929916554, |
| "learning_rate": 4.976306933658176e-06, |
| "loss": 0.2262, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.22020018198362148, |
| "grad_norm": 2.020074510485992, |
| "learning_rate": 4.976208676586435e-06, |
| "loss": 0.2141, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.22065514103730663, |
| "grad_norm": 1.8763221281396283, |
| "learning_rate": 4.976110217170104e-06, |
| "loss": 0.1491, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.2211101000909918, |
| "grad_norm": 2.235721601006219, |
| "learning_rate": 4.976011555417228e-06, |
| "loss": 0.2058, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.22156505914467697, |
| "grad_norm": 1.315034818762656, |
| "learning_rate": 4.975912691335869e-06, |
| "loss": 0.1244, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.22202001819836215, |
| "grad_norm": 2.1199398350029757, |
| "learning_rate": 4.975813624934106e-06, |
| "loss": 0.1412, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.22247497725204732, |
| "grad_norm": 1.8709221572870474, |
| "learning_rate": 4.975714356220035e-06, |
| "loss": 0.1527, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.2229299363057325, |
| "grad_norm": 2.2421419230230657, |
| "learning_rate": 4.975614885201766e-06, |
| "loss": 0.1608, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22338489535941766, |
| "grad_norm": 2.3078261939110454, |
| "learning_rate": 4.975515211887429e-06, |
| "loss": 0.1465, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.22383985441310283, |
| "grad_norm": 1.5895485837834087, |
| "learning_rate": 4.9754153362851684e-06, |
| "loss": 0.1197, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.22429481346678798, |
| "grad_norm": 1.7459488111256227, |
| "learning_rate": 4.975315258403145e-06, |
| "loss": 0.1528, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.22474977252047315, |
| "grad_norm": 1.7723162295113712, |
| "learning_rate": 4.975214978249537e-06, |
| "loss": 0.192, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.22520473157415832, |
| "grad_norm": 2.1669137038937905, |
| "learning_rate": 4.975114495832539e-06, |
| "loss": 0.2359, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.2256596906278435, |
| "grad_norm": 2.0603228355359535, |
| "learning_rate": 4.975013811160362e-06, |
| "loss": 0.1745, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.22611464968152867, |
| "grad_norm": 2.043894775326392, |
| "learning_rate": 4.974912924241233e-06, |
| "loss": 0.1624, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.22656960873521384, |
| "grad_norm": 1.6841728525009554, |
| "learning_rate": 4.974811835083397e-06, |
| "loss": 0.2189, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.227024567788899, |
| "grad_norm": 2.6366675854172335, |
| "learning_rate": 4.974710543695114e-06, |
| "loss": 0.2328, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.22747952684258416, |
| "grad_norm": 2.4052804548672304, |
| "learning_rate": 4.974609050084661e-06, |
| "loss": 0.1886, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22793448589626933, |
| "grad_norm": 2.0535117318370633, |
| "learning_rate": 4.974507354260332e-06, |
| "loss": 0.2303, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.2283894449499545, |
| "grad_norm": 2.0269029978513555, |
| "learning_rate": 4.974405456230436e-06, |
| "loss": 0.1671, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.22884440400363967, |
| "grad_norm": 2.7642802872985293, |
| "learning_rate": 4.974303356003301e-06, |
| "loss": 0.1344, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.22929936305732485, |
| "grad_norm": 1.7887955204908959, |
| "learning_rate": 4.974201053587268e-06, |
| "loss": 0.1681, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.22975432211101002, |
| "grad_norm": 1.9742201804444028, |
| "learning_rate": 4.9740985489907005e-06, |
| "loss": 0.138, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.2302092811646952, |
| "grad_norm": 2.166941374479256, |
| "learning_rate": 4.973995842221971e-06, |
| "loss": 0.1868, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.23066424021838033, |
| "grad_norm": 2.225119335059734, |
| "learning_rate": 4.973892933289476e-06, |
| "loss": 0.1567, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.2311191992720655, |
| "grad_norm": 1.8892762650773542, |
| "learning_rate": 4.97378982220162e-06, |
| "loss": 0.1488, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.23157415832575068, |
| "grad_norm": 1.8158100523332013, |
| "learning_rate": 4.973686508966832e-06, |
| "loss": 0.1301, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.23202911737943585, |
| "grad_norm": 2.0245407202628836, |
| "learning_rate": 4.973582993593554e-06, |
| "loss": 0.1695, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23248407643312102, |
| "grad_norm": 2.7034498126253674, |
| "learning_rate": 4.973479276090244e-06, |
| "loss": 0.1737, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2329390354868062, |
| "grad_norm": 2.065622568041038, |
| "learning_rate": 4.973375356465378e-06, |
| "loss": 0.149, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.23339399454049137, |
| "grad_norm": 1.9812676900095911, |
| "learning_rate": 4.973271234727447e-06, |
| "loss": 0.173, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.2338489535941765, |
| "grad_norm": 1.5726806580344541, |
| "learning_rate": 4.97316691088496e-06, |
| "loss": 0.1254, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.23430391264786168, |
| "grad_norm": 2.191785122658953, |
| "learning_rate": 4.973062384946442e-06, |
| "loss": 0.2233, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.23475887170154686, |
| "grad_norm": 1.035062440323858, |
| "learning_rate": 4.9729576569204345e-06, |
| "loss": 0.1013, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.23521383075523203, |
| "grad_norm": 1.6618268618936451, |
| "learning_rate": 4.972852726815495e-06, |
| "loss": 0.1611, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.2356687898089172, |
| "grad_norm": 1.3381515796606562, |
| "learning_rate": 4.972747594640197e-06, |
| "loss": 0.1669, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.23612374886260237, |
| "grad_norm": 2.0887228759944327, |
| "learning_rate": 4.9726422604031335e-06, |
| "loss": 0.1718, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.23657870791628755, |
| "grad_norm": 1.424194176219749, |
| "learning_rate": 4.97253672411291e-06, |
| "loss": 0.1771, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2370336669699727, |
| "grad_norm": 1.5373795467776654, |
| "learning_rate": 4.972430985778152e-06, |
| "loss": 0.1118, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.23748862602365786, |
| "grad_norm": 2.6972031210443506, |
| "learning_rate": 4.972325045407499e-06, |
| "loss": 0.1702, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.23794358507734303, |
| "grad_norm": 3.1350549460340957, |
| "learning_rate": 4.972218903009608e-06, |
| "loss": 0.2161, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.2383985441310282, |
| "grad_norm": 2.1422204131037628, |
| "learning_rate": 4.972112558593153e-06, |
| "loss": 0.1902, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.23885350318471338, |
| "grad_norm": 2.041726060026698, |
| "learning_rate": 4.972006012166823e-06, |
| "loss": 0.2079, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.23930846223839855, |
| "grad_norm": 1.7346734861898188, |
| "learning_rate": 4.971899263739326e-06, |
| "loss": 0.1394, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.23976342129208372, |
| "grad_norm": 1.959916622945104, |
| "learning_rate": 4.971792313319384e-06, |
| "loss": 0.1901, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.24021838034576887, |
| "grad_norm": 1.6780700319385458, |
| "learning_rate": 4.971685160915737e-06, |
| "loss": 0.1623, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.24067333939945404, |
| "grad_norm": 2.08830134651656, |
| "learning_rate": 4.971577806537139e-06, |
| "loss": 0.1607, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.2411282984531392, |
| "grad_norm": 2.205231289993063, |
| "learning_rate": 4.971470250192366e-06, |
| "loss": 0.1851, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24158325750682438, |
| "grad_norm": 2.911292420170041, |
| "learning_rate": 4.9713624918902045e-06, |
| "loss": 0.2235, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.24203821656050956, |
| "grad_norm": 2.1164751998531344, |
| "learning_rate": 4.971254531639461e-06, |
| "loss": 0.1556, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.24249317561419473, |
| "grad_norm": 2.740398833115599, |
| "learning_rate": 4.971146369448957e-06, |
| "loss": 0.206, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.2429481346678799, |
| "grad_norm": 1.797962382814168, |
| "learning_rate": 4.971038005327532e-06, |
| "loss": 0.161, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.24340309372156507, |
| "grad_norm": 1.995555524717142, |
| "learning_rate": 4.970929439284039e-06, |
| "loss": 0.1808, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.24385805277525022, |
| "grad_norm": 2.1172122131281927, |
| "learning_rate": 4.970820671327351e-06, |
| "loss": 0.189, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.2443130118289354, |
| "grad_norm": 1.8090573461125563, |
| "learning_rate": 4.9707117014663565e-06, |
| "loss": 0.1522, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.24476797088262056, |
| "grad_norm": 1.8419040286839186, |
| "learning_rate": 4.97060252970996e-06, |
| "loss": 0.2046, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.24522292993630573, |
| "grad_norm": 2.268977185876009, |
| "learning_rate": 4.970493156067081e-06, |
| "loss": 0.2247, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.2456778889899909, |
| "grad_norm": 2.1193932268543314, |
| "learning_rate": 4.970383580546658e-06, |
| "loss": 0.159, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24613284804367608, |
| "grad_norm": 2.173218123449192, |
| "learning_rate": 4.970273803157645e-06, |
| "loss": 0.1851, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.24658780709736125, |
| "grad_norm": 1.9062873437813912, |
| "learning_rate": 4.970163823909013e-06, |
| "loss": 0.1431, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2470427661510464, |
| "grad_norm": 2.2598849919184936, |
| "learning_rate": 4.970053642809748e-06, |
| "loss": 0.1831, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.24749772520473157, |
| "grad_norm": 2.181038873894579, |
| "learning_rate": 4.969943259868853e-06, |
| "loss": 0.1924, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.24795268425841674, |
| "grad_norm": 1.8247639377537164, |
| "learning_rate": 4.969832675095351e-06, |
| "loss": 0.151, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2484076433121019, |
| "grad_norm": 1.9978374370947616, |
| "learning_rate": 4.969721888498275e-06, |
| "loss": 0.2343, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.24886260236578708, |
| "grad_norm": 2.0040249698932953, |
| "learning_rate": 4.96961090008668e-06, |
| "loss": 0.144, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.24931756141947226, |
| "grad_norm": 1.58491785029609, |
| "learning_rate": 4.969499709869635e-06, |
| "loss": 0.2297, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.24977252047315743, |
| "grad_norm": 1.9099928105281807, |
| "learning_rate": 4.969388317856225e-06, |
| "loss": 0.1643, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.2502274795268426, |
| "grad_norm": 2.506622826362881, |
| "learning_rate": 4.969276724055554e-06, |
| "loss": 0.2302, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25068243858052774, |
| "grad_norm": 1.886779327578952, |
| "learning_rate": 4.969164928476741e-06, |
| "loss": 0.1305, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.25113739763421294, |
| "grad_norm": 2.193853436337964, |
| "learning_rate": 4.969052931128919e-06, |
| "loss": 0.1942, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2515923566878981, |
| "grad_norm": 1.696380624819296, |
| "learning_rate": 4.968940732021243e-06, |
| "loss": 0.1757, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.25204731574158323, |
| "grad_norm": 1.9308212907452063, |
| "learning_rate": 4.9688283311628795e-06, |
| "loss": 0.1953, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.25250227479526843, |
| "grad_norm": 2.2015952320833927, |
| "learning_rate": 4.968715728563014e-06, |
| "loss": 0.2188, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.2529572338489536, |
| "grad_norm": 1.8518723960249535, |
| "learning_rate": 4.968602924230847e-06, |
| "loss": 0.1439, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.2534121929026388, |
| "grad_norm": 3.211322079508386, |
| "learning_rate": 4.968489918175598e-06, |
| "loss": 0.1758, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.2538671519563239, |
| "grad_norm": 2.949982147696011, |
| "learning_rate": 4.9683767104065014e-06, |
| "loss": 0.1802, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.2543221110100091, |
| "grad_norm": 2.2092600896288697, |
| "learning_rate": 4.968263300932806e-06, |
| "loss": 0.1898, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.25477707006369427, |
| "grad_norm": 1.7931135921014567, |
| "learning_rate": 4.968149689763781e-06, |
| "loss": 0.1544, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2552320291173794, |
| "grad_norm": 1.7030840422806155, |
| "learning_rate": 4.968035876908708e-06, |
| "loss": 0.1639, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.2556869881710646, |
| "grad_norm": 1.8718848217622976, |
| "learning_rate": 4.967921862376889e-06, |
| "loss": 0.2434, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.25614194722474976, |
| "grad_norm": 2.2371670340279235, |
| "learning_rate": 4.9678076461776415e-06, |
| "loss": 0.2335, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.25659690627843496, |
| "grad_norm": 1.8393455682211606, |
| "learning_rate": 4.9676932283202965e-06, |
| "loss": 0.1499, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2570518653321201, |
| "grad_norm": 2.4142531578801387, |
| "learning_rate": 4.967578608814205e-06, |
| "loss": 0.1949, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2575068243858053, |
| "grad_norm": 2.0642965255799735, |
| "learning_rate": 4.9674637876687345e-06, |
| "loss": 0.1858, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.25796178343949044, |
| "grad_norm": 1.2532956879082058, |
| "learning_rate": 4.967348764893265e-06, |
| "loss": 0.1256, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.2584167424931756, |
| "grad_norm": 2.1919850476807574, |
| "learning_rate": 4.967233540497197e-06, |
| "loss": 0.1554, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.2588717015468608, |
| "grad_norm": 2.1554599148015186, |
| "learning_rate": 4.967118114489946e-06, |
| "loss": 0.2131, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.25932666060054593, |
| "grad_norm": 1.7423629235975449, |
| "learning_rate": 4.967002486880944e-06, |
| "loss": 0.1488, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25978161965423113, |
| "grad_norm": 2.7181048243188, |
| "learning_rate": 4.966886657679641e-06, |
| "loss": 0.2501, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.2602365787079163, |
| "grad_norm": 1.6717232797306434, |
| "learning_rate": 4.966770626895499e-06, |
| "loss": 0.1664, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.2606915377616015, |
| "grad_norm": 2.1767030645167162, |
| "learning_rate": 4.966654394538002e-06, |
| "loss": 0.1921, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.2611464968152866, |
| "grad_norm": 1.2471699088039891, |
| "learning_rate": 4.966537960616646e-06, |
| "loss": 0.0848, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.26160145586897177, |
| "grad_norm": 2.0523431055962402, |
| "learning_rate": 4.9664213251409486e-06, |
| "loss": 0.2032, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26205641492265697, |
| "grad_norm": 1.9891959124678449, |
| "learning_rate": 4.9663044881204375e-06, |
| "loss": 0.1962, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2625113739763421, |
| "grad_norm": 2.0537761706631947, |
| "learning_rate": 4.9661874495646615e-06, |
| "loss": 0.1484, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2629663330300273, |
| "grad_norm": 1.7414302230897167, |
| "learning_rate": 4.9660702094831845e-06, |
| "loss": 0.1959, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.26342129208371245, |
| "grad_norm": 2.975109707839724, |
| "learning_rate": 4.965952767885587e-06, |
| "loss": 0.215, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.26387625113739765, |
| "grad_norm": 3.1187687651037126, |
| "learning_rate": 4.965835124781465e-06, |
| "loss": 0.2326, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2643312101910828, |
| "grad_norm": 1.7844067959067744, |
| "learning_rate": 4.965717280180432e-06, |
| "loss": 0.1616, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.26478616924476794, |
| "grad_norm": 1.981807539010698, |
| "learning_rate": 4.965599234092118e-06, |
| "loss": 0.1275, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.26524112829845314, |
| "grad_norm": 2.3418573353915964, |
| "learning_rate": 4.96548098652617e-06, |
| "loss": 0.2029, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.2656960873521383, |
| "grad_norm": 1.9501727944201128, |
| "learning_rate": 4.965362537492249e-06, |
| "loss": 0.1839, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.2661510464058235, |
| "grad_norm": 1.735679302563917, |
| "learning_rate": 4.9652438870000356e-06, |
| "loss": 0.185, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.26660600545950863, |
| "grad_norm": 1.3821743738209817, |
| "learning_rate": 4.965125035059224e-06, |
| "loss": 0.117, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.26706096451319383, |
| "grad_norm": 2.0524973617804196, |
| "learning_rate": 4.965005981679527e-06, |
| "loss": 0.1563, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.267515923566879, |
| "grad_norm": 2.2596791906895395, |
| "learning_rate": 4.964886726870673e-06, |
| "loss": 0.2165, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2679708826205642, |
| "grad_norm": 1.890432704603994, |
| "learning_rate": 4.964767270642407e-06, |
| "loss": 0.1884, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2684258416742493, |
| "grad_norm": 1.6149961858038402, |
| "learning_rate": 4.964647613004491e-06, |
| "loss": 0.1353, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.26888080072793447, |
| "grad_norm": 1.7116103543510561, |
| "learning_rate": 4.964527753966702e-06, |
| "loss": 0.1403, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.26933575978161967, |
| "grad_norm": 2.400216438390535, |
| "learning_rate": 4.964407693538834e-06, |
| "loss": 0.1712, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2697907188353048, |
| "grad_norm": 2.3569276822171012, |
| "learning_rate": 4.9642874317307e-06, |
| "loss": 0.2541, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.27024567788899, |
| "grad_norm": 1.3583233690609127, |
| "learning_rate": 4.964166968552124e-06, |
| "loss": 0.1881, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.27070063694267515, |
| "grad_norm": 2.041956563972623, |
| "learning_rate": 4.9640463040129525e-06, |
| "loss": 0.2013, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.27115559599636035, |
| "grad_norm": 2.1339742915351083, |
| "learning_rate": 4.963925438123044e-06, |
| "loss": 0.1486, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.2716105550500455, |
| "grad_norm": 2.3589739110244947, |
| "learning_rate": 4.963804370892276e-06, |
| "loss": 0.1671, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.27206551410373064, |
| "grad_norm": 2.041024711316621, |
| "learning_rate": 4.9636831023305405e-06, |
| "loss": 0.1773, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.27252047315741584, |
| "grad_norm": 1.6966086145560721, |
| "learning_rate": 4.963561632447748e-06, |
| "loss": 0.1536, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.272975432211101, |
| "grad_norm": 1.7956646862639238, |
| "learning_rate": 4.9634399612538255e-06, |
| "loss": 0.1665, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2734303912647862, |
| "grad_norm": 2.4039450245635816, |
| "learning_rate": 4.963318088758714e-06, |
| "loss": 0.186, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.27388535031847133, |
| "grad_norm": 2.573374996121704, |
| "learning_rate": 4.963196014972371e-06, |
| "loss": 0.181, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.27434030937215653, |
| "grad_norm": 2.3031446562333158, |
| "learning_rate": 4.963073739904775e-06, |
| "loss": 0.1896, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.2747952684258417, |
| "grad_norm": 2.9296704327439533, |
| "learning_rate": 4.962951263565915e-06, |
| "loss": 0.2168, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.2752502274795268, |
| "grad_norm": 2.3617995527569557, |
| "learning_rate": 4.962828585965801e-06, |
| "loss": 0.1815, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.275705186533212, |
| "grad_norm": 2.1546354601106956, |
| "learning_rate": 4.962705707114457e-06, |
| "loss": 0.1658, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.27616014558689717, |
| "grad_norm": 1.9872717123396686, |
| "learning_rate": 4.962582627021923e-06, |
| "loss": 0.1885, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.27661510464058237, |
| "grad_norm": 2.3902452238732077, |
| "learning_rate": 4.962459345698258e-06, |
| "loss": 0.1934, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2770700636942675, |
| "grad_norm": 2.6613012891469334, |
| "learning_rate": 4.962335863153537e-06, |
| "loss": 0.2002, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2775250227479527, |
| "grad_norm": 1.5351443788779375, |
| "learning_rate": 4.962212179397847e-06, |
| "loss": 0.1524, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.27797998180163785, |
| "grad_norm": 1.8149311646504362, |
| "learning_rate": 4.962088294441299e-06, |
| "loss": 0.1091, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.278434940855323, |
| "grad_norm": 1.6923849341814876, |
| "learning_rate": 4.9619642082940135e-06, |
| "loss": 0.2258, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.2788898999090082, |
| "grad_norm": 2.300540388195039, |
| "learning_rate": 4.9618399209661305e-06, |
| "loss": 0.1544, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.27934485896269334, |
| "grad_norm": 2.2841254960366375, |
| "learning_rate": 4.961715432467807e-06, |
| "loss": 0.1537, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.27979981801637854, |
| "grad_norm": 2.1565671846973764, |
| "learning_rate": 4.961590742809216e-06, |
| "loss": 0.1818, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2802547770700637, |
| "grad_norm": 1.4848634903553593, |
| "learning_rate": 4.961465852000545e-06, |
| "loss": 0.1379, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.2807097361237489, |
| "grad_norm": 2.886386939634882, |
| "learning_rate": 4.961340760052001e-06, |
| "loss": 0.2137, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.28116469517743403, |
| "grad_norm": 2.12342498143493, |
| "learning_rate": 4.961215466973806e-06, |
| "loss": 0.1609, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2816196542311192, |
| "grad_norm": 1.6272561537794945, |
| "learning_rate": 4.961089972776197e-06, |
| "loss": 0.1704, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.2820746132848044, |
| "grad_norm": 2.177134514236334, |
| "learning_rate": 4.9609642774694285e-06, |
| "loss": 0.1844, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2825295723384895, |
| "grad_norm": 2.0060823387879396, |
| "learning_rate": 4.960838381063774e-06, |
| "loss": 0.1639, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.2829845313921747, |
| "grad_norm": 2.0396430448753047, |
| "learning_rate": 4.960712283569521e-06, |
| "loss": 0.1832, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.28343949044585987, |
| "grad_norm": 2.1577816713540345, |
| "learning_rate": 4.960585984996971e-06, |
| "loss": 0.1795, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.28389444949954507, |
| "grad_norm": 2.1362683979802997, |
| "learning_rate": 4.960459485356447e-06, |
| "loss": 0.2442, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2843494085532302, |
| "grad_norm": 1.7854499328292173, |
| "learning_rate": 4.960332784658285e-06, |
| "loss": 0.1461, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28480436760691535, |
| "grad_norm": 2.1713858060672218, |
| "learning_rate": 4.960205882912839e-06, |
| "loss": 0.1743, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.28525932666060055, |
| "grad_norm": 2.143444693552156, |
| "learning_rate": 4.9600787801304785e-06, |
| "loss": 0.2084, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 1.8522682250986475, |
| "learning_rate": 4.959951476321589e-06, |
| "loss": 0.1946, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.2861692447679709, |
| "grad_norm": 1.5982375639062243, |
| "learning_rate": 4.959823971496575e-06, |
| "loss": 0.1772, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.28662420382165604, |
| "grad_norm": 1.8898991951503732, |
| "learning_rate": 4.959696265665853e-06, |
| "loss": 0.1804, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28707916287534124, |
| "grad_norm": 1.9040168480447408, |
| "learning_rate": 4.959568358839862e-06, |
| "loss": 0.1258, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2875341219290264, |
| "grad_norm": 1.8463510477056075, |
| "learning_rate": 4.95944025102905e-06, |
| "loss": 0.1414, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.28798908098271153, |
| "grad_norm": 2.3179780847953055, |
| "learning_rate": 4.959311942243888e-06, |
| "loss": 0.2031, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.28844404003639673, |
| "grad_norm": 1.724174452868963, |
| "learning_rate": 4.95918343249486e-06, |
| "loss": 0.1377, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2888989990900819, |
| "grad_norm": 1.7281757474887716, |
| "learning_rate": 4.959054721792469e-06, |
| "loss": 0.2074, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2893539581437671, |
| "grad_norm": 1.749321520269807, |
| "learning_rate": 4.958925810147231e-06, |
| "loss": 0.104, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2898089171974522, |
| "grad_norm": 1.8727315308914843, |
| "learning_rate": 4.958796697569679e-06, |
| "loss": 0.1325, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.2902638762511374, |
| "grad_norm": 2.800322102970211, |
| "learning_rate": 4.958667384070365e-06, |
| "loss": 0.1583, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.29071883530482256, |
| "grad_norm": 1.7822844611072806, |
| "learning_rate": 4.958537869659855e-06, |
| "loss": 0.2057, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2911737943585077, |
| "grad_norm": 2.745456907200946, |
| "learning_rate": 4.958408154348734e-06, |
| "loss": 0.1605, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2916287534121929, |
| "grad_norm": 2.233718920040976, |
| "learning_rate": 4.9582782381476e-06, |
| "loss": 0.1996, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.29208371246587805, |
| "grad_norm": 2.2702620107271567, |
| "learning_rate": 4.958148121067071e-06, |
| "loss": 0.2927, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.29253867151956325, |
| "grad_norm": 2.150177934476292, |
| "learning_rate": 4.9580178031177775e-06, |
| "loss": 0.1949, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2929936305732484, |
| "grad_norm": 1.4333466510228, |
| "learning_rate": 4.9578872843103694e-06, |
| "loss": 0.1481, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.2934485896269336, |
| "grad_norm": 1.8148623461294702, |
| "learning_rate": 4.957756564655513e-06, |
| "loss": 0.1736, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.29390354868061874, |
| "grad_norm": 1.8574102016300988, |
| "learning_rate": 4.957625644163888e-06, |
| "loss": 0.1893, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2943585077343039, |
| "grad_norm": 2.0598318825039694, |
| "learning_rate": 4.957494522846194e-06, |
| "loss": 0.1511, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.2948134667879891, |
| "grad_norm": 1.8631745332908447, |
| "learning_rate": 4.957363200713146e-06, |
| "loss": 0.2403, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.29526842584167423, |
| "grad_norm": 1.934970676847201, |
| "learning_rate": 4.957231677775475e-06, |
| "loss": 0.1782, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.29572338489535943, |
| "grad_norm": 2.162311103918465, |
| "learning_rate": 4.957099954043928e-06, |
| "loss": 0.1894, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2961783439490446, |
| "grad_norm": 1.3750044807559711, |
| "learning_rate": 4.956968029529269e-06, |
| "loss": 0.1948, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.2966333030027298, |
| "grad_norm": 1.7571318861097756, |
| "learning_rate": 4.956835904242277e-06, |
| "loss": 0.1715, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.2970882620564149, |
| "grad_norm": 1.964585802559125, |
| "learning_rate": 4.9567035781937516e-06, |
| "loss": 0.1103, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.29754322111010006, |
| "grad_norm": 1.9039563589608381, |
| "learning_rate": 4.9565710513945024e-06, |
| "loss": 0.1668, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.29799818016378526, |
| "grad_norm": 1.837562224402912, |
| "learning_rate": 4.956438323855362e-06, |
| "loss": 0.129, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2984531392174704, |
| "grad_norm": 1.7630804326653742, |
| "learning_rate": 4.956305395587174e-06, |
| "loss": 0.1906, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2989080982711556, |
| "grad_norm": 1.910058814511253, |
| "learning_rate": 4.956172266600802e-06, |
| "loss": 0.124, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.29936305732484075, |
| "grad_norm": 2.2105167684195757, |
| "learning_rate": 4.956038936907125e-06, |
| "loss": 0.1593, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.29981801637852595, |
| "grad_norm": 2.253935685217962, |
| "learning_rate": 4.955905406517036e-06, |
| "loss": 0.1581, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.3002729754322111, |
| "grad_norm": 2.5313373580598424, |
| "learning_rate": 4.95577167544145e-06, |
| "loss": 0.1813, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.30072793448589624, |
| "grad_norm": 2.406722714489674, |
| "learning_rate": 4.955637743691291e-06, |
| "loss": 0.1633, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.30118289353958144, |
| "grad_norm": 2.4238606966439487, |
| "learning_rate": 4.955503611277506e-06, |
| "loss": 0.1917, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.3016378525932666, |
| "grad_norm": 2.6124988273739893, |
| "learning_rate": 4.955369278211055e-06, |
| "loss": 0.2094, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.3020928116469518, |
| "grad_norm": 2.976761995472576, |
| "learning_rate": 4.955234744502914e-06, |
| "loss": 0.1909, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.30254777070063693, |
| "grad_norm": 2.0362637594213053, |
| "learning_rate": 4.955100010164079e-06, |
| "loss": 0.1968, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.30300272975432213, |
| "grad_norm": 1.8717270849356715, |
| "learning_rate": 4.954965075205557e-06, |
| "loss": 0.1612, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.3034576888080073, |
| "grad_norm": 2.4021794148968953, |
| "learning_rate": 4.9548299396383755e-06, |
| "loss": 0.2181, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.3039126478616925, |
| "grad_norm": 2.1388957119580367, |
| "learning_rate": 4.954694603473578e-06, |
| "loss": 0.1692, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.3043676069153776, |
| "grad_norm": 2.1096028848377855, |
| "learning_rate": 4.954559066722222e-06, |
| "loss": 0.204, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.30482256596906276, |
| "grad_norm": 1.9629095047383018, |
| "learning_rate": 4.954423329395385e-06, |
| "loss": 0.1997, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.30527752502274796, |
| "grad_norm": 1.9442418917085225, |
| "learning_rate": 4.954287391504156e-06, |
| "loss": 0.1944, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.3057324840764331, |
| "grad_norm": 2.229272182184504, |
| "learning_rate": 4.9541512530596455e-06, |
| "loss": 0.2029, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.3061874431301183, |
| "grad_norm": 2.080623617831735, |
| "learning_rate": 4.954014914072978e-06, |
| "loss": 0.1881, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.30664240218380345, |
| "grad_norm": 1.3909729404333016, |
| "learning_rate": 4.9538783745552934e-06, |
| "loss": 0.1446, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.30709736123748865, |
| "grad_norm": 2.5204656795127303, |
| "learning_rate": 4.95374163451775e-06, |
| "loss": 0.2251, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3075523202911738, |
| "grad_norm": 2.8855471273631585, |
| "learning_rate": 4.953604693971521e-06, |
| "loss": 0.1832, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.30800727934485894, |
| "grad_norm": 2.415452060739297, |
| "learning_rate": 4.953467552927798e-06, |
| "loss": 0.188, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.30846223839854414, |
| "grad_norm": 3.3704774970598215, |
| "learning_rate": 4.9533302113977845e-06, |
| "loss": 0.2644, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.3089171974522293, |
| "grad_norm": 3.0964762790397233, |
| "learning_rate": 4.9531926693927055e-06, |
| "loss": 0.1891, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.3093721565059145, |
| "grad_norm": 2.3617921935041646, |
| "learning_rate": 4.953054926923801e-06, |
| "loss": 0.1791, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.30982711555959963, |
| "grad_norm": 2.1015907363587836, |
| "learning_rate": 4.952916984002325e-06, |
| "loss": 0.154, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.31028207461328483, |
| "grad_norm": 2.5909443467360944, |
| "learning_rate": 4.95277884063955e-06, |
| "loss": 0.1758, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.31073703366697, |
| "grad_norm": 1.9161503782177982, |
| "learning_rate": 4.952640496846766e-06, |
| "loss": 0.1883, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.3111919927206551, |
| "grad_norm": 2.2723462143890187, |
| "learning_rate": 4.952501952635276e-06, |
| "loss": 0.1813, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.3116469517743403, |
| "grad_norm": 1.5779544920569608, |
| "learning_rate": 4.952363208016402e-06, |
| "loss": 0.183, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.31210191082802546, |
| "grad_norm": 2.3768180703064834, |
| "learning_rate": 4.952224263001482e-06, |
| "loss": 0.139, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.31255686988171066, |
| "grad_norm": 1.7932474239157794, |
| "learning_rate": 4.952085117601868e-06, |
| "loss": 0.1698, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.3130118289353958, |
| "grad_norm": 2.1109045834120157, |
| "learning_rate": 4.951945771828933e-06, |
| "loss": 0.2482, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.313466787989081, |
| "grad_norm": 1.6399625432585407, |
| "learning_rate": 4.951806225694061e-06, |
| "loss": 0.1809, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.31392174704276615, |
| "grad_norm": 2.610023079079643, |
| "learning_rate": 4.951666479208658e-06, |
| "loss": 0.1964, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3143767060964513, |
| "grad_norm": 2.574945774612913, |
| "learning_rate": 4.951526532384141e-06, |
| "loss": 0.1827, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.3148316651501365, |
| "grad_norm": 1.8594925752682625, |
| "learning_rate": 4.951386385231946e-06, |
| "loss": 0.1674, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.31528662420382164, |
| "grad_norm": 1.6516261883969883, |
| "learning_rate": 4.951246037763528e-06, |
| "loss": 0.1342, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.31574158325750684, |
| "grad_norm": 1.8608275979712807, |
| "learning_rate": 4.9511054899903524e-06, |
| "loss": 0.1657, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.316196542311192, |
| "grad_norm": 2.3555359764575545, |
| "learning_rate": 4.950964741923905e-06, |
| "loss": 0.2022, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.3166515013648772, |
| "grad_norm": 1.782390866267192, |
| "learning_rate": 4.950823793575688e-06, |
| "loss": 0.1517, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.31710646041856233, |
| "grad_norm": 2.001725151610439, |
| "learning_rate": 4.950682644957218e-06, |
| "loss": 0.1745, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.3175614194722475, |
| "grad_norm": 2.6801559375906585, |
| "learning_rate": 4.9505412960800295e-06, |
| "loss": 0.2196, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.3180163785259327, |
| "grad_norm": 2.0435969601142583, |
| "learning_rate": 4.950399746955673e-06, |
| "loss": 0.1823, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.3184713375796178, |
| "grad_norm": 3.135001392998494, |
| "learning_rate": 4.950257997595716e-06, |
| "loss": 0.1932, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.318926296633303, |
| "grad_norm": 2.3774677479838484, |
| "learning_rate": 4.950116048011739e-06, |
| "loss": 0.1905, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.31938125568698816, |
| "grad_norm": 1.8516165333723722, |
| "learning_rate": 4.949973898215344e-06, |
| "loss": 0.1503, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.31983621474067336, |
| "grad_norm": 2.343561651154435, |
| "learning_rate": 4.949831548218146e-06, |
| "loss": 0.1441, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.3202911737943585, |
| "grad_norm": 1.8104402427163653, |
| "learning_rate": 4.949688998031777e-06, |
| "loss": 0.1558, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.32074613284804365, |
| "grad_norm": 2.144991489680201, |
| "learning_rate": 4.949546247667886e-06, |
| "loss": 0.1305, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.32120109190172885, |
| "grad_norm": 1.8279214675219737, |
| "learning_rate": 4.949403297138137e-06, |
| "loss": 0.1336, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.321656050955414, |
| "grad_norm": 2.3674168986503767, |
| "learning_rate": 4.949260146454212e-06, |
| "loss": 0.1764, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.3221110100090992, |
| "grad_norm": 1.6483989227538907, |
| "learning_rate": 4.94911679562781e-06, |
| "loss": 0.159, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.32256596906278434, |
| "grad_norm": 2.038187279529794, |
| "learning_rate": 4.948973244670643e-06, |
| "loss": 0.1485, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.32302092811646954, |
| "grad_norm": 2.41476196989692, |
| "learning_rate": 4.948829493594441e-06, |
| "loss": 0.2091, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3234758871701547, |
| "grad_norm": 2.222757795496577, |
| "learning_rate": 4.9486855424109524e-06, |
| "loss": 0.1503, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.32393084622383983, |
| "grad_norm": 1.850862512986181, |
| "learning_rate": 4.948541391131939e-06, |
| "loss": 0.1505, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.32438580527752503, |
| "grad_norm": 2.3940666777003137, |
| "learning_rate": 4.948397039769181e-06, |
| "loss": 0.1578, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.3248407643312102, |
| "grad_norm": 2.0487809609035113, |
| "learning_rate": 4.948252488334474e-06, |
| "loss": 0.1327, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.3252957233848954, |
| "grad_norm": 1.4541195656219779, |
| "learning_rate": 4.948107736839629e-06, |
| "loss": 0.1994, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.3257506824385805, |
| "grad_norm": 1.6302160419859526, |
| "learning_rate": 4.947962785296476e-06, |
| "loss": 0.1665, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.3262056414922657, |
| "grad_norm": 2.761516841692211, |
| "learning_rate": 4.9478176337168594e-06, |
| "loss": 0.1622, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.32666060054595086, |
| "grad_norm": 2.2365611293446865, |
| "learning_rate": 4.9476722821126386e-06, |
| "loss": 0.2191, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.327115559599636, |
| "grad_norm": 2.267629869433733, |
| "learning_rate": 4.9475267304956945e-06, |
| "loss": 0.1608, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.3275705186533212, |
| "grad_norm": 2.8370903035030812, |
| "learning_rate": 4.947380978877917e-06, |
| "loss": 0.2059, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32802547770700635, |
| "grad_norm": 1.7629045012494435, |
| "learning_rate": 4.947235027271219e-06, |
| "loss": 0.1644, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.32848043676069155, |
| "grad_norm": 1.7514209523720954, |
| "learning_rate": 4.9470888756875265e-06, |
| "loss": 0.1443, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.3289353958143767, |
| "grad_norm": 1.996409560436198, |
| "learning_rate": 4.946942524138782e-06, |
| "loss": 0.1589, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.3293903548680619, |
| "grad_norm": 1.9499597954033492, |
| "learning_rate": 4.946795972636944e-06, |
| "loss": 0.1856, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.32984531392174704, |
| "grad_norm": 1.6935756093459424, |
| "learning_rate": 4.94664922119399e-06, |
| "loss": 0.1866, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.3303002729754322, |
| "grad_norm": 2.2750870343308818, |
| "learning_rate": 4.94650226982191e-06, |
| "loss": 0.1894, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.3307552320291174, |
| "grad_norm": 1.7773678651655342, |
| "learning_rate": 4.9463551185327115e-06, |
| "loss": 0.2623, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.33121019108280253, |
| "grad_norm": 2.3870710697996302, |
| "learning_rate": 4.946207767338422e-06, |
| "loss": 0.1708, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.33166515013648773, |
| "grad_norm": 1.8969974183881673, |
| "learning_rate": 4.9460602162510805e-06, |
| "loss": 0.1758, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.3321201091901729, |
| "grad_norm": 1.9352911073022974, |
| "learning_rate": 4.945912465282744e-06, |
| "loss": 0.1199, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3325750682438581, |
| "grad_norm": 1.8878423547131853, |
| "learning_rate": 4.945764514445487e-06, |
| "loss": 0.2117, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.3330300272975432, |
| "grad_norm": 2.575730274178936, |
| "learning_rate": 4.9456163637513986e-06, |
| "loss": 0.2044, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.33348498635122836, |
| "grad_norm": 2.7338168638267066, |
| "learning_rate": 4.945468013212585e-06, |
| "loss": 0.2238, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.33393994540491356, |
| "grad_norm": 2.1060940314978702, |
| "learning_rate": 4.945319462841169e-06, |
| "loss": 0.1727, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.3343949044585987, |
| "grad_norm": 1.8942361555213085, |
| "learning_rate": 4.94517071264929e-06, |
| "loss": 0.2168, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.3348498635122839, |
| "grad_norm": 2.455108985215525, |
| "learning_rate": 4.945021762649102e-06, |
| "loss": 0.1525, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.33530482256596905, |
| "grad_norm": 1.8066289984722876, |
| "learning_rate": 4.9448726128527776e-06, |
| "loss": 0.2014, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.33575978161965425, |
| "grad_norm": 2.142750327891088, |
| "learning_rate": 4.944723263272504e-06, |
| "loss": 0.2155, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.3362147406733394, |
| "grad_norm": 2.0611633591265814, |
| "learning_rate": 4.944573713920485e-06, |
| "loss": 0.19, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.33666969972702454, |
| "grad_norm": 1.5473212219148849, |
| "learning_rate": 4.944423964808943e-06, |
| "loss": 0.1829, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33712465878070974, |
| "grad_norm": 1.7792548638263834, |
| "learning_rate": 4.944274015950113e-06, |
| "loss": 0.1563, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.3375796178343949, |
| "grad_norm": 2.27825782486859, |
| "learning_rate": 4.944123867356249e-06, |
| "loss": 0.1462, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.3380345768880801, |
| "grad_norm": 2.544197436295867, |
| "learning_rate": 4.943973519039619e-06, |
| "loss": 0.23, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.33848953594176523, |
| "grad_norm": 2.1742558484011836, |
| "learning_rate": 4.943822971012511e-06, |
| "loss": 0.1382, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.33894449499545043, |
| "grad_norm": 1.986842417086239, |
| "learning_rate": 4.943672223287226e-06, |
| "loss": 0.1751, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.3393994540491356, |
| "grad_norm": 2.0458092345288144, |
| "learning_rate": 4.9435212758760815e-06, |
| "loss": 0.2008, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.3398544131028208, |
| "grad_norm": 1.3986293648043162, |
| "learning_rate": 4.943370128791413e-06, |
| "loss": 0.1209, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.3403093721565059, |
| "grad_norm": 1.7739101505934052, |
| "learning_rate": 4.943218782045574e-06, |
| "loss": 0.1651, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.34076433121019106, |
| "grad_norm": 2.0878587765611867, |
| "learning_rate": 4.943067235650927e-06, |
| "loss": 0.1705, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.34121929026387626, |
| "grad_norm": 1.7446405914839491, |
| "learning_rate": 4.942915489619859e-06, |
| "loss": 0.1604, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3416742493175614, |
| "grad_norm": 2.165396057344333, |
| "learning_rate": 4.9427635439647704e-06, |
| "loss": 0.1923, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.3421292083712466, |
| "grad_norm": 1.7166625815039147, |
| "learning_rate": 4.942611398698075e-06, |
| "loss": 0.145, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.34258416742493175, |
| "grad_norm": 1.3978926196223211, |
| "learning_rate": 4.942459053832208e-06, |
| "loss": 0.1246, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.34303912647861695, |
| "grad_norm": 1.5203589407780953, |
| "learning_rate": 4.942306509379617e-06, |
| "loss": 0.1472, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.3434940855323021, |
| "grad_norm": 1.6513608457469287, |
| "learning_rate": 4.942153765352767e-06, |
| "loss": 0.1408, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.34394904458598724, |
| "grad_norm": 1.8035254782552455, |
| "learning_rate": 4.94200082176414e-06, |
| "loss": 0.1474, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.34440400363967244, |
| "grad_norm": 2.1335404521767414, |
| "learning_rate": 4.941847678626234e-06, |
| "loss": 0.1755, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.3448589626933576, |
| "grad_norm": 1.9408426816261404, |
| "learning_rate": 4.941694335951563e-06, |
| "loss": 0.2154, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.3453139217470428, |
| "grad_norm": 1.749049542240047, |
| "learning_rate": 4.9415407937526575e-06, |
| "loss": 0.1482, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.34576888080072793, |
| "grad_norm": 2.2747218478213598, |
| "learning_rate": 4.9413870520420635e-06, |
| "loss": 0.2213, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34622383985441313, |
| "grad_norm": 1.9679998520100659, |
| "learning_rate": 4.941233110832346e-06, |
| "loss": 0.1482, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.3466787989080983, |
| "grad_norm": 2.7634133318079135, |
| "learning_rate": 4.941078970136082e-06, |
| "loss": 0.1649, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.3471337579617834, |
| "grad_norm": 1.4323163769051608, |
| "learning_rate": 4.940924629965869e-06, |
| "loss": 0.152, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3475887170154686, |
| "grad_norm": 2.269381697045094, |
| "learning_rate": 4.940770090334319e-06, |
| "loss": 0.1446, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.34804367606915376, |
| "grad_norm": 1.8723783038369444, |
| "learning_rate": 4.940615351254059e-06, |
| "loss": 0.1142, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.34849863512283896, |
| "grad_norm": 1.8076648915776874, |
| "learning_rate": 4.940460412737734e-06, |
| "loss": 0.1944, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.3489535941765241, |
| "grad_norm": 2.080159914413928, |
| "learning_rate": 4.940305274798005e-06, |
| "loss": 0.1582, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.3494085532302093, |
| "grad_norm": 2.330746693235809, |
| "learning_rate": 4.940149937447549e-06, |
| "loss": 0.2007, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.34986351228389445, |
| "grad_norm": 1.536246049438816, |
| "learning_rate": 4.939994400699061e-06, |
| "loss": 0.1408, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3503184713375796, |
| "grad_norm": 2.2894795215614994, |
| "learning_rate": 4.939838664565248e-06, |
| "loss": 0.1837, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3507734303912648, |
| "grad_norm": 1.6850122967374852, |
| "learning_rate": 4.939682729058839e-06, |
| "loss": 0.1289, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.35122838944494994, |
| "grad_norm": 1.446339812351698, |
| "learning_rate": 4.939526594192574e-06, |
| "loss": 0.1329, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.35168334849863514, |
| "grad_norm": 1.776973239663882, |
| "learning_rate": 4.939370259979213e-06, |
| "loss": 0.1178, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.3521383075523203, |
| "grad_norm": 2.818513132709455, |
| "learning_rate": 4.9392137264315295e-06, |
| "loss": 0.2364, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3525932666060055, |
| "grad_norm": 1.6041796316256967, |
| "learning_rate": 4.939056993562316e-06, |
| "loss": 0.2278, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35304822565969063, |
| "grad_norm": 2.268295214561187, |
| "learning_rate": 4.9389000613843805e-06, |
| "loss": 0.1499, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3535031847133758, |
| "grad_norm": 2.531973358561036, |
| "learning_rate": 4.938742929910546e-06, |
| "loss": 0.1743, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.353958143767061, |
| "grad_norm": 1.3677758044070074, |
| "learning_rate": 4.938585599153652e-06, |
| "loss": 0.1351, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.3544131028207461, |
| "grad_norm": 2.4047975606277947, |
| "learning_rate": 4.938428069126555e-06, |
| "loss": 0.1951, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3548680618744313, |
| "grad_norm": 1.6598587480853697, |
| "learning_rate": 4.9382703398421285e-06, |
| "loss": 0.1602, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35532302092811646, |
| "grad_norm": 2.501614606596268, |
| "learning_rate": 4.938112411313261e-06, |
| "loss": 0.193, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.35577797998180166, |
| "grad_norm": 1.7808472248973335, |
| "learning_rate": 4.937954283552858e-06, |
| "loss": 0.1322, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.3562329390354868, |
| "grad_norm": 2.397821173092958, |
| "learning_rate": 4.93779595657384e-06, |
| "loss": 0.1819, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.35668789808917195, |
| "grad_norm": 2.0407668064122495, |
| "learning_rate": 4.937637430389145e-06, |
| "loss": 0.1722, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 1.9792096843409923, |
| "learning_rate": 4.937478705011729e-06, |
| "loss": 0.1349, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.3575978161965423, |
| "grad_norm": 1.5581979975977567, |
| "learning_rate": 4.937319780454559e-06, |
| "loss": 0.1891, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3580527752502275, |
| "grad_norm": 1.3563862115066228, |
| "learning_rate": 4.937160656730625e-06, |
| "loss": 0.1622, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.35850773430391264, |
| "grad_norm": 1.7874560137459294, |
| "learning_rate": 4.9370013338529274e-06, |
| "loss": 0.1606, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.35896269335759784, |
| "grad_norm": 1.695354030268494, |
| "learning_rate": 4.936841811834486e-06, |
| "loss": 0.1725, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.359417652411283, |
| "grad_norm": 1.5018417297722055, |
| "learning_rate": 4.936682090688337e-06, |
| "loss": 0.1568, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35987261146496813, |
| "grad_norm": 2.3008919876499276, |
| "learning_rate": 4.936522170427531e-06, |
| "loss": 0.1607, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.36032757051865333, |
| "grad_norm": 2.145424436631978, |
| "learning_rate": 4.936362051065136e-06, |
| "loss": 0.136, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3607825295723385, |
| "grad_norm": 2.023227990902717, |
| "learning_rate": 4.936201732614238e-06, |
| "loss": 0.1568, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.3612374886260237, |
| "grad_norm": 1.8119576330565363, |
| "learning_rate": 4.9360412150879355e-06, |
| "loss": 0.1291, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.3616924476797088, |
| "grad_norm": 1.904733745689391, |
| "learning_rate": 4.935880498499346e-06, |
| "loss": 0.1262, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.362147406733394, |
| "grad_norm": 2.1050139123506235, |
| "learning_rate": 4.935719582861604e-06, |
| "loss": 0.2027, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.36260236578707916, |
| "grad_norm": 1.5866289163873395, |
| "learning_rate": 4.935558468187855e-06, |
| "loss": 0.1713, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.3630573248407643, |
| "grad_norm": 2.266843952674795, |
| "learning_rate": 4.935397154491268e-06, |
| "loss": 0.1881, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.3635122838944495, |
| "grad_norm": 1.9774458028018125, |
| "learning_rate": 4.935235641785023e-06, |
| "loss": 0.1837, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.36396724294813465, |
| "grad_norm": 2.1853087729094796, |
| "learning_rate": 4.935073930082319e-06, |
| "loss": 0.176, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36442220200181985, |
| "grad_norm": 2.525766342273085, |
| "learning_rate": 4.93491201939637e-06, |
| "loss": 0.2015, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.364877161055505, |
| "grad_norm": 2.5055378214905843, |
| "learning_rate": 4.934749909740408e-06, |
| "loss": 0.1961, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3653321201091902, |
| "grad_norm": 2.0645024314881035, |
| "learning_rate": 4.934587601127677e-06, |
| "loss": 0.1644, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.36578707916287534, |
| "grad_norm": 2.0158906472533373, |
| "learning_rate": 4.934425093571442e-06, |
| "loss": 0.1911, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.3662420382165605, |
| "grad_norm": 2.284162498710454, |
| "learning_rate": 4.934262387084984e-06, |
| "loss": 0.2008, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3666969972702457, |
| "grad_norm": 2.0973583334570547, |
| "learning_rate": 4.934099481681595e-06, |
| "loss": 0.1557, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.36715195632393083, |
| "grad_norm": 2.2021201797945356, |
| "learning_rate": 4.933936377374589e-06, |
| "loss": 0.1524, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.36760691537761603, |
| "grad_norm": 1.806808877742582, |
| "learning_rate": 4.933773074177293e-06, |
| "loss": 0.1738, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3680618744313012, |
| "grad_norm": 2.2345290767527386, |
| "learning_rate": 4.933609572103053e-06, |
| "loss": 0.1442, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.3685168334849864, |
| "grad_norm": 1.9706491037079354, |
| "learning_rate": 4.933445871165229e-06, |
| "loss": 0.2354, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3689717925386715, |
| "grad_norm": 2.404773980417632, |
| "learning_rate": 4.933281971377197e-06, |
| "loss": 0.1719, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.36942675159235666, |
| "grad_norm": 1.720683846457796, |
| "learning_rate": 4.933117872752352e-06, |
| "loss": 0.1914, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.36988171064604186, |
| "grad_norm": 2.532410934524191, |
| "learning_rate": 4.932953575304102e-06, |
| "loss": 0.2144, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.370336669699727, |
| "grad_norm": 5.0403677379252425, |
| "learning_rate": 4.932789079045873e-06, |
| "loss": 0.2595, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.3707916287534122, |
| "grad_norm": 2.0578633523076437, |
| "learning_rate": 4.932624383991106e-06, |
| "loss": 0.1739, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.37124658780709735, |
| "grad_norm": 1.9986709520957122, |
| "learning_rate": 4.9324594901532605e-06, |
| "loss": 0.1838, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.37170154686078255, |
| "grad_norm": 1.7217394600458333, |
| "learning_rate": 4.93229439754581e-06, |
| "loss": 0.1579, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3721565059144677, |
| "grad_norm": 2.1321573080305813, |
| "learning_rate": 4.932129106182246e-06, |
| "loss": 0.1926, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.37261146496815284, |
| "grad_norm": 2.793277438622436, |
| "learning_rate": 4.931963616076075e-06, |
| "loss": 0.2136, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.37306642402183804, |
| "grad_norm": 1.7394149868487567, |
| "learning_rate": 4.93179792724082e-06, |
| "loss": 0.128, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3735213830755232, |
| "grad_norm": 1.82657006763275, |
| "learning_rate": 4.9316320396900195e-06, |
| "loss": 0.17, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3739763421292084, |
| "grad_norm": 1.823894210494748, |
| "learning_rate": 4.9314659534372305e-06, |
| "loss": 0.1981, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.37443130118289353, |
| "grad_norm": 2.3539272175568775, |
| "learning_rate": 4.931299668496024e-06, |
| "loss": 0.1576, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.37488626023657873, |
| "grad_norm": 2.5070798015414666, |
| "learning_rate": 4.931133184879988e-06, |
| "loss": 0.1886, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.37534121929026387, |
| "grad_norm": 2.008848059538202, |
| "learning_rate": 4.930966502602727e-06, |
| "loss": 0.1605, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37579617834394907, |
| "grad_norm": 2.407030934613122, |
| "learning_rate": 4.930799621677862e-06, |
| "loss": 0.1802, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.3762511373976342, |
| "grad_norm": 1.8420833153352183, |
| "learning_rate": 4.93063254211903e-06, |
| "loss": 0.1641, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.37670609645131936, |
| "grad_norm": 2.159279850858488, |
| "learning_rate": 4.930465263939882e-06, |
| "loss": 0.1669, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.37716105550500456, |
| "grad_norm": 2.5834296367648477, |
| "learning_rate": 4.9302977871540894e-06, |
| "loss": 0.2047, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.3776160145586897, |
| "grad_norm": 1.6914011736844907, |
| "learning_rate": 4.930130111775336e-06, |
| "loss": 0.1153, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3780709736123749, |
| "grad_norm": 1.9730937065718759, |
| "learning_rate": 4.9299622378173245e-06, |
| "loss": 0.1321, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.37852593266606005, |
| "grad_norm": 4.287590804185868, |
| "learning_rate": 4.929794165293773e-06, |
| "loss": 0.2942, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.37898089171974525, |
| "grad_norm": 1.3332137290220585, |
| "learning_rate": 4.9296258942184145e-06, |
| "loss": 0.1089, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3794358507734304, |
| "grad_norm": 1.5962116643063975, |
| "learning_rate": 4.929457424605e-06, |
| "loss": 0.1214, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.37989080982711554, |
| "grad_norm": 2.022957256898634, |
| "learning_rate": 4.929288756467296e-06, |
| "loss": 0.1853, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.38034576888080074, |
| "grad_norm": 2.4282570688213863, |
| "learning_rate": 4.929119889819086e-06, |
| "loss": 0.1873, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.3808007279344859, |
| "grad_norm": 2.9395172179458, |
| "learning_rate": 4.928950824674169e-06, |
| "loss": 0.2634, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3812556869881711, |
| "grad_norm": 2.2127049960140335, |
| "learning_rate": 4.928781561046359e-06, |
| "loss": 0.1663, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3817106460418562, |
| "grad_norm": 2.536562760970294, |
| "learning_rate": 4.928612098949488e-06, |
| "loss": 0.2011, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3821656050955414, |
| "grad_norm": 2.1855699037821514, |
| "learning_rate": 4.9284424383974026e-06, |
| "loss": 0.1794, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38262056414922657, |
| "grad_norm": 1.6332101956979397, |
| "learning_rate": 4.928272579403969e-06, |
| "loss": 0.1279, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3830755232029117, |
| "grad_norm": 1.5663751127122882, |
| "learning_rate": 4.928102521983067e-06, |
| "loss": 0.1985, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3835304822565969, |
| "grad_norm": 2.4747913159024195, |
| "learning_rate": 4.9279322661485906e-06, |
| "loss": 0.244, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.38398544131028206, |
| "grad_norm": 1.9419499604147055, |
| "learning_rate": 4.927761811914455e-06, |
| "loss": 0.1996, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.38444040036396726, |
| "grad_norm": 1.8790570447198083, |
| "learning_rate": 4.927591159294587e-06, |
| "loss": 0.1746, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3848953594176524, |
| "grad_norm": 3.2586686346278992, |
| "learning_rate": 4.927420308302933e-06, |
| "loss": 0.2099, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.3853503184713376, |
| "grad_norm": 1.8912381154957223, |
| "learning_rate": 4.927249258953454e-06, |
| "loss": 0.2159, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.38580527752502275, |
| "grad_norm": 2.5636879906209242, |
| "learning_rate": 4.927078011260126e-06, |
| "loss": 0.2142, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.3862602365787079, |
| "grad_norm": 2.2557014215101794, |
| "learning_rate": 4.926906565236943e-06, |
| "loss": 0.2158, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3867151956323931, |
| "grad_norm": 2.0433651062149076, |
| "learning_rate": 4.926734920897916e-06, |
| "loss": 0.1564, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38717015468607824, |
| "grad_norm": 1.1448398063326757, |
| "learning_rate": 4.926563078257071e-06, |
| "loss": 0.1274, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.38762511373976344, |
| "grad_norm": 1.5601081736798879, |
| "learning_rate": 4.926391037328448e-06, |
| "loss": 0.1742, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3880800727934486, |
| "grad_norm": 1.735106713842307, |
| "learning_rate": 4.926218798126108e-06, |
| "loss": 0.17, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3885350318471338, |
| "grad_norm": 1.8524828246659681, |
| "learning_rate": 4.926046360664124e-06, |
| "loss": 0.1359, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3889899909008189, |
| "grad_norm": 1.8327900649742344, |
| "learning_rate": 4.925873724956588e-06, |
| "loss": 0.1276, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.38944494995450407, |
| "grad_norm": 1.7997603613849789, |
| "learning_rate": 4.9257008910176065e-06, |
| "loss": 0.236, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.38989990900818927, |
| "grad_norm": 2.1973741478380893, |
| "learning_rate": 4.925527858861302e-06, |
| "loss": 0.1935, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3903548680618744, |
| "grad_norm": 2.086365440068575, |
| "learning_rate": 4.925354628501814e-06, |
| "loss": 0.1652, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.3908098271155596, |
| "grad_norm": 1.8116013889379734, |
| "learning_rate": 4.925181199953299e-06, |
| "loss": 0.1612, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.39126478616924476, |
| "grad_norm": 1.9247913507109833, |
| "learning_rate": 4.9250075732299285e-06, |
| "loss": 0.1588, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39171974522292996, |
| "grad_norm": 2.514293103428901, |
| "learning_rate": 4.92483374834589e-06, |
| "loss": 0.19, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3921747042766151, |
| "grad_norm": 2.0316288184050024, |
| "learning_rate": 4.9246597253153884e-06, |
| "loss": 0.1831, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.39262966333030025, |
| "grad_norm": 1.5754846974100747, |
| "learning_rate": 4.924485504152644e-06, |
| "loss": 0.1466, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.39308462238398545, |
| "grad_norm": 2.1731555902481685, |
| "learning_rate": 4.924311084871892e-06, |
| "loss": 0.1937, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.3935395814376706, |
| "grad_norm": 1.5966819404904389, |
| "learning_rate": 4.924136467487387e-06, |
| "loss": 0.1251, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3939945404913558, |
| "grad_norm": 1.8663994781934827, |
| "learning_rate": 4.923961652013397e-06, |
| "loss": 0.1523, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.39444949954504094, |
| "grad_norm": 2.1002789601399257, |
| "learning_rate": 4.923786638464207e-06, |
| "loss": 0.2129, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.39490445859872614, |
| "grad_norm": 2.081418128383539, |
| "learning_rate": 4.9236114268541196e-06, |
| "loss": 0.1437, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.3953594176524113, |
| "grad_norm": 2.447658119106072, |
| "learning_rate": 4.923436017197451e-06, |
| "loss": 0.201, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3958143767060964, |
| "grad_norm": 1.7750379508150516, |
| "learning_rate": 4.923260409508535e-06, |
| "loss": 0.1282, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3962693357597816, |
| "grad_norm": 1.6418670453366244, |
| "learning_rate": 4.9230846038017214e-06, |
| "loss": 0.2087, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.39672429481346677, |
| "grad_norm": 1.7770417360691049, |
| "learning_rate": 4.922908600091378e-06, |
| "loss": 0.1372, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.39717925386715197, |
| "grad_norm": 1.690148135895664, |
| "learning_rate": 4.9227323983918835e-06, |
| "loss": 0.1855, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3976342129208371, |
| "grad_norm": 1.5404851420453596, |
| "learning_rate": 4.922555998717639e-06, |
| "loss": 0.1398, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3980891719745223, |
| "grad_norm": 2.1706268320484328, |
| "learning_rate": 4.922379401083058e-06, |
| "loss": 0.1486, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39854413102820746, |
| "grad_norm": 3.0077672507475786, |
| "learning_rate": 4.922202605502573e-06, |
| "loss": 0.2077, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3989990900818926, |
| "grad_norm": 1.5486893349846256, |
| "learning_rate": 4.922025611990629e-06, |
| "loss": 0.1604, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3994540491355778, |
| "grad_norm": 1.8667533652947603, |
| "learning_rate": 4.92184842056169e-06, |
| "loss": 0.1722, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.39990900818926295, |
| "grad_norm": 2.289002791626951, |
| "learning_rate": 4.921671031230235e-06, |
| "loss": 0.1647, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.40036396724294815, |
| "grad_norm": 1.8286186193347604, |
| "learning_rate": 4.921493444010759e-06, |
| "loss": 0.1773, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4008189262966333, |
| "grad_norm": 1.8147441438330003, |
| "learning_rate": 4.921315658917774e-06, |
| "loss": 0.1711, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.4012738853503185, |
| "grad_norm": 2.00913911322474, |
| "learning_rate": 4.921137675965809e-06, |
| "loss": 0.1263, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.40172884440400364, |
| "grad_norm": 1.3862791101345426, |
| "learning_rate": 4.920959495169406e-06, |
| "loss": 0.1465, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.4021838034576888, |
| "grad_norm": 2.4187567327639234, |
| "learning_rate": 4.920781116543126e-06, |
| "loss": 0.2198, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.402638762511374, |
| "grad_norm": 1.6465776945830464, |
| "learning_rate": 4.920602540101546e-06, |
| "loss": 0.1309, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.4030937215650591, |
| "grad_norm": 2.6312019573375682, |
| "learning_rate": 4.920423765859257e-06, |
| "loss": 0.1948, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.4035486806187443, |
| "grad_norm": 1.9940911601496167, |
| "learning_rate": 4.920244793830869e-06, |
| "loss": 0.1657, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.40400363967242947, |
| "grad_norm": 1.9526243984241491, |
| "learning_rate": 4.920065624031006e-06, |
| "loss": 0.1616, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.40445859872611467, |
| "grad_norm": 1.5338098697837441, |
| "learning_rate": 4.919886256474309e-06, |
| "loss": 0.1512, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.4049135577797998, |
| "grad_norm": 2.0468687722376773, |
| "learning_rate": 4.919706691175435e-06, |
| "loss": 0.1701, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40536851683348496, |
| "grad_norm": 2.200436787943407, |
| "learning_rate": 4.919526928149058e-06, |
| "loss": 0.2293, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.40582347588717016, |
| "grad_norm": 1.8050882174330405, |
| "learning_rate": 4.919346967409867e-06, |
| "loss": 0.1602, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.4062784349408553, |
| "grad_norm": 1.7135594043707498, |
| "learning_rate": 4.919166808972567e-06, |
| "loss": 0.2064, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.4067333939945405, |
| "grad_norm": 2.612056409341394, |
| "learning_rate": 4.918986452851881e-06, |
| "loss": 0.1668, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.40718835304822565, |
| "grad_norm": 2.016673285347467, |
| "learning_rate": 4.918805899062545e-06, |
| "loss": 0.1925, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.40764331210191085, |
| "grad_norm": 1.4000022926360023, |
| "learning_rate": 4.9186251476193146e-06, |
| "loss": 0.1592, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.408098271155596, |
| "grad_norm": 1.60492731991447, |
| "learning_rate": 4.918444198536959e-06, |
| "loss": 0.1731, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.40855323020928114, |
| "grad_norm": 1.673902690478855, |
| "learning_rate": 4.918263051830267e-06, |
| "loss": 0.1228, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.40900818926296634, |
| "grad_norm": 2.6755237129572484, |
| "learning_rate": 4.918081707514037e-06, |
| "loss": 0.1409, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.4094631483166515, |
| "grad_norm": 1.9078463274657658, |
| "learning_rate": 4.917900165603091e-06, |
| "loss": 0.1276, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4099181073703367, |
| "grad_norm": 2.234681815409533, |
| "learning_rate": 4.9177184261122624e-06, |
| "loss": 0.1652, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.4103730664240218, |
| "grad_norm": 2.839831167960225, |
| "learning_rate": 4.917536489056402e-06, |
| "loss": 0.1798, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.410828025477707, |
| "grad_norm": 2.010867770048541, |
| "learning_rate": 4.9173543544503775e-06, |
| "loss": 0.1154, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.41128298453139217, |
| "grad_norm": 2.08218098114304, |
| "learning_rate": 4.917172022309072e-06, |
| "loss": 0.1455, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.41173794358507737, |
| "grad_norm": 1.7302162150410665, |
| "learning_rate": 4.916989492647385e-06, |
| "loss": 0.1193, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.4121929026387625, |
| "grad_norm": 1.5485580925696725, |
| "learning_rate": 4.916806765480231e-06, |
| "loss": 0.0922, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.41264786169244766, |
| "grad_norm": 1.7263185607767098, |
| "learning_rate": 4.9166238408225416e-06, |
| "loss": 0.2167, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.41310282074613286, |
| "grad_norm": 1.9178320379998328, |
| "learning_rate": 4.916440718689267e-06, |
| "loss": 0.1554, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.413557779799818, |
| "grad_norm": 1.8197306174687815, |
| "learning_rate": 4.916257399095369e-06, |
| "loss": 0.1474, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.4140127388535032, |
| "grad_norm": 1.7449499320119561, |
| "learning_rate": 4.916073882055827e-06, |
| "loss": 0.1327, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41446769790718835, |
| "grad_norm": 2.4422880124371646, |
| "learning_rate": 4.91589016758564e-06, |
| "loss": 0.1937, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.41492265696087355, |
| "grad_norm": 1.6511138034689814, |
| "learning_rate": 4.915706255699817e-06, |
| "loss": 0.1363, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.4153776160145587, |
| "grad_norm": 2.143275165439444, |
| "learning_rate": 4.915522146413389e-06, |
| "loss": 0.2735, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.41583257506824384, |
| "grad_norm": 1.924782534095729, |
| "learning_rate": 4.9153378397413985e-06, |
| "loss": 0.1751, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.41628753412192904, |
| "grad_norm": 1.951438348175618, |
| "learning_rate": 4.915153335698908e-06, |
| "loss": 0.1619, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.4167424931756142, |
| "grad_norm": 2.2127088657857548, |
| "learning_rate": 4.914968634300994e-06, |
| "loss": 0.2147, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.4171974522292994, |
| "grad_norm": 1.6061838128612729, |
| "learning_rate": 4.914783735562748e-06, |
| "loss": 0.1499, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.4176524112829845, |
| "grad_norm": 1.4285312675375041, |
| "learning_rate": 4.914598639499281e-06, |
| "loss": 0.1583, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.4181073703366697, |
| "grad_norm": 1.6360040253886021, |
| "learning_rate": 4.914413346125717e-06, |
| "loss": 0.1066, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.41856232939035487, |
| "grad_norm": 2.343895109900456, |
| "learning_rate": 4.914227855457199e-06, |
| "loss": 0.1823, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41901728844404, |
| "grad_norm": 2.318188728357057, |
| "learning_rate": 4.914042167508881e-06, |
| "loss": 0.1437, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.4194722474977252, |
| "grad_norm": 2.3202387804341336, |
| "learning_rate": 4.9138562822959416e-06, |
| "loss": 0.1589, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.41992720655141036, |
| "grad_norm": 2.608072279082345, |
| "learning_rate": 4.913670199833566e-06, |
| "loss": 0.1851, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.42038216560509556, |
| "grad_norm": 2.181253773511138, |
| "learning_rate": 4.913483920136961e-06, |
| "loss": 0.1756, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.4208371246587807, |
| "grad_norm": 2.211521150780038, |
| "learning_rate": 4.91329744322135e-06, |
| "loss": 0.1732, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.4212920837124659, |
| "grad_norm": 1.812598878243348, |
| "learning_rate": 4.913110769101971e-06, |
| "loss": 0.166, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.42174704276615105, |
| "grad_norm": 2.205776388483361, |
| "learning_rate": 4.912923897794077e-06, |
| "loss": 0.1771, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.4222020018198362, |
| "grad_norm": 1.423655928174165, |
| "learning_rate": 4.912736829312938e-06, |
| "loss": 0.1489, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.4226569608735214, |
| "grad_norm": 1.831805612119293, |
| "learning_rate": 4.912549563673842e-06, |
| "loss": 0.168, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.42311191992720654, |
| "grad_norm": 1.4699738850406474, |
| "learning_rate": 4.912362100892091e-06, |
| "loss": 0.1674, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42356687898089174, |
| "grad_norm": 1.9047547244636083, |
| "learning_rate": 4.912174440983002e-06, |
| "loss": 0.1639, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.4240218380345769, |
| "grad_norm": 2.0520314286066843, |
| "learning_rate": 4.911986583961912e-06, |
| "loss": 0.2138, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.4244767970882621, |
| "grad_norm": 2.5542601480975278, |
| "learning_rate": 4.91179852984417e-06, |
| "loss": 0.2276, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.4249317561419472, |
| "grad_norm": 1.5302053494447614, |
| "learning_rate": 4.911610278645144e-06, |
| "loss": 0.1489, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.42538671519563237, |
| "grad_norm": 1.7414787617118297, |
| "learning_rate": 4.911421830380217e-06, |
| "loss": 0.1182, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.42584167424931757, |
| "grad_norm": 1.7429292851594573, |
| "learning_rate": 4.911233185064788e-06, |
| "loss": 0.2064, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.4262966333030027, |
| "grad_norm": 2.3105951171285968, |
| "learning_rate": 4.911044342714272e-06, |
| "loss": 0.1405, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.4267515923566879, |
| "grad_norm": 1.779382452074537, |
| "learning_rate": 4.9108553033440995e-06, |
| "loss": 0.1291, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.42720655141037306, |
| "grad_norm": 1.7957846625134024, |
| "learning_rate": 4.91066606696972e-06, |
| "loss": 0.1647, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.42766151046405826, |
| "grad_norm": 2.3261521372348057, |
| "learning_rate": 4.910476633606597e-06, |
| "loss": 0.1927, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4281164695177434, |
| "grad_norm": 1.9153006743556071, |
| "learning_rate": 4.9102870032702075e-06, |
| "loss": 0.1584, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 1.8033286854373174, |
| "learning_rate": 4.910097175976049e-06, |
| "loss": 0.1825, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.42902638762511375, |
| "grad_norm": 2.8388348591880597, |
| "learning_rate": 4.909907151739634e-06, |
| "loss": 0.2113, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.4294813466787989, |
| "grad_norm": 2.6244899475003813, |
| "learning_rate": 4.909716930576489e-06, |
| "loss": 0.1704, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.4299363057324841, |
| "grad_norm": 2.112585064442849, |
| "learning_rate": 4.909526512502158e-06, |
| "loss": 0.1589, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.43039126478616924, |
| "grad_norm": 2.289068427554651, |
| "learning_rate": 4.9093358975322025e-06, |
| "loss": 0.1714, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.43084622383985444, |
| "grad_norm": 2.5327374827374065, |
| "learning_rate": 4.909145085682198e-06, |
| "loss": 0.2278, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.4313011828935396, |
| "grad_norm": 2.1519696726150315, |
| "learning_rate": 4.908954076967737e-06, |
| "loss": 0.1561, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.4317561419472247, |
| "grad_norm": 2.3965497202736485, |
| "learning_rate": 4.908762871404427e-06, |
| "loss": 0.2721, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.4322111010009099, |
| "grad_norm": 1.7303730946554432, |
| "learning_rate": 4.908571469007893e-06, |
| "loss": 0.1886, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43266606005459507, |
| "grad_norm": 1.867974683826286, |
| "learning_rate": 4.908379869793776e-06, |
| "loss": 0.1621, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.43312101910828027, |
| "grad_norm": 2.0573077802321134, |
| "learning_rate": 4.908188073777732e-06, |
| "loss": 0.1897, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.4335759781619654, |
| "grad_norm": 1.4532292026282405, |
| "learning_rate": 4.9079960809754334e-06, |
| "loss": 0.1729, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.4340309372156506, |
| "grad_norm": 1.962539816890548, |
| "learning_rate": 4.90780389140257e-06, |
| "loss": 0.1301, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.43448589626933576, |
| "grad_norm": 2.4468234331381677, |
| "learning_rate": 4.907611505074846e-06, |
| "loss": 0.1709, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.4349408553230209, |
| "grad_norm": 2.666497869750462, |
| "learning_rate": 4.907418922007983e-06, |
| "loss": 0.1628, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.4353958143767061, |
| "grad_norm": 2.2137035827801226, |
| "learning_rate": 4.907226142217717e-06, |
| "loss": 0.1353, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.43585077343039125, |
| "grad_norm": 2.572062185697332, |
| "learning_rate": 4.9070331657198015e-06, |
| "loss": 0.1745, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.43630573248407645, |
| "grad_norm": 2.405655176153194, |
| "learning_rate": 4.906839992530006e-06, |
| "loss": 0.2171, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.4367606915377616, |
| "grad_norm": 1.836795075502022, |
| "learning_rate": 4.906646622664115e-06, |
| "loss": 0.168, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.4372156505914468, |
| "grad_norm": 2.166035033805183, |
| "learning_rate": 4.906453056137931e-06, |
| "loss": 0.1223, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.43767060964513194, |
| "grad_norm": 2.072717194766617, |
| "learning_rate": 4.90625929296727e-06, |
| "loss": 0.2248, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.4381255686988171, |
| "grad_norm": 1.8024426189806846, |
| "learning_rate": 4.9060653331679665e-06, |
| "loss": 0.1956, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.4385805277525023, |
| "grad_norm": 1.8368071839220441, |
| "learning_rate": 4.90587117675587e-06, |
| "loss": 0.1601, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.4390354868061874, |
| "grad_norm": 1.6602305730067044, |
| "learning_rate": 4.905676823746846e-06, |
| "loss": 0.1433, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.4394904458598726, |
| "grad_norm": 1.2991365263950634, |
| "learning_rate": 4.9054822741567745e-06, |
| "loss": 0.1361, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.43994540491355777, |
| "grad_norm": 2.1130749414647463, |
| "learning_rate": 4.905287528001555e-06, |
| "loss": 0.145, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.44040036396724297, |
| "grad_norm": 1.8646843859502422, |
| "learning_rate": 4.905092585297102e-06, |
| "loss": 0.1685, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.4408553230209281, |
| "grad_norm": 2.1749982799245693, |
| "learning_rate": 4.904897446059344e-06, |
| "loss": 0.1621, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.44131028207461326, |
| "grad_norm": 2.334038135097662, |
| "learning_rate": 4.9047021103042255e-06, |
| "loss": 0.1486, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.44176524112829846, |
| "grad_norm": 2.600358800525879, |
| "learning_rate": 4.904506578047712e-06, |
| "loss": 0.1603, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.4422202001819836, |
| "grad_norm": 2.0684781990731436, |
| "learning_rate": 4.9043108493057785e-06, |
| "loss": 0.1708, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.4426751592356688, |
| "grad_norm": 2.0255722402037852, |
| "learning_rate": 4.904114924094421e-06, |
| "loss": 0.1608, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.44313011828935395, |
| "grad_norm": 2.1489494601016434, |
| "learning_rate": 4.903918802429648e-06, |
| "loss": 0.1829, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.44358507734303915, |
| "grad_norm": 1.787442464619014, |
| "learning_rate": 4.9037224843274875e-06, |
| "loss": 0.2043, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.4440400363967243, |
| "grad_norm": 2.343300114421743, |
| "learning_rate": 4.903525969803979e-06, |
| "loss": 0.2699, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.44449499545040944, |
| "grad_norm": 1.865479334461903, |
| "learning_rate": 4.903329258875184e-06, |
| "loss": 0.1195, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.44494995450409464, |
| "grad_norm": 1.9494468159837486, |
| "learning_rate": 4.903132351557175e-06, |
| "loss": 0.1465, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.4454049135577798, |
| "grad_norm": 2.502406890844037, |
| "learning_rate": 4.902935247866043e-06, |
| "loss": 0.1378, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.445859872611465, |
| "grad_norm": 2.036041143606274, |
| "learning_rate": 4.9027379478178935e-06, |
| "loss": 0.1483, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4463148316651501, |
| "grad_norm": 1.3077265314607576, |
| "learning_rate": 4.90254045142885e-06, |
| "loss": 0.0969, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.4467697907188353, |
| "grad_norm": 2.0861883133616828, |
| "learning_rate": 4.90234275871505e-06, |
| "loss": 0.1392, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.44722474977252047, |
| "grad_norm": 2.5093809608609274, |
| "learning_rate": 4.9021448696926486e-06, |
| "loss": 0.1743, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.44767970882620567, |
| "grad_norm": 1.575875578739379, |
| "learning_rate": 4.901946784377816e-06, |
| "loss": 0.176, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.4481346678798908, |
| "grad_norm": 1.5356501213932587, |
| "learning_rate": 4.90174850278674e-06, |
| "loss": 0.1484, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.44858962693357596, |
| "grad_norm": 1.823863525681817, |
| "learning_rate": 4.901550024935623e-06, |
| "loss": 0.1854, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.44904458598726116, |
| "grad_norm": 1.3758352509840184, |
| "learning_rate": 4.901351350840683e-06, |
| "loss": 0.1349, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.4494995450409463, |
| "grad_norm": 2.0693941858838762, |
| "learning_rate": 4.901152480518155e-06, |
| "loss": 0.1663, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.4499545040946315, |
| "grad_norm": 1.8873877263165615, |
| "learning_rate": 4.900953413984289e-06, |
| "loss": 0.1692, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.45040946314831665, |
| "grad_norm": 1.4776284855591897, |
| "learning_rate": 4.900754151255353e-06, |
| "loss": 0.1971, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.45086442220200185, |
| "grad_norm": 2.5191235263020912, |
| "learning_rate": 4.9005546923476305e-06, |
| "loss": 0.1998, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.451319381255687, |
| "grad_norm": 1.8842919796768522, |
| "learning_rate": 4.9003550372774185e-06, |
| "loss": 0.1399, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.45177434030937214, |
| "grad_norm": 2.063855552138974, |
| "learning_rate": 4.900155186061033e-06, |
| "loss": 0.1526, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.45222929936305734, |
| "grad_norm": 2.367561517511786, |
| "learning_rate": 4.8999551387148045e-06, |
| "loss": 0.1599, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.4526842584167425, |
| "grad_norm": 1.898969473092516, |
| "learning_rate": 4.89975489525508e-06, |
| "loss": 0.1902, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.4531392174704277, |
| "grad_norm": 1.8129578397632808, |
| "learning_rate": 4.899554455698223e-06, |
| "loss": 0.1693, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.4535941765241128, |
| "grad_norm": 1.8699568695488074, |
| "learning_rate": 4.899353820060612e-06, |
| "loss": 0.1581, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.454049135577798, |
| "grad_norm": 1.7239980533667612, |
| "learning_rate": 4.899152988358643e-06, |
| "loss": 0.2098, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.45450409463148317, |
| "grad_norm": 1.8097885043847937, |
| "learning_rate": 4.898951960608725e-06, |
| "loss": 0.1715, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.4549590536851683, |
| "grad_norm": 1.8523553420273773, |
| "learning_rate": 4.8987507368272865e-06, |
| "loss": 0.16, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4554140127388535, |
| "grad_norm": 2.0000127792736904, |
| "learning_rate": 4.898549317030772e-06, |
| "loss": 0.1632, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.45586897179253866, |
| "grad_norm": 1.862660132529776, |
| "learning_rate": 4.898347701235637e-06, |
| "loss": 0.1465, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.45632393084622386, |
| "grad_norm": 1.7361264176719555, |
| "learning_rate": 4.89814588945836e-06, |
| "loss": 0.1869, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.456778889899909, |
| "grad_norm": 1.978059539176156, |
| "learning_rate": 4.89794388171543e-06, |
| "loss": 0.1659, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.4572338489535942, |
| "grad_norm": 2.2207578653400906, |
| "learning_rate": 4.897741678023356e-06, |
| "loss": 0.1939, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.45768880800727935, |
| "grad_norm": 2.103052599683253, |
| "learning_rate": 4.897539278398659e-06, |
| "loss": 0.1812, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.4581437670609645, |
| "grad_norm": 2.546107708354434, |
| "learning_rate": 4.8973366828578804e-06, |
| "loss": 0.2054, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.4585987261146497, |
| "grad_norm": 1.9562513052044435, |
| "learning_rate": 4.897133891417574e-06, |
| "loss": 0.1693, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.45905368516833484, |
| "grad_norm": 2.5635809078172103, |
| "learning_rate": 4.896930904094311e-06, |
| "loss": 0.1689, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.45950864422202004, |
| "grad_norm": 2.401849938137445, |
| "learning_rate": 4.896727720904679e-06, |
| "loss": 0.1731, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4599636032757052, |
| "grad_norm": 1.3521913269323886, |
| "learning_rate": 4.896524341865282e-06, |
| "loss": 0.0961, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.4604185623293904, |
| "grad_norm": 1.773432887084503, |
| "learning_rate": 4.896320766992737e-06, |
| "loss": 0.1875, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.4608735213830755, |
| "grad_norm": 1.7325101393470637, |
| "learning_rate": 4.896116996303682e-06, |
| "loss": 0.1534, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.46132848043676067, |
| "grad_norm": 1.8711913127871913, |
| "learning_rate": 4.895913029814766e-06, |
| "loss": 0.1476, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.46178343949044587, |
| "grad_norm": 1.98409281551755, |
| "learning_rate": 4.895708867542658e-06, |
| "loss": 0.2099, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.462238398544131, |
| "grad_norm": 1.835471556122073, |
| "learning_rate": 4.895504509504039e-06, |
| "loss": 0.141, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4626933575978162, |
| "grad_norm": 1.7126193650485422, |
| "learning_rate": 4.89529995571561e-06, |
| "loss": 0.1569, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.46314831665150136, |
| "grad_norm": 1.5756476134085153, |
| "learning_rate": 4.895095206194086e-06, |
| "loss": 0.1599, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.46360327570518656, |
| "grad_norm": 1.6305833927339777, |
| "learning_rate": 4.894890260956198e-06, |
| "loss": 0.1266, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.4640582347588717, |
| "grad_norm": 2.8915138386415107, |
| "learning_rate": 4.8946851200186925e-06, |
| "loss": 0.1983, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46451319381255685, |
| "grad_norm": 2.2750148686402873, |
| "learning_rate": 4.894479783398334e-06, |
| "loss": 0.2161, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.46496815286624205, |
| "grad_norm": 1.901328095270706, |
| "learning_rate": 4.8942742511119004e-06, |
| "loss": 0.2033, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4654231119199272, |
| "grad_norm": 3.2947250275495747, |
| "learning_rate": 4.894068523176187e-06, |
| "loss": 0.258, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4658780709736124, |
| "grad_norm": 1.9323682134416058, |
| "learning_rate": 4.8938625996080056e-06, |
| "loss": 0.1788, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.46633303002729753, |
| "grad_norm": 1.6185621650651296, |
| "learning_rate": 4.893656480424184e-06, |
| "loss": 0.1651, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46678798908098273, |
| "grad_norm": 2.2508459323489, |
| "learning_rate": 4.893450165641564e-06, |
| "loss": 0.1558, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.4672429481346679, |
| "grad_norm": 1.213648480980067, |
| "learning_rate": 4.893243655277005e-06, |
| "loss": 0.1507, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.467697907188353, |
| "grad_norm": 2.1023746671368513, |
| "learning_rate": 4.893036949347383e-06, |
| "loss": 0.1721, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.4681528662420382, |
| "grad_norm": 2.278948598326534, |
| "learning_rate": 4.892830047869588e-06, |
| "loss": 0.1884, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.46860782529572337, |
| "grad_norm": 2.9197430954616683, |
| "learning_rate": 4.892622950860527e-06, |
| "loss": 0.1741, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46906278434940857, |
| "grad_norm": 1.2879852782085728, |
| "learning_rate": 4.892415658337123e-06, |
| "loss": 0.1342, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.4695177434030937, |
| "grad_norm": 2.0909020173909973, |
| "learning_rate": 4.892208170316317e-06, |
| "loss": 0.1907, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4699727024567789, |
| "grad_norm": 2.0408884938878957, |
| "learning_rate": 4.892000486815062e-06, |
| "loss": 0.1553, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.47042766151046406, |
| "grad_norm": 1.8109063186030263, |
| "learning_rate": 4.891792607850328e-06, |
| "loss": 0.154, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4708826205641492, |
| "grad_norm": 2.2630304525012126, |
| "learning_rate": 4.891584533439104e-06, |
| "loss": 0.2079, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4713375796178344, |
| "grad_norm": 1.539332632871382, |
| "learning_rate": 4.891376263598393e-06, |
| "loss": 0.1432, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.47179253867151955, |
| "grad_norm": 1.7957849792578133, |
| "learning_rate": 4.891167798345213e-06, |
| "loss": 0.1511, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.47224749772520475, |
| "grad_norm": 2.741729093401805, |
| "learning_rate": 4.890959137696598e-06, |
| "loss": 0.2263, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4727024567788899, |
| "grad_norm": 1.7348700401664916, |
| "learning_rate": 4.890750281669601e-06, |
| "loss": 0.1298, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4731574158325751, |
| "grad_norm": 1.7001320886150055, |
| "learning_rate": 4.890541230281287e-06, |
| "loss": 0.1168, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47361237488626023, |
| "grad_norm": 1.8500860192841622, |
| "learning_rate": 4.8903319835487385e-06, |
| "loss": 0.1644, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.4740673339399454, |
| "grad_norm": 1.8582195164199888, |
| "learning_rate": 4.890122541489056e-06, |
| "loss": 0.2426, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.4745222929936306, |
| "grad_norm": 1.2923171102528221, |
| "learning_rate": 4.889912904119353e-06, |
| "loss": 0.165, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.4749772520473157, |
| "grad_norm": 2.2842684826182778, |
| "learning_rate": 4.88970307145676e-06, |
| "loss": 0.1853, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.4754322111010009, |
| "grad_norm": 1.8277244050495731, |
| "learning_rate": 4.889493043518423e-06, |
| "loss": 0.2139, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.47588717015468607, |
| "grad_norm": 2.021142913969995, |
| "learning_rate": 4.889282820321506e-06, |
| "loss": 0.1312, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.47634212920837127, |
| "grad_norm": 1.8896361944599618, |
| "learning_rate": 4.889072401883187e-06, |
| "loss": 0.224, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.4767970882620564, |
| "grad_norm": 1.552692831396847, |
| "learning_rate": 4.88886178822066e-06, |
| "loss": 0.1772, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.47725204731574156, |
| "grad_norm": 1.8340975280187983, |
| "learning_rate": 4.888650979351136e-06, |
| "loss": 0.1702, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.47770700636942676, |
| "grad_norm": 2.1830448534590547, |
| "learning_rate": 4.888439975291841e-06, |
| "loss": 0.2436, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4781619654231119, |
| "grad_norm": 1.6348401870707816, |
| "learning_rate": 4.888228776060017e-06, |
| "loss": 0.1926, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.4786169244767971, |
| "grad_norm": 1.7078513906709398, |
| "learning_rate": 4.888017381672923e-06, |
| "loss": 0.1601, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.47907188353048225, |
| "grad_norm": 2.240745720528745, |
| "learning_rate": 4.887805792147832e-06, |
| "loss": 0.1766, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.47952684258416745, |
| "grad_norm": 2.428487112277442, |
| "learning_rate": 4.887594007502036e-06, |
| "loss": 0.1789, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4799818016378526, |
| "grad_norm": 2.1865518873285645, |
| "learning_rate": 4.887382027752838e-06, |
| "loss": 0.199, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.48043676069153773, |
| "grad_norm": 1.898629261883439, |
| "learning_rate": 4.8871698529175636e-06, |
| "loss": 0.1665, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.48089171974522293, |
| "grad_norm": 1.7954561311174488, |
| "learning_rate": 4.886957483013549e-06, |
| "loss": 0.1812, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.4813466787989081, |
| "grad_norm": 1.8221114015246185, |
| "learning_rate": 4.886744918058149e-06, |
| "loss": 0.2063, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.4818016378525933, |
| "grad_norm": 2.7770081846232544, |
| "learning_rate": 4.886532158068732e-06, |
| "loss": 0.2088, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4822565969062784, |
| "grad_norm": 2.1115268373643477, |
| "learning_rate": 4.886319203062683e-06, |
| "loss": 0.1444, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4827115559599636, |
| "grad_norm": 2.071172717908372, |
| "learning_rate": 4.886106053057408e-06, |
| "loss": 0.1661, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.48316651501364877, |
| "grad_norm": 2.2607152479196104, |
| "learning_rate": 4.88589270807032e-06, |
| "loss": 0.1859, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.48362147406733397, |
| "grad_norm": 1.692360966817902, |
| "learning_rate": 4.885679168118855e-06, |
| "loss": 0.1864, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.4840764331210191, |
| "grad_norm": 1.7710659763891554, |
| "learning_rate": 4.8854654332204635e-06, |
| "loss": 0.1646, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.48453139217470426, |
| "grad_norm": 1.9598218562809384, |
| "learning_rate": 4.885251503392607e-06, |
| "loss": 0.1402, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.48498635122838946, |
| "grad_norm": 2.1204508830789988, |
| "learning_rate": 4.885037378652771e-06, |
| "loss": 0.1891, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4854413102820746, |
| "grad_norm": 2.3589815655452653, |
| "learning_rate": 4.884823059018451e-06, |
| "loss": 0.1555, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.4858962693357598, |
| "grad_norm": 2.5392202747520245, |
| "learning_rate": 4.88460854450716e-06, |
| "loss": 0.192, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.48635122838944495, |
| "grad_norm": 2.3012454866986833, |
| "learning_rate": 4.884393835136427e-06, |
| "loss": 0.2073, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.48680618744313015, |
| "grad_norm": 1.7363057272250617, |
| "learning_rate": 4.884178930923799e-06, |
| "loss": 0.1909, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4872611464968153, |
| "grad_norm": 2.5682234171797638, |
| "learning_rate": 4.883963831886834e-06, |
| "loss": 0.2505, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.48771610555050043, |
| "grad_norm": 2.1085560059563435, |
| "learning_rate": 4.8837485380431115e-06, |
| "loss": 0.1713, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.48817106460418563, |
| "grad_norm": 1.8882533184752026, |
| "learning_rate": 4.883533049410223e-06, |
| "loss": 0.1602, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.4886260236578708, |
| "grad_norm": 2.9321383026683985, |
| "learning_rate": 4.8833173660057785e-06, |
| "loss": 0.2554, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.489080982711556, |
| "grad_norm": 2.531195131930091, |
| "learning_rate": 4.8831014878474004e-06, |
| "loss": 0.1797, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4895359417652411, |
| "grad_norm": 1.9044052944051695, |
| "learning_rate": 4.882885414952732e-06, |
| "loss": 0.1738, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4899909008189263, |
| "grad_norm": 1.8646399638677997, |
| "learning_rate": 4.882669147339428e-06, |
| "loss": 0.123, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.49044585987261147, |
| "grad_norm": 1.6244921355768605, |
| "learning_rate": 4.882452685025161e-06, |
| "loss": 0.1207, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.4909008189262966, |
| "grad_norm": 2.6418064094824625, |
| "learning_rate": 4.88223602802762e-06, |
| "loss": 0.1651, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.4913557779799818, |
| "grad_norm": 1.7280688771591737, |
| "learning_rate": 4.882019176364509e-06, |
| "loss": 0.1654, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.49181073703366696, |
| "grad_norm": 1.688117545561323, |
| "learning_rate": 4.881802130053548e-06, |
| "loss": 0.1779, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.49226569608735216, |
| "grad_norm": 1.9343631314892762, |
| "learning_rate": 4.881584889112473e-06, |
| "loss": 0.1378, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4927206551410373, |
| "grad_norm": 2.0445775883054194, |
| "learning_rate": 4.881367453559036e-06, |
| "loss": 0.1945, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.4931756141947225, |
| "grad_norm": 2.0708720739438835, |
| "learning_rate": 4.881149823411005e-06, |
| "loss": 0.155, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.49363057324840764, |
| "grad_norm": 1.8016295656127952, |
| "learning_rate": 4.880931998686162e-06, |
| "loss": 0.1374, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4940855323020928, |
| "grad_norm": 1.8010911071848295, |
| "learning_rate": 4.880713979402311e-06, |
| "loss": 0.2764, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.494540491355778, |
| "grad_norm": 2.2201593715577945, |
| "learning_rate": 4.880495765577263e-06, |
| "loss": 0.1785, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.49499545040946313, |
| "grad_norm": 2.5150440926935183, |
| "learning_rate": 4.880277357228852e-06, |
| "loss": 0.1415, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.49545040946314833, |
| "grad_norm": 1.4882801876169178, |
| "learning_rate": 4.880058754374923e-06, |
| "loss": 0.1528, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.4959053685168335, |
| "grad_norm": 1.9307317316728292, |
| "learning_rate": 4.879839957033343e-06, |
| "loss": 0.1661, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4963603275705187, |
| "grad_norm": 1.6645987589280862, |
| "learning_rate": 4.879620965221987e-06, |
| "loss": 0.1058, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4968152866242038, |
| "grad_norm": 1.1436431770468727, |
| "learning_rate": 4.879401778958755e-06, |
| "loss": 0.0867, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.49727024567788897, |
| "grad_norm": 2.072303030104995, |
| "learning_rate": 4.8791823982615525e-06, |
| "loss": 0.1454, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.49772520473157417, |
| "grad_norm": 1.4026343543836923, |
| "learning_rate": 4.878962823148308e-06, |
| "loss": 0.1176, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.4981801637852593, |
| "grad_norm": 2.4971931111745795, |
| "learning_rate": 4.878743053636968e-06, |
| "loss": 0.2058, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4986351228389445, |
| "grad_norm": 1.9096703970153857, |
| "learning_rate": 4.878523089745485e-06, |
| "loss": 0.2389, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.49909008189262966, |
| "grad_norm": 1.7150797344948416, |
| "learning_rate": 4.878302931491837e-06, |
| "loss": 0.1408, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.49954504094631486, |
| "grad_norm": 1.8467538410779647, |
| "learning_rate": 4.8780825788940145e-06, |
| "loss": 0.1212, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6653067013861202, |
| "learning_rate": 4.877862031970023e-06, |
| "loss": 0.165, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.5004549590536852, |
| "grad_norm": 2.7780746831303866, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.1959, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5009099181073703, |
| "grad_norm": 2.735253037622924, |
| "learning_rate": 4.877420355215637e-06, |
| "loss": 0.1853, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.5013648771610555, |
| "grad_norm": 2.2801170489693474, |
| "learning_rate": 4.877199225421334e-06, |
| "loss": 0.223, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.5018198362147407, |
| "grad_norm": 1.5121408545649673, |
| "learning_rate": 4.8769779013730454e-06, |
| "loss": 0.1766, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.5022747952684259, |
| "grad_norm": 1.2736560565952975, |
| "learning_rate": 4.876756383088858e-06, |
| "loss": 0.1147, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.502729754322111, |
| "grad_norm": 2.234019869097899, |
| "learning_rate": 4.876534670586872e-06, |
| "loss": 0.1353, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.5031847133757962, |
| "grad_norm": 2.231499299533909, |
| "learning_rate": 4.8763127638852045e-06, |
| "loss": 0.1542, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.5036396724294814, |
| "grad_norm": 1.8302676611966564, |
| "learning_rate": 4.87609066300199e-06, |
| "loss": 0.1494, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.5040946314831665, |
| "grad_norm": 2.4154877250923157, |
| "learning_rate": 4.875868367955376e-06, |
| "loss": 0.1937, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.5045495905368517, |
| "grad_norm": 2.752908247632549, |
| "learning_rate": 4.87564587876353e-06, |
| "loss": 0.2127, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.5050045495905369, |
| "grad_norm": 1.718053996922888, |
| "learning_rate": 4.87542319544463e-06, |
| "loss": 0.1702, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5054595086442221, |
| "grad_norm": 1.5702103077744012, |
| "learning_rate": 4.875200318016873e-06, |
| "loss": 0.1566, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.5059144676979072, |
| "grad_norm": 2.0381911393844825, |
| "learning_rate": 4.8749772464984736e-06, |
| "loss": 0.2017, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.5063694267515924, |
| "grad_norm": 1.8176309130216741, |
| "learning_rate": 4.874753980907658e-06, |
| "loss": 0.1864, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.5068243858052776, |
| "grad_norm": 2.1308929915187753, |
| "learning_rate": 4.8745305212626714e-06, |
| "loss": 0.1726, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.5072793448589626, |
| "grad_norm": 1.8139775978694637, |
| "learning_rate": 4.874306867581775e-06, |
| "loss": 0.1761, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.5077343039126478, |
| "grad_norm": 1.7183373875600083, |
| "learning_rate": 4.874083019883242e-06, |
| "loss": 0.1333, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.508189262966333, |
| "grad_norm": 1.8665339958095688, |
| "learning_rate": 4.873858978185367e-06, |
| "loss": 0.1932, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.5086442220200182, |
| "grad_norm": 2.352764145779797, |
| "learning_rate": 4.8736347425064565e-06, |
| "loss": 0.2031, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.5090991810737033, |
| "grad_norm": 2.678329346866304, |
| "learning_rate": 4.873410312864833e-06, |
| "loss": 0.214, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.5095541401273885, |
| "grad_norm": 2.350844354697721, |
| "learning_rate": 4.8731856892788384e-06, |
| "loss": 0.2144, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5100090991810737, |
| "grad_norm": 1.9729175722269603, |
| "learning_rate": 4.872960871766826e-06, |
| "loss": 0.2081, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.5104640582347588, |
| "grad_norm": 1.4433063314456696, |
| "learning_rate": 4.8727358603471675e-06, |
| "loss": 0.1703, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.510919017288444, |
| "grad_norm": 2.5283375453779704, |
| "learning_rate": 4.872510655038249e-06, |
| "loss": 0.1536, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.5113739763421292, |
| "grad_norm": 1.3858205152408392, |
| "learning_rate": 4.872285255858476e-06, |
| "loss": 0.1458, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.5118289353958144, |
| "grad_norm": 2.0487135879281024, |
| "learning_rate": 4.872059662826263e-06, |
| "loss": 0.1661, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5122838944494995, |
| "grad_norm": 1.9472322837633822, |
| "learning_rate": 4.8718338759600465e-06, |
| "loss": 0.1786, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.5127388535031847, |
| "grad_norm": 1.6310032173739817, |
| "learning_rate": 4.871607895278278e-06, |
| "loss": 0.1626, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.5131938125568699, |
| "grad_norm": 1.985456014356635, |
| "learning_rate": 4.871381720799421e-06, |
| "loss": 0.1702, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.513648771610555, |
| "grad_norm": 2.402370971493488, |
| "learning_rate": 4.8711553525419595e-06, |
| "loss": 0.194, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.5141037306642402, |
| "grad_norm": 1.5072231886887353, |
| "learning_rate": 4.87092879052439e-06, |
| "loss": 0.1573, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5145586897179254, |
| "grad_norm": 1.449165092170168, |
| "learning_rate": 4.8707020347652275e-06, |
| "loss": 0.1246, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.5150136487716106, |
| "grad_norm": 1.8608730682993475, |
| "learning_rate": 4.870475085283001e-06, |
| "loss": 0.1831, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.5154686078252957, |
| "grad_norm": 1.9183857631670505, |
| "learning_rate": 4.870247942096254e-06, |
| "loss": 0.1638, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.5159235668789809, |
| "grad_norm": 1.6563135044982633, |
| "learning_rate": 4.870020605223551e-06, |
| "loss": 0.1421, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.5163785259326661, |
| "grad_norm": 1.7995838783709266, |
| "learning_rate": 4.869793074683466e-06, |
| "loss": 0.147, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.5168334849863512, |
| "grad_norm": 1.9548719996118153, |
| "learning_rate": 4.8695653504945925e-06, |
| "loss": 0.1575, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.5172884440400364, |
| "grad_norm": 1.7522375644775081, |
| "learning_rate": 4.8693374326755405e-06, |
| "loss": 0.1495, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.5177434030937216, |
| "grad_norm": 1.3062327753186558, |
| "learning_rate": 4.869109321244932e-06, |
| "loss": 0.116, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.5181983621474068, |
| "grad_norm": 1.8868647769132803, |
| "learning_rate": 4.86888101622141e-06, |
| "loss": 0.1794, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.5186533212010919, |
| "grad_norm": 1.8158313749710562, |
| "learning_rate": 4.868652517623629e-06, |
| "loss": 0.1391, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5191082802547771, |
| "grad_norm": 1.8111217984491637, |
| "learning_rate": 4.86842382547026e-06, |
| "loss": 0.1494, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.5195632393084623, |
| "grad_norm": 2.8090775733585835, |
| "learning_rate": 4.868194939779992e-06, |
| "loss": 0.1896, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.5200181983621474, |
| "grad_norm": 1.9497550190765165, |
| "learning_rate": 4.867965860571529e-06, |
| "loss": 0.1552, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.5204731574158326, |
| "grad_norm": 1.990627059765444, |
| "learning_rate": 4.867736587863589e-06, |
| "loss": 0.2094, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.5209281164695178, |
| "grad_norm": 2.247771495871837, |
| "learning_rate": 4.867507121674907e-06, |
| "loss": 0.2391, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.521383075523203, |
| "grad_norm": 2.120187054464733, |
| "learning_rate": 4.867277462024235e-06, |
| "loss": 0.1775, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.521838034576888, |
| "grad_norm": 1.7774801845384391, |
| "learning_rate": 4.8670476089303395e-06, |
| "loss": 0.2129, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.5222929936305732, |
| "grad_norm": 1.5308807746672268, |
| "learning_rate": 4.866817562412003e-06, |
| "loss": 0.2109, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.5227479526842584, |
| "grad_norm": 1.219763540490379, |
| "learning_rate": 4.866587322488024e-06, |
| "loss": 0.1529, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.5232029117379435, |
| "grad_norm": 1.63359106412129, |
| "learning_rate": 4.866356889177216e-06, |
| "loss": 0.1663, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5236578707916287, |
| "grad_norm": 1.4722036099751108, |
| "learning_rate": 4.866126262498409e-06, |
| "loss": 0.1727, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.5241128298453139, |
| "grad_norm": 2.4915301409486172, |
| "learning_rate": 4.865895442470449e-06, |
| "loss": 0.1966, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.5245677888989991, |
| "grad_norm": 1.5523199921916622, |
| "learning_rate": 4.865664429112199e-06, |
| "loss": 0.1451, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.5250227479526842, |
| "grad_norm": 2.0323520596343627, |
| "learning_rate": 4.8654332224425345e-06, |
| "loss": 0.1504, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.5254777070063694, |
| "grad_norm": 2.4530093356672094, |
| "learning_rate": 4.865201822480349e-06, |
| "loss": 0.1872, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.5259326660600546, |
| "grad_norm": 1.3735368464159743, |
| "learning_rate": 4.864970229244552e-06, |
| "loss": 0.111, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.5263876251137397, |
| "grad_norm": 1.824736780190326, |
| "learning_rate": 4.864738442754068e-06, |
| "loss": 0.135, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.5268425841674249, |
| "grad_norm": 1.9990020682765113, |
| "learning_rate": 4.864506463027837e-06, |
| "loss": 0.1745, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.5272975432211101, |
| "grad_norm": 1.4799527599994446, |
| "learning_rate": 4.864274290084816e-06, |
| "loss": 0.167, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.5277525022747953, |
| "grad_norm": 2.4687809077301295, |
| "learning_rate": 4.864041923943978e-06, |
| "loss": 0.1732, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5282074613284804, |
| "grad_norm": 2.641693873435684, |
| "learning_rate": 4.863809364624309e-06, |
| "loss": 0.2128, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.5286624203821656, |
| "grad_norm": 1.5492373372050023, |
| "learning_rate": 4.863576612144814e-06, |
| "loss": 0.1592, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.5291173794358508, |
| "grad_norm": 2.3572852462486313, |
| "learning_rate": 4.863343666524512e-06, |
| "loss": 0.2061, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.5295723384895359, |
| "grad_norm": 1.8838845870200471, |
| "learning_rate": 4.863110527782437e-06, |
| "loss": 0.1798, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.5300272975432211, |
| "grad_norm": 2.304263001470561, |
| "learning_rate": 4.8628771959376435e-06, |
| "loss": 0.1556, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.5304822565969063, |
| "grad_norm": 1.7674923531547297, |
| "learning_rate": 4.862643671009195e-06, |
| "loss": 0.1333, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.5309372156505915, |
| "grad_norm": 1.393097189340672, |
| "learning_rate": 4.862409953016175e-06, |
| "loss": 0.155, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.5313921747042766, |
| "grad_norm": 1.74325807786759, |
| "learning_rate": 4.862176041977683e-06, |
| "loss": 0.1656, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.5318471337579618, |
| "grad_norm": 1.572029172895186, |
| "learning_rate": 4.861941937912832e-06, |
| "loss": 0.131, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.532302092811647, |
| "grad_norm": 2.008491262720168, |
| "learning_rate": 4.861707640840752e-06, |
| "loss": 0.1548, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5327570518653321, |
| "grad_norm": 1.482082349852649, |
| "learning_rate": 4.861473150780589e-06, |
| "loss": 0.1628, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.5332120109190173, |
| "grad_norm": 1.6791945913602067, |
| "learning_rate": 4.8612384677515054e-06, |
| "loss": 0.1785, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.5336669699727025, |
| "grad_norm": 2.20432127668894, |
| "learning_rate": 4.861003591772677e-06, |
| "loss": 0.1716, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.5341219290263877, |
| "grad_norm": 1.9304068948412856, |
| "learning_rate": 4.860768522863297e-06, |
| "loss": 0.1503, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.5345768880800728, |
| "grad_norm": 1.5238718585240933, |
| "learning_rate": 4.860533261042574e-06, |
| "loss": 0.1539, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.535031847133758, |
| "grad_norm": 1.2432245247502896, |
| "learning_rate": 4.8602978063297336e-06, |
| "loss": 0.1721, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.5354868061874432, |
| "grad_norm": 1.8986627233525826, |
| "learning_rate": 4.8600621587440155e-06, |
| "loss": 0.1717, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.5359417652411284, |
| "grad_norm": 1.6746020896303164, |
| "learning_rate": 4.859826318304676e-06, |
| "loss": 0.198, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.5363967242948134, |
| "grad_norm": 1.0811516795291998, |
| "learning_rate": 4.859590285030986e-06, |
| "loss": 0.1441, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.5368516833484986, |
| "grad_norm": 1.3182569447840091, |
| "learning_rate": 4.859354058942234e-06, |
| "loss": 0.143, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5373066424021838, |
| "grad_norm": 1.5442971076277365, |
| "learning_rate": 4.859117640057723e-06, |
| "loss": 0.1708, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.5377616014558689, |
| "grad_norm": 2.2346125174953744, |
| "learning_rate": 4.858881028396773e-06, |
| "loss": 0.2581, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.5382165605095541, |
| "grad_norm": 2.16866059231189, |
| "learning_rate": 4.8586442239787165e-06, |
| "loss": 0.1566, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.5386715195632393, |
| "grad_norm": 2.2940342617095357, |
| "learning_rate": 4.858407226822906e-06, |
| "loss": 0.2362, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.5391264786169245, |
| "grad_norm": 1.722886466945642, |
| "learning_rate": 4.858170036948707e-06, |
| "loss": 0.1643, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.5395814376706096, |
| "grad_norm": 1.8036922634291395, |
| "learning_rate": 4.857932654375503e-06, |
| "loss": 0.1399, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.5400363967242948, |
| "grad_norm": 2.4595201733911995, |
| "learning_rate": 4.857695079122691e-06, |
| "loss": 0.2806, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.54049135577798, |
| "grad_norm": 1.5611995597597812, |
| "learning_rate": 4.857457311209683e-06, |
| "loss": 0.1436, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.5409463148316651, |
| "grad_norm": 2.155441619580459, |
| "learning_rate": 4.857219350655911e-06, |
| "loss": 0.1502, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.5414012738853503, |
| "grad_norm": 1.7590257643884393, |
| "learning_rate": 4.856981197480818e-06, |
| "loss": 0.1832, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5418562329390355, |
| "grad_norm": 1.5219476359124613, |
| "learning_rate": 4.856742851703866e-06, |
| "loss": 0.1478, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.5423111919927207, |
| "grad_norm": 1.9739406001713575, |
| "learning_rate": 4.856504313344531e-06, |
| "loss": 0.2435, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.5427661510464058, |
| "grad_norm": 2.084318032784521, |
| "learning_rate": 4.8562655824223055e-06, |
| "loss": 0.1409, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.543221110100091, |
| "grad_norm": 1.1509311969673588, |
| "learning_rate": 4.856026658956697e-06, |
| "loss": 0.1281, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.5436760691537762, |
| "grad_norm": 1.14005541818581, |
| "learning_rate": 4.8557875429672295e-06, |
| "loss": 0.1438, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.5441310282074613, |
| "grad_norm": 1.6453379692427774, |
| "learning_rate": 4.855548234473444e-06, |
| "loss": 0.1898, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.5445859872611465, |
| "grad_norm": 3.715053618797708, |
| "learning_rate": 4.8553087334948935e-06, |
| "loss": 0.1884, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.5450409463148317, |
| "grad_norm": 1.9604960579417277, |
| "learning_rate": 4.855069040051149e-06, |
| "loss": 0.1668, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.5454959053685169, |
| "grad_norm": 2.008712099431151, |
| "learning_rate": 4.854829154161799e-06, |
| "loss": 0.2458, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.545950864422202, |
| "grad_norm": 1.670617885602165, |
| "learning_rate": 4.854589075846445e-06, |
| "loss": 0.195, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5464058234758872, |
| "grad_norm": 1.3262735122543114, |
| "learning_rate": 4.854348805124704e-06, |
| "loss": 0.1564, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.5468607825295724, |
| "grad_norm": 1.9039774091054742, |
| "learning_rate": 4.85410834201621e-06, |
| "loss": 0.1379, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.5473157415832575, |
| "grad_norm": 2.3929812156260364, |
| "learning_rate": 4.8538676865406155e-06, |
| "loss": 0.2412, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.5477707006369427, |
| "grad_norm": 1.551384727017807, |
| "learning_rate": 4.853626838717582e-06, |
| "loss": 0.117, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.5482256596906279, |
| "grad_norm": 5.414582318339853, |
| "learning_rate": 4.853385798566793e-06, |
| "loss": 0.1437, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.5486806187443131, |
| "grad_norm": 1.6881825100786558, |
| "learning_rate": 4.8531445661079444e-06, |
| "loss": 0.1232, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.5491355777979982, |
| "grad_norm": 1.6096306897948298, |
| "learning_rate": 4.852903141360749e-06, |
| "loss": 0.161, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.5495905368516834, |
| "grad_norm": 1.7692527628598336, |
| "learning_rate": 4.852661524344933e-06, |
| "loss": 0.1217, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.5500454959053686, |
| "grad_norm": 2.162642212991987, |
| "learning_rate": 4.852419715080244e-06, |
| "loss": 0.1986, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.5505004549590536, |
| "grad_norm": 1.4975052036096086, |
| "learning_rate": 4.852177713586437e-06, |
| "loss": 0.1435, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5509554140127388, |
| "grad_norm": 1.5907183445636404, |
| "learning_rate": 4.85193551988329e-06, |
| "loss": 0.1642, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.551410373066424, |
| "grad_norm": 1.9999573598736464, |
| "learning_rate": 4.851693133990594e-06, |
| "loss": 0.1807, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.5518653321201092, |
| "grad_norm": 2.294525710441773, |
| "learning_rate": 4.851450555928155e-06, |
| "loss": 0.1624, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.5523202911737943, |
| "grad_norm": 2.233884971616304, |
| "learning_rate": 4.851207785715797e-06, |
| "loss": 0.2324, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.5527752502274795, |
| "grad_norm": 2.0057194457772924, |
| "learning_rate": 4.850964823373355e-06, |
| "loss": 0.2105, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.5532302092811647, |
| "grad_norm": 1.9893088992044121, |
| "learning_rate": 4.850721668920685e-06, |
| "loss": 0.1784, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.5536851683348498, |
| "grad_norm": 1.811776169286512, |
| "learning_rate": 4.850478322377657e-06, |
| "loss": 0.1716, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.554140127388535, |
| "grad_norm": 2.4345407872833134, |
| "learning_rate": 4.8502347837641536e-06, |
| "loss": 0.2649, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.5545950864422202, |
| "grad_norm": 1.4197781095132433, |
| "learning_rate": 4.8499910531000776e-06, |
| "loss": 0.1473, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.5550500454959054, |
| "grad_norm": 2.7980447769263637, |
| "learning_rate": 4.849747130405346e-06, |
| "loss": 0.2153, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5555050045495905, |
| "grad_norm": 1.6352047446815658, |
| "learning_rate": 4.849503015699889e-06, |
| "loss": 0.1485, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.5559599636032757, |
| "grad_norm": 2.1831084601819066, |
| "learning_rate": 4.849258709003657e-06, |
| "loss": 0.1818, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.5564149226569609, |
| "grad_norm": 1.541290763289794, |
| "learning_rate": 4.849014210336612e-06, |
| "loss": 0.1947, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.556869881710646, |
| "grad_norm": 2.2775888091930723, |
| "learning_rate": 4.848769519718734e-06, |
| "loss": 0.2152, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.5573248407643312, |
| "grad_norm": 2.473887631559974, |
| "learning_rate": 4.848524637170018e-06, |
| "loss": 0.1588, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.5577797998180164, |
| "grad_norm": 1.7255823206927379, |
| "learning_rate": 4.848279562710474e-06, |
| "loss": 0.2174, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.5582347588717016, |
| "grad_norm": 1.8250707498997563, |
| "learning_rate": 4.848034296360129e-06, |
| "loss": 0.1404, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.5586897179253867, |
| "grad_norm": 1.3973858443687242, |
| "learning_rate": 4.847788838139025e-06, |
| "loss": 0.1598, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.5591446769790719, |
| "grad_norm": 1.6880241463364833, |
| "learning_rate": 4.847543188067219e-06, |
| "loss": 0.1361, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.5595996360327571, |
| "grad_norm": 1.6583472347876314, |
| "learning_rate": 4.847297346164786e-06, |
| "loss": 0.1681, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5600545950864422, |
| "grad_norm": 1.5526904315702266, |
| "learning_rate": 4.8470513124518134e-06, |
| "loss": 0.1704, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.5605095541401274, |
| "grad_norm": 2.9080178304839333, |
| "learning_rate": 4.8468050869484075e-06, |
| "loss": 0.2189, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.5609645131938126, |
| "grad_norm": 2.272625265359496, |
| "learning_rate": 4.846558669674688e-06, |
| "loss": 0.1796, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.5614194722474978, |
| "grad_norm": 2.1487306294232997, |
| "learning_rate": 4.8463120606507904e-06, |
| "loss": 0.1853, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.5618744313011829, |
| "grad_norm": 2.013831962718606, |
| "learning_rate": 4.846065259896867e-06, |
| "loss": 0.1844, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.5623293903548681, |
| "grad_norm": 1.8287089471640992, |
| "learning_rate": 4.845818267433086e-06, |
| "loss": 0.1784, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.5627843494085533, |
| "grad_norm": 1.800058629818333, |
| "learning_rate": 4.845571083279629e-06, |
| "loss": 0.1552, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.5632393084622384, |
| "grad_norm": 1.2446217689129786, |
| "learning_rate": 4.845323707456696e-06, |
| "loss": 0.1685, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.5636942675159236, |
| "grad_norm": 2.6424245053307787, |
| "learning_rate": 4.845076139984502e-06, |
| "loss": 0.2754, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.5641492265696088, |
| "grad_norm": 1.9189782085118383, |
| "learning_rate": 4.844828380883274e-06, |
| "loss": 0.1686, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.564604185623294, |
| "grad_norm": 1.2992527617302185, |
| "learning_rate": 4.844580430173261e-06, |
| "loss": 0.1576, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.565059144676979, |
| "grad_norm": 1.771767593474412, |
| "learning_rate": 4.8443322878747236e-06, |
| "loss": 0.1201, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.5655141037306642, |
| "grad_norm": 1.3113844210494432, |
| "learning_rate": 4.844083954007938e-06, |
| "loss": 0.1933, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.5659690627843494, |
| "grad_norm": 1.780274550683715, |
| "learning_rate": 4.843835428593198e-06, |
| "loss": 0.2449, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.5664240218380345, |
| "grad_norm": 2.0286348942605734, |
| "learning_rate": 4.84358671165081e-06, |
| "loss": 0.2206, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.5668789808917197, |
| "grad_norm": 1.9183674174882497, |
| "learning_rate": 4.843337803201102e-06, |
| "loss": 0.1932, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.5673339399454049, |
| "grad_norm": 1.8589987750417598, |
| "learning_rate": 4.8430887032644094e-06, |
| "loss": 0.2063, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.5677888989990901, |
| "grad_norm": 1.8997293354336255, |
| "learning_rate": 4.842839411861089e-06, |
| "loss": 0.15, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.5682438580527752, |
| "grad_norm": 1.5956283554174595, |
| "learning_rate": 4.842589929011513e-06, |
| "loss": 0.1249, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.5686988171064604, |
| "grad_norm": 1.7264729567079007, |
| "learning_rate": 4.8423402547360665e-06, |
| "loss": 0.1731, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5691537761601456, |
| "grad_norm": 1.9220135807111425, |
| "learning_rate": 4.842090389055153e-06, |
| "loss": 0.1143, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.5696087352138307, |
| "grad_norm": 1.7921638992770812, |
| "learning_rate": 4.841840331989189e-06, |
| "loss": 0.1976, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.5700636942675159, |
| "grad_norm": 2.000993623816501, |
| "learning_rate": 4.841590083558608e-06, |
| "loss": 0.1768, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.5705186533212011, |
| "grad_norm": 2.4830094815396304, |
| "learning_rate": 4.841339643783861e-06, |
| "loss": 0.2043, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.5709736123748863, |
| "grad_norm": 1.5989796561168585, |
| "learning_rate": 4.841089012685412e-06, |
| "loss": 0.1778, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 1.8137268898691017, |
| "learning_rate": 4.840838190283741e-06, |
| "loss": 0.1692, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.5718835304822566, |
| "grad_norm": 1.7559519711217326, |
| "learning_rate": 4.8405871765993435e-06, |
| "loss": 0.0939, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.5723384895359418, |
| "grad_norm": 1.7192722836354088, |
| "learning_rate": 4.840335971652732e-06, |
| "loss": 0.1255, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.5727934485896269, |
| "grad_norm": 1.9835814338763256, |
| "learning_rate": 4.840084575464434e-06, |
| "loss": 0.1945, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.5732484076433121, |
| "grad_norm": 1.8517843659588205, |
| "learning_rate": 4.839832988054992e-06, |
| "loss": 0.187, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5737033666969973, |
| "grad_norm": 1.8951856802928044, |
| "learning_rate": 4.839581209444966e-06, |
| "loss": 0.1196, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.5741583257506825, |
| "grad_norm": 2.3401876182004386, |
| "learning_rate": 4.839329239654927e-06, |
| "loss": 0.2252, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.5746132848043676, |
| "grad_norm": 2.1924333176646145, |
| "learning_rate": 4.839077078705468e-06, |
| "loss": 0.137, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.5750682438580528, |
| "grad_norm": 1.6673068426763284, |
| "learning_rate": 4.838824726617194e-06, |
| "loss": 0.157, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.575523202911738, |
| "grad_norm": 1.7250800520215972, |
| "learning_rate": 4.838572183410725e-06, |
| "loss": 0.1808, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5759781619654231, |
| "grad_norm": 1.6457142786345031, |
| "learning_rate": 4.838319449106697e-06, |
| "loss": 0.1635, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.5764331210191083, |
| "grad_norm": 1.5575525689618337, |
| "learning_rate": 4.838066523725764e-06, |
| "loss": 0.1127, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.5768880800727935, |
| "grad_norm": 2.5767156490698833, |
| "learning_rate": 4.837813407288594e-06, |
| "loss": 0.1798, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.5773430391264787, |
| "grad_norm": 1.9108956938528818, |
| "learning_rate": 4.837560099815869e-06, |
| "loss": 0.202, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.5777979981801638, |
| "grad_norm": 1.956778308687979, |
| "learning_rate": 4.837306601328289e-06, |
| "loss": 0.1806, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.578252957233849, |
| "grad_norm": 1.775478489276246, |
| "learning_rate": 4.837052911846569e-06, |
| "loss": 0.1695, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.5787079162875342, |
| "grad_norm": 1.787242091669647, |
| "learning_rate": 4.836799031391439e-06, |
| "loss": 0.1745, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.5791628753412192, |
| "grad_norm": 1.0591727928255608, |
| "learning_rate": 4.836544959983645e-06, |
| "loss": 0.1343, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.5796178343949044, |
| "grad_norm": 1.5740206900027498, |
| "learning_rate": 4.8362906976439485e-06, |
| "loss": 0.1635, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.5800727934485896, |
| "grad_norm": 1.5937545527814416, |
| "learning_rate": 4.836036244393127e-06, |
| "loss": 0.1581, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5805277525022748, |
| "grad_norm": 1.813708807716678, |
| "learning_rate": 4.835781600251973e-06, |
| "loss": 0.2269, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.5809827115559599, |
| "grad_norm": 2.0796570235313836, |
| "learning_rate": 4.835526765241295e-06, |
| "loss": 0.1924, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.5814376706096451, |
| "grad_norm": 1.6083810261665097, |
| "learning_rate": 4.835271739381917e-06, |
| "loss": 0.1541, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.5818926296633303, |
| "grad_norm": 1.365537997124497, |
| "learning_rate": 4.835016522694678e-06, |
| "loss": 0.136, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.5823475887170154, |
| "grad_norm": 1.8893838729814614, |
| "learning_rate": 4.834761115200434e-06, |
| "loss": 0.2207, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5828025477707006, |
| "grad_norm": 1.4870021241117473, |
| "learning_rate": 4.834505516920055e-06, |
| "loss": 0.1879, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.5832575068243858, |
| "grad_norm": 1.4165326048713465, |
| "learning_rate": 4.834249727874428e-06, |
| "loss": 0.1263, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.583712465878071, |
| "grad_norm": 1.8197657860371343, |
| "learning_rate": 4.833993748084455e-06, |
| "loss": 0.1727, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.5841674249317561, |
| "grad_norm": 1.715508493394312, |
| "learning_rate": 4.833737577571052e-06, |
| "loss": 0.1497, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.5846223839854413, |
| "grad_norm": 2.0061239985491555, |
| "learning_rate": 4.833481216355153e-06, |
| "loss": 0.1646, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5850773430391265, |
| "grad_norm": 4.355130184989222, |
| "learning_rate": 4.833224664457709e-06, |
| "loss": 0.2076, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5855323020928116, |
| "grad_norm": 3.155573393148588, |
| "learning_rate": 4.83296792189968e-06, |
| "loss": 0.2413, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5859872611464968, |
| "grad_norm": 1.656224319251134, |
| "learning_rate": 4.83271098870205e-06, |
| "loss": 0.1237, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.586442220200182, |
| "grad_norm": 1.5671815338330013, |
| "learning_rate": 4.832453864885811e-06, |
| "loss": 0.1461, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.5868971792538672, |
| "grad_norm": 1.4490558461440097, |
| "learning_rate": 4.832196550471976e-06, |
| "loss": 0.1719, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5873521383075523, |
| "grad_norm": 1.4391467760040138, |
| "learning_rate": 4.831939045481571e-06, |
| "loss": 0.1598, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5878070973612375, |
| "grad_norm": 1.853086020668375, |
| "learning_rate": 4.8316813499356375e-06, |
| "loss": 0.1654, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5882620564149227, |
| "grad_norm": 1.6999807809193854, |
| "learning_rate": 4.831423463855235e-06, |
| "loss": 0.1516, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5887170154686078, |
| "grad_norm": 2.070573438132845, |
| "learning_rate": 4.8311653872614345e-06, |
| "loss": 0.1161, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.589171974522293, |
| "grad_norm": 1.6686744603097172, |
| "learning_rate": 4.830907120175327e-06, |
| "loss": 0.1584, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5896269335759782, |
| "grad_norm": 2.089342697132724, |
| "learning_rate": 4.830648662618015e-06, |
| "loss": 0.2365, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5900818926296634, |
| "grad_norm": 1.5894012047277333, |
| "learning_rate": 4.83039001461062e-06, |
| "loss": 0.1097, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5905368516833485, |
| "grad_norm": 1.8782696857030252, |
| "learning_rate": 4.830131176174276e-06, |
| "loss": 0.151, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5909918107370337, |
| "grad_norm": 1.958971362169023, |
| "learning_rate": 4.829872147330136e-06, |
| "loss": 0.1841, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5914467697907189, |
| "grad_norm": 1.3360501731813752, |
| "learning_rate": 4.829612928099366e-06, |
| "loss": 0.1457, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.591901728844404, |
| "grad_norm": 1.638219511935524, |
| "learning_rate": 4.829353518503147e-06, |
| "loss": 0.1583, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.5923566878980892, |
| "grad_norm": 2.0096056545692025, |
| "learning_rate": 4.829093918562678e-06, |
| "loss": 0.1491, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.5928116469517744, |
| "grad_norm": 1.7893380227892468, |
| "learning_rate": 4.828834128299173e-06, |
| "loss": 0.1592, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5932666060054596, |
| "grad_norm": 1.5760903095424181, |
| "learning_rate": 4.828574147733859e-06, |
| "loss": 0.1646, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5937215650591446, |
| "grad_norm": 1.6385972545017617, |
| "learning_rate": 4.828313976887982e-06, |
| "loss": 0.1228, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5941765241128298, |
| "grad_norm": 1.7350084151113443, |
| "learning_rate": 4.8280536157828e-06, |
| "loss": 0.1532, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.594631483166515, |
| "grad_norm": 2.1711615974874223, |
| "learning_rate": 4.827793064439592e-06, |
| "loss": 0.1551, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.5950864422202001, |
| "grad_norm": 2.5688116012952125, |
| "learning_rate": 4.8275323228796455e-06, |
| "loss": 0.18, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5955414012738853, |
| "grad_norm": 1.534845536955317, |
| "learning_rate": 4.8272713911242695e-06, |
| "loss": 0.121, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5959963603275705, |
| "grad_norm": 1.9028349069881882, |
| "learning_rate": 4.827010269194785e-06, |
| "loss": 0.1228, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5964513193812557, |
| "grad_norm": 2.1051164199599, |
| "learning_rate": 4.8267489571125295e-06, |
| "loss": 0.1465, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5969062784349408, |
| "grad_norm": 2.9999435749849073, |
| "learning_rate": 4.826487454898857e-06, |
| "loss": 0.2635, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.597361237488626, |
| "grad_norm": 1.880715290875366, |
| "learning_rate": 4.826225762575136e-06, |
| "loss": 0.194, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.5978161965423112, |
| "grad_norm": 1.6843651365954362, |
| "learning_rate": 4.825963880162752e-06, |
| "loss": 0.1792, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5982711555959963, |
| "grad_norm": 1.606704753365435, |
| "learning_rate": 4.825701807683102e-06, |
| "loss": 0.1399, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5987261146496815, |
| "grad_norm": 1.8783582719750365, |
| "learning_rate": 4.825439545157603e-06, |
| "loss": 0.1743, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5991810737033667, |
| "grad_norm": 2.39834669557369, |
| "learning_rate": 4.825177092607687e-06, |
| "loss": 0.2576, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5996360327570519, |
| "grad_norm": 1.5809346444064956, |
| "learning_rate": 4.8249144500547995e-06, |
| "loss": 0.1266, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.600090991810737, |
| "grad_norm": 1.6731917139944308, |
| "learning_rate": 4.824651617520402e-06, |
| "loss": 0.1722, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.6005459508644222, |
| "grad_norm": 1.9934684665371283, |
| "learning_rate": 4.824388595025972e-06, |
| "loss": 0.1863, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6010009099181074, |
| "grad_norm": 1.7396149145777957, |
| "learning_rate": 4.824125382593003e-06, |
| "loss": 0.1582, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.6014558689717925, |
| "grad_norm": 1.7746494679795604, |
| "learning_rate": 4.823861980243003e-06, |
| "loss": 0.1485, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.6019108280254777, |
| "grad_norm": 1.8309083669399964, |
| "learning_rate": 4.823598387997497e-06, |
| "loss": 0.1495, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.6023657870791629, |
| "grad_norm": 1.9534496331991582, |
| "learning_rate": 4.823334605878024e-06, |
| "loss": 0.1462, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.6028207461328481, |
| "grad_norm": 2.1011605763315138, |
| "learning_rate": 4.82307063390614e-06, |
| "loss": 0.1853, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.6032757051865332, |
| "grad_norm": 2.5503968401256465, |
| "learning_rate": 4.822806472103413e-06, |
| "loss": 0.2297, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.6037306642402184, |
| "grad_norm": 1.4853028085158964, |
| "learning_rate": 4.822542120491431e-06, |
| "loss": 0.1692, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.6041856232939036, |
| "grad_norm": 1.5826380640650177, |
| "learning_rate": 4.822277579091796e-06, |
| "loss": 0.1845, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.6046405823475887, |
| "grad_norm": 1.7941875470339128, |
| "learning_rate": 4.822012847926125e-06, |
| "loss": 0.1723, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.6050955414012739, |
| "grad_norm": 1.6317178871077942, |
| "learning_rate": 4.821747927016049e-06, |
| "loss": 0.1309, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6055505004549591, |
| "grad_norm": 1.5814757694833934, |
| "learning_rate": 4.821482816383219e-06, |
| "loss": 0.1565, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.6060054595086443, |
| "grad_norm": 1.5304957435111453, |
| "learning_rate": 4.821217516049296e-06, |
| "loss": 0.1373, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.6064604185623294, |
| "grad_norm": 1.5147254102931988, |
| "learning_rate": 4.82095202603596e-06, |
| "loss": 0.1431, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.6069153776160146, |
| "grad_norm": 1.5663436015338144, |
| "learning_rate": 4.820686346364906e-06, |
| "loss": 0.156, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.6073703366696998, |
| "grad_norm": 2.053796214560493, |
| "learning_rate": 4.820420477057843e-06, |
| "loss": 0.1874, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.607825295723385, |
| "grad_norm": 1.95739593906374, |
| "learning_rate": 4.820154418136498e-06, |
| "loss": 0.1526, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.60828025477707, |
| "grad_norm": 1.8483495445052411, |
| "learning_rate": 4.819888169622612e-06, |
| "loss": 0.2036, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.6087352138307552, |
| "grad_norm": 1.9503495611822523, |
| "learning_rate": 4.819621731537942e-06, |
| "loss": 0.2066, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.6091901728844404, |
| "grad_norm": 1.5722530391175293, |
| "learning_rate": 4.819355103904259e-06, |
| "loss": 0.1419, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.6096451319381255, |
| "grad_norm": 1.8367765104613556, |
| "learning_rate": 4.81908828674335e-06, |
| "loss": 0.1775, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6101000909918107, |
| "grad_norm": 1.6359632675531957, |
| "learning_rate": 4.81882128007702e-06, |
| "loss": 0.1361, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.6105550500454959, |
| "grad_norm": 1.6249604007945537, |
| "learning_rate": 4.818554083927086e-06, |
| "loss": 0.1501, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.6110100090991811, |
| "grad_norm": 1.507444149214357, |
| "learning_rate": 4.818286698315383e-06, |
| "loss": 0.1318, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.6114649681528662, |
| "grad_norm": 1.714948580415853, |
| "learning_rate": 4.818019123263761e-06, |
| "loss": 0.1576, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.6119199272065514, |
| "grad_norm": 1.6310779918465994, |
| "learning_rate": 4.817751358794084e-06, |
| "loss": 0.1505, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.6123748862602366, |
| "grad_norm": 1.9516095925204497, |
| "learning_rate": 4.8174834049282325e-06, |
| "loss": 0.1513, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.6128298453139217, |
| "grad_norm": 1.6535718997078614, |
| "learning_rate": 4.817215261688104e-06, |
| "loss": 0.1509, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.6132848043676069, |
| "grad_norm": 1.7050249250163263, |
| "learning_rate": 4.816946929095607e-06, |
| "loss": 0.143, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.6137397634212921, |
| "grad_norm": 1.9555072177299098, |
| "learning_rate": 4.816678407172671e-06, |
| "loss": 0.1702, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.6141947224749773, |
| "grad_norm": 1.6603270300616475, |
| "learning_rate": 4.816409695941238e-06, |
| "loss": 0.1525, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6146496815286624, |
| "grad_norm": 2.052319098264881, |
| "learning_rate": 4.816140795423265e-06, |
| "loss": 0.1553, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.6151046405823476, |
| "grad_norm": 2.316846556963137, |
| "learning_rate": 4.8158717056407255e-06, |
| "loss": 0.2204, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.6155595996360328, |
| "grad_norm": 2.268897705180763, |
| "learning_rate": 4.815602426615609e-06, |
| "loss": 0.172, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.6160145586897179, |
| "grad_norm": 2.277033042904883, |
| "learning_rate": 4.815332958369919e-06, |
| "loss": 0.1952, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.6164695177434031, |
| "grad_norm": 2.203261308039804, |
| "learning_rate": 4.815063300925677e-06, |
| "loss": 0.1778, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.6169244767970883, |
| "grad_norm": 1.5542993423497844, |
| "learning_rate": 4.814793454304915e-06, |
| "loss": 0.1831, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.6173794358507735, |
| "grad_norm": 1.3687836885728237, |
| "learning_rate": 4.814523418529686e-06, |
| "loss": 0.1438, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.6178343949044586, |
| "grad_norm": 1.803336916930759, |
| "learning_rate": 4.814253193622056e-06, |
| "loss": 0.1426, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.6182893539581438, |
| "grad_norm": 1.521636702652137, |
| "learning_rate": 4.813982779604106e-06, |
| "loss": 0.1214, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.618744313011829, |
| "grad_norm": 1.5404670484043497, |
| "learning_rate": 4.813712176497933e-06, |
| "loss": 0.1366, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6191992720655141, |
| "grad_norm": 1.689965450022471, |
| "learning_rate": 4.813441384325649e-06, |
| "loss": 0.1346, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.6196542311191993, |
| "grad_norm": 1.7814370136900919, |
| "learning_rate": 4.813170403109383e-06, |
| "loss": 0.1444, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.6201091901728845, |
| "grad_norm": 1.72215098605925, |
| "learning_rate": 4.8128992328712774e-06, |
| "loss": 0.1127, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.6205641492265697, |
| "grad_norm": 1.504745997390183, |
| "learning_rate": 4.812627873633492e-06, |
| "loss": 0.149, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.6210191082802548, |
| "grad_norm": 1.6905090686600799, |
| "learning_rate": 4.8123563254182e-06, |
| "loss": 0.1457, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.62147406733394, |
| "grad_norm": 2.1753494024731683, |
| "learning_rate": 4.8120845882475924e-06, |
| "loss": 0.1995, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.6219290263876252, |
| "grad_norm": 2.793385404562888, |
| "learning_rate": 4.8118126621438734e-06, |
| "loss": 0.2318, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.6223839854413102, |
| "grad_norm": 1.7667899225260022, |
| "learning_rate": 4.811540547129263e-06, |
| "loss": 0.2251, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.6228389444949954, |
| "grad_norm": 2.1679573967859787, |
| "learning_rate": 4.811268243225999e-06, |
| "loss": 0.1784, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.6232939035486806, |
| "grad_norm": 2.4497161330069424, |
| "learning_rate": 4.810995750456331e-06, |
| "loss": 0.1795, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6237488626023658, |
| "grad_norm": 1.9632525184445888, |
| "learning_rate": 4.810723068842526e-06, |
| "loss": 0.1757, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.6242038216560509, |
| "grad_norm": 1.78757490589868, |
| "learning_rate": 4.810450198406867e-06, |
| "loss": 0.1994, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.6246587807097361, |
| "grad_norm": 2.5975000715086907, |
| "learning_rate": 4.810177139171653e-06, |
| "loss": 0.2177, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.6251137397634213, |
| "grad_norm": 1.8861807982376269, |
| "learning_rate": 4.809903891159195e-06, |
| "loss": 0.1318, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.6255686988171064, |
| "grad_norm": 2.474014583254649, |
| "learning_rate": 4.809630454391822e-06, |
| "loss": 0.165, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.6260236578707916, |
| "grad_norm": 1.8420180765220768, |
| "learning_rate": 4.80935682889188e-06, |
| "loss": 0.1997, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.6264786169244768, |
| "grad_norm": 1.8871529282732857, |
| "learning_rate": 4.809083014681726e-06, |
| "loss": 0.239, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.626933575978162, |
| "grad_norm": 1.776688876661572, |
| "learning_rate": 4.808809011783735e-06, |
| "loss": 0.1876, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.6273885350318471, |
| "grad_norm": 1.806661163792066, |
| "learning_rate": 4.808534820220299e-06, |
| "loss": 0.148, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.6278434940855323, |
| "grad_norm": 2.0820743721382007, |
| "learning_rate": 4.8082604400138226e-06, |
| "loss": 0.2015, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6282984531392175, |
| "grad_norm": 1.5614420996583043, |
| "learning_rate": 4.807985871186726e-06, |
| "loss": 0.1277, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.6287534121929026, |
| "grad_norm": 1.5694923603817514, |
| "learning_rate": 4.8077111137614484e-06, |
| "loss": 0.1345, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.6292083712465878, |
| "grad_norm": 2.2610976098352116, |
| "learning_rate": 4.8074361677604394e-06, |
| "loss": 0.1732, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.629663330300273, |
| "grad_norm": 2.0760282221755704, |
| "learning_rate": 4.807161033206168e-06, |
| "loss": 0.1936, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.6301182893539582, |
| "grad_norm": 1.805894786082926, |
| "learning_rate": 4.806885710121114e-06, |
| "loss": 0.1536, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.6305732484076433, |
| "grad_norm": 1.9667669224198192, |
| "learning_rate": 4.806610198527779e-06, |
| "loss": 0.1729, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.6310282074613285, |
| "grad_norm": 1.9797354051419906, |
| "learning_rate": 4.8063344984486755e-06, |
| "loss": 0.2071, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.6314831665150137, |
| "grad_norm": 2.5955252782084224, |
| "learning_rate": 4.806058609906331e-06, |
| "loss": 0.181, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.6319381255686988, |
| "grad_norm": 2.707367730234045, |
| "learning_rate": 4.805782532923292e-06, |
| "loss": 0.2299, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.632393084622384, |
| "grad_norm": 1.579292631208614, |
| "learning_rate": 4.805506267522116e-06, |
| "loss": 0.2235, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6328480436760692, |
| "grad_norm": 3.332380342069127, |
| "learning_rate": 4.80522981372538e-06, |
| "loss": 0.2485, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.6333030027297544, |
| "grad_norm": 1.597247684736274, |
| "learning_rate": 4.804953171555674e-06, |
| "loss": 0.1511, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.6337579617834395, |
| "grad_norm": 2.050037449702685, |
| "learning_rate": 4.8046763410356046e-06, |
| "loss": 0.1732, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.6342129208371247, |
| "grad_norm": 1.6703199484658815, |
| "learning_rate": 4.804399322187791e-06, |
| "loss": 0.1832, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.6346678798908099, |
| "grad_norm": 2.4171080690553155, |
| "learning_rate": 4.8041221150348725e-06, |
| "loss": 0.2519, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.635122838944495, |
| "grad_norm": 1.7415236452607812, |
| "learning_rate": 4.8038447195995e-06, |
| "loss": 0.1942, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.6355777979981801, |
| "grad_norm": 2.0585293521798, |
| "learning_rate": 4.80356713590434e-06, |
| "loss": 0.1806, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.6360327570518653, |
| "grad_norm": 1.6543360161164664, |
| "learning_rate": 4.803289363972078e-06, |
| "loss": 0.1953, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.6364877161055505, |
| "grad_norm": 1.952726003661859, |
| "learning_rate": 4.8030114038254094e-06, |
| "loss": 0.164, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.6369426751592356, |
| "grad_norm": 1.6177022530921434, |
| "learning_rate": 4.80273325548705e-06, |
| "loss": 0.1798, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6373976342129208, |
| "grad_norm": 1.9292090840839082, |
| "learning_rate": 4.802454918979728e-06, |
| "loss": 0.1652, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.637852593266606, |
| "grad_norm": 1.9210595574243916, |
| "learning_rate": 4.802176394326187e-06, |
| "loss": 0.2007, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.6383075523202911, |
| "grad_norm": 1.464054312422107, |
| "learning_rate": 4.801897681549188e-06, |
| "loss": 0.129, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.6387625113739763, |
| "grad_norm": 1.9150864430756966, |
| "learning_rate": 4.801618780671506e-06, |
| "loss": 0.1634, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.6392174704276615, |
| "grad_norm": 1.4873483060535149, |
| "learning_rate": 4.801339691715932e-06, |
| "loss": 0.1463, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.6396724294813467, |
| "grad_norm": 2.3690804594133623, |
| "learning_rate": 4.8010604147052695e-06, |
| "loss": 0.1606, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.6401273885350318, |
| "grad_norm": 2.3100068394442497, |
| "learning_rate": 4.800780949662343e-06, |
| "loss": 0.1904, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.640582347588717, |
| "grad_norm": 1.5363867596702172, |
| "learning_rate": 4.800501296609986e-06, |
| "loss": 0.1053, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.6410373066424022, |
| "grad_norm": 1.606538550331431, |
| "learning_rate": 4.800221455571053e-06, |
| "loss": 0.1397, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.6414922656960873, |
| "grad_norm": 1.611596105149799, |
| "learning_rate": 4.7999414265684105e-06, |
| "loss": 0.1303, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6419472247497725, |
| "grad_norm": 1.6262064168900117, |
| "learning_rate": 4.79966120962494e-06, |
| "loss": 0.1564, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.6424021838034577, |
| "grad_norm": 2.015359106142208, |
| "learning_rate": 4.799380804763542e-06, |
| "loss": 0.1619, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 2.0480276409863465, |
| "learning_rate": 4.799100212007128e-06, |
| "loss": 0.1711, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.643312101910828, |
| "grad_norm": 1.9220142745677993, |
| "learning_rate": 4.7988194313786275e-06, |
| "loss": 0.1496, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.6437670609645132, |
| "grad_norm": 1.5592119110073082, |
| "learning_rate": 4.798538462900984e-06, |
| "loss": 0.1563, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.6442220200181984, |
| "grad_norm": 2.7928579618942764, |
| "learning_rate": 4.798257306597157e-06, |
| "loss": 0.2031, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.6446769790718835, |
| "grad_norm": 1.579272373938799, |
| "learning_rate": 4.797975962490122e-06, |
| "loss": 0.1501, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.6451319381255687, |
| "grad_norm": 1.5556034741269746, |
| "learning_rate": 4.797694430602869e-06, |
| "loss": 0.1125, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.6455868971792539, |
| "grad_norm": 2.4067503053827273, |
| "learning_rate": 4.797412710958405e-06, |
| "loss": 0.2154, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.6460418562329391, |
| "grad_norm": 2.143935212981359, |
| "learning_rate": 4.797130803579747e-06, |
| "loss": 0.1694, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6464968152866242, |
| "grad_norm": 2.6240019391696667, |
| "learning_rate": 4.796848708489935e-06, |
| "loss": 0.2811, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.6469517743403094, |
| "grad_norm": 1.5174877651602559, |
| "learning_rate": 4.796566425712018e-06, |
| "loss": 0.1435, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.6474067333939946, |
| "grad_norm": 1.6834754436981423, |
| "learning_rate": 4.796283955269065e-06, |
| "loss": 0.1816, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.6478616924476797, |
| "grad_norm": 1.5804322468618368, |
| "learning_rate": 4.796001297184156e-06, |
| "loss": 0.1471, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.6483166515013649, |
| "grad_norm": 1.8327883828431184, |
| "learning_rate": 4.79571845148039e-06, |
| "loss": 0.2011, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.6487716105550501, |
| "grad_norm": 1.4039853389905468, |
| "learning_rate": 4.795435418180879e-06, |
| "loss": 0.1074, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.6492265696087353, |
| "grad_norm": 1.664983557085843, |
| "learning_rate": 4.795152197308753e-06, |
| "loss": 0.148, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.6496815286624203, |
| "grad_norm": 1.6844695222093484, |
| "learning_rate": 4.794868788887154e-06, |
| "loss": 0.1207, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.6501364877161055, |
| "grad_norm": 1.3430612047901953, |
| "learning_rate": 4.79458519293924e-06, |
| "loss": 0.1437, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.6505914467697907, |
| "grad_norm": 1.6637985127807216, |
| "learning_rate": 4.794301409488187e-06, |
| "loss": 0.1478, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6510464058234758, |
| "grad_norm": 1.385729637043462, |
| "learning_rate": 4.7940174385571835e-06, |
| "loss": 0.1627, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.651501364877161, |
| "grad_norm": 2.0471057598981632, |
| "learning_rate": 4.793733280169435e-06, |
| "loss": 0.2172, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.6519563239308462, |
| "grad_norm": 2.804939948704313, |
| "learning_rate": 4.7934489343481614e-06, |
| "loss": 0.2366, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.6524112829845314, |
| "grad_norm": 2.1472377723290568, |
| "learning_rate": 4.7931644011165975e-06, |
| "loss": 0.1418, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.6528662420382165, |
| "grad_norm": 1.9918480481257164, |
| "learning_rate": 4.792879680497995e-06, |
| "loss": 0.186, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.6533212010919017, |
| "grad_norm": 2.5064644756915655, |
| "learning_rate": 4.79259477251562e-06, |
| "loss": 0.2048, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.6537761601455869, |
| "grad_norm": 2.3512727211776263, |
| "learning_rate": 4.792309677192753e-06, |
| "loss": 0.2052, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.654231119199272, |
| "grad_norm": 1.9202855097301381, |
| "learning_rate": 4.79202439455269e-06, |
| "loss": 0.1458, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.6546860782529572, |
| "grad_norm": 1.4271813740118833, |
| "learning_rate": 4.791738924618745e-06, |
| "loss": 0.1211, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.6551410373066424, |
| "grad_norm": 2.032712581115854, |
| "learning_rate": 4.791453267414245e-06, |
| "loss": 0.1836, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6555959963603276, |
| "grad_norm": 1.858326597247768, |
| "learning_rate": 4.7911674229625316e-06, |
| "loss": 0.1539, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.6560509554140127, |
| "grad_norm": 1.9149985878919944, |
| "learning_rate": 4.790881391286963e-06, |
| "loss": 0.1492, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.6565059144676979, |
| "grad_norm": 2.224611827457958, |
| "learning_rate": 4.790595172410914e-06, |
| "loss": 0.1771, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.6569608735213831, |
| "grad_norm": 2.2710831934815423, |
| "learning_rate": 4.79030876635777e-06, |
| "loss": 0.1816, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.6574158325750682, |
| "grad_norm": 1.686396567912197, |
| "learning_rate": 4.790022173150938e-06, |
| "loss": 0.1715, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.6578707916287534, |
| "grad_norm": 1.6844379519791872, |
| "learning_rate": 4.789735392813835e-06, |
| "loss": 0.1612, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.6583257506824386, |
| "grad_norm": 1.9308684762069341, |
| "learning_rate": 4.789448425369896e-06, |
| "loss": 0.1943, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.6587807097361238, |
| "grad_norm": 1.7813876642605184, |
| "learning_rate": 4.789161270842571e-06, |
| "loss": 0.133, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.6592356687898089, |
| "grad_norm": 1.7016656003147437, |
| "learning_rate": 4.7888739292553235e-06, |
| "loss": 0.1787, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.6596906278434941, |
| "grad_norm": 1.788996418731665, |
| "learning_rate": 4.788586400631636e-06, |
| "loss": 0.2144, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6601455868971793, |
| "grad_norm": 1.1868611743252886, |
| "learning_rate": 4.788298684995003e-06, |
| "loss": 0.1411, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.6606005459508644, |
| "grad_norm": 1.3784782394299329, |
| "learning_rate": 4.7880107823689355e-06, |
| "loss": 0.1394, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.6610555050045496, |
| "grad_norm": 2.38570648853941, |
| "learning_rate": 4.787722692776958e-06, |
| "loss": 0.2177, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.6615104640582348, |
| "grad_norm": 1.885827372966156, |
| "learning_rate": 4.787434416242615e-06, |
| "loss": 0.1932, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.66196542311192, |
| "grad_norm": 2.0741165529803305, |
| "learning_rate": 4.787145952789461e-06, |
| "loss": 0.1916, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.6624203821656051, |
| "grad_norm": 2.2824023726624216, |
| "learning_rate": 4.786857302441069e-06, |
| "loss": 0.154, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.6628753412192903, |
| "grad_norm": 1.9364048955005693, |
| "learning_rate": 4.786568465221025e-06, |
| "loss": 0.1456, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.6633303002729755, |
| "grad_norm": 2.085706626351343, |
| "learning_rate": 4.7862794411529315e-06, |
| "loss": 0.2085, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.6637852593266605, |
| "grad_norm": 1.614288560024189, |
| "learning_rate": 4.7859902302604075e-06, |
| "loss": 0.174, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.6642402183803457, |
| "grad_norm": 2.5891987139037305, |
| "learning_rate": 4.785700832567085e-06, |
| "loss": 0.2207, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.664695177434031, |
| "grad_norm": 1.60390922794205, |
| "learning_rate": 4.785411248096613e-06, |
| "loss": 0.1694, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.6651501364877161, |
| "grad_norm": 1.9008758556011767, |
| "learning_rate": 4.785121476872654e-06, |
| "loss": 0.1917, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.6656050955414012, |
| "grad_norm": 1.8830534414569509, |
| "learning_rate": 4.784831518918888e-06, |
| "loss": 0.1738, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.6660600545950864, |
| "grad_norm": 1.7207750442706227, |
| "learning_rate": 4.784541374259008e-06, |
| "loss": 0.15, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.6665150136487716, |
| "grad_norm": 1.875368507153303, |
| "learning_rate": 4.7842510429167244e-06, |
| "loss": 0.1785, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.6669699727024567, |
| "grad_norm": 1.423039570984651, |
| "learning_rate": 4.783960524915761e-06, |
| "loss": 0.1618, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.6674249317561419, |
| "grad_norm": 3.369804205318982, |
| "learning_rate": 4.783669820279858e-06, |
| "loss": 0.2151, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.6678798908098271, |
| "grad_norm": 1.7236530224714224, |
| "learning_rate": 4.783378929032769e-06, |
| "loss": 0.1449, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.6683348498635123, |
| "grad_norm": 1.897670469007501, |
| "learning_rate": 4.783087851198267e-06, |
| "loss": 0.1565, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.6687898089171974, |
| "grad_norm": 2.120484944530229, |
| "learning_rate": 4.7827965868001356e-06, |
| "loss": 0.146, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6692447679708826, |
| "grad_norm": 1.5164080428619426, |
| "learning_rate": 4.782505135862176e-06, |
| "loss": 0.1948, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.6696997270245678, |
| "grad_norm": 1.7069357913374903, |
| "learning_rate": 4.782213498408205e-06, |
| "loss": 0.1592, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.6701546860782529, |
| "grad_norm": 1.809748302750509, |
| "learning_rate": 4.781921674462053e-06, |
| "loss": 0.1314, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.6706096451319381, |
| "grad_norm": 2.1951569204558927, |
| "learning_rate": 4.781629664047566e-06, |
| "loss": 0.1845, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.6710646041856233, |
| "grad_norm": 1.3071594737849044, |
| "learning_rate": 4.781337467188607e-06, |
| "loss": 0.1436, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.6715195632393085, |
| "grad_norm": 1.945295439800649, |
| "learning_rate": 4.781045083909053e-06, |
| "loss": 0.1855, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.6719745222929936, |
| "grad_norm": 2.1383665971380053, |
| "learning_rate": 4.780752514232796e-06, |
| "loss": 0.1746, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.6724294813466788, |
| "grad_norm": 1.9493775213300697, |
| "learning_rate": 4.780459758183743e-06, |
| "loss": 0.136, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.672884440400364, |
| "grad_norm": 1.5588501717449852, |
| "learning_rate": 4.780166815785817e-06, |
| "loss": 0.1564, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.6733393994540491, |
| "grad_norm": 1.9111191141451183, |
| "learning_rate": 4.7798736870629554e-06, |
| "loss": 0.1722, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6737943585077343, |
| "grad_norm": 1.7396374086258946, |
| "learning_rate": 4.779580372039113e-06, |
| "loss": 0.1569, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.6742493175614195, |
| "grad_norm": 2.2814229407003563, |
| "learning_rate": 4.779286870738256e-06, |
| "loss": 0.1576, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.6747042766151047, |
| "grad_norm": 2.543619017373989, |
| "learning_rate": 4.778993183184371e-06, |
| "loss": 0.1743, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.6751592356687898, |
| "grad_norm": 2.003249738108025, |
| "learning_rate": 4.778699309401453e-06, |
| "loss": 0.2083, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.675614194722475, |
| "grad_norm": 1.7140899951572492, |
| "learning_rate": 4.7784052494135195e-06, |
| "loss": 0.1649, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.6760691537761602, |
| "grad_norm": 1.6177440846188005, |
| "learning_rate": 4.778111003244596e-06, |
| "loss": 0.1706, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.6765241128298453, |
| "grad_norm": 1.3540158476274282, |
| "learning_rate": 4.777816570918731e-06, |
| "loss": 0.1474, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.6769790718835305, |
| "grad_norm": 1.8863006900369008, |
| "learning_rate": 4.777521952459982e-06, |
| "loss": 0.1995, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.6774340309372157, |
| "grad_norm": 2.2667108941921073, |
| "learning_rate": 4.777227147892424e-06, |
| "loss": 0.1855, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.6778889899909009, |
| "grad_norm": 1.9407891934102777, |
| "learning_rate": 4.776932157240147e-06, |
| "loss": 0.1503, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.678343949044586, |
| "grad_norm": 2.102459646475576, |
| "learning_rate": 4.776636980527257e-06, |
| "loss": 0.1388, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.6787989080982711, |
| "grad_norm": 2.08408986696494, |
| "learning_rate": 4.776341617777874e-06, |
| "loss": 0.1933, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.6792538671519563, |
| "grad_norm": 1.5090681867773854, |
| "learning_rate": 4.776046069016135e-06, |
| "loss": 0.1617, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.6797088262056415, |
| "grad_norm": 2.463007954699752, |
| "learning_rate": 4.775750334266188e-06, |
| "loss": 0.2267, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.6801637852593266, |
| "grad_norm": 1.0819737688059052, |
| "learning_rate": 4.775454413552202e-06, |
| "loss": 0.1047, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6806187443130118, |
| "grad_norm": 2.180583587749644, |
| "learning_rate": 4.775158306898358e-06, |
| "loss": 0.1147, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.681073703366697, |
| "grad_norm": 1.4888818210097596, |
| "learning_rate": 4.774862014328849e-06, |
| "loss": 0.1531, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.6815286624203821, |
| "grad_norm": 1.4821796970713637, |
| "learning_rate": 4.774565535867892e-06, |
| "loss": 0.163, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.6819836214740673, |
| "grad_norm": 1.9349751384396998, |
| "learning_rate": 4.77426887153971e-06, |
| "loss": 0.1602, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.6824385805277525, |
| "grad_norm": 2.068635944499767, |
| "learning_rate": 4.773972021368546e-06, |
| "loss": 0.1934, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6828935395814377, |
| "grad_norm": 1.9557854149934149, |
| "learning_rate": 4.773674985378658e-06, |
| "loss": 0.2143, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.6833484986351228, |
| "grad_norm": 2.6563423898144936, |
| "learning_rate": 4.773377763594319e-06, |
| "loss": 0.1837, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.683803457688808, |
| "grad_norm": 2.4819107124862856, |
| "learning_rate": 4.773080356039814e-06, |
| "loss": 0.1975, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.6842584167424932, |
| "grad_norm": 1.7036233463379575, |
| "learning_rate": 4.772782762739448e-06, |
| "loss": 0.1848, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.6847133757961783, |
| "grad_norm": 1.9141994818014876, |
| "learning_rate": 4.772484983717539e-06, |
| "loss": 0.2006, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.6851683348498635, |
| "grad_norm": 2.4521735191952114, |
| "learning_rate": 4.77218701899842e-06, |
| "loss": 0.2101, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.6856232939035487, |
| "grad_norm": 2.0961682322351174, |
| "learning_rate": 4.771888868606438e-06, |
| "loss": 0.2065, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.6860782529572339, |
| "grad_norm": 1.6218330474990592, |
| "learning_rate": 4.771590532565957e-06, |
| "loss": 0.1255, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.686533212010919, |
| "grad_norm": 1.9721609486698313, |
| "learning_rate": 4.771292010901357e-06, |
| "loss": 0.1303, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.6869881710646042, |
| "grad_norm": 2.121063258188487, |
| "learning_rate": 4.77099330363703e-06, |
| "loss": 0.149, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6874431301182894, |
| "grad_norm": 1.4516172378682393, |
| "learning_rate": 4.770694410797387e-06, |
| "loss": 0.1318, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.6878980891719745, |
| "grad_norm": 1.6701384225121902, |
| "learning_rate": 4.770395332406851e-06, |
| "loss": 0.1459, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.6883530482256597, |
| "grad_norm": 1.6796065018549693, |
| "learning_rate": 4.770096068489861e-06, |
| "loss": 0.1599, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.6888080072793449, |
| "grad_norm": 1.235533430237688, |
| "learning_rate": 4.769796619070872e-06, |
| "loss": 0.1519, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.6892629663330301, |
| "grad_norm": 1.3347747968404207, |
| "learning_rate": 4.769496984174353e-06, |
| "loss": 0.1064, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.6897179253867152, |
| "grad_norm": 1.5781140890537728, |
| "learning_rate": 4.769197163824791e-06, |
| "loss": 0.1435, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.6901728844404004, |
| "grad_norm": 2.213137403753888, |
| "learning_rate": 4.768897158046683e-06, |
| "loss": 0.1866, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.6906278434940856, |
| "grad_norm": 1.5778012312077723, |
| "learning_rate": 4.768596966864546e-06, |
| "loss": 0.1604, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.6910828025477707, |
| "grad_norm": 1.652969574663111, |
| "learning_rate": 4.76829659030291e-06, |
| "loss": 0.1869, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.6915377616014559, |
| "grad_norm": 1.5361209471256771, |
| "learning_rate": 4.767996028386319e-06, |
| "loss": 0.1457, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6919927206551411, |
| "grad_norm": 2.936222163725796, |
| "learning_rate": 4.767695281139336e-06, |
| "loss": 0.1881, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.6924476797088263, |
| "grad_norm": 2.3134771803324905, |
| "learning_rate": 4.767394348586535e-06, |
| "loss": 0.1599, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.6929026387625113, |
| "grad_norm": 2.4498437084815428, |
| "learning_rate": 4.767093230752507e-06, |
| "loss": 0.2138, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.6933575978161965, |
| "grad_norm": 1.5332362659492962, |
| "learning_rate": 4.766791927661859e-06, |
| "loss": 0.151, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.6938125568698817, |
| "grad_norm": 1.7915535564744174, |
| "learning_rate": 4.766490439339211e-06, |
| "loss": 0.1318, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.6942675159235668, |
| "grad_norm": 1.6447847233863087, |
| "learning_rate": 4.7661887658092e-06, |
| "loss": 0.162, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.694722474977252, |
| "grad_norm": 2.9781233092582866, |
| "learning_rate": 4.765886907096477e-06, |
| "loss": 0.2619, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.6951774340309372, |
| "grad_norm": 1.7140676149721272, |
| "learning_rate": 4.7655848632257084e-06, |
| "loss": 0.1425, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.6956323930846224, |
| "grad_norm": 2.4534906180849116, |
| "learning_rate": 4.7652826342215764e-06, |
| "loss": 0.236, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.6960873521383075, |
| "grad_norm": 1.6478858265647598, |
| "learning_rate": 4.764980220108777e-06, |
| "loss": 0.1955, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6965423111919927, |
| "grad_norm": 2.306316562409567, |
| "learning_rate": 4.764677620912022e-06, |
| "loss": 0.2079, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.6969972702456779, |
| "grad_norm": 1.644994735808915, |
| "learning_rate": 4.764374836656041e-06, |
| "loss": 0.1442, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.697452229299363, |
| "grad_norm": 1.4036507182888944, |
| "learning_rate": 4.764071867365571e-06, |
| "loss": 0.1638, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.6979071883530482, |
| "grad_norm": 1.5164218367626467, |
| "learning_rate": 4.763768713065375e-06, |
| "loss": 0.156, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.6983621474067334, |
| "grad_norm": 1.7701773803690557, |
| "learning_rate": 4.763465373780223e-06, |
| "loss": 0.1145, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6988171064604186, |
| "grad_norm": 2.076859289782232, |
| "learning_rate": 4.763161849534902e-06, |
| "loss": 0.1561, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.6992720655141037, |
| "grad_norm": 1.6167208008101008, |
| "learning_rate": 4.762858140354214e-06, |
| "loss": 0.1621, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.6997270245677889, |
| "grad_norm": 1.4746209465407152, |
| "learning_rate": 4.7625542462629785e-06, |
| "loss": 0.1768, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.7001819836214741, |
| "grad_norm": 1.4200002114989836, |
| "learning_rate": 4.762250167286027e-06, |
| "loss": 0.0995, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.7006369426751592, |
| "grad_norm": 2.080064440715621, |
| "learning_rate": 4.761945903448209e-06, |
| "loss": 0.2274, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7010919017288444, |
| "grad_norm": 1.346792584477521, |
| "learning_rate": 4.761641454774386e-06, |
| "loss": 0.1219, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.7015468607825296, |
| "grad_norm": 2.36691492405669, |
| "learning_rate": 4.761336821289436e-06, |
| "loss": 0.2965, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.7020018198362148, |
| "grad_norm": 1.773901757295841, |
| "learning_rate": 4.761032003018254e-06, |
| "loss": 0.163, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.7024567788898999, |
| "grad_norm": 1.6774939072873407, |
| "learning_rate": 4.760726999985748e-06, |
| "loss": 0.1315, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.7029117379435851, |
| "grad_norm": 1.6552217973496692, |
| "learning_rate": 4.7604218122168406e-06, |
| "loss": 0.1298, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.7033666969972703, |
| "grad_norm": 1.91830208867601, |
| "learning_rate": 4.760116439736471e-06, |
| "loss": 0.2525, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.7038216560509554, |
| "grad_norm": 1.564874376143588, |
| "learning_rate": 4.759810882569591e-06, |
| "loss": 0.1863, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.7042766151046406, |
| "grad_norm": 1.4864041422513101, |
| "learning_rate": 4.759505140741172e-06, |
| "loss": 0.1063, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.7047315741583258, |
| "grad_norm": 2.549801333631036, |
| "learning_rate": 4.759199214276196e-06, |
| "loss": 0.2505, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.705186533212011, |
| "grad_norm": 1.5401594920414479, |
| "learning_rate": 4.758893103199665e-06, |
| "loss": 0.1624, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7056414922656961, |
| "grad_norm": 1.6343764429957106, |
| "learning_rate": 4.758586807536588e-06, |
| "loss": 0.1545, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.7060964513193813, |
| "grad_norm": 1.6039711645022867, |
| "learning_rate": 4.758280327311998e-06, |
| "loss": 0.1134, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.7065514103730665, |
| "grad_norm": 2.2883990951010063, |
| "learning_rate": 4.757973662550938e-06, |
| "loss": 0.1899, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.7070063694267515, |
| "grad_norm": 1.7249554511478242, |
| "learning_rate": 4.757666813278466e-06, |
| "loss": 0.1725, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.7074613284804367, |
| "grad_norm": 2.041262841608907, |
| "learning_rate": 4.757359779519659e-06, |
| "loss": 0.2481, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.707916287534122, |
| "grad_norm": 1.7815243564082959, |
| "learning_rate": 4.757052561299604e-06, |
| "loss": 0.2166, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.7083712465878071, |
| "grad_norm": 1.5514238648411727, |
| "learning_rate": 4.756745158643407e-06, |
| "loss": 0.224, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.7088262056414922, |
| "grad_norm": 1.8608039671832461, |
| "learning_rate": 4.7564375715761865e-06, |
| "loss": 0.2223, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.7092811646951774, |
| "grad_norm": 1.6157629653628103, |
| "learning_rate": 4.756129800123078e-06, |
| "loss": 0.1293, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.7097361237488626, |
| "grad_norm": 1.4596213449886457, |
| "learning_rate": 4.755821844309232e-06, |
| "loss": 0.1805, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7101910828025477, |
| "grad_norm": 1.7295068196827752, |
| "learning_rate": 4.75551370415981e-06, |
| "loss": 0.1599, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.7106460418562329, |
| "grad_norm": 2.0606393433385612, |
| "learning_rate": 4.755205379699996e-06, |
| "loss": 0.1941, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.7111010009099181, |
| "grad_norm": 2.0979325727754294, |
| "learning_rate": 4.75489687095498e-06, |
| "loss": 0.1913, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.7115559599636033, |
| "grad_norm": 2.2303398669678076, |
| "learning_rate": 4.754588177949977e-06, |
| "loss": 0.1478, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.7120109190172884, |
| "grad_norm": 2.093261606281437, |
| "learning_rate": 4.7542793007102086e-06, |
| "loss": 0.1815, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.7124658780709736, |
| "grad_norm": 1.4472751266274675, |
| "learning_rate": 4.7539702392609165e-06, |
| "loss": 0.1697, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.7129208371246588, |
| "grad_norm": 2.0281126718428077, |
| "learning_rate": 4.753660993627356e-06, |
| "loss": 0.0948, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.7133757961783439, |
| "grad_norm": 1.5189147438423232, |
| "learning_rate": 4.753351563834795e-06, |
| "loss": 0.1727, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.7138307552320291, |
| "grad_norm": 1.7409543127807352, |
| "learning_rate": 4.753041949908521e-06, |
| "loss": 0.1642, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 2.194503112395564, |
| "learning_rate": 4.752732151873834e-06, |
| "loss": 0.2196, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7147406733393995, |
| "grad_norm": 1.697163266188786, |
| "learning_rate": 4.752422169756048e-06, |
| "loss": 0.1672, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.7151956323930846, |
| "grad_norm": 1.8134253244717562, |
| "learning_rate": 4.752112003580495e-06, |
| "loss": 0.1603, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.7156505914467698, |
| "grad_norm": 2.3783985389961915, |
| "learning_rate": 4.751801653372518e-06, |
| "loss": 0.1731, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.716105550500455, |
| "grad_norm": 2.5039159852054795, |
| "learning_rate": 4.751491119157481e-06, |
| "loss": 0.1865, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.7165605095541401, |
| "grad_norm": 1.619599621691377, |
| "learning_rate": 4.751180400960756e-06, |
| "loss": 0.1746, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7170154686078253, |
| "grad_norm": 1.65152231646464, |
| "learning_rate": 4.7508694988077355e-06, |
| "loss": 0.1515, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.7174704276615105, |
| "grad_norm": 2.465040491157821, |
| "learning_rate": 4.750558412723824e-06, |
| "loss": 0.1966, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.7179253867151957, |
| "grad_norm": 2.2789812780893364, |
| "learning_rate": 4.750247142734442e-06, |
| "loss": 0.1599, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.7183803457688808, |
| "grad_norm": 1.7581577660091943, |
| "learning_rate": 4.749935688865026e-06, |
| "loss": 0.141, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.718835304822566, |
| "grad_norm": 2.1794165158833914, |
| "learning_rate": 4.749624051141026e-06, |
| "loss": 0.1088, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7192902638762512, |
| "grad_norm": 1.443223743964179, |
| "learning_rate": 4.7493122295879076e-06, |
| "loss": 0.1189, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.7197452229299363, |
| "grad_norm": 2.35745890496679, |
| "learning_rate": 4.7490002242311525e-06, |
| "loss": 0.2129, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.7202001819836215, |
| "grad_norm": 1.5523835122804504, |
| "learning_rate": 4.748688035096255e-06, |
| "loss": 0.2081, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.7206551410373067, |
| "grad_norm": 2.4968010568360692, |
| "learning_rate": 4.748375662208726e-06, |
| "loss": 0.1759, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.7211101000909919, |
| "grad_norm": 1.9165363158958804, |
| "learning_rate": 4.748063105594092e-06, |
| "loss": 0.2267, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.721565059144677, |
| "grad_norm": 1.7864622532435137, |
| "learning_rate": 4.747750365277892e-06, |
| "loss": 0.1648, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.7220200181983621, |
| "grad_norm": 1.8532777769110087, |
| "learning_rate": 4.747437441285684e-06, |
| "loss": 0.1501, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.7224749772520473, |
| "grad_norm": 1.7539173333380942, |
| "learning_rate": 4.747124333643038e-06, |
| "loss": 0.1883, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.7229299363057324, |
| "grad_norm": 1.7153189766040051, |
| "learning_rate": 4.746811042375538e-06, |
| "loss": 0.1308, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.7233848953594176, |
| "grad_norm": 1.5162583630812903, |
| "learning_rate": 4.746497567508787e-06, |
| "loss": 0.1571, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7238398544131028, |
| "grad_norm": 1.5546810521185177, |
| "learning_rate": 4.7461839090684e-06, |
| "loss": 0.1694, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.724294813466788, |
| "grad_norm": 2.0021940033485404, |
| "learning_rate": 4.745870067080007e-06, |
| "loss": 0.171, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.7247497725204731, |
| "grad_norm": 2.221217513727709, |
| "learning_rate": 4.7455560415692545e-06, |
| "loss": 0.231, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.7252047315741583, |
| "grad_norm": 2.222153805045267, |
| "learning_rate": 4.745241832561803e-06, |
| "loss": 0.1446, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.7256596906278435, |
| "grad_norm": 1.784667663061202, |
| "learning_rate": 4.744927440083329e-06, |
| "loss": 0.1646, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.7261146496815286, |
| "grad_norm": 1.7626687045318659, |
| "learning_rate": 4.744612864159522e-06, |
| "loss": 0.1685, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.7265696087352138, |
| "grad_norm": 1.9909235520315078, |
| "learning_rate": 4.7442981048160895e-06, |
| "loss": 0.1854, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.727024567788899, |
| "grad_norm": 2.4131359111724464, |
| "learning_rate": 4.74398316207875e-06, |
| "loss": 0.1784, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.7274795268425842, |
| "grad_norm": 2.3390737079991215, |
| "learning_rate": 4.74366803597324e-06, |
| "loss": 0.28, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.7279344858962693, |
| "grad_norm": 1.5176778250654925, |
| "learning_rate": 4.743352726525311e-06, |
| "loss": 0.1119, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7283894449499545, |
| "grad_norm": 1.612075524542219, |
| "learning_rate": 4.743037233760728e-06, |
| "loss": 0.1548, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.7288444040036397, |
| "grad_norm": 2.082336981370237, |
| "learning_rate": 4.742721557705271e-06, |
| "loss": 0.1907, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.7292993630573248, |
| "grad_norm": 1.8874163681919673, |
| "learning_rate": 4.7424056983847374e-06, |
| "loss": 0.1872, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.72975432211101, |
| "grad_norm": 1.9161874420851024, |
| "learning_rate": 4.7420896558249366e-06, |
| "loss": 0.1199, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.7302092811646952, |
| "grad_norm": 1.9339794473206677, |
| "learning_rate": 4.741773430051694e-06, |
| "loss": 0.1467, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.7306642402183804, |
| "grad_norm": 1.5901851811892251, |
| "learning_rate": 4.74145702109085e-06, |
| "loss": 0.1094, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.7311191992720655, |
| "grad_norm": 2.678117310973907, |
| "learning_rate": 4.741140428968261e-06, |
| "loss": 0.2545, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.7315741583257507, |
| "grad_norm": 1.4456239768846677, |
| "learning_rate": 4.740823653709797e-06, |
| "loss": 0.101, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.7320291173794359, |
| "grad_norm": 1.5614448809750465, |
| "learning_rate": 4.740506695341343e-06, |
| "loss": 0.135, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.732484076433121, |
| "grad_norm": 1.9409375225046157, |
| "learning_rate": 4.740189553888801e-06, |
| "loss": 0.2674, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7329390354868062, |
| "grad_norm": 1.757285590607046, |
| "learning_rate": 4.739872229378085e-06, |
| "loss": 0.1358, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.7333939945404914, |
| "grad_norm": 1.7119351957596494, |
| "learning_rate": 4.739554721835125e-06, |
| "loss": 0.1405, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.7338489535941766, |
| "grad_norm": 1.5407585285384973, |
| "learning_rate": 4.739237031285867e-06, |
| "loss": 0.1789, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.7343039126478617, |
| "grad_norm": 1.8412394540639878, |
| "learning_rate": 4.738919157756272e-06, |
| "loss": 0.1741, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.7347588717015469, |
| "grad_norm": 1.9093990086684758, |
| "learning_rate": 4.738601101272313e-06, |
| "loss": 0.1972, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.7352138307552321, |
| "grad_norm": 1.6531050134000445, |
| "learning_rate": 4.738282861859983e-06, |
| "loss": 0.1828, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.7356687898089171, |
| "grad_norm": 1.6958094821678005, |
| "learning_rate": 4.737964439545284e-06, |
| "loss": 0.1623, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.7361237488626023, |
| "grad_norm": 1.9487516983862898, |
| "learning_rate": 4.737645834354238e-06, |
| "loss": 0.1761, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.7365787079162875, |
| "grad_norm": 1.5339742875273046, |
| "learning_rate": 4.737327046312879e-06, |
| "loss": 0.1188, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.7370336669699727, |
| "grad_norm": 1.8259875586922627, |
| "learning_rate": 4.737008075447259e-06, |
| "loss": 0.13, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7374886260236578, |
| "grad_norm": 2.112705655098723, |
| "learning_rate": 4.73668892178344e-06, |
| "loss": 0.162, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.737943585077343, |
| "grad_norm": 2.1191881288248755, |
| "learning_rate": 4.736369585347503e-06, |
| "loss": 0.1882, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.7383985441310282, |
| "grad_norm": 2.42511490554677, |
| "learning_rate": 4.736050066165544e-06, |
| "loss": 0.168, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.7388535031847133, |
| "grad_norm": 2.5180747249974678, |
| "learning_rate": 4.735730364263671e-06, |
| "loss": 0.2462, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.7393084622383985, |
| "grad_norm": 1.899152814897376, |
| "learning_rate": 4.735410479668009e-06, |
| "loss": 0.1649, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.7397634212920837, |
| "grad_norm": 2.5891320586414506, |
| "learning_rate": 4.735090412404697e-06, |
| "loss": 0.2112, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.7402183803457689, |
| "grad_norm": 1.6256945799338343, |
| "learning_rate": 4.734770162499891e-06, |
| "loss": 0.0995, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.740673339399454, |
| "grad_norm": 2.115890838067561, |
| "learning_rate": 4.734449729979759e-06, |
| "loss": 0.1863, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.7411282984531392, |
| "grad_norm": 1.8207130234699649, |
| "learning_rate": 4.734129114870486e-06, |
| "loss": 0.1621, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.7415832575068244, |
| "grad_norm": 2.419448299752305, |
| "learning_rate": 4.733808317198271e-06, |
| "loss": 0.1682, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7420382165605095, |
| "grad_norm": 1.864719563201482, |
| "learning_rate": 4.733487336989327e-06, |
| "loss": 0.1534, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.7424931756141947, |
| "grad_norm": 2.480364363656269, |
| "learning_rate": 4.733166174269886e-06, |
| "loss": 0.186, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.7429481346678799, |
| "grad_norm": 2.0606766178805116, |
| "learning_rate": 4.732844829066189e-06, |
| "loss": 0.2189, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.7434030937215651, |
| "grad_norm": 2.162055464706376, |
| "learning_rate": 4.732523301404497e-06, |
| "loss": 0.1969, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.7438580527752502, |
| "grad_norm": 2.12376584678073, |
| "learning_rate": 4.732201591311082e-06, |
| "loss": 0.2101, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.7443130118289354, |
| "grad_norm": 1.5079389097876976, |
| "learning_rate": 4.731879698812233e-06, |
| "loss": 0.1802, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.7447679708826206, |
| "grad_norm": 1.744034863658637, |
| "learning_rate": 4.731557623934255e-06, |
| "loss": 0.1398, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.7452229299363057, |
| "grad_norm": 2.7848754471064043, |
| "learning_rate": 4.7312353667034645e-06, |
| "loss": 0.2499, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.7456778889899909, |
| "grad_norm": 2.58334353852049, |
| "learning_rate": 4.730912927146197e-06, |
| "loss": 0.2203, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.7461328480436761, |
| "grad_norm": 2.0325933883862066, |
| "learning_rate": 4.7305903052888e-06, |
| "loss": 0.1563, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7465878070973613, |
| "grad_norm": 2.3443549071357057, |
| "learning_rate": 4.730267501157636e-06, |
| "loss": 0.1896, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.7470427661510464, |
| "grad_norm": 2.003548520587404, |
| "learning_rate": 4.729944514779084e-06, |
| "loss": 0.1705, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.7474977252047316, |
| "grad_norm": 1.3567793569480755, |
| "learning_rate": 4.729621346179536e-06, |
| "loss": 0.1429, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.7479526842584168, |
| "grad_norm": 1.9172209433761784, |
| "learning_rate": 4.7292979953854e-06, |
| "loss": 0.1224, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.7484076433121019, |
| "grad_norm": 1.7854487682262081, |
| "learning_rate": 4.7289744624231004e-06, |
| "loss": 0.1753, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.7488626023657871, |
| "grad_norm": 2.0357381373480377, |
| "learning_rate": 4.728650747319073e-06, |
| "loss": 0.1844, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.7493175614194723, |
| "grad_norm": 2.295347780668863, |
| "learning_rate": 4.728326850099771e-06, |
| "loss": 0.1949, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.7497725204731575, |
| "grad_norm": 2.2592022682113564, |
| "learning_rate": 4.728002770791663e-06, |
| "loss": 0.1641, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.7502274795268425, |
| "grad_norm": 1.8794487431290805, |
| "learning_rate": 4.727678509421229e-06, |
| "loss": 0.1672, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.7506824385805277, |
| "grad_norm": 1.471409298797821, |
| "learning_rate": 4.727354066014968e-06, |
| "loss": 0.1251, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7511373976342129, |
| "grad_norm": 1.2272497564159228, |
| "learning_rate": 4.727029440599391e-06, |
| "loss": 0.1165, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.7515923566878981, |
| "grad_norm": 1.7826119947445478, |
| "learning_rate": 4.726704633201025e-06, |
| "loss": 0.1367, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.7520473157415832, |
| "grad_norm": 1.5654538387161951, |
| "learning_rate": 4.726379643846412e-06, |
| "loss": 0.1622, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.7525022747952684, |
| "grad_norm": 2.0792625449816255, |
| "learning_rate": 4.726054472562109e-06, |
| "loss": 0.1741, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.7529572338489536, |
| "grad_norm": 1.5223527837461277, |
| "learning_rate": 4.725729119374687e-06, |
| "loss": 0.1198, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.7534121929026387, |
| "grad_norm": 1.5290108835892176, |
| "learning_rate": 4.725403584310734e-06, |
| "loss": 0.1026, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.7538671519563239, |
| "grad_norm": 2.155319535005024, |
| "learning_rate": 4.725077867396849e-06, |
| "loss": 0.1652, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.7543221110100091, |
| "grad_norm": 1.565904420652083, |
| "learning_rate": 4.724751968659648e-06, |
| "loss": 0.1628, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.7547770700636943, |
| "grad_norm": 2.9773420234850345, |
| "learning_rate": 4.724425888125764e-06, |
| "loss": 0.2409, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.7552320291173794, |
| "grad_norm": 2.3428961739867304, |
| "learning_rate": 4.724099625821842e-06, |
| "loss": 0.2216, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7556869881710646, |
| "grad_norm": 1.7855741504776685, |
| "learning_rate": 4.723773181774543e-06, |
| "loss": 0.1468, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.7561419472247498, |
| "grad_norm": 1.96972618488323, |
| "learning_rate": 4.723446556010542e-06, |
| "loss": 0.1981, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.7565969062784349, |
| "grad_norm": 1.6758348642722924, |
| "learning_rate": 4.7231197485565275e-06, |
| "loss": 0.169, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.7570518653321201, |
| "grad_norm": 1.3954523503838552, |
| "learning_rate": 4.722792759439209e-06, |
| "loss": 0.1224, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.7575068243858053, |
| "grad_norm": 2.060909913997174, |
| "learning_rate": 4.722465588685302e-06, |
| "loss": 0.2087, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.7579617834394905, |
| "grad_norm": 1.5474467660765128, |
| "learning_rate": 4.722138236321545e-06, |
| "loss": 0.1013, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.7584167424931756, |
| "grad_norm": 2.430153104930812, |
| "learning_rate": 4.721810702374687e-06, |
| "loss": 0.1439, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.7588717015468608, |
| "grad_norm": 1.7773306327385723, |
| "learning_rate": 4.721482986871491e-06, |
| "loss": 0.1485, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.759326660600546, |
| "grad_norm": 2.927464615752266, |
| "learning_rate": 4.721155089838738e-06, |
| "loss": 0.1962, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.7597816196542311, |
| "grad_norm": 1.9730589581225906, |
| "learning_rate": 4.720827011303222e-06, |
| "loss": 0.1503, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7602365787079163, |
| "grad_norm": 1.953497394359563, |
| "learning_rate": 4.720498751291751e-06, |
| "loss": 0.182, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.7606915377616015, |
| "grad_norm": 1.7839379977035983, |
| "learning_rate": 4.72017030983115e-06, |
| "loss": 0.2198, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.7611464968152867, |
| "grad_norm": 1.7993088459777005, |
| "learning_rate": 4.7198416869482575e-06, |
| "loss": 0.1696, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.7616014558689718, |
| "grad_norm": 1.892794250792964, |
| "learning_rate": 4.719512882669926e-06, |
| "loss": 0.1776, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.762056414922657, |
| "grad_norm": 2.0202484520052035, |
| "learning_rate": 4.719183897023027e-06, |
| "loss": 0.1673, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.7625113739763422, |
| "grad_norm": 2.2601047076044414, |
| "learning_rate": 4.718854730034441e-06, |
| "loss": 0.2183, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.7629663330300273, |
| "grad_norm": 1.8760309869672118, |
| "learning_rate": 4.718525381731066e-06, |
| "loss": 0.1476, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.7634212920837125, |
| "grad_norm": 1.5663417379599454, |
| "learning_rate": 4.718195852139816e-06, |
| "loss": 0.2014, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.7638762511373977, |
| "grad_norm": 2.338496392531513, |
| "learning_rate": 4.717866141287618e-06, |
| "loss": 0.2422, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.7643312101910829, |
| "grad_norm": 1.9053967868206603, |
| "learning_rate": 4.717536249201416e-06, |
| "loss": 0.1953, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7647861692447679, |
| "grad_norm": 1.831121224420973, |
| "learning_rate": 4.7172061759081646e-06, |
| "loss": 0.1626, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.7652411282984531, |
| "grad_norm": 2.234380631915828, |
| "learning_rate": 4.716875921434838e-06, |
| "loss": 0.1754, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.7656960873521383, |
| "grad_norm": 1.9990356821604962, |
| "learning_rate": 4.716545485808421e-06, |
| "loss": 0.1613, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.7661510464058234, |
| "grad_norm": 1.956500719133962, |
| "learning_rate": 4.716214869055918e-06, |
| "loss": 0.1747, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.7666060054595086, |
| "grad_norm": 1.7944596997359672, |
| "learning_rate": 4.715884071204344e-06, |
| "loss": 0.116, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.7670609645131938, |
| "grad_norm": 1.93926106516618, |
| "learning_rate": 4.715553092280731e-06, |
| "loss": 0.2121, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.767515923566879, |
| "grad_norm": 2.4656357214922626, |
| "learning_rate": 4.7152219323121246e-06, |
| "loss": 0.1772, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.7679708826205641, |
| "grad_norm": 2.2402320393494253, |
| "learning_rate": 4.714890591325586e-06, |
| "loss": 0.2021, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.7684258416742493, |
| "grad_norm": 1.7903156076682725, |
| "learning_rate": 4.714559069348189e-06, |
| "loss": 0.1825, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.7688808007279345, |
| "grad_norm": 1.6420985192646667, |
| "learning_rate": 4.714227366407027e-06, |
| "loss": 0.1475, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7693357597816196, |
| "grad_norm": 2.2750484746487936, |
| "learning_rate": 4.7138954825292035e-06, |
| "loss": 0.1492, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.7697907188353048, |
| "grad_norm": 1.992613507205851, |
| "learning_rate": 4.71356341774184e-06, |
| "loss": 0.2004, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.77024567788899, |
| "grad_norm": 1.8507536466532999, |
| "learning_rate": 4.713231172072069e-06, |
| "loss": 0.1665, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.7707006369426752, |
| "grad_norm": 2.074124213121433, |
| "learning_rate": 4.712898745547043e-06, |
| "loss": 0.1901, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.7711555959963603, |
| "grad_norm": 2.2217772464991628, |
| "learning_rate": 4.712566138193923e-06, |
| "loss": 0.2007, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.7716105550500455, |
| "grad_norm": 2.1110958043430936, |
| "learning_rate": 4.712233350039892e-06, |
| "loss": 0.1711, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.7720655141037307, |
| "grad_norm": 1.7733407712061509, |
| "learning_rate": 4.711900381112141e-06, |
| "loss": 0.1401, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.7725204731574158, |
| "grad_norm": 1.9082417250906683, |
| "learning_rate": 4.71156723143788e-06, |
| "loss": 0.1707, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.772975432211101, |
| "grad_norm": 1.8677365381806925, |
| "learning_rate": 4.711233901044332e-06, |
| "loss": 0.1868, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.7734303912647862, |
| "grad_norm": 2.0411961738002464, |
| "learning_rate": 4.710900389958735e-06, |
| "loss": 0.1744, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7738853503184714, |
| "grad_norm": 2.1935749697701, |
| "learning_rate": 4.710566698208343e-06, |
| "loss": 0.2385, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.7743403093721565, |
| "grad_norm": 1.7404480081781704, |
| "learning_rate": 4.710232825820424e-06, |
| "loss": 0.1499, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.7747952684258417, |
| "grad_norm": 1.477154965489664, |
| "learning_rate": 4.709898772822258e-06, |
| "loss": 0.1207, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.7752502274795269, |
| "grad_norm": 1.7903520569742504, |
| "learning_rate": 4.709564539241145e-06, |
| "loss": 0.1257, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.775705186533212, |
| "grad_norm": 1.509438293191361, |
| "learning_rate": 4.709230125104396e-06, |
| "loss": 0.1333, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.7761601455868972, |
| "grad_norm": 1.352600254451033, |
| "learning_rate": 4.708895530439339e-06, |
| "loss": 0.1297, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.7766151046405824, |
| "grad_norm": 2.80931496450313, |
| "learning_rate": 4.708560755273313e-06, |
| "loss": 0.1572, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.7770700636942676, |
| "grad_norm": 2.614552054035137, |
| "learning_rate": 4.7082257996336765e-06, |
| "loss": 0.2392, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.7775250227479527, |
| "grad_norm": 1.3897711262928594, |
| "learning_rate": 4.707890663547801e-06, |
| "loss": 0.1898, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.7779799818016379, |
| "grad_norm": 1.3068004754745945, |
| "learning_rate": 4.7075553470430695e-06, |
| "loss": 0.1541, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.778434940855323, |
| "grad_norm": 2.0471283874239337, |
| "learning_rate": 4.707219850146885e-06, |
| "loss": 0.189, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.7788898999090081, |
| "grad_norm": 1.406237335222361, |
| "learning_rate": 4.706884172886662e-06, |
| "loss": 0.1534, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.7793448589626933, |
| "grad_norm": 1.430209112364991, |
| "learning_rate": 4.706548315289831e-06, |
| "loss": 0.1505, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.7797998180163785, |
| "grad_norm": 1.9880980157191188, |
| "learning_rate": 4.706212277383836e-06, |
| "loss": 0.1455, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.7802547770700637, |
| "grad_norm": 1.9444624934450598, |
| "learning_rate": 4.705876059196136e-06, |
| "loss": 0.1919, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.7807097361237488, |
| "grad_norm": 1.845006648683808, |
| "learning_rate": 4.705539660754208e-06, |
| "loss": 0.1379, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.781164695177434, |
| "grad_norm": 1.7044046674717437, |
| "learning_rate": 4.705203082085538e-06, |
| "loss": 0.1323, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.7816196542311192, |
| "grad_norm": 1.7912067195327883, |
| "learning_rate": 4.70486632321763e-06, |
| "loss": 0.2117, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.7820746132848043, |
| "grad_norm": 1.9320743936658202, |
| "learning_rate": 4.7045293841780034e-06, |
| "loss": 0.1375, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.7825295723384895, |
| "grad_norm": 1.7315009532080885, |
| "learning_rate": 4.704192264994193e-06, |
| "loss": 0.1162, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7829845313921747, |
| "grad_norm": 1.6176947094849203, |
| "learning_rate": 4.703854965693743e-06, |
| "loss": 0.1318, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.7834394904458599, |
| "grad_norm": 2.40560948473341, |
| "learning_rate": 4.703517486304218e-06, |
| "loss": 0.1747, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.783894449499545, |
| "grad_norm": 1.6675266396651778, |
| "learning_rate": 4.703179826853195e-06, |
| "loss": 0.1853, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.7843494085532302, |
| "grad_norm": 1.8036543539560768, |
| "learning_rate": 4.702841987368265e-06, |
| "loss": 0.1358, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.7848043676069154, |
| "grad_norm": 2.164797051503019, |
| "learning_rate": 4.702503967877038e-06, |
| "loss": 0.1531, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.7852593266606005, |
| "grad_norm": 1.6083401375044635, |
| "learning_rate": 4.702165768407132e-06, |
| "loss": 0.1984, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 1.9227015105668148, |
| "learning_rate": 4.701827388986185e-06, |
| "loss": 0.1962, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.7861692447679709, |
| "grad_norm": 2.234973410496376, |
| "learning_rate": 4.701488829641845e-06, |
| "loss": 0.1313, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.7866242038216561, |
| "grad_norm": 1.4707235359776172, |
| "learning_rate": 4.701150090401782e-06, |
| "loss": 0.1384, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.7870791628753412, |
| "grad_norm": 1.4795549767962248, |
| "learning_rate": 4.700811171293673e-06, |
| "loss": 0.1192, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7875341219290264, |
| "grad_norm": 1.4765672888773027, |
| "learning_rate": 4.700472072345214e-06, |
| "loss": 0.1445, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.7879890809827116, |
| "grad_norm": 1.7959851809677527, |
| "learning_rate": 4.700132793584113e-06, |
| "loss": 0.176, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.7884440400363967, |
| "grad_norm": 2.0011742977871365, |
| "learning_rate": 4.699793335038098e-06, |
| "loss": 0.2073, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.7888989990900819, |
| "grad_norm": 1.5877933891450462, |
| "learning_rate": 4.699453696734905e-06, |
| "loss": 0.1163, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.7893539581437671, |
| "grad_norm": 1.994398441190682, |
| "learning_rate": 4.699113878702288e-06, |
| "loss": 0.1997, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.7898089171974523, |
| "grad_norm": 1.6186509072614172, |
| "learning_rate": 4.698773880968017e-06, |
| "loss": 0.1359, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.7902638762511374, |
| "grad_norm": 1.3756660961296079, |
| "learning_rate": 4.698433703559874e-06, |
| "loss": 0.1717, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.7907188353048226, |
| "grad_norm": 1.4461545675657563, |
| "learning_rate": 4.698093346505656e-06, |
| "loss": 0.1381, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.7911737943585078, |
| "grad_norm": 1.975346854852977, |
| "learning_rate": 4.697752809833177e-06, |
| "loss": 0.1651, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.7916287534121929, |
| "grad_norm": 2.098203427770575, |
| "learning_rate": 4.697412093570263e-06, |
| "loss": 0.1966, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.792083712465878, |
| "grad_norm": 1.7884148647415081, |
| "learning_rate": 4.697071197744756e-06, |
| "loss": 0.1603, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.7925386715195633, |
| "grad_norm": 2.20000836754146, |
| "learning_rate": 4.6967301223845115e-06, |
| "loss": 0.168, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.7929936305732485, |
| "grad_norm": 1.469643335454165, |
| "learning_rate": 4.696388867517403e-06, |
| "loss": 0.1574, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.7934485896269335, |
| "grad_norm": 1.7067059652811334, |
| "learning_rate": 4.696047433171316e-06, |
| "loss": 0.098, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.7939035486806187, |
| "grad_norm": 2.0780505106943896, |
| "learning_rate": 4.695705819374149e-06, |
| "loss": 0.178, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.7943585077343039, |
| "grad_norm": 1.8450097546428101, |
| "learning_rate": 4.695364026153818e-06, |
| "loss": 0.1637, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.794813466787989, |
| "grad_norm": 1.4237762817404553, |
| "learning_rate": 4.695022053538253e-06, |
| "loss": 0.1416, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.7952684258416742, |
| "grad_norm": 2.485744457764155, |
| "learning_rate": 4.694679901555398e-06, |
| "loss": 0.2207, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.7957233848953594, |
| "grad_norm": 2.5149587392089128, |
| "learning_rate": 4.694337570233213e-06, |
| "loss": 0.1485, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.7961783439490446, |
| "grad_norm": 2.0342522616249736, |
| "learning_rate": 4.693995059599672e-06, |
| "loss": 0.2071, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7966333030027297, |
| "grad_norm": 1.7181022322257762, |
| "learning_rate": 4.693652369682762e-06, |
| "loss": 0.2112, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.7970882620564149, |
| "grad_norm": 1.843190625559269, |
| "learning_rate": 4.693309500510487e-06, |
| "loss": 0.1632, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.7975432211101001, |
| "grad_norm": 2.7841529899485917, |
| "learning_rate": 4.692966452110864e-06, |
| "loss": 0.1534, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.7979981801637852, |
| "grad_norm": 1.5395427013532956, |
| "learning_rate": 4.6926232245119265e-06, |
| "loss": 0.2195, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.7984531392174704, |
| "grad_norm": 2.5074996998585335, |
| "learning_rate": 4.69227981774172e-06, |
| "loss": 0.1856, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.7989080982711556, |
| "grad_norm": 2.449264992514986, |
| "learning_rate": 4.691936231828308e-06, |
| "loss": 0.1779, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.7993630573248408, |
| "grad_norm": 2.481345422810722, |
| "learning_rate": 4.691592466799766e-06, |
| "loss": 0.1889, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.7998180163785259, |
| "grad_norm": 1.637751747233988, |
| "learning_rate": 4.691248522684184e-06, |
| "loss": 0.1349, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.8002729754322111, |
| "grad_norm": 1.6804430027452057, |
| "learning_rate": 4.690904399509668e-06, |
| "loss": 0.1435, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.8007279344858963, |
| "grad_norm": 2.742847873433655, |
| "learning_rate": 4.69056009730434e-06, |
| "loss": 0.2232, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8011828935395814, |
| "grad_norm": 2.40569741832729, |
| "learning_rate": 4.690215616096332e-06, |
| "loss": 0.1711, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.8016378525932666, |
| "grad_norm": 2.4832090753479834, |
| "learning_rate": 4.689870955913796e-06, |
| "loss": 0.1587, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.8020928116469518, |
| "grad_norm": 2.0194488171697063, |
| "learning_rate": 4.689526116784894e-06, |
| "loss": 0.167, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.802547770700637, |
| "grad_norm": 3.338733219322262, |
| "learning_rate": 4.689181098737805e-06, |
| "loss": 0.2404, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.8030027297543221, |
| "grad_norm": 2.150659967515375, |
| "learning_rate": 4.6888359018007235e-06, |
| "loss": 0.1288, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.8034576888080073, |
| "grad_norm": 1.9131033030180753, |
| "learning_rate": 4.6884905260018565e-06, |
| "loss": 0.1638, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.8039126478616925, |
| "grad_norm": 1.7799343855450172, |
| "learning_rate": 4.688144971369427e-06, |
| "loss": 0.2032, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.8043676069153776, |
| "grad_norm": 1.9191485121544656, |
| "learning_rate": 4.687799237931673e-06, |
| "loss": 0.1597, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.8048225659690628, |
| "grad_norm": 1.5130848101685814, |
| "learning_rate": 4.687453325716844e-06, |
| "loss": 0.1572, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.805277525022748, |
| "grad_norm": 2.380748372992281, |
| "learning_rate": 4.687107234753208e-06, |
| "loss": 0.1617, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8057324840764332, |
| "grad_norm": 2.7874285940928067, |
| "learning_rate": 4.686760965069046e-06, |
| "loss": 0.1679, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.8061874431301183, |
| "grad_norm": 1.9146816786227654, |
| "learning_rate": 4.686414516692653e-06, |
| "loss": 0.2267, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.8066424021838035, |
| "grad_norm": 1.6656788150165645, |
| "learning_rate": 4.68606788965234e-06, |
| "loss": 0.1608, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.8070973612374887, |
| "grad_norm": 2.859758352959496, |
| "learning_rate": 4.68572108397643e-06, |
| "loss": 0.2065, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.8075523202911737, |
| "grad_norm": 1.7922493594029372, |
| "learning_rate": 4.6853740996932645e-06, |
| "loss": 0.1331, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.8080072793448589, |
| "grad_norm": 1.9382561831132192, |
| "learning_rate": 4.685026936831196e-06, |
| "loss": 0.1693, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.8084622383985441, |
| "grad_norm": 2.2029297725133237, |
| "learning_rate": 4.684679595418595e-06, |
| "loss": 0.1988, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.8089171974522293, |
| "grad_norm": 1.6643742621321755, |
| "learning_rate": 4.684332075483843e-06, |
| "loss": 0.1776, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.8093721565059144, |
| "grad_norm": 1.928150435175855, |
| "learning_rate": 4.6839843770553374e-06, |
| "loss": 0.2135, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.8098271155595996, |
| "grad_norm": 1.5299034058186116, |
| "learning_rate": 4.683636500161491e-06, |
| "loss": 0.1287, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8102820746132848, |
| "grad_norm": 1.7105211102821978, |
| "learning_rate": 4.683288444830732e-06, |
| "loss": 0.1858, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.8107370336669699, |
| "grad_norm": 2.065121875110959, |
| "learning_rate": 4.6829402110915015e-06, |
| "loss": 0.1573, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.8111919927206551, |
| "grad_norm": 1.7915836692891514, |
| "learning_rate": 4.682591798972253e-06, |
| "loss": 0.163, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.8116469517743403, |
| "grad_norm": 1.9011358499015634, |
| "learning_rate": 4.682243208501461e-06, |
| "loss": 0.1565, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.8121019108280255, |
| "grad_norm": 1.8705464277674988, |
| "learning_rate": 4.681894439707609e-06, |
| "loss": 0.1532, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.8125568698817106, |
| "grad_norm": 1.5282025887885624, |
| "learning_rate": 4.681545492619195e-06, |
| "loss": 0.1212, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.8130118289353958, |
| "grad_norm": 2.4618870744714823, |
| "learning_rate": 4.681196367264736e-06, |
| "loss": 0.1737, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.813466787989081, |
| "grad_norm": 1.5010216528583702, |
| "learning_rate": 4.680847063672761e-06, |
| "loss": 0.1349, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.8139217470427661, |
| "grad_norm": 1.577176673126615, |
| "learning_rate": 4.680497581871811e-06, |
| "loss": 0.1736, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.8143767060964513, |
| "grad_norm": 2.2216456467027603, |
| "learning_rate": 4.680147921890447e-06, |
| "loss": 0.1589, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8148316651501365, |
| "grad_norm": 2.2828861135151377, |
| "learning_rate": 4.67979808375724e-06, |
| "loss": 0.1864, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.8152866242038217, |
| "grad_norm": 2.411410847612128, |
| "learning_rate": 4.679448067500777e-06, |
| "loss": 0.1704, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.8157415832575068, |
| "grad_norm": 2.6745924756823274, |
| "learning_rate": 4.67909787314966e-06, |
| "loss": 0.1855, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.816196542311192, |
| "grad_norm": 1.7549666443082432, |
| "learning_rate": 4.678747500732505e-06, |
| "loss": 0.2204, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.8166515013648772, |
| "grad_norm": 2.4603767836599086, |
| "learning_rate": 4.6783969502779455e-06, |
| "loss": 0.1805, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.8171064604185623, |
| "grad_norm": 1.5762472297440564, |
| "learning_rate": 4.6780462218146236e-06, |
| "loss": 0.1393, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.8175614194722475, |
| "grad_norm": 1.6619849736476204, |
| "learning_rate": 4.6776953153712005e-06, |
| "loss": 0.2041, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.8180163785259327, |
| "grad_norm": 1.7094043878723117, |
| "learning_rate": 4.67734423097635e-06, |
| "loss": 0.1603, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.8184713375796179, |
| "grad_norm": 1.2928545358221282, |
| "learning_rate": 4.676992968658762e-06, |
| "loss": 0.1517, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.818926296633303, |
| "grad_norm": 1.4763652797153222, |
| "learning_rate": 4.67664152844714e-06, |
| "loss": 0.0939, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8193812556869882, |
| "grad_norm": 2.260551569771672, |
| "learning_rate": 4.676289910370202e-06, |
| "loss": 0.1902, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.8198362147406734, |
| "grad_norm": 2.047407982208326, |
| "learning_rate": 4.675938114456682e-06, |
| "loss": 0.1767, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.8202911737943585, |
| "grad_norm": 1.5430069759954768, |
| "learning_rate": 4.675586140735323e-06, |
| "loss": 0.1955, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.8207461328480437, |
| "grad_norm": 2.2295561077574404, |
| "learning_rate": 4.675233989234891e-06, |
| "loss": 0.211, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.8212010919017289, |
| "grad_norm": 1.639085591327469, |
| "learning_rate": 4.67488165998416e-06, |
| "loss": 0.1163, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.821656050955414, |
| "grad_norm": 1.8522836776109448, |
| "learning_rate": 4.674529153011922e-06, |
| "loss": 0.1879, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.8221110100090991, |
| "grad_norm": 2.1812381655305653, |
| "learning_rate": 4.674176468346982e-06, |
| "loss": 0.1773, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.8225659690627843, |
| "grad_norm": 1.9367383257783326, |
| "learning_rate": 4.673823606018158e-06, |
| "loss": 0.2019, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.8230209281164695, |
| "grad_norm": 1.8576560873873327, |
| "learning_rate": 4.673470566054288e-06, |
| "loss": 0.1492, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.8234758871701547, |
| "grad_norm": 1.9497069876088635, |
| "learning_rate": 4.673117348484217e-06, |
| "loss": 0.1745, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8239308462238398, |
| "grad_norm": 1.4193615554141685, |
| "learning_rate": 4.672763953336811e-06, |
| "loss": 0.1463, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.824385805277525, |
| "grad_norm": 2.8057971610463928, |
| "learning_rate": 4.672410380640946e-06, |
| "loss": 0.2285, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.8248407643312102, |
| "grad_norm": 1.8069198589432123, |
| "learning_rate": 4.672056630425516e-06, |
| "loss": 0.1228, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.8252957233848953, |
| "grad_norm": 1.3408435512517318, |
| "learning_rate": 4.671702702719426e-06, |
| "loss": 0.1436, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.8257506824385805, |
| "grad_norm": 2.0862527734688197, |
| "learning_rate": 4.671348597551599e-06, |
| "loss": 0.2169, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.8262056414922657, |
| "grad_norm": 2.132580252859084, |
| "learning_rate": 4.670994314950971e-06, |
| "loss": 0.2017, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.8266606005459509, |
| "grad_norm": 2.5991268132353853, |
| "learning_rate": 4.6706398549464905e-06, |
| "loss": 0.2089, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.827115559599636, |
| "grad_norm": 2.3181044896129275, |
| "learning_rate": 4.670285217567124e-06, |
| "loss": 0.1531, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.8275705186533212, |
| "grad_norm": 1.7235570690460182, |
| "learning_rate": 4.6699304028418516e-06, |
| "loss": 0.1933, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.8280254777070064, |
| "grad_norm": 3.3016739021057884, |
| "learning_rate": 4.669575410799665e-06, |
| "loss": 0.2017, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8284804367606915, |
| "grad_norm": 1.3897879186817867, |
| "learning_rate": 4.669220241469573e-06, |
| "loss": 0.1393, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.8289353958143767, |
| "grad_norm": 1.7530097372349214, |
| "learning_rate": 4.668864894880599e-06, |
| "loss": 0.2163, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.8293903548680619, |
| "grad_norm": 2.7080878088048337, |
| "learning_rate": 4.668509371061781e-06, |
| "loss": 0.2166, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.8298453139217471, |
| "grad_norm": 1.9706360861102925, |
| "learning_rate": 4.668153670042171e-06, |
| "loss": 0.2253, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.8303002729754322, |
| "grad_norm": 1.830442854507149, |
| "learning_rate": 4.667797791850833e-06, |
| "loss": 0.1526, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.8307552320291174, |
| "grad_norm": 1.7672909680061333, |
| "learning_rate": 4.6674417365168495e-06, |
| "loss": 0.156, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.8312101910828026, |
| "grad_norm": 1.627604242773907, |
| "learning_rate": 4.667085504069315e-06, |
| "loss": 0.1965, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.8316651501364877, |
| "grad_norm": 1.6049507259721845, |
| "learning_rate": 4.66672909453734e-06, |
| "loss": 0.1678, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.8321201091901729, |
| "grad_norm": 2.31296929571475, |
| "learning_rate": 4.6663725079500485e-06, |
| "loss": 0.2126, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.8325750682438581, |
| "grad_norm": 1.7717243650944572, |
| "learning_rate": 4.666015744336578e-06, |
| "loss": 0.1333, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8330300272975433, |
| "grad_norm": 1.9376666152516604, |
| "learning_rate": 4.665658803726083e-06, |
| "loss": 0.161, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.8334849863512284, |
| "grad_norm": 1.9363986365280477, |
| "learning_rate": 4.6653016861477315e-06, |
| "loss": 0.1736, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.8339399454049136, |
| "grad_norm": 1.0684481587552732, |
| "learning_rate": 4.664944391630704e-06, |
| "loss": 0.1187, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.8343949044585988, |
| "grad_norm": 1.9806679260858633, |
| "learning_rate": 4.664586920204197e-06, |
| "loss": 0.1945, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.8348498635122839, |
| "grad_norm": 2.002852794100086, |
| "learning_rate": 4.664229271897422e-06, |
| "loss": 0.1449, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.835304822565969, |
| "grad_norm": 1.416210291100934, |
| "learning_rate": 4.663871446739606e-06, |
| "loss": 0.2015, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.8357597816196543, |
| "grad_norm": 1.8546810159993223, |
| "learning_rate": 4.663513444759986e-06, |
| "loss": 0.1461, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.8362147406733395, |
| "grad_norm": 2.054627126988846, |
| "learning_rate": 4.663155265987818e-06, |
| "loss": 0.1779, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.8366696997270245, |
| "grad_norm": 1.8928121217305771, |
| "learning_rate": 4.66279691045237e-06, |
| "loss": 0.1843, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.8371246587807097, |
| "grad_norm": 2.3586323101492552, |
| "learning_rate": 4.662438378182927e-06, |
| "loss": 0.2396, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8375796178343949, |
| "grad_norm": 1.8299500333063181, |
| "learning_rate": 4.662079669208783e-06, |
| "loss": 0.1645, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.83803457688808, |
| "grad_norm": 2.3480837865967215, |
| "learning_rate": 4.661720783559254e-06, |
| "loss": 0.1788, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.8384895359417652, |
| "grad_norm": 1.883623814508302, |
| "learning_rate": 4.661361721263664e-06, |
| "loss": 0.1624, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.8389444949954504, |
| "grad_norm": 2.160772908461247, |
| "learning_rate": 4.661002482351355e-06, |
| "loss": 0.1908, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.8393994540491356, |
| "grad_norm": 2.187162279477086, |
| "learning_rate": 4.660643066851682e-06, |
| "loss": 0.1808, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.8398544131028207, |
| "grad_norm": 1.8531325552871911, |
| "learning_rate": 4.6602834747940155e-06, |
| "loss": 0.1914, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.8403093721565059, |
| "grad_norm": 1.4831331336407363, |
| "learning_rate": 4.6599237062077385e-06, |
| "loss": 0.142, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.8407643312101911, |
| "grad_norm": 1.931468706645427, |
| "learning_rate": 4.65956376112225e-06, |
| "loss": 0.2224, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.8412192902638762, |
| "grad_norm": 1.3694171323558038, |
| "learning_rate": 4.659203639566965e-06, |
| "loss": 0.1375, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.8416742493175614, |
| "grad_norm": 1.936812425945626, |
| "learning_rate": 4.658843341571308e-06, |
| "loss": 0.1342, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8421292083712466, |
| "grad_norm": 1.6211061965620477, |
| "learning_rate": 4.6584828671647235e-06, |
| "loss": 0.1241, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.8425841674249318, |
| "grad_norm": 1.1366286902231244, |
| "learning_rate": 4.658122216376666e-06, |
| "loss": 0.1378, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.8430391264786169, |
| "grad_norm": 1.6359146658906643, |
| "learning_rate": 4.657761389236607e-06, |
| "loss": 0.2118, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.8434940855323021, |
| "grad_norm": 2.5329878550243734, |
| "learning_rate": 4.657400385774032e-06, |
| "loss": 0.2193, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.8439490445859873, |
| "grad_norm": 2.5278755724681425, |
| "learning_rate": 4.65703920601844e-06, |
| "loss": 0.2768, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.8444040036396724, |
| "grad_norm": 1.590463345818293, |
| "learning_rate": 4.656677849999345e-06, |
| "loss": 0.139, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.8448589626933576, |
| "grad_norm": 2.5309928033982154, |
| "learning_rate": 4.656316317746275e-06, |
| "loss": 0.1896, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.8453139217470428, |
| "grad_norm": 1.9131067732573241, |
| "learning_rate": 4.655954609288775e-06, |
| "loss": 0.1584, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.845768880800728, |
| "grad_norm": 1.676858006295649, |
| "learning_rate": 4.655592724656399e-06, |
| "loss": 0.1413, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.8462238398544131, |
| "grad_norm": 1.2591774278723207, |
| "learning_rate": 4.655230663878721e-06, |
| "loss": 0.106, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8466787989080983, |
| "grad_norm": 1.7932854876030564, |
| "learning_rate": 4.654868426985326e-06, |
| "loss": 0.1417, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.8471337579617835, |
| "grad_norm": 1.6149020601443298, |
| "learning_rate": 4.654506014005814e-06, |
| "loss": 0.1632, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.8475887170154686, |
| "grad_norm": 2.4429847082643734, |
| "learning_rate": 4.6541434249698e-06, |
| "loss": 0.1726, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.8480436760691538, |
| "grad_norm": 1.958537494840022, |
| "learning_rate": 4.6537806599069144e-06, |
| "loss": 0.1918, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.848498635122839, |
| "grad_norm": 2.0524656641640573, |
| "learning_rate": 4.653417718846799e-06, |
| "loss": 0.1824, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.8489535941765242, |
| "grad_norm": 2.471476245561928, |
| "learning_rate": 4.6530546018191126e-06, |
| "loss": 0.1833, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.8494085532302093, |
| "grad_norm": 1.792641798980951, |
| "learning_rate": 4.652691308853526e-06, |
| "loss": 0.1409, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.8498635122838945, |
| "grad_norm": 1.663452952464092, |
| "learning_rate": 4.652327839979729e-06, |
| "loss": 0.1707, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.8503184713375797, |
| "grad_norm": 2.201926398578509, |
| "learning_rate": 4.651964195227419e-06, |
| "loss": 0.1399, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.8507734303912647, |
| "grad_norm": 1.8923698849228574, |
| "learning_rate": 4.651600374626315e-06, |
| "loss": 0.1381, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8512283894449499, |
| "grad_norm": 1.4952325363772294, |
| "learning_rate": 4.651236378206144e-06, |
| "loss": 0.1521, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.8516833484986351, |
| "grad_norm": 1.6339894998223452, |
| "learning_rate": 4.650872205996651e-06, |
| "loss": 0.1813, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.8521383075523203, |
| "grad_norm": 2.105965789292229, |
| "learning_rate": 4.650507858027595e-06, |
| "loss": 0.1482, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.8525932666060054, |
| "grad_norm": 1.9949585656638686, |
| "learning_rate": 4.6501433343287475e-06, |
| "loss": 0.1851, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.8530482256596906, |
| "grad_norm": 2.4070104220391326, |
| "learning_rate": 4.6497786349298975e-06, |
| "loss": 0.1662, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.8535031847133758, |
| "grad_norm": 1.7461886999738794, |
| "learning_rate": 4.649413759860846e-06, |
| "loss": 0.1254, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.8539581437670609, |
| "grad_norm": 2.2792475326190016, |
| "learning_rate": 4.649048709151408e-06, |
| "loss": 0.2312, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.8544131028207461, |
| "grad_norm": 1.3426843322261688, |
| "learning_rate": 4.648683482831415e-06, |
| "loss": 0.1172, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.8548680618744313, |
| "grad_norm": 2.382337203322208, |
| "learning_rate": 4.648318080930711e-06, |
| "loss": 0.2074, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.8553230209281165, |
| "grad_norm": 1.712854915430822, |
| "learning_rate": 4.647952503479154e-06, |
| "loss": 0.1704, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8557779799818016, |
| "grad_norm": 1.8081149874596518, |
| "learning_rate": 4.6475867505066195e-06, |
| "loss": 0.1917, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.8562329390354868, |
| "grad_norm": 1.9751613678879965, |
| "learning_rate": 4.647220822042995e-06, |
| "loss": 0.1735, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.856687898089172, |
| "grad_norm": 2.1327662706521906, |
| "learning_rate": 4.64685471811818e-06, |
| "loss": 0.1449, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 2.064198155606807, |
| "learning_rate": 4.646488438762094e-06, |
| "loss": 0.23, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.8575978161965423, |
| "grad_norm": 1.506998926934666, |
| "learning_rate": 4.646121984004666e-06, |
| "loss": 0.165, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.8580527752502275, |
| "grad_norm": 1.8322392109933523, |
| "learning_rate": 4.64575535387584e-06, |
| "loss": 0.2264, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.8585077343039127, |
| "grad_norm": 2.0388479228852048, |
| "learning_rate": 4.645388548405578e-06, |
| "loss": 0.2175, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.8589626933575978, |
| "grad_norm": 2.097249131206244, |
| "learning_rate": 4.645021567623852e-06, |
| "loss": 0.2196, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.859417652411283, |
| "grad_norm": 1.5275188180484371, |
| "learning_rate": 4.644654411560651e-06, |
| "loss": 0.1417, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.8598726114649682, |
| "grad_norm": 1.8944498906948435, |
| "learning_rate": 4.644287080245975e-06, |
| "loss": 0.1795, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8603275705186533, |
| "grad_norm": 1.983029598334522, |
| "learning_rate": 4.643919573709843e-06, |
| "loss": 0.1986, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.8607825295723385, |
| "grad_norm": 1.6266032809421398, |
| "learning_rate": 4.6435518919822854e-06, |
| "loss": 0.207, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.8612374886260237, |
| "grad_norm": 2.19323813493903, |
| "learning_rate": 4.643184035093348e-06, |
| "loss": 0.1393, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.8616924476797089, |
| "grad_norm": 1.8257509692409855, |
| "learning_rate": 4.642816003073089e-06, |
| "loss": 0.1634, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.862147406733394, |
| "grad_norm": 1.2900997861243053, |
| "learning_rate": 4.6424477959515836e-06, |
| "loss": 0.1654, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.8626023657870792, |
| "grad_norm": 1.540771601167976, |
| "learning_rate": 4.642079413758919e-06, |
| "loss": 0.1518, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.8630573248407644, |
| "grad_norm": 1.899942137953783, |
| "learning_rate": 4.641710856525199e-06, |
| "loss": 0.1821, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.8635122838944495, |
| "grad_norm": 1.4129439458546442, |
| "learning_rate": 4.641342124280539e-06, |
| "loss": 0.1716, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.8639672429481347, |
| "grad_norm": 2.3313958542346995, |
| "learning_rate": 4.6409732170550705e-06, |
| "loss": 0.1687, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.8644222020018199, |
| "grad_norm": 1.4646430511341277, |
| "learning_rate": 4.64060413487894e-06, |
| "loss": 0.1321, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.864877161055505, |
| "grad_norm": 1.6578645771032987, |
| "learning_rate": 4.640234877782306e-06, |
| "loss": 0.1339, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.8653321201091901, |
| "grad_norm": 2.115428055628657, |
| "learning_rate": 4.639865445795344e-06, |
| "loss": 0.155, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.8657870791628753, |
| "grad_norm": 1.4246658971760144, |
| "learning_rate": 4.63949583894824e-06, |
| "loss": 0.1211, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.8662420382165605, |
| "grad_norm": 1.8915165798317974, |
| "learning_rate": 4.639126057271199e-06, |
| "loss": 0.1943, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.8666969972702456, |
| "grad_norm": 1.4359286477489568, |
| "learning_rate": 4.6387561007944355e-06, |
| "loss": 0.1927, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.8671519563239308, |
| "grad_norm": 1.7402908671263166, |
| "learning_rate": 4.638385969548183e-06, |
| "loss": 0.197, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.867606915377616, |
| "grad_norm": 1.6362731205557584, |
| "learning_rate": 4.638015663562686e-06, |
| "loss": 0.1383, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.8680618744313012, |
| "grad_norm": 2.4913116222464837, |
| "learning_rate": 4.637645182868204e-06, |
| "loss": 0.2, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.8685168334849863, |
| "grad_norm": 1.254842356705368, |
| "learning_rate": 4.637274527495011e-06, |
| "loss": 0.121, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.8689717925386715, |
| "grad_norm": 1.5120107885745528, |
| "learning_rate": 4.6369036974733955e-06, |
| "loss": 0.1464, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8694267515923567, |
| "grad_norm": 1.392142485713207, |
| "learning_rate": 4.63653269283366e-06, |
| "loss": 0.1325, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.8698817106460418, |
| "grad_norm": 1.6362489180779098, |
| "learning_rate": 4.636161513606122e-06, |
| "loss": 0.1887, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.870336669699727, |
| "grad_norm": 1.7061998927826107, |
| "learning_rate": 4.6357901598211105e-06, |
| "loss": 0.1559, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.8707916287534122, |
| "grad_norm": 1.7490187306928824, |
| "learning_rate": 4.635418631508974e-06, |
| "loss": 0.1504, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.8712465878070974, |
| "grad_norm": 1.7459918799385958, |
| "learning_rate": 4.635046928700069e-06, |
| "loss": 0.1737, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.8717015468607825, |
| "grad_norm": 2.128565340614342, |
| "learning_rate": 4.634675051424771e-06, |
| "loss": 0.1843, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.8721565059144677, |
| "grad_norm": 1.5616930523249197, |
| "learning_rate": 4.634302999713468e-06, |
| "loss": 0.1004, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.8726114649681529, |
| "grad_norm": 1.886440296737102, |
| "learning_rate": 4.633930773596563e-06, |
| "loss": 0.2085, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.873066424021838, |
| "grad_norm": 1.6874199025414718, |
| "learning_rate": 4.633558373104472e-06, |
| "loss": 0.1965, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.8735213830755232, |
| "grad_norm": 1.4248884120885352, |
| "learning_rate": 4.633185798267625e-06, |
| "loss": 0.1814, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8739763421292084, |
| "grad_norm": 2.0576525781987107, |
| "learning_rate": 4.632813049116467e-06, |
| "loss": 0.2251, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.8744313011828936, |
| "grad_norm": 2.422851032077204, |
| "learning_rate": 4.63244012568146e-06, |
| "loss": 0.1949, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.8748862602365787, |
| "grad_norm": 2.1417664356799087, |
| "learning_rate": 4.632067027993076e-06, |
| "loss": 0.1548, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.8753412192902639, |
| "grad_norm": 1.4407274073506169, |
| "learning_rate": 4.631693756081802e-06, |
| "loss": 0.1252, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.8757961783439491, |
| "grad_norm": 1.6004631673541039, |
| "learning_rate": 4.631320309978141e-06, |
| "loss": 0.1876, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.8762511373976342, |
| "grad_norm": 1.7251546761372085, |
| "learning_rate": 4.630946689712609e-06, |
| "loss": 0.1624, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.8767060964513194, |
| "grad_norm": 1.7738030549432209, |
| "learning_rate": 4.630572895315737e-06, |
| "loss": 0.1748, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.8771610555050046, |
| "grad_norm": 1.0086649768907636, |
| "learning_rate": 4.63019892681807e-06, |
| "loss": 0.1032, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.8776160145586898, |
| "grad_norm": 1.2701304891541718, |
| "learning_rate": 4.629824784250166e-06, |
| "loss": 0.1192, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.8780709736123748, |
| "grad_norm": 1.6784044296991356, |
| "learning_rate": 4.629450467642599e-06, |
| "loss": 0.1265, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.87852593266606, |
| "grad_norm": 1.976065902819502, |
| "learning_rate": 4.629075977025957e-06, |
| "loss": 0.1681, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.8789808917197452, |
| "grad_norm": 1.6213814808866245, |
| "learning_rate": 4.62870131243084e-06, |
| "loss": 0.1493, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.8794358507734303, |
| "grad_norm": 1.9807101332336867, |
| "learning_rate": 4.628326473887865e-06, |
| "loss": 0.1095, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.8798908098271155, |
| "grad_norm": 1.3613443516857038, |
| "learning_rate": 4.627951461427663e-06, |
| "loss": 0.0886, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.8803457688808007, |
| "grad_norm": 2.294295361155117, |
| "learning_rate": 4.627576275080876e-06, |
| "loss": 0.1782, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.8808007279344859, |
| "grad_norm": 1.465162455531879, |
| "learning_rate": 4.627200914878165e-06, |
| "loss": 0.1689, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.881255686988171, |
| "grad_norm": 1.9852567754309711, |
| "learning_rate": 4.6268253808502005e-06, |
| "loss": 0.1953, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.8817106460418562, |
| "grad_norm": 1.3259365892059651, |
| "learning_rate": 4.626449673027671e-06, |
| "loss": 0.1186, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.8821656050955414, |
| "grad_norm": 2.311627846572585, |
| "learning_rate": 4.626073791441278e-06, |
| "loss": 0.175, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.8826205641492265, |
| "grad_norm": 1.403685443623727, |
| "learning_rate": 4.625697736121735e-06, |
| "loss": 0.1632, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8830755232029117, |
| "grad_norm": 1.8370812337880758, |
| "learning_rate": 4.6253215070997735e-06, |
| "loss": 0.1805, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.8835304822565969, |
| "grad_norm": 1.7617734494239499, |
| "learning_rate": 4.624945104406135e-06, |
| "loss": 0.1484, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.8839854413102821, |
| "grad_norm": 1.2929099916167694, |
| "learning_rate": 4.624568528071579e-06, |
| "loss": 0.1109, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.8844404003639672, |
| "grad_norm": 1.6991526267122765, |
| "learning_rate": 4.624191778126879e-06, |
| "loss": 0.1833, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.8848953594176524, |
| "grad_norm": 1.947027254377722, |
| "learning_rate": 4.623814854602818e-06, |
| "loss": 0.2251, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.8853503184713376, |
| "grad_norm": 1.7473125338322357, |
| "learning_rate": 4.623437757530198e-06, |
| "loss": 0.1144, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.8858052775250227, |
| "grad_norm": 1.56986818124434, |
| "learning_rate": 4.623060486939835e-06, |
| "loss": 0.1507, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.8862602365787079, |
| "grad_norm": 2.2731317429688995, |
| "learning_rate": 4.622683042862556e-06, |
| "loss": 0.1854, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.8867151956323931, |
| "grad_norm": 1.5668080033034493, |
| "learning_rate": 4.622305425329205e-06, |
| "loss": 0.1093, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.8871701546860783, |
| "grad_norm": 1.4666243413929643, |
| "learning_rate": 4.621927634370638e-06, |
| "loss": 0.1179, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8876251137397634, |
| "grad_norm": 2.142207445885291, |
| "learning_rate": 4.621549670017727e-06, |
| "loss": 0.2196, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.8880800727934486, |
| "grad_norm": 1.9929367654553447, |
| "learning_rate": 4.6211715323013595e-06, |
| "loss": 0.1926, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.8885350318471338, |
| "grad_norm": 1.8377495474805912, |
| "learning_rate": 4.6207932212524325e-06, |
| "loss": 0.1879, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.8889899909008189, |
| "grad_norm": 1.8025632169370749, |
| "learning_rate": 4.620414736901861e-06, |
| "loss": 0.1627, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.8894449499545041, |
| "grad_norm": 1.7867128092311804, |
| "learning_rate": 4.620036079280573e-06, |
| "loss": 0.2169, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.8898999090081893, |
| "grad_norm": 2.4571527122530776, |
| "learning_rate": 4.619657248419511e-06, |
| "loss": 0.2337, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.8903548680618745, |
| "grad_norm": 1.5424608043537418, |
| "learning_rate": 4.61927824434963e-06, |
| "loss": 0.134, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.8908098271155596, |
| "grad_norm": 1.8248865805885555, |
| "learning_rate": 4.6188990671019015e-06, |
| "loss": 0.1473, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.8912647861692448, |
| "grad_norm": 1.2825883167116863, |
| "learning_rate": 4.618519716707311e-06, |
| "loss": 0.1377, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.89171974522293, |
| "grad_norm": 1.9837251078508047, |
| "learning_rate": 4.618140193196856e-06, |
| "loss": 0.1736, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.892174704276615, |
| "grad_norm": 1.604956750795707, |
| "learning_rate": 4.61776049660155e-06, |
| "loss": 0.1711, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.8926296633303002, |
| "grad_norm": 1.5703167687380166, |
| "learning_rate": 4.61738062695242e-06, |
| "loss": 0.1519, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.8930846223839854, |
| "grad_norm": 2.2186984451911638, |
| "learning_rate": 4.617000584280506e-06, |
| "loss": 0.1443, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.8935395814376706, |
| "grad_norm": 1.906102770647992, |
| "learning_rate": 4.616620368616866e-06, |
| "loss": 0.1878, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.8939945404913557, |
| "grad_norm": 2.0871942985325167, |
| "learning_rate": 4.616239979992568e-06, |
| "loss": 0.2384, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.8944494995450409, |
| "grad_norm": 1.6638677246444422, |
| "learning_rate": 4.615859418438695e-06, |
| "loss": 0.1792, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.8949044585987261, |
| "grad_norm": 1.387205154257509, |
| "learning_rate": 4.615478683986345e-06, |
| "loss": 0.144, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.8953594176524113, |
| "grad_norm": 1.8836562093395437, |
| "learning_rate": 4.6150977766666315e-06, |
| "loss": 0.2174, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.8958143767060964, |
| "grad_norm": 1.9229400987313323, |
| "learning_rate": 4.614716696510679e-06, |
| "loss": 0.2241, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.8962693357597816, |
| "grad_norm": 1.8744101552937114, |
| "learning_rate": 4.614335443549628e-06, |
| "loss": 0.1402, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8967242948134668, |
| "grad_norm": 1.7357579966910537, |
| "learning_rate": 4.613954017814633e-06, |
| "loss": 0.1286, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.8971792538671519, |
| "grad_norm": 1.8840478367784224, |
| "learning_rate": 4.613572419336862e-06, |
| "loss": 0.1342, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.8976342129208371, |
| "grad_norm": 1.5927521655138008, |
| "learning_rate": 4.613190648147497e-06, |
| "loss": 0.1513, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.8980891719745223, |
| "grad_norm": 2.065610545817281, |
| "learning_rate": 4.612808704277736e-06, |
| "loss": 0.2084, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.8985441310282075, |
| "grad_norm": 1.5284731538672136, |
| "learning_rate": 4.612426587758789e-06, |
| "loss": 0.188, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.8989990900818926, |
| "grad_norm": 2.023375971468293, |
| "learning_rate": 4.612044298621881e-06, |
| "loss": 0.1344, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.8994540491355778, |
| "grad_norm": 1.9534402095489405, |
| "learning_rate": 4.611661836898252e-06, |
| "loss": 0.1738, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.899909008189263, |
| "grad_norm": 1.9156260955002997, |
| "learning_rate": 4.611279202619151e-06, |
| "loss": 0.1668, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.9003639672429481, |
| "grad_norm": 1.9526723286463348, |
| "learning_rate": 4.61089639581585e-06, |
| "loss": 0.1669, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.9008189262966333, |
| "grad_norm": 1.9056078059584818, |
| "learning_rate": 4.610513416519628e-06, |
| "loss": 0.1507, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9012738853503185, |
| "grad_norm": 1.5105931587228634, |
| "learning_rate": 4.6101302647617806e-06, |
| "loss": 0.1488, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.9017288444040037, |
| "grad_norm": 2.0835062062044347, |
| "learning_rate": 4.609746940573617e-06, |
| "loss": 0.1324, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.9021838034576888, |
| "grad_norm": 1.9577939305337912, |
| "learning_rate": 4.609363443986461e-06, |
| "loss": 0.1636, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.902638762511374, |
| "grad_norm": 1.7800989438629395, |
| "learning_rate": 4.60897977503165e-06, |
| "loss": 0.1754, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.9030937215650592, |
| "grad_norm": 2.1110656440447544, |
| "learning_rate": 4.608595933740536e-06, |
| "loss": 0.2122, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.9035486806187443, |
| "grad_norm": 1.286237936407134, |
| "learning_rate": 4.608211920144485e-06, |
| "loss": 0.202, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.9040036396724295, |
| "grad_norm": 2.2604741864786178, |
| "learning_rate": 4.607827734274876e-06, |
| "loss": 0.1669, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.9044585987261147, |
| "grad_norm": 1.7607840905259224, |
| "learning_rate": 4.607443376163104e-06, |
| "loss": 0.1375, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.9049135577797999, |
| "grad_norm": 1.7402029650347348, |
| "learning_rate": 4.607058845840576e-06, |
| "loss": 0.1431, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.905368516833485, |
| "grad_norm": 1.666160268732321, |
| "learning_rate": 4.606674143338714e-06, |
| "loss": 0.1485, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9058234758871702, |
| "grad_norm": 2.0612124207721654, |
| "learning_rate": 4.606289268688955e-06, |
| "loss": 0.1419, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.9062784349408554, |
| "grad_norm": 1.9143034406546822, |
| "learning_rate": 4.605904221922749e-06, |
| "loss": 0.1842, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.9067333939945404, |
| "grad_norm": 2.410587966058405, |
| "learning_rate": 4.6055190030715605e-06, |
| "loss": 0.1858, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.9071883530482256, |
| "grad_norm": 1.4389936850061738, |
| "learning_rate": 4.605133612166868e-06, |
| "loss": 0.1387, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.9076433121019108, |
| "grad_norm": 1.546723165322591, |
| "learning_rate": 4.604748049240162e-06, |
| "loss": 0.1353, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.908098271155596, |
| "grad_norm": 1.510897129777589, |
| "learning_rate": 4.604362314322951e-06, |
| "loss": 0.1322, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.9085532302092811, |
| "grad_norm": 2.3885439589368147, |
| "learning_rate": 4.603976407446756e-06, |
| "loss": 0.1656, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.9090081892629663, |
| "grad_norm": 1.193637078798613, |
| "learning_rate": 4.603590328643108e-06, |
| "loss": 0.1057, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.9094631483166515, |
| "grad_norm": 1.910033395843472, |
| "learning_rate": 4.60320407794356e-06, |
| "loss": 0.1519, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.9099181073703366, |
| "grad_norm": 1.6867999496406765, |
| "learning_rate": 4.602817655379672e-06, |
| "loss": 0.1776, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9103730664240218, |
| "grad_norm": 1.7117660414525686, |
| "learning_rate": 4.602431060983022e-06, |
| "loss": 0.1451, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.910828025477707, |
| "grad_norm": 1.4990428536514322, |
| "learning_rate": 4.6020442947852e-06, |
| "loss": 0.1409, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.9112829845313922, |
| "grad_norm": 1.446262498955875, |
| "learning_rate": 4.6016573568178105e-06, |
| "loss": 0.1135, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.9117379435850773, |
| "grad_norm": 1.6571232403743137, |
| "learning_rate": 4.601270247112473e-06, |
| "loss": 0.2404, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.9121929026387625, |
| "grad_norm": 2.0064329107593646, |
| "learning_rate": 4.60088296570082e-06, |
| "loss": 0.1905, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.9126478616924477, |
| "grad_norm": 1.4125062029338067, |
| "learning_rate": 4.600495512614499e-06, |
| "loss": 0.1117, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.9131028207461328, |
| "grad_norm": 1.8059848267053757, |
| "learning_rate": 4.60010788788517e-06, |
| "loss": 0.2289, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.913557779799818, |
| "grad_norm": 1.8237596303340968, |
| "learning_rate": 4.5997200915445095e-06, |
| "loss": 0.1983, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.9140127388535032, |
| "grad_norm": 1.6824481144619179, |
| "learning_rate": 4.599332123624204e-06, |
| "loss": 0.1361, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.9144676979071884, |
| "grad_norm": 1.5469841434239995, |
| "learning_rate": 4.598943984155959e-06, |
| "loss": 0.1561, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9149226569608735, |
| "grad_norm": 1.1721008124510859, |
| "learning_rate": 4.598555673171489e-06, |
| "loss": 0.0997, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.9153776160145587, |
| "grad_norm": 1.367389738430673, |
| "learning_rate": 4.5981671907025275e-06, |
| "loss": 0.124, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.9158325750682439, |
| "grad_norm": 1.9852471647698953, |
| "learning_rate": 4.597778536780818e-06, |
| "loss": 0.1746, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.916287534121929, |
| "grad_norm": 2.1379896488178405, |
| "learning_rate": 4.597389711438121e-06, |
| "loss": 0.2387, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.9167424931756142, |
| "grad_norm": 1.4433682072802856, |
| "learning_rate": 4.597000714706207e-06, |
| "loss": 0.1261, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.9171974522292994, |
| "grad_norm": 1.92195373557543, |
| "learning_rate": 4.596611546616865e-06, |
| "loss": 0.1982, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.9176524112829846, |
| "grad_norm": 1.9323067168518875, |
| "learning_rate": 4.596222207201896e-06, |
| "loss": 0.1767, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.9181073703366697, |
| "grad_norm": 1.7925696405315172, |
| "learning_rate": 4.595832696493115e-06, |
| "loss": 0.1692, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.9185623293903549, |
| "grad_norm": 1.6896362560345692, |
| "learning_rate": 4.59544301452235e-06, |
| "loss": 0.1527, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.9190172884440401, |
| "grad_norm": 2.6520358388003307, |
| "learning_rate": 4.595053161321444e-06, |
| "loss": 0.2183, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9194722474977252, |
| "grad_norm": 1.8502691763569332, |
| "learning_rate": 4.594663136922256e-06, |
| "loss": 0.2027, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.9199272065514104, |
| "grad_norm": 1.66876391954138, |
| "learning_rate": 4.594272941356655e-06, |
| "loss": 0.1592, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.9203821656050956, |
| "grad_norm": 2.000282499671209, |
| "learning_rate": 4.593882574656528e-06, |
| "loss": 0.1899, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.9208371246587808, |
| "grad_norm": 2.1057167872680864, |
| "learning_rate": 4.5934920368537724e-06, |
| "loss": 0.1649, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.9212920837124658, |
| "grad_norm": 2.3421388058050603, |
| "learning_rate": 4.593101327980301e-06, |
| "loss": 0.1953, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.921747042766151, |
| "grad_norm": 1.4619166894313524, |
| "learning_rate": 4.592710448068043e-06, |
| "loss": 0.1645, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.9222020018198362, |
| "grad_norm": 2.1135622970646457, |
| "learning_rate": 4.592319397148936e-06, |
| "loss": 0.1391, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.9226569608735213, |
| "grad_norm": 1.2948388707877838, |
| "learning_rate": 4.5919281752549386e-06, |
| "loss": 0.1465, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.9231119199272065, |
| "grad_norm": 2.587913347360957, |
| "learning_rate": 4.5915367824180165e-06, |
| "loss": 0.2171, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.9235668789808917, |
| "grad_norm": 1.2685293245744347, |
| "learning_rate": 4.591145218670154e-06, |
| "loss": 0.1127, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9240218380345769, |
| "grad_norm": 1.99832008478398, |
| "learning_rate": 4.590753484043348e-06, |
| "loss": 0.1795, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.924476797088262, |
| "grad_norm": 1.9341588389439468, |
| "learning_rate": 4.590361578569609e-06, |
| "loss": 0.1625, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.9249317561419472, |
| "grad_norm": 1.906987896729889, |
| "learning_rate": 4.589969502280962e-06, |
| "loss": 0.1292, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.9253867151956324, |
| "grad_norm": 1.3759296704205837, |
| "learning_rate": 4.589577255209445e-06, |
| "loss": 0.1618, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.9258416742493175, |
| "grad_norm": 1.7824080215785223, |
| "learning_rate": 4.589184837387112e-06, |
| "loss": 0.1571, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.9262966333030027, |
| "grad_norm": 1.969233090292503, |
| "learning_rate": 4.588792248846028e-06, |
| "loss": 0.1565, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.9267515923566879, |
| "grad_norm": 2.0350441155725982, |
| "learning_rate": 4.588399489618274e-06, |
| "loss": 0.2092, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.9272065514103731, |
| "grad_norm": 1.3739303279350978, |
| "learning_rate": 4.588006559735945e-06, |
| "loss": 0.1144, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.9276615104640582, |
| "grad_norm": 1.8231719010868002, |
| "learning_rate": 4.587613459231149e-06, |
| "loss": 0.19, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.9281164695177434, |
| "grad_norm": 1.7222249399366698, |
| "learning_rate": 4.5872201881360105e-06, |
| "loss": 0.1818, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 1.9962016913755094, |
| "learning_rate": 4.586826746482662e-06, |
| "loss": 0.1858, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.9290263876251137, |
| "grad_norm": 1.581565012958607, |
| "learning_rate": 4.586433134303257e-06, |
| "loss": 0.1388, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.9294813466787989, |
| "grad_norm": 2.2212237230761342, |
| "learning_rate": 4.586039351629959e-06, |
| "loss": 0.1627, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.9299363057324841, |
| "grad_norm": 2.4442840318574954, |
| "learning_rate": 4.585645398494944e-06, |
| "loss": 0.1421, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.9303912647861693, |
| "grad_norm": 1.63124630524275, |
| "learning_rate": 4.585251274930406e-06, |
| "loss": 0.1553, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.9308462238398544, |
| "grad_norm": 1.9068361286149722, |
| "learning_rate": 4.584856980968552e-06, |
| "loss": 0.195, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.9313011828935396, |
| "grad_norm": 1.8750052649788462, |
| "learning_rate": 4.584462516641599e-06, |
| "loss": 0.1843, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.9317561419472248, |
| "grad_norm": 1.8692305314343534, |
| "learning_rate": 4.584067881981784e-06, |
| "loss": 0.1607, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.9322111010009099, |
| "grad_norm": 1.7454178600595318, |
| "learning_rate": 4.583673077021352e-06, |
| "loss": 0.1166, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.9326660600545951, |
| "grad_norm": 1.7370379964519336, |
| "learning_rate": 4.583278101792567e-06, |
| "loss": 0.1658, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9331210191082803, |
| "grad_norm": 1.6957581344539345, |
| "learning_rate": 4.582882956327704e-06, |
| "loss": 0.1394, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.9335759781619655, |
| "grad_norm": 1.8052091804015933, |
| "learning_rate": 4.58248764065905e-06, |
| "loss": 0.1571, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.9340309372156506, |
| "grad_norm": 1.5675006184278855, |
| "learning_rate": 4.582092154818912e-06, |
| "loss": 0.145, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.9344858962693358, |
| "grad_norm": 1.6024320375744705, |
| "learning_rate": 4.581696498839605e-06, |
| "loss": 0.2042, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.934940855323021, |
| "grad_norm": 1.8058483639041405, |
| "learning_rate": 4.581300672753462e-06, |
| "loss": 0.1661, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.935395814376706, |
| "grad_norm": 1.9556770558432066, |
| "learning_rate": 4.580904676592826e-06, |
| "loss": 0.1767, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.9358507734303912, |
| "grad_norm": 1.5186464139909968, |
| "learning_rate": 4.580508510390057e-06, |
| "loss": 0.1131, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.9363057324840764, |
| "grad_norm": 1.5844512517498417, |
| "learning_rate": 4.580112174177529e-06, |
| "loss": 0.1815, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.9367606915377616, |
| "grad_norm": 1.382066796659836, |
| "learning_rate": 4.5797156679876274e-06, |
| "loss": 0.1073, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.9372156505914467, |
| "grad_norm": 2.7590592902292332, |
| "learning_rate": 4.5793189918527524e-06, |
| "loss": 0.3083, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9376706096451319, |
| "grad_norm": 2.097729619621905, |
| "learning_rate": 4.5789221458053205e-06, |
| "loss": 0.1572, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.9381255686988171, |
| "grad_norm": 2.269383743265302, |
| "learning_rate": 4.578525129877759e-06, |
| "loss": 0.2157, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.9385805277525022, |
| "grad_norm": 1.704369436738576, |
| "learning_rate": 4.5781279441025105e-06, |
| "loss": 0.1746, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.9390354868061874, |
| "grad_norm": 1.961199267422335, |
| "learning_rate": 4.577730588512031e-06, |
| "loss": 0.1794, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.9394904458598726, |
| "grad_norm": 2.0070527773957663, |
| "learning_rate": 4.577333063138791e-06, |
| "loss": 0.1744, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.9399454049135578, |
| "grad_norm": 1.4918844273699323, |
| "learning_rate": 4.576935368015274e-06, |
| "loss": 0.1614, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.9404003639672429, |
| "grad_norm": 1.957075251939811, |
| "learning_rate": 4.576537503173978e-06, |
| "loss": 0.2007, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.9408553230209281, |
| "grad_norm": 2.1344327287579916, |
| "learning_rate": 4.576139468647415e-06, |
| "loss": 0.1953, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.9413102820746133, |
| "grad_norm": 2.052141999542276, |
| "learning_rate": 4.575741264468111e-06, |
| "loss": 0.1247, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.9417652411282984, |
| "grad_norm": 1.9687685313144003, |
| "learning_rate": 4.575342890668603e-06, |
| "loss": 0.1941, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9422202001819836, |
| "grad_norm": 2.1906738543597695, |
| "learning_rate": 4.574944347281448e-06, |
| "loss": 0.2436, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.9426751592356688, |
| "grad_norm": 2.0326378397322253, |
| "learning_rate": 4.5745456343392114e-06, |
| "loss": 0.1916, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.943130118289354, |
| "grad_norm": 1.9398275581691273, |
| "learning_rate": 4.574146751874473e-06, |
| "loss": 0.2243, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.9435850773430391, |
| "grad_norm": 1.583576444036144, |
| "learning_rate": 4.57374769991983e-06, |
| "loss": 0.1335, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.9440400363967243, |
| "grad_norm": 1.49493272878593, |
| "learning_rate": 4.573348478507888e-06, |
| "loss": 0.132, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.9444949954504095, |
| "grad_norm": 2.191087505295727, |
| "learning_rate": 4.5729490876712725e-06, |
| "loss": 0.2728, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.9449499545040946, |
| "grad_norm": 1.5696743668055735, |
| "learning_rate": 4.572549527442619e-06, |
| "loss": 0.1167, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.9454049135577798, |
| "grad_norm": 1.4703104600885406, |
| "learning_rate": 4.572149797854578e-06, |
| "loss": 0.1481, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.945859872611465, |
| "grad_norm": 1.3375471658633535, |
| "learning_rate": 4.571749898939813e-06, |
| "loss": 0.1448, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.9463148316651502, |
| "grad_norm": 1.1353706299658501, |
| "learning_rate": 4.5713498307310024e-06, |
| "loss": 0.1095, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9467697907188353, |
| "grad_norm": 1.170226192835475, |
| "learning_rate": 4.570949593260837e-06, |
| "loss": 0.1025, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.9472247497725205, |
| "grad_norm": 1.611590656998796, |
| "learning_rate": 4.570549186562024e-06, |
| "loss": 0.1648, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.9476797088262057, |
| "grad_norm": 1.9894469425244659, |
| "learning_rate": 4.570148610667281e-06, |
| "loss": 0.2171, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.9481346678798908, |
| "grad_norm": 2.6290643290299403, |
| "learning_rate": 4.569747865609343e-06, |
| "loss": 0.2035, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.948589626933576, |
| "grad_norm": 1.9997278123807103, |
| "learning_rate": 4.569346951420957e-06, |
| "loss": 0.219, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.9490445859872612, |
| "grad_norm": 2.3647369288676465, |
| "learning_rate": 4.568945868134882e-06, |
| "loss": 0.1821, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.9494995450409464, |
| "grad_norm": 1.4361032491832602, |
| "learning_rate": 4.568544615783894e-06, |
| "loss": 0.174, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.9499545040946314, |
| "grad_norm": 2.4948435319990794, |
| "learning_rate": 4.568143194400782e-06, |
| "loss": 0.162, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.9504094631483166, |
| "grad_norm": 2.3391791745125823, |
| "learning_rate": 4.567741604018348e-06, |
| "loss": 0.1731, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.9508644222020018, |
| "grad_norm": 1.9417130047261684, |
| "learning_rate": 4.567339844669407e-06, |
| "loss": 0.2115, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9513193812556869, |
| "grad_norm": 1.341309783614821, |
| "learning_rate": 4.566937916386791e-06, |
| "loss": 0.1207, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.9517743403093721, |
| "grad_norm": 1.8063160975644432, |
| "learning_rate": 4.566535819203342e-06, |
| "loss": 0.1484, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.9522292993630573, |
| "grad_norm": 1.4064547804406506, |
| "learning_rate": 4.566133553151918e-06, |
| "loss": 0.1696, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.9526842584167425, |
| "grad_norm": 1.5123792301862293, |
| "learning_rate": 4.565731118265392e-06, |
| "loss": 0.1513, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.9531392174704276, |
| "grad_norm": 2.6660242675499974, |
| "learning_rate": 4.5653285145766465e-06, |
| "loss": 0.1967, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.9535941765241128, |
| "grad_norm": 1.3182075171271719, |
| "learning_rate": 4.564925742118583e-06, |
| "loss": 0.1647, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.954049135577798, |
| "grad_norm": 2.0246143369138583, |
| "learning_rate": 4.564522800924111e-06, |
| "loss": 0.1933, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.9545040946314831, |
| "grad_norm": 1.5229871866624265, |
| "learning_rate": 4.56411969102616e-06, |
| "loss": 0.1262, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.9549590536851683, |
| "grad_norm": 1.6259281484911337, |
| "learning_rate": 4.5637164124576695e-06, |
| "loss": 0.22, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.9554140127388535, |
| "grad_norm": 2.2924228140977534, |
| "learning_rate": 4.563312965251594e-06, |
| "loss": 0.1788, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9558689717925387, |
| "grad_norm": 2.145017083065323, |
| "learning_rate": 4.562909349440899e-06, |
| "loss": 0.1997, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.9563239308462238, |
| "grad_norm": 1.4998751606083633, |
| "learning_rate": 4.5625055650585695e-06, |
| "loss": 0.1268, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.956778889899909, |
| "grad_norm": 2.212976295267469, |
| "learning_rate": 4.562101612137599e-06, |
| "loss": 0.1717, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.9572338489535942, |
| "grad_norm": 1.679438029199367, |
| "learning_rate": 4.561697490710998e-06, |
| "loss": 0.1072, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.9576888080072793, |
| "grad_norm": 2.079365510674891, |
| "learning_rate": 4.561293200811787e-06, |
| "loss": 0.1746, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.9581437670609645, |
| "grad_norm": 1.686198495026396, |
| "learning_rate": 4.560888742473005e-06, |
| "loss": 0.1561, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.9585987261146497, |
| "grad_norm": 1.6637740262678333, |
| "learning_rate": 4.560484115727703e-06, |
| "loss": 0.202, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.9590536851683349, |
| "grad_norm": 1.3363367490497915, |
| "learning_rate": 4.560079320608942e-06, |
| "loss": 0.1505, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.95950864422202, |
| "grad_norm": 1.3524224143962482, |
| "learning_rate": 4.5596743571498035e-06, |
| "loss": 0.1556, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.9599636032757052, |
| "grad_norm": 2.051012825316942, |
| "learning_rate": 4.5592692253833775e-06, |
| "loss": 0.1557, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9604185623293904, |
| "grad_norm": 1.8725405774246842, |
| "learning_rate": 4.5588639253427705e-06, |
| "loss": 0.1361, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.9608735213830755, |
| "grad_norm": 1.6129721682768872, |
| "learning_rate": 4.558458457061101e-06, |
| "loss": 0.1604, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.9613284804367607, |
| "grad_norm": 2.4257644594708654, |
| "learning_rate": 4.5580528205715024e-06, |
| "loss": 0.1728, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.9617834394904459, |
| "grad_norm": 2.2020262494310714, |
| "learning_rate": 4.557647015907121e-06, |
| "loss": 0.1982, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.9622383985441311, |
| "grad_norm": 1.3942660783602792, |
| "learning_rate": 4.557241043101118e-06, |
| "loss": 0.1263, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.9626933575978162, |
| "grad_norm": 1.6927990416728342, |
| "learning_rate": 4.556834902186667e-06, |
| "loss": 0.2537, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.9631483166515014, |
| "grad_norm": 2.0785259665220646, |
| "learning_rate": 4.556428593196956e-06, |
| "loss": 0.1927, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.9636032757051866, |
| "grad_norm": 1.7131650413165849, |
| "learning_rate": 4.556022116165189e-06, |
| "loss": 0.2146, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.9640582347588716, |
| "grad_norm": 1.7560312461053569, |
| "learning_rate": 4.555615471124578e-06, |
| "loss": 0.1429, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.9645131938125568, |
| "grad_norm": 1.4424071339171873, |
| "learning_rate": 4.555208658108354e-06, |
| "loss": 0.1017, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.964968152866242, |
| "grad_norm": 2.366476482520588, |
| "learning_rate": 4.55480167714976e-06, |
| "loss": 0.1701, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.9654231119199272, |
| "grad_norm": 1.3193271811867113, |
| "learning_rate": 4.554394528282052e-06, |
| "loss": 0.1608, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.9658780709736123, |
| "grad_norm": 1.6112197038225973, |
| "learning_rate": 4.553987211538501e-06, |
| "loss": 0.1663, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.9663330300272975, |
| "grad_norm": 2.2120821423419477, |
| "learning_rate": 4.5535797269523906e-06, |
| "loss": 0.1761, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.9667879890809827, |
| "grad_norm": 1.9459325657347053, |
| "learning_rate": 4.55317207455702e-06, |
| "loss": 0.1648, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.9672429481346679, |
| "grad_norm": 1.2258892841488513, |
| "learning_rate": 4.552764254385697e-06, |
| "loss": 0.113, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.967697907188353, |
| "grad_norm": 1.7595258140929935, |
| "learning_rate": 4.552356266471751e-06, |
| "loss": 0.1773, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.9681528662420382, |
| "grad_norm": 1.9664757298212556, |
| "learning_rate": 4.55194811084852e-06, |
| "loss": 0.165, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.9686078252957234, |
| "grad_norm": 2.222530250938157, |
| "learning_rate": 4.551539787549354e-06, |
| "loss": 0.2096, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.9690627843494085, |
| "grad_norm": 1.3774868751004326, |
| "learning_rate": 4.551131296607623e-06, |
| "loss": 0.1089, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9695177434030937, |
| "grad_norm": 1.8067013761642468, |
| "learning_rate": 4.550722638056703e-06, |
| "loss": 0.1323, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.9699727024567789, |
| "grad_norm": 2.24991176799243, |
| "learning_rate": 4.550313811929993e-06, |
| "loss": 0.1334, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.9704276615104641, |
| "grad_norm": 2.72004150671695, |
| "learning_rate": 4.549904818260895e-06, |
| "loss": 0.1775, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.9708826205641492, |
| "grad_norm": 2.342721771224346, |
| "learning_rate": 4.549495657082834e-06, |
| "loss": 0.191, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.9713375796178344, |
| "grad_norm": 2.2728812324499534, |
| "learning_rate": 4.549086328429242e-06, |
| "loss": 0.1425, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.9717925386715196, |
| "grad_norm": 1.453499597882781, |
| "learning_rate": 4.548676832333569e-06, |
| "loss": 0.1316, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.9722474977252047, |
| "grad_norm": 2.01603990428807, |
| "learning_rate": 4.548267168829279e-06, |
| "loss": 0.1307, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.9727024567788899, |
| "grad_norm": 1.6605060275137966, |
| "learning_rate": 4.547857337949844e-06, |
| "loss": 0.1399, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.9731574158325751, |
| "grad_norm": 1.5535531332266466, |
| "learning_rate": 4.5474473397287556e-06, |
| "loss": 0.1321, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.9736123748862603, |
| "grad_norm": 1.5373238474360202, |
| "learning_rate": 4.547037174199517e-06, |
| "loss": 0.1343, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9740673339399454, |
| "grad_norm": 1.8078338860297858, |
| "learning_rate": 4.546626841395645e-06, |
| "loss": 0.1635, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.9745222929936306, |
| "grad_norm": 2.3652157653146326, |
| "learning_rate": 4.54621634135067e-06, |
| "loss": 0.1574, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.9749772520473158, |
| "grad_norm": 1.582720512511224, |
| "learning_rate": 4.545805674098136e-06, |
| "loss": 0.1834, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.9754322111010009, |
| "grad_norm": 1.603799084987541, |
| "learning_rate": 4.545394839671601e-06, |
| "loss": 0.1464, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.9758871701546861, |
| "grad_norm": 2.2937187508235612, |
| "learning_rate": 4.544983838104637e-06, |
| "loss": 0.1689, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.9763421292083713, |
| "grad_norm": 1.5827694703198016, |
| "learning_rate": 4.544572669430828e-06, |
| "loss": 0.1974, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.9767970882620565, |
| "grad_norm": 1.5229863728993667, |
| "learning_rate": 4.544161333683775e-06, |
| "loss": 0.1347, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.9772520473157416, |
| "grad_norm": 1.7227170284858135, |
| "learning_rate": 4.543749830897088e-06, |
| "loss": 0.2186, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.9777070063694268, |
| "grad_norm": 1.9401788313572834, |
| "learning_rate": 4.543338161104395e-06, |
| "loss": 0.1674, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.978161965423112, |
| "grad_norm": 1.4440321556413929, |
| "learning_rate": 4.542926324339335e-06, |
| "loss": 0.1518, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.978616924476797, |
| "grad_norm": 1.5863469206535143, |
| "learning_rate": 4.542514320635561e-06, |
| "loss": 0.1548, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.9790718835304822, |
| "grad_norm": 1.7952124026440508, |
| "learning_rate": 4.542102150026741e-06, |
| "loss": 0.2011, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.9795268425841674, |
| "grad_norm": 1.2781168765483073, |
| "learning_rate": 4.541689812546556e-06, |
| "loss": 0.1708, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.9799818016378526, |
| "grad_norm": 2.275201017608769, |
| "learning_rate": 4.541277308228698e-06, |
| "loss": 0.2655, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.9804367606915377, |
| "grad_norm": 1.6797512508176873, |
| "learning_rate": 4.540864637106879e-06, |
| "loss": 0.1526, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.9808917197452229, |
| "grad_norm": 1.7795439392430585, |
| "learning_rate": 4.540451799214817e-06, |
| "loss": 0.1561, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.9813466787989081, |
| "grad_norm": 2.2915523451786766, |
| "learning_rate": 4.540038794586248e-06, |
| "loss": 0.1603, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.9818016378525932, |
| "grad_norm": 2.2274131509949537, |
| "learning_rate": 4.539625623254923e-06, |
| "loss": 0.1423, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.9822565969062784, |
| "grad_norm": 1.3978925866840657, |
| "learning_rate": 4.539212285254601e-06, |
| "loss": 0.1708, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.9827115559599636, |
| "grad_norm": 1.7857894009279391, |
| "learning_rate": 4.5387987806190615e-06, |
| "loss": 0.1893, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9831665150136488, |
| "grad_norm": 1.518791485457489, |
| "learning_rate": 4.538385109382093e-06, |
| "loss": 0.1709, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.9836214740673339, |
| "grad_norm": 1.3743190231639797, |
| "learning_rate": 4.537971271577498e-06, |
| "loss": 0.1746, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.9840764331210191, |
| "grad_norm": 1.1750088863525163, |
| "learning_rate": 4.537557267239093e-06, |
| "loss": 0.108, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.9845313921747043, |
| "grad_norm": 1.2225308832618265, |
| "learning_rate": 4.537143096400712e-06, |
| "loss": 0.1061, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.9849863512283894, |
| "grad_norm": 2.1247362714767415, |
| "learning_rate": 4.536728759096195e-06, |
| "loss": 0.179, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.9854413102820746, |
| "grad_norm": 1.808580318181682, |
| "learning_rate": 4.536314255359402e-06, |
| "loss": 0.1335, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.9858962693357598, |
| "grad_norm": 1.6790298431680175, |
| "learning_rate": 4.535899585224204e-06, |
| "loss": 0.1493, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.986351228389445, |
| "grad_norm": 3.0332484593824245, |
| "learning_rate": 4.535484748724486e-06, |
| "loss": 0.2063, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.9868061874431301, |
| "grad_norm": 1.6421323451507468, |
| "learning_rate": 4.535069745894147e-06, |
| "loss": 0.1673, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.9872611464968153, |
| "grad_norm": 1.9282204111223042, |
| "learning_rate": 4.534654576767098e-06, |
| "loss": 0.1428, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9877161055505005, |
| "grad_norm": 1.4541197485662065, |
| "learning_rate": 4.534239241377266e-06, |
| "loss": 0.1901, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.9881710646041856, |
| "grad_norm": 3.2268329342995554, |
| "learning_rate": 4.5338237397585895e-06, |
| "loss": 0.2441, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.9886260236578708, |
| "grad_norm": 2.4649363175751646, |
| "learning_rate": 4.533408071945021e-06, |
| "loss": 0.1763, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.989080982711556, |
| "grad_norm": 1.8464040284824113, |
| "learning_rate": 4.532992237970528e-06, |
| "loss": 0.1646, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.9895359417652412, |
| "grad_norm": 2.115464473457186, |
| "learning_rate": 4.532576237869091e-06, |
| "loss": 0.1468, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.9899909008189263, |
| "grad_norm": 1.6765582325152246, |
| "learning_rate": 4.5321600716747025e-06, |
| "loss": 0.1377, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.9904458598726115, |
| "grad_norm": 1.8413627666297776, |
| "learning_rate": 4.531743739421369e-06, |
| "loss": 0.181, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.9909008189262967, |
| "grad_norm": 1.7110916137165555, |
| "learning_rate": 4.531327241143114e-06, |
| "loss": 0.1418, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.9913557779799818, |
| "grad_norm": 2.3165603295554726, |
| "learning_rate": 4.530910576873969e-06, |
| "loss": 0.1666, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.991810737033667, |
| "grad_norm": 2.0264888702689254, |
| "learning_rate": 4.530493746647984e-06, |
| "loss": 0.1653, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9922656960873522, |
| "grad_norm": 3.7082736074441227, |
| "learning_rate": 4.530076750499219e-06, |
| "loss": 0.1955, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.9927206551410374, |
| "grad_norm": 1.4980795502080217, |
| "learning_rate": 4.52965958846175e-06, |
| "loss": 0.1763, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.9931756141947224, |
| "grad_norm": 1.328886576986546, |
| "learning_rate": 4.529242260569665e-06, |
| "loss": 0.135, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.9936305732484076, |
| "grad_norm": 2.4602783485410478, |
| "learning_rate": 4.528824766857067e-06, |
| "loss": 0.225, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.9940855323020928, |
| "grad_norm": 2.656745825690249, |
| "learning_rate": 4.5284071073580715e-06, |
| "loss": 0.1623, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.9945404913557779, |
| "grad_norm": 2.191300990353365, |
| "learning_rate": 4.527989282106807e-06, |
| "loss": 0.145, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.9949954504094631, |
| "grad_norm": 2.3096174225453043, |
| "learning_rate": 4.527571291137416e-06, |
| "loss": 0.2047, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.9954504094631483, |
| "grad_norm": 2.2206355508554374, |
| "learning_rate": 4.527153134484056e-06, |
| "loss": 0.1978, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.9959053685168335, |
| "grad_norm": 1.5575737643430931, |
| "learning_rate": 4.5267348121808965e-06, |
| "loss": 0.1083, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.9963603275705186, |
| "grad_norm": 1.1842592978237663, |
| "learning_rate": 4.526316324262121e-06, |
| "loss": 0.1418, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9968152866242038, |
| "grad_norm": 2.066729296311549, |
| "learning_rate": 4.525897670761926e-06, |
| "loss": 0.1555, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.997270245677889, |
| "grad_norm": 1.8945946795231638, |
| "learning_rate": 4.525478851714522e-06, |
| "loss": 0.1602, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.9977252047315741, |
| "grad_norm": 2.288603637382534, |
| "learning_rate": 4.525059867154133e-06, |
| "loss": 0.1728, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.9981801637852593, |
| "grad_norm": 1.548625455808381, |
| "learning_rate": 4.5246407171149975e-06, |
| "loss": 0.1535, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.9986351228389445, |
| "grad_norm": 1.7795058207338135, |
| "learning_rate": 4.5242214016313655e-06, |
| "loss": 0.1937, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.9990900818926297, |
| "grad_norm": 1.8173123394415125, |
| "learning_rate": 4.523801920737501e-06, |
| "loss": 0.1855, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.9995450409463148, |
| "grad_norm": 1.5328423318772029, |
| "learning_rate": 4.523382274467684e-06, |
| "loss": 0.1734, |
| "step": 2197 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.6888871167302404, |
| "learning_rate": 4.522962462856206e-06, |
| "loss": 0.1061, |
| "step": 2198 |
| }, |
| { |
| "epoch": 1.000454959053685, |
| "grad_norm": 1.0169999119479456, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.051, |
| "step": 2199 |
| }, |
| { |
| "epoch": 1.0009099181073704, |
| "grad_norm": 1.6609923808472133, |
| "learning_rate": 4.522122343745495e-06, |
| "loss": 0.0982, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.0013648771610555, |
| "grad_norm": 1.2283700830083324, |
| "learning_rate": 4.521702036314915e-06, |
| "loss": 0.068, |
| "step": 2201 |
| }, |
| { |
| "epoch": 1.0018198362147406, |
| "grad_norm": 1.220074312624483, |
| "learning_rate": 4.521281563679973e-06, |
| "loss": 0.0629, |
| "step": 2202 |
| }, |
| { |
| "epoch": 1.0022747952684259, |
| "grad_norm": 1.4941719880778739, |
| "learning_rate": 4.5208609258750314e-06, |
| "loss": 0.0755, |
| "step": 2203 |
| }, |
| { |
| "epoch": 1.002729754322111, |
| "grad_norm": 1.1143728511252875, |
| "learning_rate": 4.52044012293446e-06, |
| "loss": 0.0587, |
| "step": 2204 |
| }, |
| { |
| "epoch": 1.0031847133757963, |
| "grad_norm": 1.5319847923881116, |
| "learning_rate": 4.520019154892646e-06, |
| "loss": 0.0851, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.0036396724294814, |
| "grad_norm": 1.2636498680398078, |
| "learning_rate": 4.519598021783989e-06, |
| "loss": 0.0993, |
| "step": 2206 |
| }, |
| { |
| "epoch": 1.0040946314831665, |
| "grad_norm": 1.5487488091959216, |
| "learning_rate": 4.519176723642903e-06, |
| "loss": 0.113, |
| "step": 2207 |
| }, |
| { |
| "epoch": 1.0045495905368518, |
| "grad_norm": 1.5557166129958784, |
| "learning_rate": 4.518755260503813e-06, |
| "loss": 0.0788, |
| "step": 2208 |
| }, |
| { |
| "epoch": 1.0050045495905369, |
| "grad_norm": 1.2818157097100387, |
| "learning_rate": 4.51833363240116e-06, |
| "loss": 0.0743, |
| "step": 2209 |
| }, |
| { |
| "epoch": 1.005459508644222, |
| "grad_norm": 1.200932009259888, |
| "learning_rate": 4.517911839369398e-06, |
| "loss": 0.0811, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.0059144676979073, |
| "grad_norm": 1.4486327355662423, |
| "learning_rate": 4.517489881442993e-06, |
| "loss": 0.062, |
| "step": 2211 |
| }, |
| { |
| "epoch": 1.0063694267515924, |
| "grad_norm": 1.3527098955371344, |
| "learning_rate": 4.517067758656424e-06, |
| "loss": 0.0627, |
| "step": 2212 |
| }, |
| { |
| "epoch": 1.0068243858052774, |
| "grad_norm": 1.4047497974003487, |
| "learning_rate": 4.516645471044188e-06, |
| "loss": 0.0651, |
| "step": 2213 |
| }, |
| { |
| "epoch": 1.0072793448589628, |
| "grad_norm": 1.4164244968906639, |
| "learning_rate": 4.516223018640791e-06, |
| "loss": 0.0714, |
| "step": 2214 |
| }, |
| { |
| "epoch": 1.0077343039126478, |
| "grad_norm": 1.5809882117425458, |
| "learning_rate": 4.515800401480754e-06, |
| "loss": 0.0989, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.008189262966333, |
| "grad_norm": 1.6844068994280326, |
| "learning_rate": 4.515377619598612e-06, |
| "loss": 0.1007, |
| "step": 2216 |
| }, |
| { |
| "epoch": 1.0086442220200182, |
| "grad_norm": 1.5732620970585767, |
| "learning_rate": 4.514954673028913e-06, |
| "loss": 0.0765, |
| "step": 2217 |
| }, |
| { |
| "epoch": 1.0090991810737033, |
| "grad_norm": 1.3651454362527589, |
| "learning_rate": 4.5145315618062155e-06, |
| "loss": 0.0817, |
| "step": 2218 |
| }, |
| { |
| "epoch": 1.0095541401273886, |
| "grad_norm": 1.7849697070364972, |
| "learning_rate": 4.514108285965098e-06, |
| "loss": 0.0946, |
| "step": 2219 |
| }, |
| { |
| "epoch": 1.0100090991810737, |
| "grad_norm": 1.4164875410963866, |
| "learning_rate": 4.513684845540146e-06, |
| "loss": 0.067, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0104640582347588, |
| "grad_norm": 1.7807110987231174, |
| "learning_rate": 4.5132612405659625e-06, |
| "loss": 0.1131, |
| "step": 2221 |
| }, |
| { |
| "epoch": 1.0109190172884441, |
| "grad_norm": 1.6962102867596296, |
| "learning_rate": 4.5128374710771625e-06, |
| "loss": 0.1001, |
| "step": 2222 |
| }, |
| { |
| "epoch": 1.0113739763421292, |
| "grad_norm": 1.9807611103838136, |
| "learning_rate": 4.512413537108374e-06, |
| "loss": 0.1216, |
| "step": 2223 |
| }, |
| { |
| "epoch": 1.0118289353958143, |
| "grad_norm": 2.2071849786855195, |
| "learning_rate": 4.511989438694239e-06, |
| "loss": 0.0758, |
| "step": 2224 |
| }, |
| { |
| "epoch": 1.0122838944494996, |
| "grad_norm": 1.41006582199038, |
| "learning_rate": 4.511565175869415e-06, |
| "loss": 0.0676, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.0127388535031847, |
| "grad_norm": 1.5005194178509522, |
| "learning_rate": 4.511140748668566e-06, |
| "loss": 0.0845, |
| "step": 2226 |
| }, |
| { |
| "epoch": 1.0131938125568698, |
| "grad_norm": 1.2291494575864939, |
| "learning_rate": 4.510716157126379e-06, |
| "loss": 0.0611, |
| "step": 2227 |
| }, |
| { |
| "epoch": 1.013648771610555, |
| "grad_norm": 2.4795116846611975, |
| "learning_rate": 4.510291401277548e-06, |
| "loss": 0.0983, |
| "step": 2228 |
| }, |
| { |
| "epoch": 1.0141037306642402, |
| "grad_norm": 2.657277286309681, |
| "learning_rate": 4.509866481156781e-06, |
| "loss": 0.1101, |
| "step": 2229 |
| }, |
| { |
| "epoch": 1.0145586897179253, |
| "grad_norm": 1.8196308245882602, |
| "learning_rate": 4.509441396798802e-06, |
| "loss": 0.0998, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0150136487716106, |
| "grad_norm": 1.9314931582074881, |
| "learning_rate": 4.5090161482383475e-06, |
| "loss": 0.0936, |
| "step": 2231 |
| }, |
| { |
| "epoch": 1.0154686078252957, |
| "grad_norm": 1.2746342487726179, |
| "learning_rate": 4.508590735510166e-06, |
| "loss": 0.0676, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.015923566878981, |
| "grad_norm": 1.8859048739802027, |
| "learning_rate": 4.508165158649019e-06, |
| "loss": 0.0811, |
| "step": 2233 |
| }, |
| { |
| "epoch": 1.016378525932666, |
| "grad_norm": 1.6756178231136896, |
| "learning_rate": 4.507739417689685e-06, |
| "loss": 0.0747, |
| "step": 2234 |
| }, |
| { |
| "epoch": 1.0168334849863512, |
| "grad_norm": 1.3984270258928366, |
| "learning_rate": 4.507313512666953e-06, |
| "loss": 0.075, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.0172884440400365, |
| "grad_norm": 1.5242107845200688, |
| "learning_rate": 4.506887443615625e-06, |
| "loss": 0.0823, |
| "step": 2236 |
| }, |
| { |
| "epoch": 1.0177434030937216, |
| "grad_norm": 1.5995342787535922, |
| "learning_rate": 4.506461210570518e-06, |
| "loss": 0.0971, |
| "step": 2237 |
| }, |
| { |
| "epoch": 1.0181983621474067, |
| "grad_norm": 1.1425078029916038, |
| "learning_rate": 4.506034813566462e-06, |
| "loss": 0.1233, |
| "step": 2238 |
| }, |
| { |
| "epoch": 1.018653321201092, |
| "grad_norm": 1.4187790734010148, |
| "learning_rate": 4.505608252638301e-06, |
| "loss": 0.0934, |
| "step": 2239 |
| }, |
| { |
| "epoch": 1.019108280254777, |
| "grad_norm": 1.9848336082848856, |
| "learning_rate": 4.50518152782089e-06, |
| "loss": 0.1203, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0195632393084622, |
| "grad_norm": 1.2043374157232327, |
| "learning_rate": 4.504754639149101e-06, |
| "loss": 0.0709, |
| "step": 2241 |
| }, |
| { |
| "epoch": 1.0200181983621475, |
| "grad_norm": 1.36618996999929, |
| "learning_rate": 4.504327586657814e-06, |
| "loss": 0.0647, |
| "step": 2242 |
| }, |
| { |
| "epoch": 1.0204731574158326, |
| "grad_norm": 1.563535065138085, |
| "learning_rate": 4.50390037038193e-06, |
| "loss": 0.0833, |
| "step": 2243 |
| }, |
| { |
| "epoch": 1.0209281164695176, |
| "grad_norm": 1.5296584792807861, |
| "learning_rate": 4.503472990356357e-06, |
| "loss": 0.0946, |
| "step": 2244 |
| }, |
| { |
| "epoch": 1.021383075523203, |
| "grad_norm": 1.512634883619265, |
| "learning_rate": 4.503045446616018e-06, |
| "loss": 0.0715, |
| "step": 2245 |
| }, |
| { |
| "epoch": 1.021838034576888, |
| "grad_norm": 1.3010427168043244, |
| "learning_rate": 4.502617739195852e-06, |
| "loss": 0.0873, |
| "step": 2246 |
| }, |
| { |
| "epoch": 1.0222929936305734, |
| "grad_norm": 1.387157397416425, |
| "learning_rate": 4.502189868130807e-06, |
| "loss": 0.0763, |
| "step": 2247 |
| }, |
| { |
| "epoch": 1.0227479526842584, |
| "grad_norm": 1.828795187833686, |
| "learning_rate": 4.501761833455849e-06, |
| "loss": 0.1319, |
| "step": 2248 |
| }, |
| { |
| "epoch": 1.0232029117379435, |
| "grad_norm": 1.3445669290205065, |
| "learning_rate": 4.501333635205952e-06, |
| "loss": 0.068, |
| "step": 2249 |
| }, |
| { |
| "epoch": 1.0236578707916288, |
| "grad_norm": 1.5610944674651466, |
| "learning_rate": 4.5009052734161095e-06, |
| "loss": 0.0739, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.024112829845314, |
| "grad_norm": 1.2525841076083186, |
| "learning_rate": 4.500476748121324e-06, |
| "loss": 0.1094, |
| "step": 2251 |
| }, |
| { |
| "epoch": 1.024567788898999, |
| "grad_norm": 1.5118810013113924, |
| "learning_rate": 4.500048059356613e-06, |
| "loss": 0.1041, |
| "step": 2252 |
| }, |
| { |
| "epoch": 1.0250227479526843, |
| "grad_norm": 1.318153460904525, |
| "learning_rate": 4.499619207157007e-06, |
| "loss": 0.0851, |
| "step": 2253 |
| }, |
| { |
| "epoch": 1.0254777070063694, |
| "grad_norm": 1.3005012388734132, |
| "learning_rate": 4.499190191557549e-06, |
| "loss": 0.1007, |
| "step": 2254 |
| }, |
| { |
| "epoch": 1.0259326660600545, |
| "grad_norm": 1.7684251321269342, |
| "learning_rate": 4.498761012593296e-06, |
| "loss": 0.1144, |
| "step": 2255 |
| }, |
| { |
| "epoch": 1.0263876251137398, |
| "grad_norm": 1.2065670700113398, |
| "learning_rate": 4.498331670299321e-06, |
| "loss": 0.1344, |
| "step": 2256 |
| }, |
| { |
| "epoch": 1.026842584167425, |
| "grad_norm": 1.6857989870574055, |
| "learning_rate": 4.497902164710704e-06, |
| "loss": 0.0642, |
| "step": 2257 |
| }, |
| { |
| "epoch": 1.02729754322111, |
| "grad_norm": 1.6473004600696095, |
| "learning_rate": 4.497472495862547e-06, |
| "loss": 0.0981, |
| "step": 2258 |
| }, |
| { |
| "epoch": 1.0277525022747953, |
| "grad_norm": 1.3689985527437365, |
| "learning_rate": 4.497042663789957e-06, |
| "loss": 0.0813, |
| "step": 2259 |
| }, |
| { |
| "epoch": 1.0282074613284804, |
| "grad_norm": 1.6484955662328646, |
| "learning_rate": 4.496612668528059e-06, |
| "loss": 0.1318, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.0286624203821657, |
| "grad_norm": 1.2301308018690613, |
| "learning_rate": 4.496182510111991e-06, |
| "loss": 0.1323, |
| "step": 2261 |
| }, |
| { |
| "epoch": 1.0291173794358508, |
| "grad_norm": 1.3974663767006335, |
| "learning_rate": 4.495752188576902e-06, |
| "loss": 0.1113, |
| "step": 2262 |
| }, |
| { |
| "epoch": 1.0295723384895359, |
| "grad_norm": 1.9572449646613161, |
| "learning_rate": 4.4953217039579574e-06, |
| "loss": 0.1108, |
| "step": 2263 |
| }, |
| { |
| "epoch": 1.0300272975432212, |
| "grad_norm": 1.5604560381918156, |
| "learning_rate": 4.494891056290335e-06, |
| "loss": 0.126, |
| "step": 2264 |
| }, |
| { |
| "epoch": 1.0304822565969063, |
| "grad_norm": 1.7509136256359128, |
| "learning_rate": 4.494460245609223e-06, |
| "loss": 0.0767, |
| "step": 2265 |
| }, |
| { |
| "epoch": 1.0309372156505914, |
| "grad_norm": 1.5345571279100725, |
| "learning_rate": 4.494029271949827e-06, |
| "loss": 0.1008, |
| "step": 2266 |
| }, |
| { |
| "epoch": 1.0313921747042767, |
| "grad_norm": 1.0263814664645543, |
| "learning_rate": 4.493598135347363e-06, |
| "loss": 0.0931, |
| "step": 2267 |
| }, |
| { |
| "epoch": 1.0318471337579618, |
| "grad_norm": 2.0480255592331584, |
| "learning_rate": 4.493166835837064e-06, |
| "loss": 0.0681, |
| "step": 2268 |
| }, |
| { |
| "epoch": 1.0323020928116469, |
| "grad_norm": 1.8761109395251792, |
| "learning_rate": 4.492735373454171e-06, |
| "loss": 0.1086, |
| "step": 2269 |
| }, |
| { |
| "epoch": 1.0327570518653322, |
| "grad_norm": 1.897488467663145, |
| "learning_rate": 4.492303748233943e-06, |
| "loss": 0.1267, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0332120109190173, |
| "grad_norm": 1.7630394900644286, |
| "learning_rate": 4.49187196021165e-06, |
| "loss": 0.148, |
| "step": 2271 |
| }, |
| { |
| "epoch": 1.0336669699727024, |
| "grad_norm": 1.557460432820476, |
| "learning_rate": 4.491440009422575e-06, |
| "loss": 0.0822, |
| "step": 2272 |
| }, |
| { |
| "epoch": 1.0341219290263877, |
| "grad_norm": 2.2035963282826474, |
| "learning_rate": 4.491007895902016e-06, |
| "loss": 0.1237, |
| "step": 2273 |
| }, |
| { |
| "epoch": 1.0345768880800728, |
| "grad_norm": 1.7055574933768018, |
| "learning_rate": 4.490575619685283e-06, |
| "loss": 0.101, |
| "step": 2274 |
| }, |
| { |
| "epoch": 1.035031847133758, |
| "grad_norm": 2.3176332211637103, |
| "learning_rate": 4.4901431808077e-06, |
| "loss": 0.0965, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.0354868061874432, |
| "grad_norm": 1.9372753009751453, |
| "learning_rate": 4.489710579304603e-06, |
| "loss": 0.1356, |
| "step": 2276 |
| }, |
| { |
| "epoch": 1.0359417652411282, |
| "grad_norm": 1.3110102653721396, |
| "learning_rate": 4.489277815211343e-06, |
| "loss": 0.0544, |
| "step": 2277 |
| }, |
| { |
| "epoch": 1.0363967242948136, |
| "grad_norm": 1.4905691930121885, |
| "learning_rate": 4.488844888563284e-06, |
| "loss": 0.1552, |
| "step": 2278 |
| }, |
| { |
| "epoch": 1.0368516833484986, |
| "grad_norm": 1.2129187548833384, |
| "learning_rate": 4.488411799395802e-06, |
| "loss": 0.0635, |
| "step": 2279 |
| }, |
| { |
| "epoch": 1.0373066424021837, |
| "grad_norm": 1.7307605999371245, |
| "learning_rate": 4.487978547744287e-06, |
| "loss": 0.0718, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.037761601455869, |
| "grad_norm": 4.002919733780402, |
| "learning_rate": 4.487545133644143e-06, |
| "loss": 0.0918, |
| "step": 2281 |
| }, |
| { |
| "epoch": 1.0382165605095541, |
| "grad_norm": 1.434451235166591, |
| "learning_rate": 4.487111557130787e-06, |
| "loss": 0.1087, |
| "step": 2282 |
| }, |
| { |
| "epoch": 1.0386715195632392, |
| "grad_norm": 1.6326264823457393, |
| "learning_rate": 4.486677818239647e-06, |
| "loss": 0.0943, |
| "step": 2283 |
| }, |
| { |
| "epoch": 1.0391264786169245, |
| "grad_norm": 1.6173934297359729, |
| "learning_rate": 4.486243917006169e-06, |
| "loss": 0.0825, |
| "step": 2284 |
| }, |
| { |
| "epoch": 1.0395814376706096, |
| "grad_norm": 1.330454351983684, |
| "learning_rate": 4.485809853465807e-06, |
| "loss": 0.0505, |
| "step": 2285 |
| }, |
| { |
| "epoch": 1.0400363967242947, |
| "grad_norm": 1.3258755084207146, |
| "learning_rate": 4.4853756276540315e-06, |
| "loss": 0.0877, |
| "step": 2286 |
| }, |
| { |
| "epoch": 1.04049135577798, |
| "grad_norm": 1.4601501745351109, |
| "learning_rate": 4.484941239606326e-06, |
| "loss": 0.0861, |
| "step": 2287 |
| }, |
| { |
| "epoch": 1.040946314831665, |
| "grad_norm": 1.978079069134469, |
| "learning_rate": 4.484506689358186e-06, |
| "loss": 0.1226, |
| "step": 2288 |
| }, |
| { |
| "epoch": 1.0414012738853504, |
| "grad_norm": 1.3962311543656398, |
| "learning_rate": 4.484071976945121e-06, |
| "loss": 0.0687, |
| "step": 2289 |
| }, |
| { |
| "epoch": 1.0418562329390355, |
| "grad_norm": 1.2605481862079213, |
| "learning_rate": 4.483637102402655e-06, |
| "loss": 0.1035, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.0423111919927206, |
| "grad_norm": 1.3191554559607057, |
| "learning_rate": 4.4832020657663224e-06, |
| "loss": 0.0789, |
| "step": 2291 |
| }, |
| { |
| "epoch": 1.042766151046406, |
| "grad_norm": 1.7983136808453735, |
| "learning_rate": 4.482766867071673e-06, |
| "loss": 0.068, |
| "step": 2292 |
| }, |
| { |
| "epoch": 1.043221110100091, |
| "grad_norm": 1.3901753138130788, |
| "learning_rate": 4.482331506354269e-06, |
| "loss": 0.1017, |
| "step": 2293 |
| }, |
| { |
| "epoch": 1.043676069153776, |
| "grad_norm": 1.581469571449512, |
| "learning_rate": 4.4818959836496876e-06, |
| "loss": 0.0639, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.0441310282074614, |
| "grad_norm": 1.269815942746802, |
| "learning_rate": 4.481460298993515e-06, |
| "loss": 0.0625, |
| "step": 2295 |
| }, |
| { |
| "epoch": 1.0445859872611465, |
| "grad_norm": 1.3773026873827707, |
| "learning_rate": 4.481024452421357e-06, |
| "loss": 0.0815, |
| "step": 2296 |
| }, |
| { |
| "epoch": 1.0450409463148316, |
| "grad_norm": 1.4926712499107542, |
| "learning_rate": 4.480588443968825e-06, |
| "loss": 0.0651, |
| "step": 2297 |
| }, |
| { |
| "epoch": 1.0454959053685169, |
| "grad_norm": 1.3393174273757424, |
| "learning_rate": 4.4801522736715505e-06, |
| "loss": 0.0853, |
| "step": 2298 |
| }, |
| { |
| "epoch": 1.045950864422202, |
| "grad_norm": 1.5129017760803518, |
| "learning_rate": 4.479715941565174e-06, |
| "loss": 0.054, |
| "step": 2299 |
| }, |
| { |
| "epoch": 1.046405823475887, |
| "grad_norm": 2.0616493840890255, |
| "learning_rate": 4.4792794476853514e-06, |
| "loss": 0.0808, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0468607825295724, |
| "grad_norm": 1.5861310389241974, |
| "learning_rate": 4.47884279206775e-06, |
| "loss": 0.0927, |
| "step": 2301 |
| }, |
| { |
| "epoch": 1.0473157415832575, |
| "grad_norm": 0.928390801162424, |
| "learning_rate": 4.478405974748054e-06, |
| "loss": 0.0722, |
| "step": 2302 |
| }, |
| { |
| "epoch": 1.0477707006369428, |
| "grad_norm": 1.5458094332092187, |
| "learning_rate": 4.477968995761954e-06, |
| "loss": 0.0867, |
| "step": 2303 |
| }, |
| { |
| "epoch": 1.0482256596906279, |
| "grad_norm": 1.5404011995876956, |
| "learning_rate": 4.477531855145161e-06, |
| "loss": 0.0902, |
| "step": 2304 |
| }, |
| { |
| "epoch": 1.048680618744313, |
| "grad_norm": 1.3434412855749513, |
| "learning_rate": 4.477094552933395e-06, |
| "loss": 0.0655, |
| "step": 2305 |
| }, |
| { |
| "epoch": 1.0491355777979983, |
| "grad_norm": 1.083100442302988, |
| "learning_rate": 4.476657089162391e-06, |
| "loss": 0.066, |
| "step": 2306 |
| }, |
| { |
| "epoch": 1.0495905368516834, |
| "grad_norm": 1.3871586676322527, |
| "learning_rate": 4.476219463867897e-06, |
| "loss": 0.1087, |
| "step": 2307 |
| }, |
| { |
| "epoch": 1.0500454959053684, |
| "grad_norm": 1.7852029642214748, |
| "learning_rate": 4.475781677085671e-06, |
| "loss": 0.0916, |
| "step": 2308 |
| }, |
| { |
| "epoch": 1.0505004549590538, |
| "grad_norm": 1.4206975802030928, |
| "learning_rate": 4.4753437288514904e-06, |
| "loss": 0.0664, |
| "step": 2309 |
| }, |
| { |
| "epoch": 1.0509554140127388, |
| "grad_norm": 1.464232148884979, |
| "learning_rate": 4.47490561920114e-06, |
| "loss": 0.098, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.051410373066424, |
| "grad_norm": 1.7389093637922037, |
| "learning_rate": 4.474467348170421e-06, |
| "loss": 0.0926, |
| "step": 2311 |
| }, |
| { |
| "epoch": 1.0518653321201092, |
| "grad_norm": 1.6567765919211275, |
| "learning_rate": 4.474028915795148e-06, |
| "loss": 0.1079, |
| "step": 2312 |
| }, |
| { |
| "epoch": 1.0523202911737943, |
| "grad_norm": 0.8043045141598315, |
| "learning_rate": 4.473590322111145e-06, |
| "loss": 0.0639, |
| "step": 2313 |
| }, |
| { |
| "epoch": 1.0527752502274794, |
| "grad_norm": 1.535130658359192, |
| "learning_rate": 4.473151567154255e-06, |
| "loss": 0.0806, |
| "step": 2314 |
| }, |
| { |
| "epoch": 1.0532302092811647, |
| "grad_norm": 1.2136793848488039, |
| "learning_rate": 4.472712650960328e-06, |
| "loss": 0.0732, |
| "step": 2315 |
| }, |
| { |
| "epoch": 1.0536851683348498, |
| "grad_norm": 1.4191160149688276, |
| "learning_rate": 4.472273573565234e-06, |
| "loss": 0.1603, |
| "step": 2316 |
| }, |
| { |
| "epoch": 1.0541401273885351, |
| "grad_norm": 1.812354142724077, |
| "learning_rate": 4.471834335004849e-06, |
| "loss": 0.1629, |
| "step": 2317 |
| }, |
| { |
| "epoch": 1.0545950864422202, |
| "grad_norm": 1.1853207063745665, |
| "learning_rate": 4.471394935315067e-06, |
| "loss": 0.0429, |
| "step": 2318 |
| }, |
| { |
| "epoch": 1.0550500454959053, |
| "grad_norm": 1.7435537882257561, |
| "learning_rate": 4.470955374531794e-06, |
| "loss": 0.1269, |
| "step": 2319 |
| }, |
| { |
| "epoch": 1.0555050045495906, |
| "grad_norm": 1.7557827405058806, |
| "learning_rate": 4.470515652690947e-06, |
| "loss": 0.065, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.0559599636032757, |
| "grad_norm": 1.413841453700311, |
| "learning_rate": 4.470075769828461e-06, |
| "loss": 0.0972, |
| "step": 2321 |
| }, |
| { |
| "epoch": 1.0564149226569608, |
| "grad_norm": 2.027164177434821, |
| "learning_rate": 4.46963572598028e-06, |
| "loss": 0.1036, |
| "step": 2322 |
| }, |
| { |
| "epoch": 1.056869881710646, |
| "grad_norm": 1.3937151595286825, |
| "learning_rate": 4.469195521182362e-06, |
| "loss": 0.0962, |
| "step": 2323 |
| }, |
| { |
| "epoch": 1.0573248407643312, |
| "grad_norm": 1.6401213468826432, |
| "learning_rate": 4.468755155470679e-06, |
| "loss": 0.0932, |
| "step": 2324 |
| }, |
| { |
| "epoch": 1.0577797998180163, |
| "grad_norm": 2.338885175215576, |
| "learning_rate": 4.468314628881214e-06, |
| "loss": 0.0962, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.0582347588717016, |
| "grad_norm": 1.4115973810191336, |
| "learning_rate": 4.467873941449969e-06, |
| "loss": 0.1021, |
| "step": 2326 |
| }, |
| { |
| "epoch": 1.0586897179253867, |
| "grad_norm": 1.982422405584423, |
| "learning_rate": 4.46743309321295e-06, |
| "loss": 0.1079, |
| "step": 2327 |
| }, |
| { |
| "epoch": 1.0591446769790718, |
| "grad_norm": 1.7740653248101632, |
| "learning_rate": 4.466992084206185e-06, |
| "loss": 0.1169, |
| "step": 2328 |
| }, |
| { |
| "epoch": 1.059599636032757, |
| "grad_norm": 1.116268548969285, |
| "learning_rate": 4.466550914465709e-06, |
| "loss": 0.0657, |
| "step": 2329 |
| }, |
| { |
| "epoch": 1.0600545950864422, |
| "grad_norm": 1.8360092943419488, |
| "learning_rate": 4.466109584027573e-06, |
| "loss": 0.127, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0605095541401275, |
| "grad_norm": 1.3810676537742754, |
| "learning_rate": 4.465668092927841e-06, |
| "loss": 0.0856, |
| "step": 2331 |
| }, |
| { |
| "epoch": 1.0609645131938126, |
| "grad_norm": 2.185972325771388, |
| "learning_rate": 4.465226441202589e-06, |
| "loss": 0.0851, |
| "step": 2332 |
| }, |
| { |
| "epoch": 1.0614194722474977, |
| "grad_norm": 1.3875472079527142, |
| "learning_rate": 4.464784628887908e-06, |
| "loss": 0.0792, |
| "step": 2333 |
| }, |
| { |
| "epoch": 1.061874431301183, |
| "grad_norm": 1.2775951274791801, |
| "learning_rate": 4.4643426560199e-06, |
| "loss": 0.104, |
| "step": 2334 |
| }, |
| { |
| "epoch": 1.062329390354868, |
| "grad_norm": 1.5319736940172268, |
| "learning_rate": 4.46390052263468e-06, |
| "loss": 0.1104, |
| "step": 2335 |
| }, |
| { |
| "epoch": 1.0627843494085532, |
| "grad_norm": 1.812780273198809, |
| "learning_rate": 4.463458228768378e-06, |
| "loss": 0.0949, |
| "step": 2336 |
| }, |
| { |
| "epoch": 1.0632393084622385, |
| "grad_norm": 1.5756060982683149, |
| "learning_rate": 4.463015774457137e-06, |
| "loss": 0.082, |
| "step": 2337 |
| }, |
| { |
| "epoch": 1.0636942675159236, |
| "grad_norm": 2.6744844011663917, |
| "learning_rate": 4.462573159737113e-06, |
| "loss": 0.1212, |
| "step": 2338 |
| }, |
| { |
| "epoch": 1.0641492265696086, |
| "grad_norm": 1.2563398274616853, |
| "learning_rate": 4.462130384644472e-06, |
| "loss": 0.0768, |
| "step": 2339 |
| }, |
| { |
| "epoch": 1.064604185623294, |
| "grad_norm": 1.8057420294279858, |
| "learning_rate": 4.461687449215397e-06, |
| "loss": 0.1099, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.065059144676979, |
| "grad_norm": 1.6208315079433049, |
| "learning_rate": 4.4612443534860826e-06, |
| "loss": 0.1144, |
| "step": 2341 |
| }, |
| { |
| "epoch": 1.0655141037306644, |
| "grad_norm": 1.9711864344243992, |
| "learning_rate": 4.460801097492737e-06, |
| "loss": 0.0856, |
| "step": 2342 |
| }, |
| { |
| "epoch": 1.0659690627843494, |
| "grad_norm": 1.3323713152755212, |
| "learning_rate": 4.460357681271579e-06, |
| "loss": 0.0715, |
| "step": 2343 |
| }, |
| { |
| "epoch": 1.0664240218380345, |
| "grad_norm": 1.6353594143577714, |
| "learning_rate": 4.4599141048588454e-06, |
| "loss": 0.111, |
| "step": 2344 |
| }, |
| { |
| "epoch": 1.0668789808917198, |
| "grad_norm": 1.921680218643112, |
| "learning_rate": 4.4594703682907825e-06, |
| "loss": 0.1084, |
| "step": 2345 |
| }, |
| { |
| "epoch": 1.067333939945405, |
| "grad_norm": 1.6583549389810224, |
| "learning_rate": 4.459026471603649e-06, |
| "loss": 0.1051, |
| "step": 2346 |
| }, |
| { |
| "epoch": 1.06778889899909, |
| "grad_norm": 1.7686266077660249, |
| "learning_rate": 4.45858241483372e-06, |
| "loss": 0.1108, |
| "step": 2347 |
| }, |
| { |
| "epoch": 1.0682438580527753, |
| "grad_norm": 1.2657212497494363, |
| "learning_rate": 4.458138198017281e-06, |
| "loss": 0.0775, |
| "step": 2348 |
| }, |
| { |
| "epoch": 1.0686988171064604, |
| "grad_norm": 1.294854322669401, |
| "learning_rate": 4.457693821190631e-06, |
| "loss": 0.0991, |
| "step": 2349 |
| }, |
| { |
| "epoch": 1.0691537761601455, |
| "grad_norm": 1.6787540486710895, |
| "learning_rate": 4.4572492843900815e-06, |
| "loss": 0.1061, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0696087352138308, |
| "grad_norm": 1.2916611688046353, |
| "learning_rate": 4.456804587651961e-06, |
| "loss": 0.0997, |
| "step": 2351 |
| }, |
| { |
| "epoch": 1.070063694267516, |
| "grad_norm": 1.1797535857178234, |
| "learning_rate": 4.456359731012606e-06, |
| "loss": 0.1019, |
| "step": 2352 |
| }, |
| { |
| "epoch": 1.070518653321201, |
| "grad_norm": 1.4074451049825587, |
| "learning_rate": 4.455914714508369e-06, |
| "loss": 0.0639, |
| "step": 2353 |
| }, |
| { |
| "epoch": 1.0709736123748863, |
| "grad_norm": 0.7791870489522308, |
| "learning_rate": 4.455469538175614e-06, |
| "loss": 0.0293, |
| "step": 2354 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 1.3432260603887558, |
| "learning_rate": 4.455024202050719e-06, |
| "loss": 0.086, |
| "step": 2355 |
| }, |
| { |
| "epoch": 1.0718835304822565, |
| "grad_norm": 1.4625155799519551, |
| "learning_rate": 4.454578706170075e-06, |
| "loss": 0.0726, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.0723384895359418, |
| "grad_norm": 1.9522119831099414, |
| "learning_rate": 4.454133050570087e-06, |
| "loss": 0.0687, |
| "step": 2357 |
| }, |
| { |
| "epoch": 1.0727934485896269, |
| "grad_norm": 1.561587548295498, |
| "learning_rate": 4.453687235287169e-06, |
| "loss": 0.133, |
| "step": 2358 |
| }, |
| { |
| "epoch": 1.0732484076433122, |
| "grad_norm": 1.2057828723386872, |
| "learning_rate": 4.453241260357754e-06, |
| "loss": 0.0913, |
| "step": 2359 |
| }, |
| { |
| "epoch": 1.0737033666969973, |
| "grad_norm": 1.666054721084408, |
| "learning_rate": 4.452795125818283e-06, |
| "loss": 0.0971, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.0741583257506824, |
| "grad_norm": 1.758685408172953, |
| "learning_rate": 4.4523488317052146e-06, |
| "loss": 0.1075, |
| "step": 2361 |
| }, |
| { |
| "epoch": 1.0746132848043677, |
| "grad_norm": 1.105397570856634, |
| "learning_rate": 4.451902378055015e-06, |
| "loss": 0.0573, |
| "step": 2362 |
| }, |
| { |
| "epoch": 1.0750682438580528, |
| "grad_norm": 1.192901271256021, |
| "learning_rate": 4.451455764904169e-06, |
| "loss": 0.0809, |
| "step": 2363 |
| }, |
| { |
| "epoch": 1.0755232029117379, |
| "grad_norm": 1.819087657943071, |
| "learning_rate": 4.45100899228917e-06, |
| "loss": 0.0997, |
| "step": 2364 |
| }, |
| { |
| "epoch": 1.0759781619654232, |
| "grad_norm": 1.3969388862666674, |
| "learning_rate": 4.4505620602465275e-06, |
| "loss": 0.0601, |
| "step": 2365 |
| }, |
| { |
| "epoch": 1.0764331210191083, |
| "grad_norm": 2.1004515911969937, |
| "learning_rate": 4.450114968812761e-06, |
| "loss": 0.1059, |
| "step": 2366 |
| }, |
| { |
| "epoch": 1.0768880800727934, |
| "grad_norm": 1.3898874863369548, |
| "learning_rate": 4.449667718024406e-06, |
| "loss": 0.1217, |
| "step": 2367 |
| }, |
| { |
| "epoch": 1.0773430391264787, |
| "grad_norm": 1.624148028385408, |
| "learning_rate": 4.449220307918011e-06, |
| "loss": 0.1426, |
| "step": 2368 |
| }, |
| { |
| "epoch": 1.0777979981801638, |
| "grad_norm": 1.3957158550214264, |
| "learning_rate": 4.448772738530134e-06, |
| "loss": 0.065, |
| "step": 2369 |
| }, |
| { |
| "epoch": 1.078252957233849, |
| "grad_norm": 1.2170939851594698, |
| "learning_rate": 4.44832500989735e-06, |
| "loss": 0.0431, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0787079162875342, |
| "grad_norm": 1.4145038782998978, |
| "learning_rate": 4.447877122056243e-06, |
| "loss": 0.0672, |
| "step": 2371 |
| }, |
| { |
| "epoch": 1.0791628753412192, |
| "grad_norm": 1.6983412550072923, |
| "learning_rate": 4.447429075043416e-06, |
| "loss": 0.0645, |
| "step": 2372 |
| }, |
| { |
| "epoch": 1.0796178343949046, |
| "grad_norm": 1.9437215682706028, |
| "learning_rate": 4.4469808688954786e-06, |
| "loss": 0.0798, |
| "step": 2373 |
| }, |
| { |
| "epoch": 1.0800727934485896, |
| "grad_norm": 1.3885506691120681, |
| "learning_rate": 4.446532503649058e-06, |
| "loss": 0.1103, |
| "step": 2374 |
| }, |
| { |
| "epoch": 1.0805277525022747, |
| "grad_norm": 1.3760694731918508, |
| "learning_rate": 4.44608397934079e-06, |
| "loss": 0.0658, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.08098271155596, |
| "grad_norm": 1.4014742842676748, |
| "learning_rate": 4.445635296007329e-06, |
| "loss": 0.0777, |
| "step": 2376 |
| }, |
| { |
| "epoch": 1.0814376706096451, |
| "grad_norm": 1.5083231204611136, |
| "learning_rate": 4.445186453685339e-06, |
| "loss": 0.0765, |
| "step": 2377 |
| }, |
| { |
| "epoch": 1.0818926296633302, |
| "grad_norm": 2.31100453638565, |
| "learning_rate": 4.444737452411494e-06, |
| "loss": 0.1285, |
| "step": 2378 |
| }, |
| { |
| "epoch": 1.0823475887170155, |
| "grad_norm": 2.400477978408628, |
| "learning_rate": 4.444288292222488e-06, |
| "loss": 0.1032, |
| "step": 2379 |
| }, |
| { |
| "epoch": 1.0828025477707006, |
| "grad_norm": 1.2288090886103258, |
| "learning_rate": 4.443838973155023e-06, |
| "loss": 0.0732, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.0832575068243857, |
| "grad_norm": 1.7401608518222071, |
| "learning_rate": 4.443389495245816e-06, |
| "loss": 0.1038, |
| "step": 2381 |
| }, |
| { |
| "epoch": 1.083712465878071, |
| "grad_norm": 1.0676718989217244, |
| "learning_rate": 4.442939858531594e-06, |
| "loss": 0.0977, |
| "step": 2382 |
| }, |
| { |
| "epoch": 1.084167424931756, |
| "grad_norm": 2.16417029576833, |
| "learning_rate": 4.442490063049103e-06, |
| "loss": 0.1247, |
| "step": 2383 |
| }, |
| { |
| "epoch": 1.0846223839854412, |
| "grad_norm": 1.7397604358649068, |
| "learning_rate": 4.442040108835095e-06, |
| "loss": 0.0734, |
| "step": 2384 |
| }, |
| { |
| "epoch": 1.0850773430391265, |
| "grad_norm": 1.3344372550818824, |
| "learning_rate": 4.44158999592634e-06, |
| "loss": 0.0738, |
| "step": 2385 |
| }, |
| { |
| "epoch": 1.0855323020928116, |
| "grad_norm": 1.464102086807412, |
| "learning_rate": 4.441139724359617e-06, |
| "loss": 0.069, |
| "step": 2386 |
| }, |
| { |
| "epoch": 1.085987261146497, |
| "grad_norm": 1.2702083100987853, |
| "learning_rate": 4.440689294171724e-06, |
| "loss": 0.0731, |
| "step": 2387 |
| }, |
| { |
| "epoch": 1.086442220200182, |
| "grad_norm": 1.7208341236115763, |
| "learning_rate": 4.440238705399465e-06, |
| "loss": 0.0894, |
| "step": 2388 |
| }, |
| { |
| "epoch": 1.086897179253867, |
| "grad_norm": 1.717461266806642, |
| "learning_rate": 4.439787958079662e-06, |
| "loss": 0.0913, |
| "step": 2389 |
| }, |
| { |
| "epoch": 1.0873521383075524, |
| "grad_norm": 1.5936201417077822, |
| "learning_rate": 4.439337052249146e-06, |
| "loss": 0.0853, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.0878070973612375, |
| "grad_norm": 1.5280204524637513, |
| "learning_rate": 4.4388859879447645e-06, |
| "loss": 0.0725, |
| "step": 2391 |
| }, |
| { |
| "epoch": 1.0882620564149226, |
| "grad_norm": 1.7709159752994665, |
| "learning_rate": 4.438434765203376e-06, |
| "loss": 0.1374, |
| "step": 2392 |
| }, |
| { |
| "epoch": 1.0887170154686079, |
| "grad_norm": 1.7267099736271705, |
| "learning_rate": 4.4379833840618524e-06, |
| "loss": 0.1174, |
| "step": 2393 |
| }, |
| { |
| "epoch": 1.089171974522293, |
| "grad_norm": 1.4910726524631923, |
| "learning_rate": 4.4375318445570785e-06, |
| "loss": 0.0655, |
| "step": 2394 |
| }, |
| { |
| "epoch": 1.089626933575978, |
| "grad_norm": 1.8163886098625441, |
| "learning_rate": 4.437080146725951e-06, |
| "loss": 0.0546, |
| "step": 2395 |
| }, |
| { |
| "epoch": 1.0900818926296634, |
| "grad_norm": 1.2219692369480206, |
| "learning_rate": 4.436628290605384e-06, |
| "loss": 0.0672, |
| "step": 2396 |
| }, |
| { |
| "epoch": 1.0905368516833485, |
| "grad_norm": 1.6116626987809923, |
| "learning_rate": 4.436176276232297e-06, |
| "loss": 0.1028, |
| "step": 2397 |
| }, |
| { |
| "epoch": 1.0909918107370338, |
| "grad_norm": 2.3052452656431255, |
| "learning_rate": 4.4357241036436294e-06, |
| "loss": 0.0939, |
| "step": 2398 |
| }, |
| { |
| "epoch": 1.0914467697907189, |
| "grad_norm": 0.9223535743607304, |
| "learning_rate": 4.435271772876329e-06, |
| "loss": 0.0689, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.091901728844404, |
| "grad_norm": 1.531866494757431, |
| "learning_rate": 4.434819283967359e-06, |
| "loss": 0.1145, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0923566878980893, |
| "grad_norm": 2.012408668977357, |
| "learning_rate": 4.434366636953695e-06, |
| "loss": 0.0655, |
| "step": 2401 |
| }, |
| { |
| "epoch": 1.0928116469517744, |
| "grad_norm": 1.4296585397558859, |
| "learning_rate": 4.433913831872324e-06, |
| "loss": 0.0663, |
| "step": 2402 |
| }, |
| { |
| "epoch": 1.0932666060054594, |
| "grad_norm": 1.5463695757532308, |
| "learning_rate": 4.43346086876025e-06, |
| "loss": 0.1785, |
| "step": 2403 |
| }, |
| { |
| "epoch": 1.0937215650591448, |
| "grad_norm": 2.2667173046164253, |
| "learning_rate": 4.433007747654484e-06, |
| "loss": 0.0963, |
| "step": 2404 |
| }, |
| { |
| "epoch": 1.0941765241128298, |
| "grad_norm": 1.7874869125348338, |
| "learning_rate": 4.432554468592054e-06, |
| "loss": 0.1245, |
| "step": 2405 |
| }, |
| { |
| "epoch": 1.094631483166515, |
| "grad_norm": 2.0669862144476387, |
| "learning_rate": 4.432101031610001e-06, |
| "loss": 0.1237, |
| "step": 2406 |
| }, |
| { |
| "epoch": 1.0950864422202002, |
| "grad_norm": 1.6979511768981763, |
| "learning_rate": 4.431647436745376e-06, |
| "loss": 0.0888, |
| "step": 2407 |
| }, |
| { |
| "epoch": 1.0955414012738853, |
| "grad_norm": 1.9257787054792377, |
| "learning_rate": 4.431193684035246e-06, |
| "loss": 0.0816, |
| "step": 2408 |
| }, |
| { |
| "epoch": 1.0959963603275704, |
| "grad_norm": 1.541493056259052, |
| "learning_rate": 4.43073977351669e-06, |
| "loss": 0.0766, |
| "step": 2409 |
| }, |
| { |
| "epoch": 1.0964513193812557, |
| "grad_norm": 2.051380197110344, |
| "learning_rate": 4.430285705226799e-06, |
| "loss": 0.0692, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0969062784349408, |
| "grad_norm": 1.574334878171295, |
| "learning_rate": 4.429831479202676e-06, |
| "loss": 0.0867, |
| "step": 2411 |
| }, |
| { |
| "epoch": 1.097361237488626, |
| "grad_norm": 1.297944277206769, |
| "learning_rate": 4.429377095481441e-06, |
| "loss": 0.0729, |
| "step": 2412 |
| }, |
| { |
| "epoch": 1.0978161965423112, |
| "grad_norm": 1.4644868521714023, |
| "learning_rate": 4.428922554100221e-06, |
| "loss": 0.1372, |
| "step": 2413 |
| }, |
| { |
| "epoch": 1.0982711555959963, |
| "grad_norm": 1.1220705548281613, |
| "learning_rate": 4.428467855096163e-06, |
| "loss": 0.0775, |
| "step": 2414 |
| }, |
| { |
| "epoch": 1.0987261146496816, |
| "grad_norm": 2.3884661536435043, |
| "learning_rate": 4.428012998506419e-06, |
| "loss": 0.0783, |
| "step": 2415 |
| }, |
| { |
| "epoch": 1.0991810737033667, |
| "grad_norm": 1.3934936655417303, |
| "learning_rate": 4.42755798436816e-06, |
| "loss": 0.0993, |
| "step": 2416 |
| }, |
| { |
| "epoch": 1.0996360327570518, |
| "grad_norm": 1.7787119321180418, |
| "learning_rate": 4.427102812718568e-06, |
| "loss": 0.0923, |
| "step": 2417 |
| }, |
| { |
| "epoch": 1.100090991810737, |
| "grad_norm": 2.0287950182704018, |
| "learning_rate": 4.426647483594836e-06, |
| "loss": 0.1214, |
| "step": 2418 |
| }, |
| { |
| "epoch": 1.1005459508644222, |
| "grad_norm": 1.2227878126042278, |
| "learning_rate": 4.4261919970341724e-06, |
| "loss": 0.109, |
| "step": 2419 |
| }, |
| { |
| "epoch": 1.1010009099181073, |
| "grad_norm": 1.4547250907863465, |
| "learning_rate": 4.425736353073798e-06, |
| "loss": 0.0639, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.1014558689717926, |
| "grad_norm": 1.361745944169816, |
| "learning_rate": 4.425280551750945e-06, |
| "loss": 0.0779, |
| "step": 2421 |
| }, |
| { |
| "epoch": 1.1019108280254777, |
| "grad_norm": 1.4312448198815029, |
| "learning_rate": 4.42482459310286e-06, |
| "loss": 0.097, |
| "step": 2422 |
| }, |
| { |
| "epoch": 1.1023657870791628, |
| "grad_norm": 1.5917118093221942, |
| "learning_rate": 4.424368477166801e-06, |
| "loss": 0.0981, |
| "step": 2423 |
| }, |
| { |
| "epoch": 1.102820746132848, |
| "grad_norm": 1.4650250955165152, |
| "learning_rate": 4.423912203980041e-06, |
| "loss": 0.114, |
| "step": 2424 |
| }, |
| { |
| "epoch": 1.1032757051865332, |
| "grad_norm": 1.6849750447492673, |
| "learning_rate": 4.423455773579865e-06, |
| "loss": 0.072, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.1037306642402185, |
| "grad_norm": 1.678029572619772, |
| "learning_rate": 4.422999186003568e-06, |
| "loss": 0.0943, |
| "step": 2426 |
| }, |
| { |
| "epoch": 1.1041856232939036, |
| "grad_norm": 1.1098076423379506, |
| "learning_rate": 4.422542441288462e-06, |
| "loss": 0.0731, |
| "step": 2427 |
| }, |
| { |
| "epoch": 1.1046405823475887, |
| "grad_norm": 1.4743567185549873, |
| "learning_rate": 4.42208553947187e-06, |
| "loss": 0.109, |
| "step": 2428 |
| }, |
| { |
| "epoch": 1.105095541401274, |
| "grad_norm": 1.3759474671598095, |
| "learning_rate": 4.4216284805911275e-06, |
| "loss": 0.0924, |
| "step": 2429 |
| }, |
| { |
| "epoch": 1.105550500454959, |
| "grad_norm": 2.0527322032275794, |
| "learning_rate": 4.421171264683584e-06, |
| "loss": 0.106, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.1060054595086442, |
| "grad_norm": 1.664729158421169, |
| "learning_rate": 4.4207138917866e-06, |
| "loss": 0.1339, |
| "step": 2431 |
| }, |
| { |
| "epoch": 1.1064604185623295, |
| "grad_norm": 1.8178200019923791, |
| "learning_rate": 4.420256361937551e-06, |
| "loss": 0.093, |
| "step": 2432 |
| }, |
| { |
| "epoch": 1.1069153776160146, |
| "grad_norm": 1.1183446921626512, |
| "learning_rate": 4.419798675173824e-06, |
| "loss": 0.0646, |
| "step": 2433 |
| }, |
| { |
| "epoch": 1.1073703366696996, |
| "grad_norm": 1.3726858689513264, |
| "learning_rate": 4.419340831532819e-06, |
| "loss": 0.0813, |
| "step": 2434 |
| }, |
| { |
| "epoch": 1.107825295723385, |
| "grad_norm": 1.3403945446236318, |
| "learning_rate": 4.418882831051949e-06, |
| "loss": 0.0754, |
| "step": 2435 |
| }, |
| { |
| "epoch": 1.10828025477707, |
| "grad_norm": 1.6141383424379385, |
| "learning_rate": 4.418424673768639e-06, |
| "loss": 0.0661, |
| "step": 2436 |
| }, |
| { |
| "epoch": 1.1087352138307551, |
| "grad_norm": 1.0940032242798146, |
| "learning_rate": 4.417966359720329e-06, |
| "loss": 0.0318, |
| "step": 2437 |
| }, |
| { |
| "epoch": 1.1091901728844404, |
| "grad_norm": 1.3623311010378927, |
| "learning_rate": 4.417507888944469e-06, |
| "loss": 0.0637, |
| "step": 2438 |
| }, |
| { |
| "epoch": 1.1096451319381255, |
| "grad_norm": 2.141865035990428, |
| "learning_rate": 4.417049261478525e-06, |
| "loss": 0.1037, |
| "step": 2439 |
| }, |
| { |
| "epoch": 1.1101000909918108, |
| "grad_norm": 1.420497893607898, |
| "learning_rate": 4.416590477359971e-06, |
| "loss": 0.0564, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.110555050045496, |
| "grad_norm": 1.2732829960352239, |
| "learning_rate": 4.416131536626299e-06, |
| "loss": 0.1076, |
| "step": 2441 |
| }, |
| { |
| "epoch": 1.111010009099181, |
| "grad_norm": 1.4336397689648444, |
| "learning_rate": 4.415672439315011e-06, |
| "loss": 0.1066, |
| "step": 2442 |
| }, |
| { |
| "epoch": 1.1114649681528663, |
| "grad_norm": 1.0286658142783538, |
| "learning_rate": 4.415213185463623e-06, |
| "loss": 0.0992, |
| "step": 2443 |
| }, |
| { |
| "epoch": 1.1119199272065514, |
| "grad_norm": 1.5137672717842037, |
| "learning_rate": 4.414753775109661e-06, |
| "loss": 0.0474, |
| "step": 2444 |
| }, |
| { |
| "epoch": 1.1123748862602365, |
| "grad_norm": 1.7400780554313313, |
| "learning_rate": 4.414294208290669e-06, |
| "loss": 0.1138, |
| "step": 2445 |
| }, |
| { |
| "epoch": 1.1128298453139218, |
| "grad_norm": 1.644624340954533, |
| "learning_rate": 4.413834485044199e-06, |
| "loss": 0.08, |
| "step": 2446 |
| }, |
| { |
| "epoch": 1.113284804367607, |
| "grad_norm": 1.4630415788998294, |
| "learning_rate": 4.413374605407817e-06, |
| "loss": 0.0523, |
| "step": 2447 |
| }, |
| { |
| "epoch": 1.113739763421292, |
| "grad_norm": 1.8356228780285462, |
| "learning_rate": 4.412914569419103e-06, |
| "loss": 0.0811, |
| "step": 2448 |
| }, |
| { |
| "epoch": 1.1141947224749773, |
| "grad_norm": 1.324899907458732, |
| "learning_rate": 4.412454377115649e-06, |
| "loss": 0.0888, |
| "step": 2449 |
| }, |
| { |
| "epoch": 1.1146496815286624, |
| "grad_norm": 1.4895058777507912, |
| "learning_rate": 4.411994028535061e-06, |
| "loss": 0.1094, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.1151046405823477, |
| "grad_norm": 1.6376764275236961, |
| "learning_rate": 4.411533523714954e-06, |
| "loss": 0.0661, |
| "step": 2451 |
| }, |
| { |
| "epoch": 1.1155595996360328, |
| "grad_norm": 1.3175933666660855, |
| "learning_rate": 4.41107286269296e-06, |
| "loss": 0.0832, |
| "step": 2452 |
| }, |
| { |
| "epoch": 1.1160145586897179, |
| "grad_norm": 1.4664317140231247, |
| "learning_rate": 4.410612045506722e-06, |
| "loss": 0.1019, |
| "step": 2453 |
| }, |
| { |
| "epoch": 1.1164695177434032, |
| "grad_norm": 1.697124490095177, |
| "learning_rate": 4.410151072193897e-06, |
| "loss": 0.1164, |
| "step": 2454 |
| }, |
| { |
| "epoch": 1.1169244767970883, |
| "grad_norm": 1.520297101782584, |
| "learning_rate": 4.409689942792152e-06, |
| "loss": 0.0824, |
| "step": 2455 |
| }, |
| { |
| "epoch": 1.1173794358507734, |
| "grad_norm": 1.693914191969565, |
| "learning_rate": 4.409228657339168e-06, |
| "loss": 0.13, |
| "step": 2456 |
| }, |
| { |
| "epoch": 1.1178343949044587, |
| "grad_norm": 2.024825308244833, |
| "learning_rate": 4.4087672158726415e-06, |
| "loss": 0.0874, |
| "step": 2457 |
| }, |
| { |
| "epoch": 1.1182893539581438, |
| "grad_norm": 1.6218817682748383, |
| "learning_rate": 4.408305618430277e-06, |
| "loss": 0.0877, |
| "step": 2458 |
| }, |
| { |
| "epoch": 1.1187443130118289, |
| "grad_norm": 2.1554598427149054, |
| "learning_rate": 4.407843865049797e-06, |
| "loss": 0.0932, |
| "step": 2459 |
| }, |
| { |
| "epoch": 1.1191992720655142, |
| "grad_norm": 1.711228616600094, |
| "learning_rate": 4.40738195576893e-06, |
| "loss": 0.064, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1196542311191993, |
| "grad_norm": 1.8471856875898178, |
| "learning_rate": 4.406919890625424e-06, |
| "loss": 0.0987, |
| "step": 2461 |
| }, |
| { |
| "epoch": 1.1201091901728844, |
| "grad_norm": 1.1003500159856345, |
| "learning_rate": 4.406457669657036e-06, |
| "loss": 0.0759, |
| "step": 2462 |
| }, |
| { |
| "epoch": 1.1205641492265697, |
| "grad_norm": 2.109594577114758, |
| "learning_rate": 4.405995292901537e-06, |
| "loss": 0.0942, |
| "step": 2463 |
| }, |
| { |
| "epoch": 1.1210191082802548, |
| "grad_norm": 1.8182386073569805, |
| "learning_rate": 4.40553276039671e-06, |
| "loss": 0.1389, |
| "step": 2464 |
| }, |
| { |
| "epoch": 1.1214740673339398, |
| "grad_norm": 1.4379586293025806, |
| "learning_rate": 4.4050700721803505e-06, |
| "loss": 0.099, |
| "step": 2465 |
| }, |
| { |
| "epoch": 1.1219290263876252, |
| "grad_norm": 1.4425166537042247, |
| "learning_rate": 4.404607228290269e-06, |
| "loss": 0.0861, |
| "step": 2466 |
| }, |
| { |
| "epoch": 1.1223839854413102, |
| "grad_norm": 1.4093172987847846, |
| "learning_rate": 4.404144228764285e-06, |
| "loss": 0.0621, |
| "step": 2467 |
| }, |
| { |
| "epoch": 1.1228389444949956, |
| "grad_norm": 1.8641838091648237, |
| "learning_rate": 4.403681073640235e-06, |
| "loss": 0.1364, |
| "step": 2468 |
| }, |
| { |
| "epoch": 1.1232939035486806, |
| "grad_norm": 1.4149844792642807, |
| "learning_rate": 4.403217762955963e-06, |
| "loss": 0.0738, |
| "step": 2469 |
| }, |
| { |
| "epoch": 1.1237488626023657, |
| "grad_norm": 1.167003064546788, |
| "learning_rate": 4.402754296749331e-06, |
| "loss": 0.1399, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.124203821656051, |
| "grad_norm": 1.3706100775947843, |
| "learning_rate": 4.402290675058211e-06, |
| "loss": 0.0743, |
| "step": 2471 |
| }, |
| { |
| "epoch": 1.1246587807097361, |
| "grad_norm": 1.3145920684357588, |
| "learning_rate": 4.401826897920487e-06, |
| "loss": 0.1099, |
| "step": 2472 |
| }, |
| { |
| "epoch": 1.1251137397634212, |
| "grad_norm": 1.5982593223467985, |
| "learning_rate": 4.4013629653740575e-06, |
| "loss": 0.0645, |
| "step": 2473 |
| }, |
| { |
| "epoch": 1.1255686988171065, |
| "grad_norm": 1.652131477085118, |
| "learning_rate": 4.400898877456833e-06, |
| "loss": 0.1091, |
| "step": 2474 |
| }, |
| { |
| "epoch": 1.1260236578707916, |
| "grad_norm": 1.1449819643243202, |
| "learning_rate": 4.400434634206737e-06, |
| "loss": 0.068, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.1264786169244767, |
| "grad_norm": 1.144310552102497, |
| "learning_rate": 4.399970235661705e-06, |
| "loss": 0.0685, |
| "step": 2476 |
| }, |
| { |
| "epoch": 1.126933575978162, |
| "grad_norm": 1.2448262081573807, |
| "learning_rate": 4.399505681859685e-06, |
| "loss": 0.0932, |
| "step": 2477 |
| }, |
| { |
| "epoch": 1.127388535031847, |
| "grad_norm": 1.1408663298803172, |
| "learning_rate": 4.399040972838639e-06, |
| "loss": 0.0423, |
| "step": 2478 |
| }, |
| { |
| "epoch": 1.1278434940855324, |
| "grad_norm": 1.699409897859247, |
| "learning_rate": 4.398576108636541e-06, |
| "loss": 0.0787, |
| "step": 2479 |
| }, |
| { |
| "epoch": 1.1282984531392175, |
| "grad_norm": 1.7864933002408017, |
| "learning_rate": 4.398111089291378e-06, |
| "loss": 0.0892, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1287534121929026, |
| "grad_norm": 2.14798840196358, |
| "learning_rate": 4.3976459148411464e-06, |
| "loss": 0.1009, |
| "step": 2481 |
| }, |
| { |
| "epoch": 1.129208371246588, |
| "grad_norm": 1.5385879391737598, |
| "learning_rate": 4.3971805853238616e-06, |
| "loss": 0.081, |
| "step": 2482 |
| }, |
| { |
| "epoch": 1.129663330300273, |
| "grad_norm": 2.531930467512664, |
| "learning_rate": 4.396715100777547e-06, |
| "loss": 0.0686, |
| "step": 2483 |
| }, |
| { |
| "epoch": 1.130118289353958, |
| "grad_norm": 1.8968573987064818, |
| "learning_rate": 4.39624946124024e-06, |
| "loss": 0.1027, |
| "step": 2484 |
| }, |
| { |
| "epoch": 1.1305732484076434, |
| "grad_norm": 1.5129833288445977, |
| "learning_rate": 4.39578366674999e-06, |
| "loss": 0.072, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.1310282074613285, |
| "grad_norm": 1.4623536249588729, |
| "learning_rate": 4.395317717344861e-06, |
| "loss": 0.0924, |
| "step": 2486 |
| }, |
| { |
| "epoch": 1.1314831665150136, |
| "grad_norm": 1.9901397225611637, |
| "learning_rate": 4.394851613062927e-06, |
| "loss": 0.0852, |
| "step": 2487 |
| }, |
| { |
| "epoch": 1.1319381255686989, |
| "grad_norm": 1.3624251358159498, |
| "learning_rate": 4.394385353942275e-06, |
| "loss": 0.0543, |
| "step": 2488 |
| }, |
| { |
| "epoch": 1.132393084622384, |
| "grad_norm": 2.097016286942742, |
| "learning_rate": 4.393918940021008e-06, |
| "loss": 0.1261, |
| "step": 2489 |
| }, |
| { |
| "epoch": 1.132848043676069, |
| "grad_norm": 1.7568839339292304, |
| "learning_rate": 4.393452371337238e-06, |
| "loss": 0.0754, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1333030027297544, |
| "grad_norm": 1.4870006844681243, |
| "learning_rate": 4.39298564792909e-06, |
| "loss": 0.0765, |
| "step": 2491 |
| }, |
| { |
| "epoch": 1.1337579617834395, |
| "grad_norm": 2.3747689669640204, |
| "learning_rate": 4.392518769834705e-06, |
| "loss": 0.1088, |
| "step": 2492 |
| }, |
| { |
| "epoch": 1.1342129208371245, |
| "grad_norm": 1.8391194648070115, |
| "learning_rate": 4.392051737092231e-06, |
| "loss": 0.1038, |
| "step": 2493 |
| }, |
| { |
| "epoch": 1.1346678798908099, |
| "grad_norm": 1.3181948862231594, |
| "learning_rate": 4.391584549739834e-06, |
| "loss": 0.0953, |
| "step": 2494 |
| }, |
| { |
| "epoch": 1.135122838944495, |
| "grad_norm": 1.768253423337537, |
| "learning_rate": 4.391117207815691e-06, |
| "loss": 0.0861, |
| "step": 2495 |
| }, |
| { |
| "epoch": 1.1355777979981803, |
| "grad_norm": 1.7733681614801209, |
| "learning_rate": 4.3906497113579895e-06, |
| "loss": 0.0869, |
| "step": 2496 |
| }, |
| { |
| "epoch": 1.1360327570518653, |
| "grad_norm": 1.7107321819304122, |
| "learning_rate": 4.390182060404931e-06, |
| "loss": 0.0522, |
| "step": 2497 |
| }, |
| { |
| "epoch": 1.1364877161055504, |
| "grad_norm": 1.434552421646011, |
| "learning_rate": 4.389714254994732e-06, |
| "loss": 0.0846, |
| "step": 2498 |
| }, |
| { |
| "epoch": 1.1369426751592357, |
| "grad_norm": 1.5226850377251067, |
| "learning_rate": 4.389246295165617e-06, |
| "loss": 0.083, |
| "step": 2499 |
| }, |
| { |
| "epoch": 1.1373976342129208, |
| "grad_norm": 1.1587798025261624, |
| "learning_rate": 4.388778180955826e-06, |
| "loss": 0.0715, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.137852593266606, |
| "grad_norm": 2.2145425207872735, |
| "learning_rate": 4.388309912403612e-06, |
| "loss": 0.126, |
| "step": 2501 |
| }, |
| { |
| "epoch": 1.1383075523202912, |
| "grad_norm": 1.860918476304708, |
| "learning_rate": 4.38784148954724e-06, |
| "loss": 0.0825, |
| "step": 2502 |
| }, |
| { |
| "epoch": 1.1387625113739763, |
| "grad_norm": 1.5494754816427427, |
| "learning_rate": 4.387372912424987e-06, |
| "loss": 0.0664, |
| "step": 2503 |
| }, |
| { |
| "epoch": 1.1392174704276614, |
| "grad_norm": 1.4756280948745337, |
| "learning_rate": 4.386904181075142e-06, |
| "loss": 0.1292, |
| "step": 2504 |
| }, |
| { |
| "epoch": 1.1396724294813467, |
| "grad_norm": 1.4970335285969478, |
| "learning_rate": 4.386435295536008e-06, |
| "loss": 0.0617, |
| "step": 2505 |
| }, |
| { |
| "epoch": 1.1401273885350318, |
| "grad_norm": 1.3926364015804897, |
| "learning_rate": 4.385966255845902e-06, |
| "loss": 0.0978, |
| "step": 2506 |
| }, |
| { |
| "epoch": 1.1405823475887171, |
| "grad_norm": 1.392316755067547, |
| "learning_rate": 4.38549706204315e-06, |
| "loss": 0.1051, |
| "step": 2507 |
| }, |
| { |
| "epoch": 1.1410373066424022, |
| "grad_norm": 1.337875750299131, |
| "learning_rate": 4.385027714166094e-06, |
| "loss": 0.0818, |
| "step": 2508 |
| }, |
| { |
| "epoch": 1.1414922656960873, |
| "grad_norm": 1.7636561267412383, |
| "learning_rate": 4.384558212253084e-06, |
| "loss": 0.058, |
| "step": 2509 |
| }, |
| { |
| "epoch": 1.1419472247497726, |
| "grad_norm": 1.4667430941313127, |
| "learning_rate": 4.384088556342488e-06, |
| "loss": 0.0757, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1424021838034577, |
| "grad_norm": 1.4237110238919748, |
| "learning_rate": 4.383618746472686e-06, |
| "loss": 0.0769, |
| "step": 2511 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 1.5730790632789893, |
| "learning_rate": 4.383148782682064e-06, |
| "loss": 0.0653, |
| "step": 2512 |
| }, |
| { |
| "epoch": 1.143312101910828, |
| "grad_norm": 1.4241196656590642, |
| "learning_rate": 4.382678665009028e-06, |
| "loss": 0.1399, |
| "step": 2513 |
| }, |
| { |
| "epoch": 1.1437670609645132, |
| "grad_norm": 1.343619807338348, |
| "learning_rate": 4.382208393491994e-06, |
| "loss": 0.1179, |
| "step": 2514 |
| }, |
| { |
| "epoch": 1.1442220200181983, |
| "grad_norm": 1.5009441966445611, |
| "learning_rate": 4.381737968169389e-06, |
| "loss": 0.0771, |
| "step": 2515 |
| }, |
| { |
| "epoch": 1.1446769790718836, |
| "grad_norm": 1.986426705123048, |
| "learning_rate": 4.381267389079657e-06, |
| "loss": 0.0701, |
| "step": 2516 |
| }, |
| { |
| "epoch": 1.1451319381255687, |
| "grad_norm": 1.55910702321473, |
| "learning_rate": 4.380796656261248e-06, |
| "loss": 0.0972, |
| "step": 2517 |
| }, |
| { |
| "epoch": 1.1455868971792538, |
| "grad_norm": 1.3317020576259018, |
| "learning_rate": 4.38032576975263e-06, |
| "loss": 0.0611, |
| "step": 2518 |
| }, |
| { |
| "epoch": 1.146041856232939, |
| "grad_norm": 1.2157043472122377, |
| "learning_rate": 4.3798547295922825e-06, |
| "loss": 0.0699, |
| "step": 2519 |
| }, |
| { |
| "epoch": 1.1464968152866242, |
| "grad_norm": 2.724328439334893, |
| "learning_rate": 4.3793835358186955e-06, |
| "loss": 0.0797, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1469517743403093, |
| "grad_norm": 1.7128126611421937, |
| "learning_rate": 4.378912188470374e-06, |
| "loss": 0.1045, |
| "step": 2521 |
| }, |
| { |
| "epoch": 1.1474067333939946, |
| "grad_norm": 1.4469267749443473, |
| "learning_rate": 4.378440687585832e-06, |
| "loss": 0.0924, |
| "step": 2522 |
| }, |
| { |
| "epoch": 1.1478616924476797, |
| "grad_norm": 1.8130770437623378, |
| "learning_rate": 4.3779690332036005e-06, |
| "loss": 0.1218, |
| "step": 2523 |
| }, |
| { |
| "epoch": 1.148316651501365, |
| "grad_norm": 1.7468548582501024, |
| "learning_rate": 4.3774972253622205e-06, |
| "loss": 0.1111, |
| "step": 2524 |
| }, |
| { |
| "epoch": 1.14877161055505, |
| "grad_norm": 1.4797480492586725, |
| "learning_rate": 4.377025264100246e-06, |
| "loss": 0.0854, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.1492265696087351, |
| "grad_norm": 1.7116967965378072, |
| "learning_rate": 4.376553149456244e-06, |
| "loss": 0.0594, |
| "step": 2526 |
| }, |
| { |
| "epoch": 1.1496815286624205, |
| "grad_norm": 1.643705257307874, |
| "learning_rate": 4.376080881468793e-06, |
| "loss": 0.0696, |
| "step": 2527 |
| }, |
| { |
| "epoch": 1.1501364877161055, |
| "grad_norm": 1.1326114868014416, |
| "learning_rate": 4.375608460176483e-06, |
| "loss": 0.0705, |
| "step": 2528 |
| }, |
| { |
| "epoch": 1.1505914467697906, |
| "grad_norm": 1.7031789207462111, |
| "learning_rate": 4.375135885617922e-06, |
| "loss": 0.0812, |
| "step": 2529 |
| }, |
| { |
| "epoch": 1.151046405823476, |
| "grad_norm": 1.41010135204267, |
| "learning_rate": 4.3746631578317236e-06, |
| "loss": 0.086, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.151501364877161, |
| "grad_norm": 1.6943016984534656, |
| "learning_rate": 4.374190276856517e-06, |
| "loss": 0.0754, |
| "step": 2531 |
| }, |
| { |
| "epoch": 1.1519563239308463, |
| "grad_norm": 2.0617449393261165, |
| "learning_rate": 4.373717242730946e-06, |
| "loss": 0.09, |
| "step": 2532 |
| }, |
| { |
| "epoch": 1.1524112829845314, |
| "grad_norm": 1.7367594980944636, |
| "learning_rate": 4.373244055493663e-06, |
| "loss": 0.0623, |
| "step": 2533 |
| }, |
| { |
| "epoch": 1.1528662420382165, |
| "grad_norm": 1.9342760133428794, |
| "learning_rate": 4.372770715183336e-06, |
| "loss": 0.1147, |
| "step": 2534 |
| }, |
| { |
| "epoch": 1.1533212010919018, |
| "grad_norm": 2.0637174188437255, |
| "learning_rate": 4.372297221838642e-06, |
| "loss": 0.1456, |
| "step": 2535 |
| }, |
| { |
| "epoch": 1.153776160145587, |
| "grad_norm": 1.640815829478928, |
| "learning_rate": 4.3718235754982755e-06, |
| "loss": 0.1097, |
| "step": 2536 |
| }, |
| { |
| "epoch": 1.154231119199272, |
| "grad_norm": 1.4969972221702579, |
| "learning_rate": 4.371349776200939e-06, |
| "loss": 0.1089, |
| "step": 2537 |
| }, |
| { |
| "epoch": 1.1546860782529573, |
| "grad_norm": 1.7453973329666645, |
| "learning_rate": 4.37087582398535e-06, |
| "loss": 0.081, |
| "step": 2538 |
| }, |
| { |
| "epoch": 1.1551410373066424, |
| "grad_norm": 1.3301344902434764, |
| "learning_rate": 4.370401718890237e-06, |
| "loss": 0.0839, |
| "step": 2539 |
| }, |
| { |
| "epoch": 1.1555959963603275, |
| "grad_norm": 1.3726509501801365, |
| "learning_rate": 4.369927460954342e-06, |
| "loss": 0.0757, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1560509554140128, |
| "grad_norm": 1.7575525897527056, |
| "learning_rate": 4.36945305021642e-06, |
| "loss": 0.0984, |
| "step": 2541 |
| }, |
| { |
| "epoch": 1.156505914467698, |
| "grad_norm": 1.0573468860101436, |
| "learning_rate": 4.368978486715237e-06, |
| "loss": 0.0858, |
| "step": 2542 |
| }, |
| { |
| "epoch": 1.156960873521383, |
| "grad_norm": 1.2811400584279555, |
| "learning_rate": 4.368503770489573e-06, |
| "loss": 0.0956, |
| "step": 2543 |
| }, |
| { |
| "epoch": 1.1574158325750683, |
| "grad_norm": 1.3937719698326214, |
| "learning_rate": 4.368028901578218e-06, |
| "loss": 0.0721, |
| "step": 2544 |
| }, |
| { |
| "epoch": 1.1578707916287534, |
| "grad_norm": 1.3592341439150106, |
| "learning_rate": 4.367553880019977e-06, |
| "loss": 0.072, |
| "step": 2545 |
| }, |
| { |
| "epoch": 1.1583257506824385, |
| "grad_norm": 1.6455271567667071, |
| "learning_rate": 4.367078705853667e-06, |
| "loss": 0.0688, |
| "step": 2546 |
| }, |
| { |
| "epoch": 1.1587807097361238, |
| "grad_norm": 1.6810345974728753, |
| "learning_rate": 4.366603379118117e-06, |
| "loss": 0.1038, |
| "step": 2547 |
| }, |
| { |
| "epoch": 1.1592356687898089, |
| "grad_norm": 1.4578278036788574, |
| "learning_rate": 4.366127899852169e-06, |
| "loss": 0.0865, |
| "step": 2548 |
| }, |
| { |
| "epoch": 1.159690627843494, |
| "grad_norm": 1.3103780377545284, |
| "learning_rate": 4.365652268094675e-06, |
| "loss": 0.0674, |
| "step": 2549 |
| }, |
| { |
| "epoch": 1.1601455868971793, |
| "grad_norm": 1.7957120553998775, |
| "learning_rate": 4.365176483884504e-06, |
| "loss": 0.1312, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.1606005459508644, |
| "grad_norm": 1.6492238946584739, |
| "learning_rate": 4.364700547260533e-06, |
| "loss": 0.0907, |
| "step": 2551 |
| }, |
| { |
| "epoch": 1.1610555050045497, |
| "grad_norm": 1.3864243311454894, |
| "learning_rate": 4.3642244582616545e-06, |
| "loss": 0.0977, |
| "step": 2552 |
| }, |
| { |
| "epoch": 1.1615104640582348, |
| "grad_norm": 1.5321223648985156, |
| "learning_rate": 4.363748216926772e-06, |
| "loss": 0.0975, |
| "step": 2553 |
| }, |
| { |
| "epoch": 1.1619654231119199, |
| "grad_norm": 1.428088888774431, |
| "learning_rate": 4.363271823294802e-06, |
| "loss": 0.1138, |
| "step": 2554 |
| }, |
| { |
| "epoch": 1.1624203821656052, |
| "grad_norm": 1.9030961957887997, |
| "learning_rate": 4.362795277404673e-06, |
| "loss": 0.1121, |
| "step": 2555 |
| }, |
| { |
| "epoch": 1.1628753412192903, |
| "grad_norm": 1.1462755051031488, |
| "learning_rate": 4.362318579295326e-06, |
| "loss": 0.0467, |
| "step": 2556 |
| }, |
| { |
| "epoch": 1.1633303002729753, |
| "grad_norm": 1.4980767963568005, |
| "learning_rate": 4.361841729005715e-06, |
| "loss": 0.1018, |
| "step": 2557 |
| }, |
| { |
| "epoch": 1.1637852593266607, |
| "grad_norm": 2.2145503141446614, |
| "learning_rate": 4.361364726574806e-06, |
| "loss": 0.0853, |
| "step": 2558 |
| }, |
| { |
| "epoch": 1.1642402183803457, |
| "grad_norm": 1.1989117424823872, |
| "learning_rate": 4.360887572041578e-06, |
| "loss": 0.0868, |
| "step": 2559 |
| }, |
| { |
| "epoch": 1.164695177434031, |
| "grad_norm": 1.9066512245156881, |
| "learning_rate": 4.36041026544502e-06, |
| "loss": 0.1471, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.1651501364877161, |
| "grad_norm": 1.428837377276699, |
| "learning_rate": 4.359932806824138e-06, |
| "loss": 0.0718, |
| "step": 2561 |
| }, |
| { |
| "epoch": 1.1656050955414012, |
| "grad_norm": 1.417125208635274, |
| "learning_rate": 4.359455196217946e-06, |
| "loss": 0.0614, |
| "step": 2562 |
| }, |
| { |
| "epoch": 1.1660600545950865, |
| "grad_norm": 1.6663939403921464, |
| "learning_rate": 4.358977433665471e-06, |
| "loss": 0.0586, |
| "step": 2563 |
| }, |
| { |
| "epoch": 1.1665150136487716, |
| "grad_norm": 1.3921354785427886, |
| "learning_rate": 4.3584995192057565e-06, |
| "loss": 0.0691, |
| "step": 2564 |
| }, |
| { |
| "epoch": 1.1669699727024567, |
| "grad_norm": 1.1683109281081594, |
| "learning_rate": 4.358021452877854e-06, |
| "loss": 0.0952, |
| "step": 2565 |
| }, |
| { |
| "epoch": 1.167424931756142, |
| "grad_norm": 1.5985810446894706, |
| "learning_rate": 4.357543234720829e-06, |
| "loss": 0.0771, |
| "step": 2566 |
| }, |
| { |
| "epoch": 1.1678798908098271, |
| "grad_norm": 1.726758001874974, |
| "learning_rate": 4.357064864773761e-06, |
| "loss": 0.0852, |
| "step": 2567 |
| }, |
| { |
| "epoch": 1.1683348498635122, |
| "grad_norm": 1.376146728666042, |
| "learning_rate": 4.3565863430757375e-06, |
| "loss": 0.0816, |
| "step": 2568 |
| }, |
| { |
| "epoch": 1.1687898089171975, |
| "grad_norm": 1.266164839412077, |
| "learning_rate": 4.356107669665862e-06, |
| "loss": 0.095, |
| "step": 2569 |
| }, |
| { |
| "epoch": 1.1692447679708826, |
| "grad_norm": 1.7363433482517434, |
| "learning_rate": 4.355628844583249e-06, |
| "loss": 0.1348, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1696997270245677, |
| "grad_norm": 1.5900315387927095, |
| "learning_rate": 4.355149867867029e-06, |
| "loss": 0.0785, |
| "step": 2571 |
| }, |
| { |
| "epoch": 1.170154686078253, |
| "grad_norm": 1.7031570854225535, |
| "learning_rate": 4.354670739556338e-06, |
| "loss": 0.0903, |
| "step": 2572 |
| }, |
| { |
| "epoch": 1.170609645131938, |
| "grad_norm": 1.553459320102983, |
| "learning_rate": 4.35419145969033e-06, |
| "loss": 0.0808, |
| "step": 2573 |
| }, |
| { |
| "epoch": 1.1710646041856232, |
| "grad_norm": 1.624748274996521, |
| "learning_rate": 4.35371202830817e-06, |
| "loss": 0.0946, |
| "step": 2574 |
| }, |
| { |
| "epoch": 1.1715195632393085, |
| "grad_norm": 1.998220943026382, |
| "learning_rate": 4.353232445449034e-06, |
| "loss": 0.1007, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.1719745222929936, |
| "grad_norm": 1.3879277679859046, |
| "learning_rate": 4.352752711152112e-06, |
| "loss": 0.0752, |
| "step": 2576 |
| }, |
| { |
| "epoch": 1.1724294813466787, |
| "grad_norm": 2.043253151446217, |
| "learning_rate": 4.352272825456605e-06, |
| "loss": 0.1392, |
| "step": 2577 |
| }, |
| { |
| "epoch": 1.172884440400364, |
| "grad_norm": 1.4430794602564747, |
| "learning_rate": 4.3517927884017275e-06, |
| "loss": 0.1071, |
| "step": 2578 |
| }, |
| { |
| "epoch": 1.173339399454049, |
| "grad_norm": 1.3026567584819855, |
| "learning_rate": 4.351312600026706e-06, |
| "loss": 0.0907, |
| "step": 2579 |
| }, |
| { |
| "epoch": 1.1737943585077344, |
| "grad_norm": 1.4101005705511307, |
| "learning_rate": 4.350832260370779e-06, |
| "loss": 0.1012, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1742493175614195, |
| "grad_norm": 1.3419121345653944, |
| "learning_rate": 4.350351769473198e-06, |
| "loss": 0.0696, |
| "step": 2581 |
| }, |
| { |
| "epoch": 1.1747042766151046, |
| "grad_norm": 1.350413613603601, |
| "learning_rate": 4.349871127373226e-06, |
| "loss": 0.0917, |
| "step": 2582 |
| }, |
| { |
| "epoch": 1.1751592356687899, |
| "grad_norm": 1.5328058199569599, |
| "learning_rate": 4.349390334110141e-06, |
| "loss": 0.1113, |
| "step": 2583 |
| }, |
| { |
| "epoch": 1.175614194722475, |
| "grad_norm": 1.1093873947356732, |
| "learning_rate": 4.348909389723228e-06, |
| "loss": 0.0659, |
| "step": 2584 |
| }, |
| { |
| "epoch": 1.17606915377616, |
| "grad_norm": 1.6756868000210596, |
| "learning_rate": 4.348428294251791e-06, |
| "loss": 0.0998, |
| "step": 2585 |
| }, |
| { |
| "epoch": 1.1765241128298454, |
| "grad_norm": 1.4020895191217355, |
| "learning_rate": 4.34794704773514e-06, |
| "loss": 0.0756, |
| "step": 2586 |
| }, |
| { |
| "epoch": 1.1769790718835305, |
| "grad_norm": 1.619901575556969, |
| "learning_rate": 4.347465650212602e-06, |
| "loss": 0.1049, |
| "step": 2587 |
| }, |
| { |
| "epoch": 1.1774340309372158, |
| "grad_norm": 1.2820911146358447, |
| "learning_rate": 4.346984101723513e-06, |
| "loss": 0.099, |
| "step": 2588 |
| }, |
| { |
| "epoch": 1.1778889899909009, |
| "grad_norm": 1.5114352969050147, |
| "learning_rate": 4.3465024023072255e-06, |
| "loss": 0.1257, |
| "step": 2589 |
| }, |
| { |
| "epoch": 1.178343949044586, |
| "grad_norm": 1.3539463988206946, |
| "learning_rate": 4.3460205520031006e-06, |
| "loss": 0.0593, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1787989080982713, |
| "grad_norm": 1.951842216649359, |
| "learning_rate": 4.345538550850512e-06, |
| "loss": 0.1236, |
| "step": 2591 |
| }, |
| { |
| "epoch": 1.1792538671519563, |
| "grad_norm": 1.8285849146657949, |
| "learning_rate": 4.345056398888847e-06, |
| "loss": 0.0928, |
| "step": 2592 |
| }, |
| { |
| "epoch": 1.1797088262056414, |
| "grad_norm": 1.5041066242121004, |
| "learning_rate": 4.3445740961575066e-06, |
| "loss": 0.0687, |
| "step": 2593 |
| }, |
| { |
| "epoch": 1.1801637852593267, |
| "grad_norm": 1.6575747108346124, |
| "learning_rate": 4.3440916426959e-06, |
| "loss": 0.0904, |
| "step": 2594 |
| }, |
| { |
| "epoch": 1.1806187443130118, |
| "grad_norm": 1.3214979838016756, |
| "learning_rate": 4.343609038543452e-06, |
| "loss": 0.0899, |
| "step": 2595 |
| }, |
| { |
| "epoch": 1.181073703366697, |
| "grad_norm": 1.4859231565076656, |
| "learning_rate": 4.3431262837396e-06, |
| "loss": 0.0978, |
| "step": 2596 |
| }, |
| { |
| "epoch": 1.1815286624203822, |
| "grad_norm": 1.6150637319977543, |
| "learning_rate": 4.342643378323791e-06, |
| "loss": 0.0842, |
| "step": 2597 |
| }, |
| { |
| "epoch": 1.1819836214740673, |
| "grad_norm": 1.413038987453138, |
| "learning_rate": 4.342160322335487e-06, |
| "loss": 0.0654, |
| "step": 2598 |
| }, |
| { |
| "epoch": 1.1824385805277524, |
| "grad_norm": 2.182860548460036, |
| "learning_rate": 4.34167711581416e-06, |
| "loss": 0.0841, |
| "step": 2599 |
| }, |
| { |
| "epoch": 1.1828935395814377, |
| "grad_norm": 1.275297167024451, |
| "learning_rate": 4.3411937587992955e-06, |
| "loss": 0.0722, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1833484986351228, |
| "grad_norm": 1.1799530738898074, |
| "learning_rate": 4.340710251330393e-06, |
| "loss": 0.0662, |
| "step": 2601 |
| }, |
| { |
| "epoch": 1.183803457688808, |
| "grad_norm": 1.872220715095368, |
| "learning_rate": 4.34022659344696e-06, |
| "loss": 0.1292, |
| "step": 2602 |
| }, |
| { |
| "epoch": 1.1842584167424932, |
| "grad_norm": 1.6772862778704278, |
| "learning_rate": 4.339742785188521e-06, |
| "loss": 0.0966, |
| "step": 2603 |
| }, |
| { |
| "epoch": 1.1847133757961783, |
| "grad_norm": 1.6082753483614305, |
| "learning_rate": 4.339258826594611e-06, |
| "loss": 0.0582, |
| "step": 2604 |
| }, |
| { |
| "epoch": 1.1851683348498634, |
| "grad_norm": 1.6117792608004555, |
| "learning_rate": 4.338774717704774e-06, |
| "loss": 0.0643, |
| "step": 2605 |
| }, |
| { |
| "epoch": 1.1856232939035487, |
| "grad_norm": 1.7422517232972539, |
| "learning_rate": 4.338290458558572e-06, |
| "loss": 0.1766, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.1860782529572338, |
| "grad_norm": 2.1476781837506818, |
| "learning_rate": 4.3378060491955744e-06, |
| "loss": 0.1463, |
| "step": 2607 |
| }, |
| { |
| "epoch": 1.186533212010919, |
| "grad_norm": 1.8922581543540133, |
| "learning_rate": 4.337321489655366e-06, |
| "loss": 0.1528, |
| "step": 2608 |
| }, |
| { |
| "epoch": 1.1869881710646042, |
| "grad_norm": 1.7516502810489014, |
| "learning_rate": 4.336836779977543e-06, |
| "loss": 0.1038, |
| "step": 2609 |
| }, |
| { |
| "epoch": 1.1874431301182893, |
| "grad_norm": 1.4511814170214454, |
| "learning_rate": 4.336351920201714e-06, |
| "loss": 0.1005, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1878980891719746, |
| "grad_norm": 1.5620930461894496, |
| "learning_rate": 4.335866910367498e-06, |
| "loss": 0.0492, |
| "step": 2611 |
| }, |
| { |
| "epoch": 1.1883530482256597, |
| "grad_norm": 2.7082970498760117, |
| "learning_rate": 4.3353817505145294e-06, |
| "loss": 0.0909, |
| "step": 2612 |
| }, |
| { |
| "epoch": 1.1888080072793448, |
| "grad_norm": 1.5743219982804768, |
| "learning_rate": 4.334896440682452e-06, |
| "loss": 0.077, |
| "step": 2613 |
| }, |
| { |
| "epoch": 1.18926296633303, |
| "grad_norm": 1.3966339148129352, |
| "learning_rate": 4.334410980910924e-06, |
| "loss": 0.1218, |
| "step": 2614 |
| }, |
| { |
| "epoch": 1.1897179253867152, |
| "grad_norm": 1.4856452151376027, |
| "learning_rate": 4.333925371239615e-06, |
| "loss": 0.1035, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.1901728844404005, |
| "grad_norm": 1.6127438575709883, |
| "learning_rate": 4.3334396117082065e-06, |
| "loss": 0.1052, |
| "step": 2616 |
| }, |
| { |
| "epoch": 1.1906278434940856, |
| "grad_norm": 1.7288330036362787, |
| "learning_rate": 4.332953702356393e-06, |
| "loss": 0.1607, |
| "step": 2617 |
| }, |
| { |
| "epoch": 1.1910828025477707, |
| "grad_norm": 1.2779780017213267, |
| "learning_rate": 4.33246764322388e-06, |
| "loss": 0.0664, |
| "step": 2618 |
| }, |
| { |
| "epoch": 1.191537761601456, |
| "grad_norm": 1.843632743904082, |
| "learning_rate": 4.331981434350387e-06, |
| "loss": 0.1535, |
| "step": 2619 |
| }, |
| { |
| "epoch": 1.191992720655141, |
| "grad_norm": 1.3210812550635276, |
| "learning_rate": 4.331495075775644e-06, |
| "loss": 0.1404, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.1924476797088261, |
| "grad_norm": 1.3878492439329282, |
| "learning_rate": 4.331008567539395e-06, |
| "loss": 0.0747, |
| "step": 2621 |
| }, |
| { |
| "epoch": 1.1929026387625115, |
| "grad_norm": 1.3357463507965919, |
| "learning_rate": 4.330521909681394e-06, |
| "loss": 0.0766, |
| "step": 2622 |
| }, |
| { |
| "epoch": 1.1933575978161965, |
| "grad_norm": 1.6211605147229922, |
| "learning_rate": 4.330035102241409e-06, |
| "loss": 0.1197, |
| "step": 2623 |
| }, |
| { |
| "epoch": 1.1938125568698816, |
| "grad_norm": 1.496864935979414, |
| "learning_rate": 4.32954814525922e-06, |
| "loss": 0.0701, |
| "step": 2624 |
| }, |
| { |
| "epoch": 1.194267515923567, |
| "grad_norm": 1.3041113510202, |
| "learning_rate": 4.329061038774619e-06, |
| "loss": 0.071, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.194722474977252, |
| "grad_norm": 1.3390637893903103, |
| "learning_rate": 4.32857378282741e-06, |
| "loss": 0.0951, |
| "step": 2626 |
| }, |
| { |
| "epoch": 1.1951774340309371, |
| "grad_norm": 1.3209742325562313, |
| "learning_rate": 4.328086377457409e-06, |
| "loss": 0.0844, |
| "step": 2627 |
| }, |
| { |
| "epoch": 1.1956323930846224, |
| "grad_norm": 1.8118172786335158, |
| "learning_rate": 4.327598822704444e-06, |
| "loss": 0.1175, |
| "step": 2628 |
| }, |
| { |
| "epoch": 1.1960873521383075, |
| "grad_norm": 1.6299368669430234, |
| "learning_rate": 4.327111118608357e-06, |
| "loss": 0.1467, |
| "step": 2629 |
| }, |
| { |
| "epoch": 1.1965423111919926, |
| "grad_norm": 1.5688063002459107, |
| "learning_rate": 4.326623265209001e-06, |
| "loss": 0.0803, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.196997270245678, |
| "grad_norm": 1.6465294755773725, |
| "learning_rate": 4.326135262546241e-06, |
| "loss": 0.0705, |
| "step": 2631 |
| }, |
| { |
| "epoch": 1.197452229299363, |
| "grad_norm": 1.6238105525738482, |
| "learning_rate": 4.325647110659954e-06, |
| "loss": 0.1254, |
| "step": 2632 |
| }, |
| { |
| "epoch": 1.197907188353048, |
| "grad_norm": 1.7891444626148267, |
| "learning_rate": 4.325158809590028e-06, |
| "loss": 0.0718, |
| "step": 2633 |
| }, |
| { |
| "epoch": 1.1983621474067334, |
| "grad_norm": 1.047556103709193, |
| "learning_rate": 4.324670359376368e-06, |
| "loss": 0.0548, |
| "step": 2634 |
| }, |
| { |
| "epoch": 1.1988171064604185, |
| "grad_norm": 1.4266407858751808, |
| "learning_rate": 4.3241817600588865e-06, |
| "loss": 0.0799, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.1992720655141038, |
| "grad_norm": 1.0758052671422083, |
| "learning_rate": 4.3236930116775086e-06, |
| "loss": 0.0469, |
| "step": 2636 |
| }, |
| { |
| "epoch": 1.199727024567789, |
| "grad_norm": 1.8000162783707994, |
| "learning_rate": 4.323204114272174e-06, |
| "loss": 0.1349, |
| "step": 2637 |
| }, |
| { |
| "epoch": 1.200181983621474, |
| "grad_norm": 2.2216878566032836, |
| "learning_rate": 4.3227150678828335e-06, |
| "loss": 0.1198, |
| "step": 2638 |
| }, |
| { |
| "epoch": 1.2006369426751593, |
| "grad_norm": 1.674728333776232, |
| "learning_rate": 4.322225872549448e-06, |
| "loss": 0.1025, |
| "step": 2639 |
| }, |
| { |
| "epoch": 1.2010919017288444, |
| "grad_norm": 1.689368542839076, |
| "learning_rate": 4.321736528311994e-06, |
| "loss": 0.1048, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.2015468607825295, |
| "grad_norm": 1.4354075881450123, |
| "learning_rate": 4.321247035210456e-06, |
| "loss": 0.0692, |
| "step": 2641 |
| }, |
| { |
| "epoch": 1.2020018198362148, |
| "grad_norm": 1.6563738642729477, |
| "learning_rate": 4.320757393284837e-06, |
| "loss": 0.0767, |
| "step": 2642 |
| }, |
| { |
| "epoch": 1.2024567788898999, |
| "grad_norm": 1.379611923602435, |
| "learning_rate": 4.3202676025751455e-06, |
| "loss": 0.0591, |
| "step": 2643 |
| }, |
| { |
| "epoch": 1.2029117379435852, |
| "grad_norm": 1.6479290456698004, |
| "learning_rate": 4.319777663121406e-06, |
| "loss": 0.0961, |
| "step": 2644 |
| }, |
| { |
| "epoch": 1.2033666969972703, |
| "grad_norm": 1.9415821059711678, |
| "learning_rate": 4.319287574963653e-06, |
| "loss": 0.1624, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.2038216560509554, |
| "grad_norm": 1.5187755572188995, |
| "learning_rate": 4.318797338141936e-06, |
| "loss": 0.0799, |
| "step": 2646 |
| }, |
| { |
| "epoch": 1.2042766151046407, |
| "grad_norm": 1.2261158559841066, |
| "learning_rate": 4.318306952696314e-06, |
| "loss": 0.0789, |
| "step": 2647 |
| }, |
| { |
| "epoch": 1.2047315741583258, |
| "grad_norm": 1.5350997195388667, |
| "learning_rate": 4.317816418666859e-06, |
| "loss": 0.0648, |
| "step": 2648 |
| }, |
| { |
| "epoch": 1.2051865332120109, |
| "grad_norm": 2.0282859482323135, |
| "learning_rate": 4.317325736093656e-06, |
| "loss": 0.1003, |
| "step": 2649 |
| }, |
| { |
| "epoch": 1.2056414922656962, |
| "grad_norm": 1.099438335437198, |
| "learning_rate": 4.316834905016801e-06, |
| "loss": 0.0749, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.2060964513193813, |
| "grad_norm": 1.6955258737212886, |
| "learning_rate": 4.3163439254764015e-06, |
| "loss": 0.0799, |
| "step": 2651 |
| }, |
| { |
| "epoch": 1.2065514103730663, |
| "grad_norm": 1.4782312844645842, |
| "learning_rate": 4.31585279751258e-06, |
| "loss": 0.0812, |
| "step": 2652 |
| }, |
| { |
| "epoch": 1.2070063694267517, |
| "grad_norm": 0.962225205333111, |
| "learning_rate": 4.315361521165467e-06, |
| "loss": 0.0421, |
| "step": 2653 |
| }, |
| { |
| "epoch": 1.2074613284804367, |
| "grad_norm": 1.475944438171979, |
| "learning_rate": 4.314870096475209e-06, |
| "loss": 0.0797, |
| "step": 2654 |
| }, |
| { |
| "epoch": 1.2079162875341218, |
| "grad_norm": 1.9568750202890988, |
| "learning_rate": 4.3143785234819624e-06, |
| "loss": 0.1064, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.2083712465878071, |
| "grad_norm": 1.2968330567546162, |
| "learning_rate": 4.3138868022258974e-06, |
| "loss": 0.0541, |
| "step": 2656 |
| }, |
| { |
| "epoch": 1.2088262056414922, |
| "grad_norm": 1.3512605939635933, |
| "learning_rate": 4.313394932747194e-06, |
| "loss": 0.084, |
| "step": 2657 |
| }, |
| { |
| "epoch": 1.2092811646951773, |
| "grad_norm": 1.2788458917599885, |
| "learning_rate": 4.312902915086045e-06, |
| "loss": 0.078, |
| "step": 2658 |
| }, |
| { |
| "epoch": 1.2097361237488626, |
| "grad_norm": 1.2087340265742859, |
| "learning_rate": 4.312410749282658e-06, |
| "loss": 0.083, |
| "step": 2659 |
| }, |
| { |
| "epoch": 1.2101910828025477, |
| "grad_norm": 1.51675138627556, |
| "learning_rate": 4.311918435377248e-06, |
| "loss": 0.098, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.210646041856233, |
| "grad_norm": 1.767606141999641, |
| "learning_rate": 4.311425973410047e-06, |
| "loss": 0.1403, |
| "step": 2661 |
| }, |
| { |
| "epoch": 1.2111010009099181, |
| "grad_norm": 1.8607859425213837, |
| "learning_rate": 4.310933363421296e-06, |
| "loss": 0.1002, |
| "step": 2662 |
| }, |
| { |
| "epoch": 1.2115559599636032, |
| "grad_norm": 2.188295719120762, |
| "learning_rate": 4.310440605451248e-06, |
| "loss": 0.1062, |
| "step": 2663 |
| }, |
| { |
| "epoch": 1.2120109190172885, |
| "grad_norm": 1.6007893169355347, |
| "learning_rate": 4.30994769954017e-06, |
| "loss": 0.0855, |
| "step": 2664 |
| }, |
| { |
| "epoch": 1.2124658780709736, |
| "grad_norm": 1.7264264512353125, |
| "learning_rate": 4.30945464572834e-06, |
| "loss": 0.1561, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.2129208371246587, |
| "grad_norm": 1.4708066988612976, |
| "learning_rate": 4.3089614440560465e-06, |
| "loss": 0.0607, |
| "step": 2666 |
| }, |
| { |
| "epoch": 1.213375796178344, |
| "grad_norm": 1.5600890024513265, |
| "learning_rate": 4.3084680945635946e-06, |
| "loss": 0.1364, |
| "step": 2667 |
| }, |
| { |
| "epoch": 1.213830755232029, |
| "grad_norm": 1.876498244558624, |
| "learning_rate": 4.307974597291296e-06, |
| "loss": 0.1076, |
| "step": 2668 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 1.37065103914952, |
| "learning_rate": 4.307480952279478e-06, |
| "loss": 0.0523, |
| "step": 2669 |
| }, |
| { |
| "epoch": 1.2147406733393995, |
| "grad_norm": 1.4444820040999051, |
| "learning_rate": 4.3069871595684795e-06, |
| "loss": 0.0739, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2151956323930846, |
| "grad_norm": 1.5069719193608038, |
| "learning_rate": 4.30649321919865e-06, |
| "loss": 0.0911, |
| "step": 2671 |
| }, |
| { |
| "epoch": 1.21565059144677, |
| "grad_norm": 1.2934622383879057, |
| "learning_rate": 4.305999131210353e-06, |
| "loss": 0.0837, |
| "step": 2672 |
| }, |
| { |
| "epoch": 1.216105550500455, |
| "grad_norm": 1.5853581830621495, |
| "learning_rate": 4.305504895643963e-06, |
| "loss": 0.0833, |
| "step": 2673 |
| }, |
| { |
| "epoch": 1.21656050955414, |
| "grad_norm": 1.3709517382273528, |
| "learning_rate": 4.305010512539867e-06, |
| "loss": 0.1159, |
| "step": 2674 |
| }, |
| { |
| "epoch": 1.2170154686078254, |
| "grad_norm": 1.4168456459509742, |
| "learning_rate": 4.304515981938462e-06, |
| "loss": 0.0606, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.2174704276615105, |
| "grad_norm": 1.5616363029677887, |
| "learning_rate": 4.304021303880161e-06, |
| "loss": 0.0996, |
| "step": 2676 |
| }, |
| { |
| "epoch": 1.2179253867151956, |
| "grad_norm": 1.708179628273713, |
| "learning_rate": 4.303526478405386e-06, |
| "loss": 0.1065, |
| "step": 2677 |
| }, |
| { |
| "epoch": 1.2183803457688809, |
| "grad_norm": 2.116672264038859, |
| "learning_rate": 4.3030315055545715e-06, |
| "loss": 0.128, |
| "step": 2678 |
| }, |
| { |
| "epoch": 1.218835304822566, |
| "grad_norm": 1.6986733358840764, |
| "learning_rate": 4.302536385368165e-06, |
| "loss": 0.082, |
| "step": 2679 |
| }, |
| { |
| "epoch": 1.219290263876251, |
| "grad_norm": 1.6851973141425958, |
| "learning_rate": 4.3020411178866246e-06, |
| "loss": 0.0666, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2197452229299364, |
| "grad_norm": 1.3268862435295075, |
| "learning_rate": 4.3015457031504226e-06, |
| "loss": 0.0615, |
| "step": 2681 |
| }, |
| { |
| "epoch": 1.2202001819836215, |
| "grad_norm": 2.894618285414545, |
| "learning_rate": 4.301050141200041e-06, |
| "loss": 0.1161, |
| "step": 2682 |
| }, |
| { |
| "epoch": 1.2206551410373065, |
| "grad_norm": 1.8518976016980668, |
| "learning_rate": 4.300554432075975e-06, |
| "loss": 0.0677, |
| "step": 2683 |
| }, |
| { |
| "epoch": 1.2211101000909919, |
| "grad_norm": 1.9252846318661894, |
| "learning_rate": 4.300058575818733e-06, |
| "loss": 0.1195, |
| "step": 2684 |
| }, |
| { |
| "epoch": 1.221565059144677, |
| "grad_norm": 1.7916218908549502, |
| "learning_rate": 4.299562572468833e-06, |
| "loss": 0.1264, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.222020018198362, |
| "grad_norm": 1.3194566331820348, |
| "learning_rate": 4.299066422066807e-06, |
| "loss": 0.044, |
| "step": 2686 |
| }, |
| { |
| "epoch": 1.2224749772520473, |
| "grad_norm": 1.702059632495899, |
| "learning_rate": 4.2985701246531965e-06, |
| "loss": 0.1094, |
| "step": 2687 |
| }, |
| { |
| "epoch": 1.2229299363057324, |
| "grad_norm": 1.3985606136942172, |
| "learning_rate": 4.2980736802685575e-06, |
| "loss": 0.0476, |
| "step": 2688 |
| }, |
| { |
| "epoch": 1.2233848953594177, |
| "grad_norm": 1.8905242980121515, |
| "learning_rate": 4.297577088953458e-06, |
| "loss": 0.0676, |
| "step": 2689 |
| }, |
| { |
| "epoch": 1.2238398544131028, |
| "grad_norm": 0.8842330436141602, |
| "learning_rate": 4.2970803507484756e-06, |
| "loss": 0.0528, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.224294813466788, |
| "grad_norm": 1.5087671057266334, |
| "learning_rate": 4.296583465694204e-06, |
| "loss": 0.0781, |
| "step": 2691 |
| }, |
| { |
| "epoch": 1.2247497725204732, |
| "grad_norm": 2.1139760440967112, |
| "learning_rate": 4.296086433831244e-06, |
| "loss": 0.0995, |
| "step": 2692 |
| }, |
| { |
| "epoch": 1.2252047315741583, |
| "grad_norm": 1.3607345905968589, |
| "learning_rate": 4.295589255200212e-06, |
| "loss": 0.0842, |
| "step": 2693 |
| }, |
| { |
| "epoch": 1.2256596906278434, |
| "grad_norm": 1.7864471189286306, |
| "learning_rate": 4.295091929841734e-06, |
| "loss": 0.0839, |
| "step": 2694 |
| }, |
| { |
| "epoch": 1.2261146496815287, |
| "grad_norm": 1.4725627389737213, |
| "learning_rate": 4.2945944577964516e-06, |
| "loss": 0.1817, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.2265696087352138, |
| "grad_norm": 1.1876699089763878, |
| "learning_rate": 4.294096839105013e-06, |
| "loss": 0.0614, |
| "step": 2696 |
| }, |
| { |
| "epoch": 1.2270245677888991, |
| "grad_norm": 1.4225833533824312, |
| "learning_rate": 4.293599073808083e-06, |
| "loss": 0.0796, |
| "step": 2697 |
| }, |
| { |
| "epoch": 1.2274795268425842, |
| "grad_norm": 1.3288722678195426, |
| "learning_rate": 4.293101161946337e-06, |
| "loss": 0.0555, |
| "step": 2698 |
| }, |
| { |
| "epoch": 1.2279344858962693, |
| "grad_norm": 1.2424148095147949, |
| "learning_rate": 4.292603103560462e-06, |
| "loss": 0.0488, |
| "step": 2699 |
| }, |
| { |
| "epoch": 1.2283894449499546, |
| "grad_norm": 1.2746073892843495, |
| "learning_rate": 4.292104898691157e-06, |
| "loss": 0.0965, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2288444040036397, |
| "grad_norm": 1.9553417584027957, |
| "learning_rate": 4.291606547379131e-06, |
| "loss": 0.0863, |
| "step": 2701 |
| }, |
| { |
| "epoch": 1.2292993630573248, |
| "grad_norm": 1.6292687158685326, |
| "learning_rate": 4.291108049665109e-06, |
| "loss": 0.1039, |
| "step": 2702 |
| }, |
| { |
| "epoch": 1.22975432211101, |
| "grad_norm": 1.6141920925692421, |
| "learning_rate": 4.290609405589827e-06, |
| "loss": 0.0702, |
| "step": 2703 |
| }, |
| { |
| "epoch": 1.2302092811646952, |
| "grad_norm": 1.568358524006938, |
| "learning_rate": 4.29011061519403e-06, |
| "loss": 0.1305, |
| "step": 2704 |
| }, |
| { |
| "epoch": 1.2306642402183803, |
| "grad_norm": 1.5832578242534308, |
| "learning_rate": 4.289611678518478e-06, |
| "loss": 0.0943, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.2311191992720656, |
| "grad_norm": 1.7204606734278, |
| "learning_rate": 4.289112595603941e-06, |
| "loss": 0.1271, |
| "step": 2706 |
| }, |
| { |
| "epoch": 1.2315741583257507, |
| "grad_norm": 1.878311333320497, |
| "learning_rate": 4.288613366491202e-06, |
| "loss": 0.0753, |
| "step": 2707 |
| }, |
| { |
| "epoch": 1.2320291173794358, |
| "grad_norm": 1.6190494499887427, |
| "learning_rate": 4.288113991221057e-06, |
| "loss": 0.0815, |
| "step": 2708 |
| }, |
| { |
| "epoch": 1.232484076433121, |
| "grad_norm": 1.4265449920467896, |
| "learning_rate": 4.2876144698343115e-06, |
| "loss": 0.0905, |
| "step": 2709 |
| }, |
| { |
| "epoch": 1.2329390354868062, |
| "grad_norm": 1.5792299252383166, |
| "learning_rate": 4.287114802371783e-06, |
| "loss": 0.0933, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.2333939945404913, |
| "grad_norm": 1.5541962345380622, |
| "learning_rate": 4.286614988874304e-06, |
| "loss": 0.1018, |
| "step": 2711 |
| }, |
| { |
| "epoch": 1.2338489535941766, |
| "grad_norm": 1.4933850317503654, |
| "learning_rate": 4.286115029382717e-06, |
| "loss": 0.1448, |
| "step": 2712 |
| }, |
| { |
| "epoch": 1.2343039126478617, |
| "grad_norm": 1.778907316114548, |
| "learning_rate": 4.285614923937876e-06, |
| "loss": 0.1101, |
| "step": 2713 |
| }, |
| { |
| "epoch": 1.2347588717015467, |
| "grad_norm": 1.3970757565526302, |
| "learning_rate": 4.285114672580647e-06, |
| "loss": 0.0862, |
| "step": 2714 |
| }, |
| { |
| "epoch": 1.235213830755232, |
| "grad_norm": 1.9653421473113715, |
| "learning_rate": 4.284614275351907e-06, |
| "loss": 0.1155, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.2356687898089171, |
| "grad_norm": 1.4818183158109117, |
| "learning_rate": 4.2841137322925495e-06, |
| "loss": 0.1109, |
| "step": 2716 |
| }, |
| { |
| "epoch": 1.2361237488626025, |
| "grad_norm": 1.395827472007909, |
| "learning_rate": 4.283613043443474e-06, |
| "loss": 0.0615, |
| "step": 2717 |
| }, |
| { |
| "epoch": 1.2365787079162875, |
| "grad_norm": 1.2600494580099084, |
| "learning_rate": 4.2831122088455955e-06, |
| "loss": 0.0588, |
| "step": 2718 |
| }, |
| { |
| "epoch": 1.2370336669699726, |
| "grad_norm": 1.731274261725021, |
| "learning_rate": 4.2826112285398395e-06, |
| "loss": 0.1502, |
| "step": 2719 |
| }, |
| { |
| "epoch": 1.237488626023658, |
| "grad_norm": 1.0227517272317024, |
| "learning_rate": 4.282110102567145e-06, |
| "loss": 0.0517, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.237943585077343, |
| "grad_norm": 1.3776885997310226, |
| "learning_rate": 4.28160883096846e-06, |
| "loss": 0.0663, |
| "step": 2721 |
| }, |
| { |
| "epoch": 1.2383985441310281, |
| "grad_norm": 1.2572442124919356, |
| "learning_rate": 4.281107413784747e-06, |
| "loss": 0.067, |
| "step": 2722 |
| }, |
| { |
| "epoch": 1.2388535031847134, |
| "grad_norm": 1.2741809908905852, |
| "learning_rate": 4.28060585105698e-06, |
| "loss": 0.1001, |
| "step": 2723 |
| }, |
| { |
| "epoch": 1.2393084622383985, |
| "grad_norm": 1.6333661735440708, |
| "learning_rate": 4.280104142826143e-06, |
| "loss": 0.0787, |
| "step": 2724 |
| }, |
| { |
| "epoch": 1.2397634212920838, |
| "grad_norm": 2.1072595872871984, |
| "learning_rate": 4.2796022891332355e-06, |
| "loss": 0.1632, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.240218380345769, |
| "grad_norm": 2.029930265466161, |
| "learning_rate": 4.279100290019265e-06, |
| "loss": 0.0732, |
| "step": 2726 |
| }, |
| { |
| "epoch": 1.240673339399454, |
| "grad_norm": 1.3800193403031813, |
| "learning_rate": 4.278598145525253e-06, |
| "loss": 0.1215, |
| "step": 2727 |
| }, |
| { |
| "epoch": 1.2411282984531393, |
| "grad_norm": 2.1334796621942074, |
| "learning_rate": 4.278095855692233e-06, |
| "loss": 0.1028, |
| "step": 2728 |
| }, |
| { |
| "epoch": 1.2415832575068244, |
| "grad_norm": 1.9037023983095858, |
| "learning_rate": 4.277593420561249e-06, |
| "loss": 0.0583, |
| "step": 2729 |
| }, |
| { |
| "epoch": 1.2420382165605095, |
| "grad_norm": 1.5266711911694233, |
| "learning_rate": 4.277090840173359e-06, |
| "loss": 0.0727, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2424931756141948, |
| "grad_norm": 1.779852269680275, |
| "learning_rate": 4.276588114569631e-06, |
| "loss": 0.1165, |
| "step": 2731 |
| }, |
| { |
| "epoch": 1.24294813466788, |
| "grad_norm": 1.1686354520981554, |
| "learning_rate": 4.2760852437911436e-06, |
| "loss": 0.0696, |
| "step": 2732 |
| }, |
| { |
| "epoch": 1.243403093721565, |
| "grad_norm": 1.6281358508365982, |
| "learning_rate": 4.2755822278789926e-06, |
| "loss": 0.0748, |
| "step": 2733 |
| }, |
| { |
| "epoch": 1.2438580527752503, |
| "grad_norm": 1.9348550299278917, |
| "learning_rate": 4.2750790668742795e-06, |
| "loss": 0.0771, |
| "step": 2734 |
| }, |
| { |
| "epoch": 1.2443130118289354, |
| "grad_norm": 1.6843775010519313, |
| "learning_rate": 4.274575760818122e-06, |
| "loss": 0.1291, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.2447679708826205, |
| "grad_norm": 1.7400214741336621, |
| "learning_rate": 4.274072309751646e-06, |
| "loss": 0.0736, |
| "step": 2736 |
| }, |
| { |
| "epoch": 1.2452229299363058, |
| "grad_norm": 1.3279822498973282, |
| "learning_rate": 4.273568713715993e-06, |
| "loss": 0.105, |
| "step": 2737 |
| }, |
| { |
| "epoch": 1.2456778889899909, |
| "grad_norm": 1.4181047264694318, |
| "learning_rate": 4.2730649727523145e-06, |
| "loss": 0.1044, |
| "step": 2738 |
| }, |
| { |
| "epoch": 1.246132848043676, |
| "grad_norm": 1.5420933585436614, |
| "learning_rate": 4.272561086901773e-06, |
| "loss": 0.0742, |
| "step": 2739 |
| }, |
| { |
| "epoch": 1.2465878070973613, |
| "grad_norm": 2.0627213117577616, |
| "learning_rate": 4.272057056205544e-06, |
| "loss": 0.1002, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2470427661510464, |
| "grad_norm": 1.6373337151018261, |
| "learning_rate": 4.271552880704815e-06, |
| "loss": 0.0786, |
| "step": 2741 |
| }, |
| { |
| "epoch": 1.2474977252047315, |
| "grad_norm": 1.4066801307959027, |
| "learning_rate": 4.271048560440786e-06, |
| "loss": 0.0951, |
| "step": 2742 |
| }, |
| { |
| "epoch": 1.2479526842584168, |
| "grad_norm": 1.4840597932593944, |
| "learning_rate": 4.2705440954546665e-06, |
| "loss": 0.1449, |
| "step": 2743 |
| }, |
| { |
| "epoch": 1.2484076433121019, |
| "grad_norm": 1.4874386819240102, |
| "learning_rate": 4.270039485787678e-06, |
| "loss": 0.0979, |
| "step": 2744 |
| }, |
| { |
| "epoch": 1.2488626023657872, |
| "grad_norm": 1.4996547701951468, |
| "learning_rate": 4.269534731481057e-06, |
| "loss": 0.1153, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.2493175614194723, |
| "grad_norm": 1.748368630407863, |
| "learning_rate": 4.269029832576048e-06, |
| "loss": 0.0701, |
| "step": 2746 |
| }, |
| { |
| "epoch": 1.2497725204731573, |
| "grad_norm": 1.2272157062443403, |
| "learning_rate": 4.2685247891139114e-06, |
| "loss": 0.0742, |
| "step": 2747 |
| }, |
| { |
| "epoch": 1.2502274795268427, |
| "grad_norm": 1.2535267297683748, |
| "learning_rate": 4.268019601135914e-06, |
| "loss": 0.0663, |
| "step": 2748 |
| }, |
| { |
| "epoch": 1.2506824385805277, |
| "grad_norm": 2.2232595843640954, |
| "learning_rate": 4.26751426868334e-06, |
| "loss": 0.0552, |
| "step": 2749 |
| }, |
| { |
| "epoch": 1.251137397634213, |
| "grad_norm": 1.6413257670602424, |
| "learning_rate": 4.2670087917974826e-06, |
| "loss": 0.0953, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2515923566878981, |
| "grad_norm": 2.525956129850652, |
| "learning_rate": 4.266503170519645e-06, |
| "loss": 0.1019, |
| "step": 2751 |
| }, |
| { |
| "epoch": 1.2520473157415832, |
| "grad_norm": 1.7532088817176623, |
| "learning_rate": 4.265997404891147e-06, |
| "loss": 0.0962, |
| "step": 2752 |
| }, |
| { |
| "epoch": 1.2525022747952685, |
| "grad_norm": 1.7385955199194223, |
| "learning_rate": 4.265491494953316e-06, |
| "loss": 0.0829, |
| "step": 2753 |
| }, |
| { |
| "epoch": 1.2529572338489536, |
| "grad_norm": 1.5355610337039685, |
| "learning_rate": 4.2649854407474925e-06, |
| "loss": 0.1359, |
| "step": 2754 |
| }, |
| { |
| "epoch": 1.2534121929026387, |
| "grad_norm": 1.28022022581084, |
| "learning_rate": 4.26447924231503e-06, |
| "loss": 0.0558, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.253867151956324, |
| "grad_norm": 1.3880085094165089, |
| "learning_rate": 4.263972899697292e-06, |
| "loss": 0.0976, |
| "step": 2756 |
| }, |
| { |
| "epoch": 1.2543221110100091, |
| "grad_norm": 1.274974064159807, |
| "learning_rate": 4.263466412935654e-06, |
| "loss": 0.1164, |
| "step": 2757 |
| }, |
| { |
| "epoch": 1.2547770700636942, |
| "grad_norm": 1.3582086906964457, |
| "learning_rate": 4.262959782071505e-06, |
| "loss": 0.0524, |
| "step": 2758 |
| }, |
| { |
| "epoch": 1.2552320291173795, |
| "grad_norm": 1.8565157639016567, |
| "learning_rate": 4.262453007146244e-06, |
| "loss": 0.1207, |
| "step": 2759 |
| }, |
| { |
| "epoch": 1.2556869881710646, |
| "grad_norm": 1.1179278766341727, |
| "learning_rate": 4.261946088201282e-06, |
| "loss": 0.0628, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2561419472247497, |
| "grad_norm": 1.3815222535677334, |
| "learning_rate": 4.261439025278044e-06, |
| "loss": 0.0783, |
| "step": 2761 |
| }, |
| { |
| "epoch": 1.256596906278435, |
| "grad_norm": 1.6096595755674274, |
| "learning_rate": 4.260931818417962e-06, |
| "loss": 0.0655, |
| "step": 2762 |
| }, |
| { |
| "epoch": 1.25705186533212, |
| "grad_norm": 1.4310899801227122, |
| "learning_rate": 4.260424467662484e-06, |
| "loss": 0.0794, |
| "step": 2763 |
| }, |
| { |
| "epoch": 1.2575068243858052, |
| "grad_norm": 1.3830505652727263, |
| "learning_rate": 4.259916973053069e-06, |
| "loss": 0.126, |
| "step": 2764 |
| }, |
| { |
| "epoch": 1.2579617834394905, |
| "grad_norm": 1.2593848254260958, |
| "learning_rate": 4.2594093346311865e-06, |
| "loss": 0.0952, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.2584167424931756, |
| "grad_norm": 1.7618010142299456, |
| "learning_rate": 4.258901552438319e-06, |
| "loss": 0.1159, |
| "step": 2766 |
| }, |
| { |
| "epoch": 1.2588717015468607, |
| "grad_norm": 1.4438782108606985, |
| "learning_rate": 4.25839362651596e-06, |
| "loss": 0.0862, |
| "step": 2767 |
| }, |
| { |
| "epoch": 1.259326660600546, |
| "grad_norm": 1.960220687441142, |
| "learning_rate": 4.257885556905613e-06, |
| "loss": 0.0847, |
| "step": 2768 |
| }, |
| { |
| "epoch": 1.259781619654231, |
| "grad_norm": 1.588478187298156, |
| "learning_rate": 4.257377343648799e-06, |
| "loss": 0.0798, |
| "step": 2769 |
| }, |
| { |
| "epoch": 1.2602365787079162, |
| "grad_norm": 1.3801501508630765, |
| "learning_rate": 4.256868986787044e-06, |
| "loss": 0.0942, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2606915377616015, |
| "grad_norm": 1.429324437514992, |
| "learning_rate": 4.256360486361889e-06, |
| "loss": 0.0588, |
| "step": 2771 |
| }, |
| { |
| "epoch": 1.2611464968152866, |
| "grad_norm": 1.6843373956104633, |
| "learning_rate": 4.255851842414887e-06, |
| "loss": 0.0655, |
| "step": 2772 |
| }, |
| { |
| "epoch": 1.2616014558689717, |
| "grad_norm": 1.8180982857396182, |
| "learning_rate": 4.255343054987601e-06, |
| "loss": 0.1242, |
| "step": 2773 |
| }, |
| { |
| "epoch": 1.262056414922657, |
| "grad_norm": 1.417537186445061, |
| "learning_rate": 4.2548341241216085e-06, |
| "loss": 0.0584, |
| "step": 2774 |
| }, |
| { |
| "epoch": 1.262511373976342, |
| "grad_norm": 1.8094891195148863, |
| "learning_rate": 4.254325049858496e-06, |
| "loss": 0.104, |
| "step": 2775 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 10990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 555, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 24592138002432.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|