| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 1730, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002890173410404624, |
| "grad_norm": 7.090742589535237, |
| "learning_rate": 1.1560693641618497e-07, |
| "loss": 1.1688, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005780346820809248, |
| "grad_norm": 6.961285906182364, |
| "learning_rate": 2.3121387283236994e-07, |
| "loss": 1.152, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008670520231213872, |
| "grad_norm": 6.803426809209352, |
| "learning_rate": 3.468208092485549e-07, |
| "loss": 1.1394, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011560693641618497, |
| "grad_norm": 6.908189824429038, |
| "learning_rate": 4.624277456647399e-07, |
| "loss": 1.1456, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014450867052023121, |
| "grad_norm": 6.896200809170114, |
| "learning_rate": 5.780346820809249e-07, |
| "loss": 1.1535, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.017341040462427744, |
| "grad_norm": 6.805578082000228, |
| "learning_rate": 6.936416184971098e-07, |
| "loss": 1.1366, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.02023121387283237, |
| "grad_norm": 6.703973957994694, |
| "learning_rate": 8.092485549132949e-07, |
| "loss": 1.1601, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.023121387283236993, |
| "grad_norm": 6.208768024958665, |
| "learning_rate": 9.248554913294798e-07, |
| "loss": 1.0901, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02601156069364162, |
| "grad_norm": 6.0765892308895255, |
| "learning_rate": 1.040462427745665e-06, |
| "loss": 1.0838, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.028901734104046242, |
| "grad_norm": 4.955277796409471, |
| "learning_rate": 1.1560693641618499e-06, |
| "loss": 1.0948, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.031791907514450865, |
| "grad_norm": 4.694651793751496, |
| "learning_rate": 1.2716763005780348e-06, |
| "loss": 1.0693, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03468208092485549, |
| "grad_norm": 4.519565070186095, |
| "learning_rate": 1.3872832369942195e-06, |
| "loss": 1.0612, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03757225433526012, |
| "grad_norm": 4.197708724581753, |
| "learning_rate": 1.502890173410405e-06, |
| "loss": 1.071, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.04046242774566474, |
| "grad_norm": 2.892414618232367, |
| "learning_rate": 1.6184971098265898e-06, |
| "loss": 1.0341, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04335260115606936, |
| "grad_norm": 2.8567945230902487, |
| "learning_rate": 1.7341040462427746e-06, |
| "loss": 1.0408, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.046242774566473986, |
| "grad_norm": 2.728165100469004, |
| "learning_rate": 1.8497109826589595e-06, |
| "loss": 1.0101, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.049132947976878616, |
| "grad_norm": 2.6771874838000933, |
| "learning_rate": 1.965317919075145e-06, |
| "loss": 0.9826, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05202312138728324, |
| "grad_norm": 2.1602714711793687, |
| "learning_rate": 2.08092485549133e-06, |
| "loss": 0.9661, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.05491329479768786, |
| "grad_norm": 2.566414332647808, |
| "learning_rate": 2.1965317919075148e-06, |
| "loss": 0.9596, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.057803468208092484, |
| "grad_norm": 2.551741054940349, |
| "learning_rate": 2.3121387283236997e-06, |
| "loss": 0.9482, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06069364161849711, |
| "grad_norm": 2.5386956864281904, |
| "learning_rate": 2.4277456647398847e-06, |
| "loss": 0.9411, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06358381502890173, |
| "grad_norm": 2.1389476290630047, |
| "learning_rate": 2.5433526011560696e-06, |
| "loss": 0.9384, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06647398843930635, |
| "grad_norm": 1.8798822954453593, |
| "learning_rate": 2.658959537572254e-06, |
| "loss": 0.9385, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06936416184971098, |
| "grad_norm": 1.6443784984730219, |
| "learning_rate": 2.774566473988439e-06, |
| "loss": 0.9346, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07225433526011561, |
| "grad_norm": 1.8190270220824762, |
| "learning_rate": 2.890173410404625e-06, |
| "loss": 0.8848, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07514450867052024, |
| "grad_norm": 2.0826997260801092, |
| "learning_rate": 3.00578034682081e-06, |
| "loss": 0.8725, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.07803468208092486, |
| "grad_norm": 1.7656621345001091, |
| "learning_rate": 3.1213872832369948e-06, |
| "loss": 0.8621, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08092485549132948, |
| "grad_norm": 1.4044326746181084, |
| "learning_rate": 3.2369942196531797e-06, |
| "loss": 0.8399, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0838150289017341, |
| "grad_norm": 1.1511152676326295, |
| "learning_rate": 3.3526011560693642e-06, |
| "loss": 0.8481, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08670520231213873, |
| "grad_norm": 1.1197545053925908, |
| "learning_rate": 3.468208092485549e-06, |
| "loss": 0.8354, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08959537572254335, |
| "grad_norm": 1.254685408280926, |
| "learning_rate": 3.583815028901734e-06, |
| "loss": 0.8387, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.09248554913294797, |
| "grad_norm": 1.3419102903202926, |
| "learning_rate": 3.699421965317919e-06, |
| "loss": 0.8525, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0953757225433526, |
| "grad_norm": 1.2271588730403455, |
| "learning_rate": 3.815028901734104e-06, |
| "loss": 0.851, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09826589595375723, |
| "grad_norm": 1.0577363822256058, |
| "learning_rate": 3.93063583815029e-06, |
| "loss": 0.8317, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.10115606936416185, |
| "grad_norm": 1.0440985673514709, |
| "learning_rate": 4.046242774566474e-06, |
| "loss": 0.8313, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10404624277456648, |
| "grad_norm": 1.0098807937064038, |
| "learning_rate": 4.16184971098266e-06, |
| "loss": 0.8044, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1069364161849711, |
| "grad_norm": 0.8580054683034375, |
| "learning_rate": 4.277456647398844e-06, |
| "loss": 0.8237, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10982658959537572, |
| "grad_norm": 0.9796431941968685, |
| "learning_rate": 4.3930635838150296e-06, |
| "loss": 0.806, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.11271676300578035, |
| "grad_norm": 0.9439424055584376, |
| "learning_rate": 4.508670520231214e-06, |
| "loss": 0.8056, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11560693641618497, |
| "grad_norm": 0.9203463890270979, |
| "learning_rate": 4.6242774566473994e-06, |
| "loss": 0.7844, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11849710982658959, |
| "grad_norm": 0.8780398374830074, |
| "learning_rate": 4.739884393063584e-06, |
| "loss": 0.8, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.12138728323699421, |
| "grad_norm": 0.7825324337099361, |
| "learning_rate": 4.855491329479769e-06, |
| "loss": 0.7943, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.12427745664739884, |
| "grad_norm": 0.8066246593788152, |
| "learning_rate": 4.971098265895954e-06, |
| "loss": 0.789, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12716763005780346, |
| "grad_norm": 0.8289688925666006, |
| "learning_rate": 5.086705202312139e-06, |
| "loss": 0.7821, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.13005780346820808, |
| "grad_norm": 0.8808962397795624, |
| "learning_rate": 5.202312138728324e-06, |
| "loss": 0.7866, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.1329479768786127, |
| "grad_norm": 0.79942280517295, |
| "learning_rate": 5.317919075144508e-06, |
| "loss": 0.7666, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13583815028901733, |
| "grad_norm": 0.724258148574199, |
| "learning_rate": 5.433526011560694e-06, |
| "loss": 0.7692, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.13872832369942195, |
| "grad_norm": 0.7437020529632945, |
| "learning_rate": 5.549132947976878e-06, |
| "loss": 0.7589, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.1416184971098266, |
| "grad_norm": 0.7700612704319088, |
| "learning_rate": 5.664739884393064e-06, |
| "loss": 0.7591, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.14450867052023122, |
| "grad_norm": 0.7923035177832577, |
| "learning_rate": 5.78034682080925e-06, |
| "loss": 0.7683, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14739884393063585, |
| "grad_norm": 0.7088889817945515, |
| "learning_rate": 5.895953757225434e-06, |
| "loss": 0.7353, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.15028901734104047, |
| "grad_norm": 0.676374698276162, |
| "learning_rate": 6.01156069364162e-06, |
| "loss": 0.7644, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1531791907514451, |
| "grad_norm": 0.6924940688568527, |
| "learning_rate": 6.127167630057804e-06, |
| "loss": 0.752, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.15606936416184972, |
| "grad_norm": 0.8527665295088717, |
| "learning_rate": 6.2427745664739895e-06, |
| "loss": 0.7639, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15895953757225434, |
| "grad_norm": 0.6888695783169247, |
| "learning_rate": 6.358381502890174e-06, |
| "loss": 0.7509, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.16184971098265896, |
| "grad_norm": 0.7490728100821497, |
| "learning_rate": 6.473988439306359e-06, |
| "loss": 0.7484, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.16473988439306358, |
| "grad_norm": 0.9157288705931512, |
| "learning_rate": 6.589595375722544e-06, |
| "loss": 0.7408, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1676300578034682, |
| "grad_norm": 0.700876792326669, |
| "learning_rate": 6.7052023121387284e-06, |
| "loss": 0.7599, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.17052023121387283, |
| "grad_norm": 0.7075983505256961, |
| "learning_rate": 6.820809248554914e-06, |
| "loss": 0.7253, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.17341040462427745, |
| "grad_norm": 0.7662259009582316, |
| "learning_rate": 6.936416184971098e-06, |
| "loss": 0.7493, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17630057803468208, |
| "grad_norm": 0.7106648098423958, |
| "learning_rate": 7.052023121387284e-06, |
| "loss": 0.7491, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1791907514450867, |
| "grad_norm": 0.8234290971439716, |
| "learning_rate": 7.167630057803468e-06, |
| "loss": 0.7437, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.18208092485549132, |
| "grad_norm": 0.7893881123491794, |
| "learning_rate": 7.283236994219654e-06, |
| "loss": 0.7516, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.18497109826589594, |
| "grad_norm": 0.7820983140830451, |
| "learning_rate": 7.398843930635838e-06, |
| "loss": 0.7288, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.18786127167630057, |
| "grad_norm": 0.8347449630731727, |
| "learning_rate": 7.5144508670520235e-06, |
| "loss": 0.7576, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1907514450867052, |
| "grad_norm": 0.6638382496430837, |
| "learning_rate": 7.630057803468209e-06, |
| "loss": 0.7289, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1936416184971098, |
| "grad_norm": 0.8106026549694, |
| "learning_rate": 7.745664739884393e-06, |
| "loss": 0.7267, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.19653179190751446, |
| "grad_norm": 0.7500535606813568, |
| "learning_rate": 7.86127167630058e-06, |
| "loss": 0.7516, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1994219653179191, |
| "grad_norm": 0.669136491901921, |
| "learning_rate": 7.976878612716764e-06, |
| "loss": 0.7149, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.2023121387283237, |
| "grad_norm": 0.7266271821143816, |
| "learning_rate": 8.092485549132949e-06, |
| "loss": 0.7229, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.20520231213872833, |
| "grad_norm": 0.8328723885762708, |
| "learning_rate": 8.208092485549133e-06, |
| "loss": 0.7367, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.20809248554913296, |
| "grad_norm": 0.6611812919714526, |
| "learning_rate": 8.32369942196532e-06, |
| "loss": 0.7394, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.21098265895953758, |
| "grad_norm": 0.7232692335331032, |
| "learning_rate": 8.439306358381504e-06, |
| "loss": 0.7383, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2138728323699422, |
| "grad_norm": 0.7028412695126045, |
| "learning_rate": 8.554913294797688e-06, |
| "loss": 0.7222, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.21676300578034682, |
| "grad_norm": 0.7274403497707242, |
| "learning_rate": 8.670520231213873e-06, |
| "loss": 0.7311, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.21965317919075145, |
| "grad_norm": 0.6871391487835555, |
| "learning_rate": 8.786127167630059e-06, |
| "loss": 0.7182, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.22254335260115607, |
| "grad_norm": 0.7418967633087836, |
| "learning_rate": 8.901734104046244e-06, |
| "loss": 0.7141, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2254335260115607, |
| "grad_norm": 0.7513431838317323, |
| "learning_rate": 9.017341040462428e-06, |
| "loss": 0.7138, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.22832369942196531, |
| "grad_norm": 0.6956743141202641, |
| "learning_rate": 9.132947976878613e-06, |
| "loss": 0.7006, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.23121387283236994, |
| "grad_norm": 0.6747351175950522, |
| "learning_rate": 9.248554913294799e-06, |
| "loss": 0.7094, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.23410404624277456, |
| "grad_norm": 0.6938212711390582, |
| "learning_rate": 9.364161849710983e-06, |
| "loss": 0.7294, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.23699421965317918, |
| "grad_norm": 0.7336859571024911, |
| "learning_rate": 9.479768786127168e-06, |
| "loss": 0.734, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.2398843930635838, |
| "grad_norm": 0.6857197575458358, |
| "learning_rate": 9.595375722543352e-06, |
| "loss": 0.7166, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.24277456647398843, |
| "grad_norm": 0.7287371485546202, |
| "learning_rate": 9.710982658959539e-06, |
| "loss": 0.7245, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.24566473988439305, |
| "grad_norm": 0.7055914697217203, |
| "learning_rate": 9.826589595375723e-06, |
| "loss": 0.7045, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.24855491329479767, |
| "grad_norm": 0.8230354709821103, |
| "learning_rate": 9.942196531791908e-06, |
| "loss": 0.7053, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.2514450867052023, |
| "grad_norm": 0.7267067340623982, |
| "learning_rate": 1.0057803468208094e-05, |
| "loss": 0.6994, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2543352601156069, |
| "grad_norm": 0.7721448103371933, |
| "learning_rate": 1.0173410404624278e-05, |
| "loss": 0.694, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.25722543352601157, |
| "grad_norm": 0.7325214157840806, |
| "learning_rate": 1.0289017341040463e-05, |
| "loss": 0.7096, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.26011560693641617, |
| "grad_norm": 0.6829001987547941, |
| "learning_rate": 1.0404624277456647e-05, |
| "loss": 0.7114, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2630057803468208, |
| "grad_norm": 0.8080043630636083, |
| "learning_rate": 1.0520231213872834e-05, |
| "loss": 0.7306, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2658959537572254, |
| "grad_norm": 0.7590424295029534, |
| "learning_rate": 1.0635838150289017e-05, |
| "loss": 0.6958, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.26878612716763006, |
| "grad_norm": 0.8001743955786633, |
| "learning_rate": 1.0751445086705203e-05, |
| "loss": 0.7386, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.27167630057803466, |
| "grad_norm": 0.8387588767102132, |
| "learning_rate": 1.0867052023121387e-05, |
| "loss": 0.7083, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2745664739884393, |
| "grad_norm": 0.8688553902234525, |
| "learning_rate": 1.0982658959537573e-05, |
| "loss": 0.7364, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.2774566473988439, |
| "grad_norm": 0.7464318068601791, |
| "learning_rate": 1.1098265895953756e-05, |
| "loss": 0.7149, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.28034682080924855, |
| "grad_norm": 0.757546152793497, |
| "learning_rate": 1.1213872832369943e-05, |
| "loss": 0.6983, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2832369942196532, |
| "grad_norm": 0.7438008526685018, |
| "learning_rate": 1.1329479768786129e-05, |
| "loss": 0.7057, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2861271676300578, |
| "grad_norm": 0.8578457525033802, |
| "learning_rate": 1.1445086705202313e-05, |
| "loss": 0.7249, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.28901734104046245, |
| "grad_norm": 0.746719183950257, |
| "learning_rate": 1.15606936416185e-05, |
| "loss": 0.711, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.29190751445086704, |
| "grad_norm": 1.0078208261088115, |
| "learning_rate": 1.1676300578034682e-05, |
| "loss": 0.7094, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2947976878612717, |
| "grad_norm": 0.7362091784291579, |
| "learning_rate": 1.1791907514450869e-05, |
| "loss": 0.696, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2976878612716763, |
| "grad_norm": 0.9770080398344241, |
| "learning_rate": 1.1907514450867053e-05, |
| "loss": 0.7214, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.30057803468208094, |
| "grad_norm": 0.7751464484631613, |
| "learning_rate": 1.202312138728324e-05, |
| "loss": 0.712, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.30346820809248554, |
| "grad_norm": 0.9674784175128227, |
| "learning_rate": 1.2138728323699422e-05, |
| "loss": 0.7091, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.3063583815028902, |
| "grad_norm": 0.8537449224492079, |
| "learning_rate": 1.2254335260115608e-05, |
| "loss": 0.6907, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3092485549132948, |
| "grad_norm": 0.9801860723729289, |
| "learning_rate": 1.2369942196531793e-05, |
| "loss": 0.7041, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.31213872832369943, |
| "grad_norm": 0.9229373895628625, |
| "learning_rate": 1.2485549132947979e-05, |
| "loss": 0.7204, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.315028901734104, |
| "grad_norm": 0.8638908853520371, |
| "learning_rate": 1.2601156069364162e-05, |
| "loss": 0.7009, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3179190751445087, |
| "grad_norm": 0.7773844197376821, |
| "learning_rate": 1.2716763005780348e-05, |
| "loss": 0.6945, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3208092485549133, |
| "grad_norm": 0.8125551782431788, |
| "learning_rate": 1.2832369942196533e-05, |
| "loss": 0.7291, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3236994219653179, |
| "grad_norm": 0.7481929293692252, |
| "learning_rate": 1.2947976878612719e-05, |
| "loss": 0.6884, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3265895953757225, |
| "grad_norm": 0.8853702590472872, |
| "learning_rate": 1.3063583815028902e-05, |
| "loss": 0.7021, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.32947976878612717, |
| "grad_norm": 0.7260205643390303, |
| "learning_rate": 1.3179190751445088e-05, |
| "loss": 0.6719, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.33236994219653176, |
| "grad_norm": 0.8978278445772442, |
| "learning_rate": 1.3294797687861272e-05, |
| "loss": 0.7254, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3352601156069364, |
| "grad_norm": 0.7958403914693805, |
| "learning_rate": 1.3410404624277457e-05, |
| "loss": 0.7031, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.33815028901734107, |
| "grad_norm": 0.7737575346947156, |
| "learning_rate": 1.3526011560693643e-05, |
| "loss": 0.6987, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.34104046242774566, |
| "grad_norm": 1.127441618739487, |
| "learning_rate": 1.3641618497109828e-05, |
| "loss": 0.6682, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3439306358381503, |
| "grad_norm": 0.8463337023953159, |
| "learning_rate": 1.3757225433526014e-05, |
| "loss": 0.6923, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3468208092485549, |
| "grad_norm": 0.9189231681321044, |
| "learning_rate": 1.3872832369942197e-05, |
| "loss": 0.7202, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34971098265895956, |
| "grad_norm": 0.7824948205964652, |
| "learning_rate": 1.3988439306358383e-05, |
| "loss": 0.6936, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.35260115606936415, |
| "grad_norm": 1.0322221560229676, |
| "learning_rate": 1.4104046242774567e-05, |
| "loss": 0.6995, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3554913294797688, |
| "grad_norm": 0.8141648711576799, |
| "learning_rate": 1.4219653179190754e-05, |
| "loss": 0.6927, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3583815028901734, |
| "grad_norm": 0.8449673387537935, |
| "learning_rate": 1.4335260115606936e-05, |
| "loss": 0.7048, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.36127167630057805, |
| "grad_norm": 0.9002017571970037, |
| "learning_rate": 1.4450867052023123e-05, |
| "loss": 0.6681, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.36416184971098264, |
| "grad_norm": 0.7809488544709244, |
| "learning_rate": 1.4566473988439307e-05, |
| "loss": 0.707, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.3670520231213873, |
| "grad_norm": 0.9600878348909891, |
| "learning_rate": 1.4682080924855493e-05, |
| "loss": 0.6886, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.3699421965317919, |
| "grad_norm": 0.8728269186167633, |
| "learning_rate": 1.4797687861271676e-05, |
| "loss": 0.7056, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.37283236994219654, |
| "grad_norm": 0.896265984559966, |
| "learning_rate": 1.4913294797687862e-05, |
| "loss": 0.691, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.37572254335260113, |
| "grad_norm": 0.7956220341021001, |
| "learning_rate": 1.5028901734104047e-05, |
| "loss": 0.6827, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3786127167630058, |
| "grad_norm": 0.8059535895529943, |
| "learning_rate": 1.5144508670520233e-05, |
| "loss": 0.6998, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.3815028901734104, |
| "grad_norm": 0.8563980216140917, |
| "learning_rate": 1.5260115606936418e-05, |
| "loss": 0.705, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.38439306358381503, |
| "grad_norm": 0.9640073892946409, |
| "learning_rate": 1.5375722543352604e-05, |
| "loss": 0.7163, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3872832369942196, |
| "grad_norm": 0.9671494807729399, |
| "learning_rate": 1.5491329479768787e-05, |
| "loss": 0.6882, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.3901734104046243, |
| "grad_norm": 0.9263949710988968, |
| "learning_rate": 1.5606936416184973e-05, |
| "loss": 0.7018, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3930635838150289, |
| "grad_norm": 0.8605012636577827, |
| "learning_rate": 1.572254335260116e-05, |
| "loss": 0.697, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3959537572254335, |
| "grad_norm": 0.967358301859874, |
| "learning_rate": 1.5838150289017342e-05, |
| "loss": 0.6914, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3988439306358382, |
| "grad_norm": 0.9342870144851555, |
| "learning_rate": 1.5953757225433528e-05, |
| "loss": 0.706, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.40173410404624277, |
| "grad_norm": 0.8328745505321132, |
| "learning_rate": 1.606936416184971e-05, |
| "loss": 0.6902, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4046242774566474, |
| "grad_norm": 0.8241167968873063, |
| "learning_rate": 1.6184971098265897e-05, |
| "loss": 0.6893, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.407514450867052, |
| "grad_norm": 0.7412040936941583, |
| "learning_rate": 1.6300578034682083e-05, |
| "loss": 0.6977, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.41040462427745666, |
| "grad_norm": 0.8819984014049386, |
| "learning_rate": 1.6416184971098266e-05, |
| "loss": 0.6668, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.41329479768786126, |
| "grad_norm": 0.8385796939119565, |
| "learning_rate": 1.6531791907514452e-05, |
| "loss": 0.7113, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4161849710982659, |
| "grad_norm": 0.8604832658397593, |
| "learning_rate": 1.664739884393064e-05, |
| "loss": 0.7026, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4190751445086705, |
| "grad_norm": 1.0461593299403098, |
| "learning_rate": 1.676300578034682e-05, |
| "loss": 0.6878, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.42196531791907516, |
| "grad_norm": 0.811865171671389, |
| "learning_rate": 1.6878612716763008e-05, |
| "loss": 0.6796, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.42485549132947975, |
| "grad_norm": 0.9481614894881139, |
| "learning_rate": 1.699421965317919e-05, |
| "loss": 0.7025, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4277456647398844, |
| "grad_norm": 0.8343305893997477, |
| "learning_rate": 1.7109826589595377e-05, |
| "loss": 0.6874, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.430635838150289, |
| "grad_norm": 0.9421367720906404, |
| "learning_rate": 1.722543352601156e-05, |
| "loss": 0.6868, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.43352601156069365, |
| "grad_norm": 0.8390255587190718, |
| "learning_rate": 1.7341040462427746e-05, |
| "loss": 0.6938, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.43641618497109824, |
| "grad_norm": 0.8951037803089976, |
| "learning_rate": 1.7456647398843932e-05, |
| "loss": 0.6899, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.4393063583815029, |
| "grad_norm": 0.7867290840778911, |
| "learning_rate": 1.7572254335260118e-05, |
| "loss": 0.7028, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4421965317919075, |
| "grad_norm": 0.995998766689234, |
| "learning_rate": 1.76878612716763e-05, |
| "loss": 0.7028, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.44508670520231214, |
| "grad_norm": 0.9880944969572394, |
| "learning_rate": 1.7803468208092487e-05, |
| "loss": 0.6727, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4479768786127168, |
| "grad_norm": 1.284513930041935, |
| "learning_rate": 1.7919075144508673e-05, |
| "loss": 0.6922, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4508670520231214, |
| "grad_norm": 0.9511258625683117, |
| "learning_rate": 1.8034682080924856e-05, |
| "loss": 0.6903, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.45375722543352603, |
| "grad_norm": 1.7388405496528023, |
| "learning_rate": 1.8150289017341043e-05, |
| "loss": 0.6819, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.45664739884393063, |
| "grad_norm": 1.3452366087747083, |
| "learning_rate": 1.8265895953757225e-05, |
| "loss": 0.7148, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4595375722543353, |
| "grad_norm": 1.271970881953204, |
| "learning_rate": 1.838150289017341e-05, |
| "loss": 0.6929, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.4624277456647399, |
| "grad_norm": 1.0994489504483136, |
| "learning_rate": 1.8497109826589598e-05, |
| "loss": 0.6782, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.4653179190751445, |
| "grad_norm": 1.1657326994628787, |
| "learning_rate": 1.8612716763005784e-05, |
| "loss": 0.6887, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.4682080924855491, |
| "grad_norm": 0.9854583993441037, |
| "learning_rate": 1.8728323699421967e-05, |
| "loss": 0.6839, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.47109826589595377, |
| "grad_norm": 1.3286703766678492, |
| "learning_rate": 1.8843930635838153e-05, |
| "loss": 0.6842, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.47398843930635837, |
| "grad_norm": 1.0185723644859102, |
| "learning_rate": 1.8959537572254336e-05, |
| "loss": 0.6988, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.476878612716763, |
| "grad_norm": 1.1241175262812633, |
| "learning_rate": 1.9075144508670522e-05, |
| "loss": 0.6887, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4797687861271676, |
| "grad_norm": 0.9824433207331327, |
| "learning_rate": 1.9190751445086705e-05, |
| "loss": 0.6952, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.48265895953757226, |
| "grad_norm": 1.1152282227331158, |
| "learning_rate": 1.930635838150289e-05, |
| "loss": 0.6602, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.48554913294797686, |
| "grad_norm": 1.034043682955398, |
| "learning_rate": 1.9421965317919077e-05, |
| "loss": 0.696, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.4884393063583815, |
| "grad_norm": 0.8508152573776165, |
| "learning_rate": 1.9537572254335264e-05, |
| "loss": 0.6959, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4913294797687861, |
| "grad_norm": 1.0495255954356906, |
| "learning_rate": 1.9653179190751446e-05, |
| "loss": 0.6751, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.49421965317919075, |
| "grad_norm": 0.9582542481428932, |
| "learning_rate": 1.9768786127167633e-05, |
| "loss": 0.696, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.49710982658959535, |
| "grad_norm": 1.0332331094000415, |
| "learning_rate": 1.9884393063583815e-05, |
| "loss": 0.7099, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.883896628507405, |
| "learning_rate": 2e-05, |
| "loss": 0.6689, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5028901734104047, |
| "grad_norm": 0.8240741765805385, |
| "learning_rate": 1.9999979644003047e-05, |
| "loss": 0.666, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5057803468208093, |
| "grad_norm": 0.9906783783216854, |
| "learning_rate": 1.9999918576095053e-05, |
| "loss": 0.7095, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5086705202312138, |
| "grad_norm": 0.8757602478752999, |
| "learning_rate": 1.9999816796524642e-05, |
| "loss": 0.6636, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5115606936416185, |
| "grad_norm": 0.9816660715471465, |
| "learning_rate": 1.9999674305706178e-05, |
| "loss": 0.6844, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5144508670520231, |
| "grad_norm": 0.9646033715264206, |
| "learning_rate": 1.999949110421977e-05, |
| "loss": 0.708, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5173410404624278, |
| "grad_norm": 1.054095791885763, |
| "learning_rate": 1.999926719281127e-05, |
| "loss": 0.7004, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5202312138728323, |
| "grad_norm": 1.1074464249197975, |
| "learning_rate": 1.9999002572392253e-05, |
| "loss": 0.6768, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.523121387283237, |
| "grad_norm": 1.1267450710175495, |
| "learning_rate": 1.9998697244040062e-05, |
| "loss": 0.6909, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5260115606936416, |
| "grad_norm": 1.2160503421027011, |
| "learning_rate": 1.9998351208997734e-05, |
| "loss": 0.686, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5289017341040463, |
| "grad_norm": 1.2058488485981815, |
| "learning_rate": 1.9997964468674055e-05, |
| "loss": 0.6785, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5317919075144508, |
| "grad_norm": 1.4265640706372753, |
| "learning_rate": 1.999753702464352e-05, |
| "loss": 0.6852, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5346820809248555, |
| "grad_norm": 1.3345764329808851, |
| "learning_rate": 1.9997068878646332e-05, |
| "loss": 0.686, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5375722543352601, |
| "grad_norm": 1.119659843212298, |
| "learning_rate": 1.999656003258842e-05, |
| "loss": 0.6994, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5404624277456648, |
| "grad_norm": 1.4079713400525928, |
| "learning_rate": 1.9996010488541384e-05, |
| "loss": 0.6895, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5433526011560693, |
| "grad_norm": 1.066543075627955, |
| "learning_rate": 1.9995420248742534e-05, |
| "loss": 0.6929, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.546242774566474, |
| "grad_norm": 1.1769931562935132, |
| "learning_rate": 1.9994789315594854e-05, |
| "loss": 0.6971, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5491329479768786, |
| "grad_norm": 1.0303545526204303, |
| "learning_rate": 1.9994117691667e-05, |
| "loss": 0.6864, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5520231213872833, |
| "grad_norm": 1.1897764422644492, |
| "learning_rate": 1.999340537969329e-05, |
| "loss": 0.6997, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.5549132947976878, |
| "grad_norm": 1.0634156313689362, |
| "learning_rate": 1.9992652382573677e-05, |
| "loss": 0.6858, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5578034682080925, |
| "grad_norm": 1.2297353763416312, |
| "learning_rate": 1.999185870337377e-05, |
| "loss": 0.7016, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5606936416184971, |
| "grad_norm": 0.9673042975439411, |
| "learning_rate": 1.9991024345324793e-05, |
| "loss": 0.6755, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5635838150289018, |
| "grad_norm": 1.3338585851272673, |
| "learning_rate": 1.9990149311823586e-05, |
| "loss": 0.6709, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5664739884393064, |
| "grad_norm": 1.293864196693847, |
| "learning_rate": 1.9989233606432587e-05, |
| "loss": 0.6916, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.569364161849711, |
| "grad_norm": 1.0355424413471204, |
| "learning_rate": 1.9988277232879806e-05, |
| "loss": 0.6607, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5722543352601156, |
| "grad_norm": 1.113803363521732, |
| "learning_rate": 1.998728019505884e-05, |
| "loss": 0.6693, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.5751445086705202, |
| "grad_norm": 1.1763066495609689, |
| "learning_rate": 1.9986242497028823e-05, |
| "loss": 0.6801, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5780346820809249, |
| "grad_norm": 1.112264303614586, |
| "learning_rate": 1.9985164143014433e-05, |
| "loss": 0.6726, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5809248554913294, |
| "grad_norm": 1.3725438934255987, |
| "learning_rate": 1.9984045137405864e-05, |
| "loss": 0.6865, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5838150289017341, |
| "grad_norm": 1.0494739786278526, |
| "learning_rate": 1.9982885484758813e-05, |
| "loss": 0.6453, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5867052023121387, |
| "grad_norm": 0.9977419788918668, |
| "learning_rate": 1.9981685189794457e-05, |
| "loss": 0.6843, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5895953757225434, |
| "grad_norm": 1.0197936450519736, |
| "learning_rate": 1.9980444257399428e-05, |
| "loss": 0.6981, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5924855491329479, |
| "grad_norm": 0.8579217853259621, |
| "learning_rate": 1.9979162692625817e-05, |
| "loss": 0.6855, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5953757225433526, |
| "grad_norm": 1.0477044603584735, |
| "learning_rate": 1.9977840500691133e-05, |
| "loss": 0.6869, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5982658959537572, |
| "grad_norm": 0.9027615423442625, |
| "learning_rate": 1.9976477686978274e-05, |
| "loss": 0.6627, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.6011560693641619, |
| "grad_norm": 1.1728694608856245, |
| "learning_rate": 1.9975074257035533e-05, |
| "loss": 0.6831, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6040462427745664, |
| "grad_norm": 0.830419992847188, |
| "learning_rate": 1.9973630216576547e-05, |
| "loss": 0.6689, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6069364161849711, |
| "grad_norm": 1.0672356976307946, |
| "learning_rate": 1.9972145571480296e-05, |
| "loss": 0.6975, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6098265895953757, |
| "grad_norm": 0.9508554177191132, |
| "learning_rate": 1.997062032779107e-05, |
| "loss": 0.6528, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6127167630057804, |
| "grad_norm": 0.7800609305235056, |
| "learning_rate": 1.9969054491718437e-05, |
| "loss": 0.6849, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.615606936416185, |
| "grad_norm": 1.0095266619422791, |
| "learning_rate": 1.9967448069637224e-05, |
| "loss": 0.6746, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6184971098265896, |
| "grad_norm": 0.8623507582546789, |
| "learning_rate": 1.9965801068087504e-05, |
| "loss": 0.6556, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6213872832369942, |
| "grad_norm": 0.8697052589621063, |
| "learning_rate": 1.996411349377454e-05, |
| "loss": 0.6805, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6242774566473989, |
| "grad_norm": 0.9633918268862216, |
| "learning_rate": 1.9962385353568787e-05, |
| "loss": 0.6675, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6271676300578035, |
| "grad_norm": 1.0957605776766808, |
| "learning_rate": 1.9960616654505852e-05, |
| "loss": 0.6406, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.630057803468208, |
| "grad_norm": 0.9991877306193689, |
| "learning_rate": 1.9958807403786452e-05, |
| "loss": 0.6661, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6329479768786127, |
| "grad_norm": 0.8868819421389161, |
| "learning_rate": 1.995695760877642e-05, |
| "loss": 0.6556, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6358381502890174, |
| "grad_norm": 0.8810851372838674, |
| "learning_rate": 1.9955067277006633e-05, |
| "loss": 0.6687, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.638728323699422, |
| "grad_norm": 0.8373803462646919, |
| "learning_rate": 1.9953136416173005e-05, |
| "loss": 0.6872, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6416184971098265, |
| "grad_norm": 0.8240195012502505, |
| "learning_rate": 1.9951165034136465e-05, |
| "loss": 0.6606, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6445086705202312, |
| "grad_norm": 0.8497327904586488, |
| "learning_rate": 1.9949153138922897e-05, |
| "loss": 0.6727, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.6473988439306358, |
| "grad_norm": 0.9574664025504449, |
| "learning_rate": 1.9947100738723126e-05, |
| "loss": 0.6903, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6502890173410405, |
| "grad_norm": 0.7959314545389815, |
| "learning_rate": 1.9945007841892886e-05, |
| "loss": 0.648, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.653179190751445, |
| "grad_norm": 0.8327210631747088, |
| "learning_rate": 1.994287445695277e-05, |
| "loss": 0.6723, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6560693641618497, |
| "grad_norm": 0.8491750263082852, |
| "learning_rate": 1.9940700592588228e-05, |
| "loss": 0.6951, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6589595375722543, |
| "grad_norm": 0.9783543443859294, |
| "learning_rate": 1.9938486257649483e-05, |
| "loss": 0.6521, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.661849710982659, |
| "grad_norm": 0.8848987100308845, |
| "learning_rate": 1.9936231461151535e-05, |
| "loss": 0.6786, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6647398843930635, |
| "grad_norm": 0.880197695167373, |
| "learning_rate": 1.9933936212274115e-05, |
| "loss": 0.6619, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6676300578034682, |
| "grad_norm": 1.0154012991829307, |
| "learning_rate": 1.9931600520361637e-05, |
| "loss": 0.6549, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6705202312138728, |
| "grad_norm": 0.9283345521485898, |
| "learning_rate": 1.9929224394923167e-05, |
| "loss": 0.6781, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6734104046242775, |
| "grad_norm": 1.015308580501105, |
| "learning_rate": 1.9926807845632385e-05, |
| "loss": 0.6814, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6763005780346821, |
| "grad_norm": 0.9736049842517275, |
| "learning_rate": 1.9924350882327545e-05, |
| "loss": 0.6834, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6791907514450867, |
| "grad_norm": 0.9968601538853321, |
| "learning_rate": 1.992185351501144e-05, |
| "loss": 0.6735, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6820809248554913, |
| "grad_norm": 0.9532926457310584, |
| "learning_rate": 1.9919315753851343e-05, |
| "loss": 0.6593, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.684971098265896, |
| "grad_norm": 1.2870145989902115, |
| "learning_rate": 1.991673760917899e-05, |
| "loss": 0.67, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6878612716763006, |
| "grad_norm": 1.1226345328773792, |
| "learning_rate": 1.991411909149052e-05, |
| "loss": 0.6785, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6907514450867052, |
| "grad_norm": 1.204681411549356, |
| "learning_rate": 1.9911460211446445e-05, |
| "loss": 0.6795, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6936416184971098, |
| "grad_norm": 1.1282608884854723, |
| "learning_rate": 1.990876097987159e-05, |
| "loss": 0.6641, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6965317919075145, |
| "grad_norm": 1.2759929528296277, |
| "learning_rate": 1.9906021407755064e-05, |
| "loss": 0.6738, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.6994219653179191, |
| "grad_norm": 1.0879712951989373, |
| "learning_rate": 1.990324150625022e-05, |
| "loss": 0.6688, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.7023121387283237, |
| "grad_norm": 1.4403079444759717, |
| "learning_rate": 1.9900421286674582e-05, |
| "loss": 0.6769, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7052023121387283, |
| "grad_norm": 1.191228603950231, |
| "learning_rate": 1.9897560760509834e-05, |
| "loss": 0.667, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.708092485549133, |
| "grad_norm": 1.275787843674223, |
| "learning_rate": 1.989465993940174e-05, |
| "loss": 0.6734, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7109826589595376, |
| "grad_norm": 1.1457956849938589, |
| "learning_rate": 1.9891718835160128e-05, |
| "loss": 0.662, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7138728323699421, |
| "grad_norm": 1.2858432685195738, |
| "learning_rate": 1.9888737459758817e-05, |
| "loss": 0.6875, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7167630057803468, |
| "grad_norm": 0.9398046224404962, |
| "learning_rate": 1.9885715825335584e-05, |
| "loss": 0.6548, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7196531791907514, |
| "grad_norm": 1.6358172197686662, |
| "learning_rate": 1.9882653944192095e-05, |
| "loss": 0.6954, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7225433526011561, |
| "grad_norm": 0.9961704138959488, |
| "learning_rate": 1.9879551828793893e-05, |
| "loss": 0.6615, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7254335260115607, |
| "grad_norm": 2.0842179603061424, |
| "learning_rate": 1.98764094917703e-05, |
| "loss": 0.6884, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7283236994219653, |
| "grad_norm": 2.330063826601232, |
| "learning_rate": 1.9873226945914394e-05, |
| "loss": 0.6705, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7312138728323699, |
| "grad_norm": 0.8889229347289838, |
| "learning_rate": 1.9870004204182962e-05, |
| "loss": 0.6749, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7341040462427746, |
| "grad_norm": 2.0009447904173965, |
| "learning_rate": 1.986674127969642e-05, |
| "loss": 0.6542, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7369942196531792, |
| "grad_norm": 1.8777338758058673, |
| "learning_rate": 1.986343818573879e-05, |
| "loss": 0.6766, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.7398843930635838, |
| "grad_norm": 1.2106551936340486, |
| "learning_rate": 1.986009493575763e-05, |
| "loss": 0.6674, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7427745664739884, |
| "grad_norm": 1.576538210921238, |
| "learning_rate": 1.985671154336396e-05, |
| "loss": 0.6727, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7456647398843931, |
| "grad_norm": 0.8573792365014181, |
| "learning_rate": 1.9853288022332267e-05, |
| "loss": 0.669, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7485549132947977, |
| "grad_norm": 1.8092997648187275, |
| "learning_rate": 1.9849824386600372e-05, |
| "loss": 0.6675, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7514450867052023, |
| "grad_norm": 1.5944489420897012, |
| "learning_rate": 1.984632065026943e-05, |
| "loss": 0.675, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7543352601156069, |
| "grad_norm": 0.9900937681751832, |
| "learning_rate": 1.9842776827603858e-05, |
| "loss": 0.6704, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7572254335260116, |
| "grad_norm": 1.5977627858005359, |
| "learning_rate": 1.9839192933031253e-05, |
| "loss": 0.6922, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.7601156069364162, |
| "grad_norm": 1.273943078477883, |
| "learning_rate": 1.9835568981142376e-05, |
| "loss": 0.6667, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7630057803468208, |
| "grad_norm": 0.951797646828311, |
| "learning_rate": 1.9831904986691045e-05, |
| "loss": 0.6702, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7658959537572254, |
| "grad_norm": 1.4628752552787214, |
| "learning_rate": 1.9828200964594125e-05, |
| "loss": 0.6732, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7687861271676301, |
| "grad_norm": 1.0303268490984354, |
| "learning_rate": 1.9824456929931417e-05, |
| "loss": 0.6823, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7716763005780347, |
| "grad_norm": 1.0447939758438316, |
| "learning_rate": 1.982067289794564e-05, |
| "loss": 0.6739, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7745664739884393, |
| "grad_norm": 0.9399400681706109, |
| "learning_rate": 1.9816848884042342e-05, |
| "loss": 0.6619, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7774566473988439, |
| "grad_norm": 1.001440688633843, |
| "learning_rate": 1.9812984903789845e-05, |
| "loss": 0.6618, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.7803468208092486, |
| "grad_norm": 0.8766732359528921, |
| "learning_rate": 1.9809080972919182e-05, |
| "loss": 0.6831, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7832369942196532, |
| "grad_norm": 1.059517462713445, |
| "learning_rate": 1.9805137107324034e-05, |
| "loss": 0.6717, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7861271676300579, |
| "grad_norm": 0.9174226778841795, |
| "learning_rate": 1.9801153323060667e-05, |
| "loss": 0.6666, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7890173410404624, |
| "grad_norm": 0.9500149856474327, |
| "learning_rate": 1.9797129636347857e-05, |
| "loss": 0.6756, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.791907514450867, |
| "grad_norm": 1.1277366658505596, |
| "learning_rate": 1.979306606356684e-05, |
| "loss": 0.6678, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7947976878612717, |
| "grad_norm": 0.8597685475695553, |
| "learning_rate": 1.9788962621261224e-05, |
| "loss": 0.6746, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7976878612716763, |
| "grad_norm": 0.9175327448204571, |
| "learning_rate": 1.9784819326136947e-05, |
| "loss": 0.6663, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8005780346820809, |
| "grad_norm": 0.8783811136618952, |
| "learning_rate": 1.978063619506219e-05, |
| "loss": 0.6849, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.8034682080924855, |
| "grad_norm": 0.9725568607677347, |
| "learning_rate": 1.9776413245067308e-05, |
| "loss": 0.6833, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8063583815028902, |
| "grad_norm": 1.0920362225909976, |
| "learning_rate": 1.9772150493344776e-05, |
| "loss": 0.6777, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8092485549132948, |
| "grad_norm": 0.8283527251152283, |
| "learning_rate": 1.9767847957249107e-05, |
| "loss": 0.6652, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8121387283236994, |
| "grad_norm": 0.9586230775456167, |
| "learning_rate": 1.9763505654296782e-05, |
| "loss": 0.6551, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.815028901734104, |
| "grad_norm": 0.9276724961475561, |
| "learning_rate": 1.9759123602166183e-05, |
| "loss": 0.6629, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8179190751445087, |
| "grad_norm": 0.8478213794038515, |
| "learning_rate": 1.9754701818697515e-05, |
| "loss": 0.6754, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.8208092485549133, |
| "grad_norm": 0.8916244711990654, |
| "learning_rate": 1.9750240321892743e-05, |
| "loss": 0.6653, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8236994219653179, |
| "grad_norm": 1.0196060923330346, |
| "learning_rate": 1.9745739129915508e-05, |
| "loss": 0.658, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8265895953757225, |
| "grad_norm": 0.7769568302916775, |
| "learning_rate": 1.9741198261091067e-05, |
| "loss": 0.6569, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8294797687861272, |
| "grad_norm": 1.0029214919025133, |
| "learning_rate": 1.973661773390619e-05, |
| "loss": 0.6749, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.8323699421965318, |
| "grad_norm": 0.8327215361003341, |
| "learning_rate": 1.9731997567009125e-05, |
| "loss": 0.6593, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8352601156069365, |
| "grad_norm": 0.9901656491825068, |
| "learning_rate": 1.9727337779209496e-05, |
| "loss": 0.6544, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.838150289017341, |
| "grad_norm": 0.9449653589975971, |
| "learning_rate": 1.9722638389478218e-05, |
| "loss": 0.6785, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8410404624277457, |
| "grad_norm": 0.9008910405778384, |
| "learning_rate": 1.9717899416947452e-05, |
| "loss": 0.6581, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8439306358381503, |
| "grad_norm": 0.9359751850845522, |
| "learning_rate": 1.9713120880910495e-05, |
| "loss": 0.6596, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.846820809248555, |
| "grad_norm": 1.019479157630927, |
| "learning_rate": 1.9708302800821724e-05, |
| "loss": 0.6785, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8497109826589595, |
| "grad_norm": 0.807717813745512, |
| "learning_rate": 1.97034451962965e-05, |
| "loss": 0.6578, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.8526011560693642, |
| "grad_norm": 0.914205212195178, |
| "learning_rate": 1.9698548087111104e-05, |
| "loss": 0.6594, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8554913294797688, |
| "grad_norm": 0.8701390512807908, |
| "learning_rate": 1.9693611493202637e-05, |
| "loss": 0.6449, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8583815028901735, |
| "grad_norm": 0.941351078554254, |
| "learning_rate": 1.9688635434668962e-05, |
| "loss": 0.6832, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.861271676300578, |
| "grad_norm": 0.8193353541415879, |
| "learning_rate": 1.9683619931768606e-05, |
| "loss": 0.657, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8641618497109826, |
| "grad_norm": 0.8451216158980733, |
| "learning_rate": 1.967856500492068e-05, |
| "loss": 0.6463, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8670520231213873, |
| "grad_norm": 1.014043705928693, |
| "learning_rate": 1.96734706747048e-05, |
| "loss": 0.6678, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.869942196531792, |
| "grad_norm": 0.9180712848357285, |
| "learning_rate": 1.9668336961861004e-05, |
| "loss": 0.6571, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.8728323699421965, |
| "grad_norm": 0.9771471615649878, |
| "learning_rate": 1.966316388728965e-05, |
| "loss": 0.6841, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8757225433526011, |
| "grad_norm": 0.9172877654678109, |
| "learning_rate": 1.9657951472051363e-05, |
| "loss": 0.6673, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8786127167630058, |
| "grad_norm": 0.7838927377379438, |
| "learning_rate": 1.9652699737366924e-05, |
| "loss": 0.6799, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8815028901734104, |
| "grad_norm": 1.032296971095877, |
| "learning_rate": 1.964740870461719e-05, |
| "loss": 0.656, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.884393063583815, |
| "grad_norm": 0.9338887493109845, |
| "learning_rate": 1.9642078395343015e-05, |
| "loss": 0.6778, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8872832369942196, |
| "grad_norm": 1.0366456810470164, |
| "learning_rate": 1.9636708831245147e-05, |
| "loss": 0.6687, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8901734104046243, |
| "grad_norm": 1.0352798236975347, |
| "learning_rate": 1.9631300034184155e-05, |
| "loss": 0.6614, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.8930635838150289, |
| "grad_norm": 1.0719560333820592, |
| "learning_rate": 1.9625852026180325e-05, |
| "loss": 0.6511, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8959537572254336, |
| "grad_norm": 1.3788790895580383, |
| "learning_rate": 1.962036482941359e-05, |
| "loss": 0.6278, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8988439306358381, |
| "grad_norm": 1.2490418926438716, |
| "learning_rate": 1.9614838466223415e-05, |
| "loss": 0.6527, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9017341040462428, |
| "grad_norm": 1.0890036074128466, |
| "learning_rate": 1.9609272959108737e-05, |
| "loss": 0.6713, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.9046242774566474, |
| "grad_norm": 1.2685369636239598, |
| "learning_rate": 1.9603668330727835e-05, |
| "loss": 0.6687, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9075144508670521, |
| "grad_norm": 0.9375781230616366, |
| "learning_rate": 1.9598024603898272e-05, |
| "loss": 0.6601, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9104046242774566, |
| "grad_norm": 0.9805983483939692, |
| "learning_rate": 1.959234180159679e-05, |
| "loss": 0.6636, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.9132947976878613, |
| "grad_norm": 0.8345215644291492, |
| "learning_rate": 1.95866199469592e-05, |
| "loss": 0.6662, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.9161849710982659, |
| "grad_norm": 1.1419645380118422, |
| "learning_rate": 1.9580859063280326e-05, |
| "loss": 0.6721, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.9190751445086706, |
| "grad_norm": 0.8518284778478276, |
| "learning_rate": 1.9575059174013865e-05, |
| "loss": 0.6452, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.9219653179190751, |
| "grad_norm": 1.0259765421703095, |
| "learning_rate": 1.9569220302772324e-05, |
| "loss": 0.68, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.9248554913294798, |
| "grad_norm": 0.9338132523901228, |
| "learning_rate": 1.9563342473326915e-05, |
| "loss": 0.6669, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9277456647398844, |
| "grad_norm": 0.780487742190609, |
| "learning_rate": 1.955742570960745e-05, |
| "loss": 0.6569, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.930635838150289, |
| "grad_norm": 1.063738587393258, |
| "learning_rate": 1.9551470035702254e-05, |
| "loss": 0.6612, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9335260115606936, |
| "grad_norm": 0.9243223458852142, |
| "learning_rate": 1.9545475475858068e-05, |
| "loss": 0.6542, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9364161849710982, |
| "grad_norm": 0.8371252573118421, |
| "learning_rate": 1.9539442054479932e-05, |
| "loss": 0.6642, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9393063583815029, |
| "grad_norm": 1.0489130551960444, |
| "learning_rate": 1.953336979613112e-05, |
| "loss": 0.6582, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9421965317919075, |
| "grad_norm": 0.8634370818517902, |
| "learning_rate": 1.952725872553299e-05, |
| "loss": 0.6517, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.9450867052023122, |
| "grad_norm": 0.996549774465454, |
| "learning_rate": 1.9521108867564945e-05, |
| "loss": 0.6567, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9479768786127167, |
| "grad_norm": 0.8744840085025122, |
| "learning_rate": 1.9514920247264272e-05, |
| "loss": 0.6778, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9508670520231214, |
| "grad_norm": 0.7679718534975754, |
| "learning_rate": 1.9508692889826085e-05, |
| "loss": 0.641, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.953757225433526, |
| "grad_norm": 1.0185045543013254, |
| "learning_rate": 1.9502426820603194e-05, |
| "loss": 0.6538, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9566473988439307, |
| "grad_norm": 0.7892516767280732, |
| "learning_rate": 1.9496122065106017e-05, |
| "loss": 0.6689, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9595375722543352, |
| "grad_norm": 0.9977508723060716, |
| "learning_rate": 1.948977864900247e-05, |
| "loss": 0.687, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9624277456647399, |
| "grad_norm": 0.8319535102491462, |
| "learning_rate": 1.9483396598117865e-05, |
| "loss": 0.6688, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.9653179190751445, |
| "grad_norm": 0.799972427515052, |
| "learning_rate": 1.9476975938434807e-05, |
| "loss": 0.6519, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9682080924855492, |
| "grad_norm": 0.8287106660579454, |
| "learning_rate": 1.9470516696093075e-05, |
| "loss": 0.6804, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9710982658959537, |
| "grad_norm": 0.8408671077437165, |
| "learning_rate": 1.946401889738954e-05, |
| "loss": 0.6735, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9739884393063584, |
| "grad_norm": 0.778788680444376, |
| "learning_rate": 1.945748256877803e-05, |
| "loss": 0.6424, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.976878612716763, |
| "grad_norm": 0.8202129621633388, |
| "learning_rate": 1.9450907736869244e-05, |
| "loss": 0.6616, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9797687861271677, |
| "grad_norm": 0.7097697012893457, |
| "learning_rate": 1.9444294428430637e-05, |
| "loss": 0.6468, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9826589595375722, |
| "grad_norm": 0.8208343688250482, |
| "learning_rate": 1.9437642670386303e-05, |
| "loss": 0.6354, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9855491329479769, |
| "grad_norm": 0.8621583549435662, |
| "learning_rate": 1.9430952489816876e-05, |
| "loss": 0.667, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9884393063583815, |
| "grad_norm": 0.7289222420622824, |
| "learning_rate": 1.9424223913959415e-05, |
| "loss": 0.6473, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9913294797687862, |
| "grad_norm": 0.7902494174858328, |
| "learning_rate": 1.9417456970207294e-05, |
| "loss": 0.6506, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9942196531791907, |
| "grad_norm": 0.8836340418645006, |
| "learning_rate": 1.941065168611009e-05, |
| "loss": 0.6588, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9971098265895953, |
| "grad_norm": 0.8029794711667372, |
| "learning_rate": 1.9403808089373473e-05, |
| "loss": 0.6514, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7974385031738896, |
| "learning_rate": 1.9396926207859085e-05, |
| "loss": 0.6676, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.0028901734104045, |
| "grad_norm": 0.759112688070958, |
| "learning_rate": 1.9390006069584443e-05, |
| "loss": 0.6119, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.0057803468208093, |
| "grad_norm": 0.8862274468565814, |
| "learning_rate": 1.938304770272281e-05, |
| "loss": 0.6021, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.0086705202312138, |
| "grad_norm": 0.7814352524501207, |
| "learning_rate": 1.9376051135603082e-05, |
| "loss": 0.6044, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.0115606936416186, |
| "grad_norm": 0.9449437554187625, |
| "learning_rate": 1.936901639670968e-05, |
| "loss": 0.579, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.0144508670520231, |
| "grad_norm": 0.8063769589686405, |
| "learning_rate": 1.9361943514682426e-05, |
| "loss": 0.6396, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.0173410404624277, |
| "grad_norm": 0.854490910174394, |
| "learning_rate": 1.935483251831644e-05, |
| "loss": 0.5835, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0202312138728324, |
| "grad_norm": 0.8605749465708546, |
| "learning_rate": 1.9347683436562e-05, |
| "loss": 0.5972, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.023121387283237, |
| "grad_norm": 0.8097614763630925, |
| "learning_rate": 1.9340496298524444e-05, |
| "loss": 0.5751, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.0260115606936415, |
| "grad_norm": 0.8683551593034882, |
| "learning_rate": 1.9333271133464048e-05, |
| "loss": 0.5863, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0289017341040463, |
| "grad_norm": 0.7888873961905672, |
| "learning_rate": 1.932600797079589e-05, |
| "loss": 0.5917, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.0317919075144508, |
| "grad_norm": 0.8782926408362381, |
| "learning_rate": 1.9318706840089766e-05, |
| "loss": 0.5962, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.0346820809248556, |
| "grad_norm": 0.8178114356790833, |
| "learning_rate": 1.9311367771070025e-05, |
| "loss": 0.5864, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.0375722543352601, |
| "grad_norm": 0.8629398360113137, |
| "learning_rate": 1.9303990793615485e-05, |
| "loss": 0.5984, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.0404624277456647, |
| "grad_norm": 0.6814483238738642, |
| "learning_rate": 1.9296575937759293e-05, |
| "loss": 0.5891, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0433526011560694, |
| "grad_norm": 0.7624866277749663, |
| "learning_rate": 1.92891232336888e-05, |
| "loss": 0.5957, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.046242774566474, |
| "grad_norm": 0.6997601771348588, |
| "learning_rate": 1.928163271174546e-05, |
| "loss": 0.5858, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.0491329479768785, |
| "grad_norm": 0.6875497438405674, |
| "learning_rate": 1.9274104402424672e-05, |
| "loss": 0.5929, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.0520231213872833, |
| "grad_norm": 0.7298576901663, |
| "learning_rate": 1.926653833637569e-05, |
| "loss": 0.5528, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.0549132947976878, |
| "grad_norm": 0.7597749551660024, |
| "learning_rate": 1.9258934544401474e-05, |
| "loss": 0.5807, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0578034682080926, |
| "grad_norm": 0.8341419746232556, |
| "learning_rate": 1.925129305745858e-05, |
| "loss": 0.6007, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.060693641618497, |
| "grad_norm": 0.8363319211825032, |
| "learning_rate": 1.924361390665702e-05, |
| "loss": 0.5866, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.0635838150289016, |
| "grad_norm": 0.8341959472463382, |
| "learning_rate": 1.9235897123260155e-05, |
| "loss": 0.5955, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.0664739884393064, |
| "grad_norm": 0.8133892730497435, |
| "learning_rate": 1.9228142738684546e-05, |
| "loss": 0.5784, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.069364161849711, |
| "grad_norm": 1.2777260488694913, |
| "learning_rate": 1.922035078449984e-05, |
| "loss": 0.5847, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0722543352601157, |
| "grad_norm": 0.8653900558337858, |
| "learning_rate": 1.921252129242863e-05, |
| "loss": 0.5924, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.0751445086705202, |
| "grad_norm": 0.9385954594567523, |
| "learning_rate": 1.9204654294346345e-05, |
| "loss": 0.6041, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0780346820809248, |
| "grad_norm": 0.87509462682995, |
| "learning_rate": 1.91967498222811e-05, |
| "loss": 0.5982, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.0809248554913296, |
| "grad_norm": 0.94107834956112, |
| "learning_rate": 1.918880790841358e-05, |
| "loss": 0.5929, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.083815028901734, |
| "grad_norm": 1.039879945508619, |
| "learning_rate": 1.91808285850769e-05, |
| "loss": 0.5743, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0867052023121386, |
| "grad_norm": 0.9683102748368639, |
| "learning_rate": 1.917281188475647e-05, |
| "loss": 0.5854, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.0895953757225434, |
| "grad_norm": 0.9833933562897522, |
| "learning_rate": 1.9164757840089888e-05, |
| "loss": 0.5907, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.092485549132948, |
| "grad_norm": 0.8360949807989344, |
| "learning_rate": 1.9156666483866764e-05, |
| "loss": 0.5704, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0953757225433527, |
| "grad_norm": 0.8195838450969974, |
| "learning_rate": 1.9148537849028624e-05, |
| "loss": 0.5908, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0982658959537572, |
| "grad_norm": 0.8210293235148685, |
| "learning_rate": 1.9140371968668767e-05, |
| "loss": 0.5777, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.1011560693641618, |
| "grad_norm": 0.8082878324842184, |
| "learning_rate": 1.9132168876032115e-05, |
| "loss": 0.5913, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.1040462427745665, |
| "grad_norm": 0.7053348622324761, |
| "learning_rate": 1.9123928604515093e-05, |
| "loss": 0.5811, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.106936416184971, |
| "grad_norm": 0.8393957205841531, |
| "learning_rate": 1.9115651187665496e-05, |
| "loss": 0.5947, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.1098265895953756, |
| "grad_norm": 0.8570047895468481, |
| "learning_rate": 1.9107336659182335e-05, |
| "loss": 0.5966, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.1127167630057804, |
| "grad_norm": 0.7561662655121975, |
| "learning_rate": 1.909898505291571e-05, |
| "loss": 0.5923, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.115606936416185, |
| "grad_norm": 0.8914708422505475, |
| "learning_rate": 1.909059640286668e-05, |
| "loss": 0.5947, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.1184971098265897, |
| "grad_norm": 0.7647763521555886, |
| "learning_rate": 1.9082170743187107e-05, |
| "loss": 0.5834, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.1213872832369942, |
| "grad_norm": 0.7433559499879944, |
| "learning_rate": 1.9073708108179536e-05, |
| "loss": 0.6127, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.1242774566473988, |
| "grad_norm": 0.7292753284766555, |
| "learning_rate": 1.9065208532297043e-05, |
| "loss": 0.5756, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.1271676300578035, |
| "grad_norm": 0.6464374837535348, |
| "learning_rate": 1.9056672050143087e-05, |
| "loss": 0.5833, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.130057803468208, |
| "grad_norm": 0.7753730546050381, |
| "learning_rate": 1.90480986964714e-05, |
| "loss": 0.5922, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.1329479768786128, |
| "grad_norm": 0.6435645880482299, |
| "learning_rate": 1.903948850618581e-05, |
| "loss": 0.6059, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.1358381502890174, |
| "grad_norm": 0.7434435721364454, |
| "learning_rate": 1.9030841514340116e-05, |
| "loss": 0.602, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.138728323699422, |
| "grad_norm": 60.08083679049716, |
| "learning_rate": 1.9022157756137948e-05, |
| "loss": 0.5944, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.1416184971098267, |
| "grad_norm": 37.44031875571649, |
| "learning_rate": 1.9013437266932616e-05, |
| "loss": 0.6008, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.1445086705202312, |
| "grad_norm": 1.6749714439835397, |
| "learning_rate": 1.900468008222697e-05, |
| "loss": 0.5846, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.147398843930636, |
| "grad_norm": 0.9344473190434486, |
| "learning_rate": 1.8995886237673254e-05, |
| "loss": 0.598, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.1502890173410405, |
| "grad_norm": 1.0836943912001913, |
| "learning_rate": 1.8987055769072973e-05, |
| "loss": 0.5762, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.153179190751445, |
| "grad_norm": 1.2528088342761161, |
| "learning_rate": 1.897818871237671e-05, |
| "loss": 0.5925, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.1560693641618498, |
| "grad_norm": 0.9917984973637564, |
| "learning_rate": 1.8969285103684033e-05, |
| "loss": 0.5671, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1589595375722543, |
| "grad_norm": 1.5567160801823279, |
| "learning_rate": 1.89603449792433e-05, |
| "loss": 0.5696, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.1618497109826589, |
| "grad_norm": 1.4218814005716265, |
| "learning_rate": 1.8951368375451547e-05, |
| "loss": 0.5835, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.1647398843930636, |
| "grad_norm": 1.073972437970269, |
| "learning_rate": 1.894235532885431e-05, |
| "loss": 0.579, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.1676300578034682, |
| "grad_norm": 1.404716962457925, |
| "learning_rate": 1.8933305876145508e-05, |
| "loss": 0.5946, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.1705202312138727, |
| "grad_norm": 0.7867748017486164, |
| "learning_rate": 1.8924220054167258e-05, |
| "loss": 0.6072, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.1734104046242775, |
| "grad_norm": 1.2268275967968227, |
| "learning_rate": 1.891509789990976e-05, |
| "loss": 0.6026, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.176300578034682, |
| "grad_norm": 0.782129228131316, |
| "learning_rate": 1.8905939450511117e-05, |
| "loss": 0.6027, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.1791907514450868, |
| "grad_norm": 1.0957712387505731, |
| "learning_rate": 1.889674474325721e-05, |
| "loss": 0.5833, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.1820809248554913, |
| "grad_norm": 0.7457776282138502, |
| "learning_rate": 1.888751381558152e-05, |
| "loss": 0.5905, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.1849710982658959, |
| "grad_norm": 0.9771507342260098, |
| "learning_rate": 1.8878246705064995e-05, |
| "loss": 0.592, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1878612716763006, |
| "grad_norm": 0.7874916056588455, |
| "learning_rate": 1.886894344943589e-05, |
| "loss": 0.6106, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.1907514450867052, |
| "grad_norm": 0.8386954062604738, |
| "learning_rate": 1.8859604086569616e-05, |
| "loss": 0.5819, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.19364161849711, |
| "grad_norm": 0.6800465330568464, |
| "learning_rate": 1.885022865448858e-05, |
| "loss": 0.579, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.1965317919075145, |
| "grad_norm": 0.84906552256597, |
| "learning_rate": 1.8840817191362033e-05, |
| "loss": 0.5783, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.199421965317919, |
| "grad_norm": 0.7608413013366039, |
| "learning_rate": 1.883136973550592e-05, |
| "loss": 0.5881, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.2023121387283238, |
| "grad_norm": 0.7850338345722294, |
| "learning_rate": 1.8821886325382718e-05, |
| "loss": 0.5625, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.2052023121387283, |
| "grad_norm": 0.7620713234702672, |
| "learning_rate": 1.881236699960128e-05, |
| "loss": 0.6098, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.208092485549133, |
| "grad_norm": 0.673183012859137, |
| "learning_rate": 1.8802811796916677e-05, |
| "loss": 0.5877, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.2109826589595376, |
| "grad_norm": 0.725146027963119, |
| "learning_rate": 1.879322075623005e-05, |
| "loss": 0.5781, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.2138728323699421, |
| "grad_norm": 0.8111014658303358, |
| "learning_rate": 1.878359391658843e-05, |
| "loss": 0.5909, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.216763005780347, |
| "grad_norm": 0.7229365039244683, |
| "learning_rate": 1.8773931317184607e-05, |
| "loss": 0.581, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.2196531791907514, |
| "grad_norm": 0.7844203333806608, |
| "learning_rate": 1.876423299735695e-05, |
| "loss": 0.602, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.222543352601156, |
| "grad_norm": 0.7203643454120146, |
| "learning_rate": 1.875449899658925e-05, |
| "loss": 0.5783, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.2254335260115607, |
| "grad_norm": 0.7232973902101298, |
| "learning_rate": 1.8744729354510566e-05, |
| "loss": 0.5731, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.2283236994219653, |
| "grad_norm": 0.8824647184156448, |
| "learning_rate": 1.8734924110895056e-05, |
| "loss": 0.5805, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.2312138728323698, |
| "grad_norm": 0.7228862642569523, |
| "learning_rate": 1.8725083305661825e-05, |
| "loss": 0.5799, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.2341040462427746, |
| "grad_norm": 0.8780482452876094, |
| "learning_rate": 1.8715206978874753e-05, |
| "loss": 0.5855, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.2369942196531791, |
| "grad_norm": 0.9453380989200908, |
| "learning_rate": 1.870529517074234e-05, |
| "loss": 0.5794, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.239884393063584, |
| "grad_norm": 0.78977409227397, |
| "learning_rate": 1.869534792161752e-05, |
| "loss": 0.5858, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.2427745664739884, |
| "grad_norm": 0.8341640471700158, |
| "learning_rate": 1.8685365271997543e-05, |
| "loss": 0.5889, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.245664739884393, |
| "grad_norm": 0.8883116241060414, |
| "learning_rate": 1.8675347262523756e-05, |
| "loss": 0.5744, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.2485549132947977, |
| "grad_norm": 0.822861993276753, |
| "learning_rate": 1.866529393398148e-05, |
| "loss": 0.5805, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.2514450867052023, |
| "grad_norm": 0.8668081481118199, |
| "learning_rate": 1.8655205327299813e-05, |
| "loss": 0.5986, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.254335260115607, |
| "grad_norm": 0.7150804000976962, |
| "learning_rate": 1.864508148355149e-05, |
| "loss": 0.5978, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.2572254335260116, |
| "grad_norm": 0.9364709689963131, |
| "learning_rate": 1.8634922443952692e-05, |
| "loss": 0.5956, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.260115606936416, |
| "grad_norm": 0.73444038368573, |
| "learning_rate": 1.86247282498629e-05, |
| "loss": 0.5738, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.2630057803468209, |
| "grad_norm": 0.7427485980050539, |
| "learning_rate": 1.861449894278471e-05, |
| "loss": 0.5847, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.2658959537572254, |
| "grad_norm": 0.7263094084914291, |
| "learning_rate": 1.8604234564363667e-05, |
| "loss": 0.5709, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.2687861271676302, |
| "grad_norm": 0.6408013443679268, |
| "learning_rate": 1.8593935156388104e-05, |
| "loss": 0.5758, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.2716763005780347, |
| "grad_norm": 0.7502681132648505, |
| "learning_rate": 1.8583600760788967e-05, |
| "loss": 0.5942, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2745664739884393, |
| "grad_norm": 0.745513544179955, |
| "learning_rate": 1.857323141963964e-05, |
| "loss": 0.6025, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.2774566473988438, |
| "grad_norm": 0.6942252533539747, |
| "learning_rate": 1.8562827175155777e-05, |
| "loss": 0.6167, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.2803468208092486, |
| "grad_norm": 0.718098643343022, |
| "learning_rate": 1.855238806969513e-05, |
| "loss": 0.5633, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.2832369942196533, |
| "grad_norm": 0.7839064869449918, |
| "learning_rate": 1.8541914145757383e-05, |
| "loss": 0.5997, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.2861271676300579, |
| "grad_norm": 0.7956894509101192, |
| "learning_rate": 1.853140544598397e-05, |
| "loss": 0.5964, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2890173410404624, |
| "grad_norm": 0.7422825022983778, |
| "learning_rate": 1.8520862013157898e-05, |
| "loss": 0.5919, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.291907514450867, |
| "grad_norm": 0.7641838189194473, |
| "learning_rate": 1.8510283890203585e-05, |
| "loss": 0.587, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.2947976878612717, |
| "grad_norm": 0.7606612268729219, |
| "learning_rate": 1.8499671120186683e-05, |
| "loss": 0.5858, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.2976878612716762, |
| "grad_norm": 0.7372439820132116, |
| "learning_rate": 1.848902374631389e-05, |
| "loss": 0.5837, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.300578034682081, |
| "grad_norm": 0.967164565722023, |
| "learning_rate": 1.847834181193279e-05, |
| "loss": 0.6018, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.3034682080924855, |
| "grad_norm": 0.8255799111031907, |
| "learning_rate": 1.846762536053167e-05, |
| "loss": 0.6087, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.30635838150289, |
| "grad_norm": 0.663848287941645, |
| "learning_rate": 1.8456874435739337e-05, |
| "loss": 0.5849, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.3092485549132948, |
| "grad_norm": 0.8432857541018854, |
| "learning_rate": 1.8446089081324947e-05, |
| "loss": 0.5681, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.3121387283236994, |
| "grad_norm": 0.6729630606749915, |
| "learning_rate": 1.8435269341197836e-05, |
| "loss": 0.5841, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.3150289017341041, |
| "grad_norm": 0.7541439660338949, |
| "learning_rate": 1.8424415259407317e-05, |
| "loss": 0.5722, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.3179190751445087, |
| "grad_norm": 0.8533593226136065, |
| "learning_rate": 1.8413526880142524e-05, |
| "loss": 0.5863, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.3208092485549132, |
| "grad_norm": 0.7611155456663428, |
| "learning_rate": 1.8402604247732224e-05, |
| "loss": 0.6055, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.323699421965318, |
| "grad_norm": 0.8690834992386883, |
| "learning_rate": 1.839164740664462e-05, |
| "loss": 0.5986, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.3265895953757225, |
| "grad_norm": 0.8169643185444966, |
| "learning_rate": 1.8380656401487208e-05, |
| "loss": 0.6068, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.3294797687861273, |
| "grad_norm": 0.8979789289219052, |
| "learning_rate": 1.8369631277006556e-05, |
| "loss": 0.6058, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.3323699421965318, |
| "grad_norm": 0.681886344899434, |
| "learning_rate": 1.8358572078088144e-05, |
| "loss": 0.5847, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.3352601156069364, |
| "grad_norm": 0.9141104041541918, |
| "learning_rate": 1.8347478849756182e-05, |
| "loss": 0.564, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.3381502890173411, |
| "grad_norm": 0.6669758128756574, |
| "learning_rate": 1.8336351637173405e-05, |
| "loss": 0.5694, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.3410404624277457, |
| "grad_norm": 0.9401071775838993, |
| "learning_rate": 1.8325190485640924e-05, |
| "loss": 0.5824, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.3439306358381504, |
| "grad_norm": 0.7492669020477855, |
| "learning_rate": 1.8313995440598002e-05, |
| "loss": 0.5916, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.346820809248555, |
| "grad_norm": 0.7722595161498734, |
| "learning_rate": 1.830276654762191e-05, |
| "loss": 0.5841, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.3497109826589595, |
| "grad_norm": 0.770036690413647, |
| "learning_rate": 1.8291503852427698e-05, |
| "loss": 0.5941, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.352601156069364, |
| "grad_norm": 0.822537024839394, |
| "learning_rate": 1.8280207400868057e-05, |
| "loss": 0.6011, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.3554913294797688, |
| "grad_norm": 0.7717763755818814, |
| "learning_rate": 1.8268877238933084e-05, |
| "loss": 0.5704, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.3583815028901733, |
| "grad_norm": 0.6273504834608445, |
| "learning_rate": 1.825751341275013e-05, |
| "loss": 0.573, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.361271676300578, |
| "grad_norm": 0.7204175643195626, |
| "learning_rate": 1.8246115968583597e-05, |
| "loss": 0.5836, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.3641618497109826, |
| "grad_norm": 0.7107257229105907, |
| "learning_rate": 1.8234684952834758e-05, |
| "loss": 0.585, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.3670520231213872, |
| "grad_norm": 0.7589385988495859, |
| "learning_rate": 1.8223220412041552e-05, |
| "loss": 0.587, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.369942196531792, |
| "grad_norm": 0.8390223582562657, |
| "learning_rate": 1.821172239287841e-05, |
| "loss": 0.5989, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.3728323699421965, |
| "grad_norm": 0.7671563822376801, |
| "learning_rate": 1.8200190942156063e-05, |
| "loss": 0.5843, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3757225433526012, |
| "grad_norm": 0.8165672732561593, |
| "learning_rate": 1.8188626106821346e-05, |
| "loss": 0.5922, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.3786127167630058, |
| "grad_norm": 0.7548591808890495, |
| "learning_rate": 1.817702793395701e-05, |
| "loss": 0.5714, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.3815028901734103, |
| "grad_norm": 0.7895371681175447, |
| "learning_rate": 1.816539647078153e-05, |
| "loss": 0.5876, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.384393063583815, |
| "grad_norm": 0.7362383319580541, |
| "learning_rate": 1.8153731764648907e-05, |
| "loss": 0.5896, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.3872832369942196, |
| "grad_norm": 0.8682834177952186, |
| "learning_rate": 1.8142033863048487e-05, |
| "loss": 0.5916, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3901734104046244, |
| "grad_norm": 0.843342860143743, |
| "learning_rate": 1.8130302813604762e-05, |
| "loss": 0.5968, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.393063583815029, |
| "grad_norm": 0.9357147713641858, |
| "learning_rate": 1.8118538664077175e-05, |
| "loss": 0.5776, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.3959537572254335, |
| "grad_norm": 0.726516144204578, |
| "learning_rate": 1.810674146235992e-05, |
| "loss": 0.5837, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.3988439306358382, |
| "grad_norm": 0.7231249100124149, |
| "learning_rate": 1.8094911256481765e-05, |
| "loss": 0.5676, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.4017341040462428, |
| "grad_norm": 0.8236757722841949, |
| "learning_rate": 1.8083048094605826e-05, |
| "loss": 0.5874, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.4046242774566475, |
| "grad_norm": 0.9619067429859167, |
| "learning_rate": 1.807115202502941e-05, |
| "loss": 0.5861, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.407514450867052, |
| "grad_norm": 0.7062417807318491, |
| "learning_rate": 1.805922309618378e-05, |
| "loss": 0.5912, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.4104046242774566, |
| "grad_norm": 0.7900913898127147, |
| "learning_rate": 1.804726135663399e-05, |
| "loss": 0.5937, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.4132947976878611, |
| "grad_norm": 0.876647311651091, |
| "learning_rate": 1.8035266855078663e-05, |
| "loss": 0.6093, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.416184971098266, |
| "grad_norm": 0.795569177139578, |
| "learning_rate": 1.8023239640349814e-05, |
| "loss": 0.5921, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.4190751445086704, |
| "grad_norm": 0.8846891720315945, |
| "learning_rate": 1.801117976141262e-05, |
| "loss": 0.58, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.4219653179190752, |
| "grad_norm": 0.7553556873287146, |
| "learning_rate": 1.7999087267365265e-05, |
| "loss": 0.5704, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.4248554913294798, |
| "grad_norm": 0.7580506512265982, |
| "learning_rate": 1.7986962207438692e-05, |
| "loss": 0.5928, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.4277456647398843, |
| "grad_norm": 0.8692821453087939, |
| "learning_rate": 1.797480463099645e-05, |
| "loss": 0.577, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.430635838150289, |
| "grad_norm": 0.681599198571664, |
| "learning_rate": 1.7962614587534448e-05, |
| "loss": 0.5753, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.4335260115606936, |
| "grad_norm": 0.8477803298084209, |
| "learning_rate": 1.795039212668078e-05, |
| "loss": 0.5764, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.4364161849710984, |
| "grad_norm": 0.7899636423682991, |
| "learning_rate": 1.793813729819553e-05, |
| "loss": 0.5931, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.439306358381503, |
| "grad_norm": 0.7751880599800394, |
| "learning_rate": 1.792585015197055e-05, |
| "loss": 0.6065, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.4421965317919074, |
| "grad_norm": 0.7819584492038231, |
| "learning_rate": 1.7913530738029252e-05, |
| "loss": 0.5922, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.4450867052023122, |
| "grad_norm": 0.7410757363702914, |
| "learning_rate": 1.7901179106526438e-05, |
| "loss": 0.5901, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4479768786127167, |
| "grad_norm": 0.8294915410142112, |
| "learning_rate": 1.788879530774805e-05, |
| "loss": 0.6094, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.4508670520231215, |
| "grad_norm": 0.825270638932142, |
| "learning_rate": 1.7876379392111012e-05, |
| "loss": 0.5941, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.453757225433526, |
| "grad_norm": 0.8695263847370847, |
| "learning_rate": 1.7863931410162987e-05, |
| "loss": 0.5728, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.4566473988439306, |
| "grad_norm": 0.7280349111620275, |
| "learning_rate": 1.7851451412582196e-05, |
| "loss": 0.5911, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.4595375722543353, |
| "grad_norm": 1.0251637552257442, |
| "learning_rate": 1.783893945017719e-05, |
| "loss": 0.5721, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4624277456647399, |
| "grad_norm": 0.7591697317876812, |
| "learning_rate": 1.782639557388667e-05, |
| "loss": 0.5854, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.4653179190751446, |
| "grad_norm": 0.9795699818825268, |
| "learning_rate": 1.7813819834779258e-05, |
| "loss": 0.5845, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.4682080924855492, |
| "grad_norm": 0.8818773497711714, |
| "learning_rate": 1.780121228405329e-05, |
| "loss": 0.5863, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.4710982658959537, |
| "grad_norm": 0.6655630594025235, |
| "learning_rate": 1.778857297303662e-05, |
| "loss": 0.5792, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.4739884393063583, |
| "grad_norm": 0.8556541902196599, |
| "learning_rate": 1.777590195318641e-05, |
| "loss": 0.5746, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.476878612716763, |
| "grad_norm": 0.6319453690933835, |
| "learning_rate": 1.77631992760889e-05, |
| "loss": 0.5774, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.4797687861271676, |
| "grad_norm": 0.6634365810897271, |
| "learning_rate": 1.775046499345922e-05, |
| "loss": 0.6016, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.4826589595375723, |
| "grad_norm": 0.7453817549418363, |
| "learning_rate": 1.773769915714118e-05, |
| "loss": 0.5818, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.4855491329479769, |
| "grad_norm": 0.7329107259848197, |
| "learning_rate": 1.7724901819107047e-05, |
| "loss": 0.5789, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.4884393063583814, |
| "grad_norm": 0.7397082974840387, |
| "learning_rate": 1.7712073031457332e-05, |
| "loss": 0.595, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.4913294797687862, |
| "grad_norm": 0.7751550546024663, |
| "learning_rate": 1.769921284642058e-05, |
| "loss": 0.5936, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.4942196531791907, |
| "grad_norm": 0.6978582914470572, |
| "learning_rate": 1.768632131635318e-05, |
| "loss": 0.5873, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.4971098265895955, |
| "grad_norm": 0.7745359745262627, |
| "learning_rate": 1.767339849373912e-05, |
| "loss": 0.5946, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.7287263926354944, |
| "learning_rate": 1.766044443118978e-05, |
| "loss": 0.5682, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.5028901734104045, |
| "grad_norm": 0.7722236362673658, |
| "learning_rate": 1.764745918144374e-05, |
| "loss": 0.5556, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.5057803468208093, |
| "grad_norm": 0.7299445800710093, |
| "learning_rate": 1.7634442797366537e-05, |
| "loss": 0.5786, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.5086705202312138, |
| "grad_norm": 0.6751944887927009, |
| "learning_rate": 1.7621395331950464e-05, |
| "loss": 0.5804, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.5115606936416186, |
| "grad_norm": 0.6793007632896605, |
| "learning_rate": 1.7608316838314355e-05, |
| "loss": 0.5649, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.5144508670520231, |
| "grad_norm": 0.6875547584975547, |
| "learning_rate": 1.759520736970337e-05, |
| "loss": 0.6058, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.5173410404624277, |
| "grad_norm": 0.7035985997297632, |
| "learning_rate": 1.7582066979488764e-05, |
| "loss": 0.5863, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.5202312138728322, |
| "grad_norm": 0.6824434177269557, |
| "learning_rate": 1.756889572116769e-05, |
| "loss": 0.5831, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.523121387283237, |
| "grad_norm": 0.7919452976996221, |
| "learning_rate": 1.755569364836296e-05, |
| "loss": 0.581, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.5260115606936417, |
| "grad_norm": 0.6716502550573372, |
| "learning_rate": 1.7542460814822853e-05, |
| "loss": 0.5966, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.5289017341040463, |
| "grad_norm": 0.7018593765581507, |
| "learning_rate": 1.752919727442087e-05, |
| "loss": 0.5976, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.5317919075144508, |
| "grad_norm": 0.7171067506633475, |
| "learning_rate": 1.7515903081155525e-05, |
| "loss": 0.5766, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.5346820809248554, |
| "grad_norm": 0.7058408714374818, |
| "learning_rate": 1.7502578289150137e-05, |
| "loss": 0.5859, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.5375722543352601, |
| "grad_norm": 0.7707834529909837, |
| "learning_rate": 1.748922295265258e-05, |
| "loss": 0.5989, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.5404624277456649, |
| "grad_norm": 0.730864740486449, |
| "learning_rate": 1.7475837126035105e-05, |
| "loss": 0.5797, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.5433526011560694, |
| "grad_norm": 0.7443397273692416, |
| "learning_rate": 1.7462420863794075e-05, |
| "loss": 0.5942, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.546242774566474, |
| "grad_norm": 0.6699753066639157, |
| "learning_rate": 1.7448974220549765e-05, |
| "loss": 0.5843, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.5491329479768785, |
| "grad_norm": 0.7834712901051348, |
| "learning_rate": 1.743549725104614e-05, |
| "loss": 0.5854, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.5520231213872833, |
| "grad_norm": 0.7375236904395911, |
| "learning_rate": 1.742199001015064e-05, |
| "loss": 0.5873, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.5549132947976878, |
| "grad_norm": 0.7520797737041237, |
| "learning_rate": 1.740845255285393e-05, |
| "loss": 0.5737, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.5578034682080926, |
| "grad_norm": 0.750510184354196, |
| "learning_rate": 1.7394884934269695e-05, |
| "loss": 0.564, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.560693641618497, |
| "grad_norm": 0.7504237975136158, |
| "learning_rate": 1.7381287209634417e-05, |
| "loss": 0.6057, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5635838150289016, |
| "grad_norm": 0.6607096876637601, |
| "learning_rate": 1.736765943430715e-05, |
| "loss": 0.5931, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.5664739884393064, |
| "grad_norm": 0.7692482037901299, |
| "learning_rate": 1.7354001663769278e-05, |
| "loss": 0.5989, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.569364161849711, |
| "grad_norm": 0.6528108961728156, |
| "learning_rate": 1.734031395362431e-05, |
| "loss": 0.5681, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.5722543352601157, |
| "grad_norm": 0.8314040661899244, |
| "learning_rate": 1.7326596359597646e-05, |
| "loss": 0.5808, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.5751445086705202, |
| "grad_norm": 0.6551143635361942, |
| "learning_rate": 1.731284893753634e-05, |
| "loss": 0.5824, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5780346820809248, |
| "grad_norm": 0.7504473441299307, |
| "learning_rate": 1.7299071743408894e-05, |
| "loss": 0.6188, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.5809248554913293, |
| "grad_norm": 0.7011203491881448, |
| "learning_rate": 1.728526483330501e-05, |
| "loss": 0.5731, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.583815028901734, |
| "grad_norm": 0.7685686045630253, |
| "learning_rate": 1.7271428263435373e-05, |
| "loss": 0.5714, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.5867052023121389, |
| "grad_norm": 0.7044883437721119, |
| "learning_rate": 1.7257562090131422e-05, |
| "loss": 0.573, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.5895953757225434, |
| "grad_norm": 0.8203622929135783, |
| "learning_rate": 1.7243666369845104e-05, |
| "loss": 0.5845, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.592485549132948, |
| "grad_norm": 0.6889783456040496, |
| "learning_rate": 1.7229741159148676e-05, |
| "loss": 0.5933, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.5953757225433525, |
| "grad_norm": 0.9765254946088467, |
| "learning_rate": 1.7215786514734438e-05, |
| "loss": 0.5665, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.5982658959537572, |
| "grad_norm": 0.6955017748996175, |
| "learning_rate": 1.7201802493414538e-05, |
| "loss": 0.5571, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.601156069364162, |
| "grad_norm": 0.9455064828292016, |
| "learning_rate": 1.7187789152120712e-05, |
| "loss": 0.5948, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.6040462427745665, |
| "grad_norm": 0.8420994974130152, |
| "learning_rate": 1.7173746547904065e-05, |
| "loss": 0.5988, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.606936416184971, |
| "grad_norm": 0.7954238132099828, |
| "learning_rate": 1.7159674737934843e-05, |
| "loss": 0.5639, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.6098265895953756, |
| "grad_norm": 0.894451573841629, |
| "learning_rate": 1.7145573779502185e-05, |
| "loss": 0.596, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.6127167630057804, |
| "grad_norm": 0.7857493523002436, |
| "learning_rate": 1.7131443730013907e-05, |
| "loss": 0.5838, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.6156069364161851, |
| "grad_norm": 0.9094958240641396, |
| "learning_rate": 1.7117284646996256e-05, |
| "loss": 0.577, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.6184971098265897, |
| "grad_norm": 0.7426411606349894, |
| "learning_rate": 1.7103096588093686e-05, |
| "loss": 0.5964, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.6213872832369942, |
| "grad_norm": 0.7420940463823611, |
| "learning_rate": 1.708887961106861e-05, |
| "loss": 0.554, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.6242774566473988, |
| "grad_norm": 0.8081581100315017, |
| "learning_rate": 1.707463377380118e-05, |
| "loss": 0.605, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.6271676300578035, |
| "grad_norm": 0.6331916502112901, |
| "learning_rate": 1.706035913428904e-05, |
| "loss": 0.5921, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.630057803468208, |
| "grad_norm": 0.7310464650272858, |
| "learning_rate": 1.704605575064709e-05, |
| "loss": 0.5707, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.6329479768786128, |
| "grad_norm": 0.7523392612827752, |
| "learning_rate": 1.7031723681107254e-05, |
| "loss": 0.595, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.6358381502890174, |
| "grad_norm": 0.7449756861349839, |
| "learning_rate": 1.7017362984018256e-05, |
| "loss": 0.5841, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.638728323699422, |
| "grad_norm": 0.831280655903283, |
| "learning_rate": 1.7002973717845345e-05, |
| "loss": 0.5902, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.6416184971098264, |
| "grad_norm": 0.83598321136025, |
| "learning_rate": 1.69885559411701e-05, |
| "loss": 0.5823, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.6445086705202312, |
| "grad_norm": 0.7138345328389332, |
| "learning_rate": 1.6974109712690163e-05, |
| "loss": 0.5889, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.647398843930636, |
| "grad_norm": 0.7110975012072633, |
| "learning_rate": 1.695963509121901e-05, |
| "loss": 0.5686, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6502890173410405, |
| "grad_norm": 0.8042107045528893, |
| "learning_rate": 1.6945132135685712e-05, |
| "loss": 0.5947, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.653179190751445, |
| "grad_norm": 0.7013763940024369, |
| "learning_rate": 1.6930600905134688e-05, |
| "loss": 0.5604, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.6560693641618496, |
| "grad_norm": 0.7785765624779681, |
| "learning_rate": 1.6916041458725486e-05, |
| "loss": 0.5963, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.6589595375722543, |
| "grad_norm": 0.7821908290786825, |
| "learning_rate": 1.6901453855732503e-05, |
| "loss": 0.5621, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.661849710982659, |
| "grad_norm": 0.7536723428025114, |
| "learning_rate": 1.6886838155544786e-05, |
| "loss": 0.6013, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.6647398843930636, |
| "grad_norm": 0.7614681747533972, |
| "learning_rate": 1.6872194417665767e-05, |
| "loss": 0.5888, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.6676300578034682, |
| "grad_norm": 0.7311883818551561, |
| "learning_rate": 1.6857522701713016e-05, |
| "loss": 0.5788, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.6705202312138727, |
| "grad_norm": 0.7241599297519454, |
| "learning_rate": 1.684282306741802e-05, |
| "loss": 0.5602, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.6734104046242775, |
| "grad_norm": 0.7858570405102655, |
| "learning_rate": 1.6828095574625917e-05, |
| "loss": 0.5684, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.6763005780346822, |
| "grad_norm": 0.7306983679762703, |
| "learning_rate": 1.6813340283295265e-05, |
| "loss": 0.5733, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6791907514450868, |
| "grad_norm": 0.8176853531742918, |
| "learning_rate": 1.6798557253497802e-05, |
| "loss": 0.5927, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.6820809248554913, |
| "grad_norm": 0.7464641313209691, |
| "learning_rate": 1.678374654541819e-05, |
| "loss": 0.5862, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.6849710982658959, |
| "grad_norm": 0.7413436012267695, |
| "learning_rate": 1.6768908219353774e-05, |
| "loss": 0.5804, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.6878612716763006, |
| "grad_norm": 0.7126278020137226, |
| "learning_rate": 1.6754042335714333e-05, |
| "loss": 0.5759, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.6907514450867052, |
| "grad_norm": 0.7809699652694174, |
| "learning_rate": 1.6739148955021854e-05, |
| "loss": 0.5877, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.69364161849711, |
| "grad_norm": 0.6640250271493856, |
| "learning_rate": 1.6724228137910247e-05, |
| "loss": 0.5947, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.6965317919075145, |
| "grad_norm": 0.7176063205379599, |
| "learning_rate": 1.670927994512514e-05, |
| "loss": 0.6073, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.699421965317919, |
| "grad_norm": 0.7913883631540534, |
| "learning_rate": 1.6694304437523608e-05, |
| "loss": 0.5699, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.7023121387283235, |
| "grad_norm": 0.6369673178626757, |
| "learning_rate": 1.6679301676073923e-05, |
| "loss": 0.5606, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.7052023121387283, |
| "grad_norm": 0.7416646166687233, |
| "learning_rate": 1.6664271721855325e-05, |
| "loss": 0.5834, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.708092485549133, |
| "grad_norm": 0.6721518206723244, |
| "learning_rate": 1.664921463605775e-05, |
| "loss": 0.579, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.7109826589595376, |
| "grad_norm": 0.6573326596850098, |
| "learning_rate": 1.6634130479981597e-05, |
| "loss": 0.5853, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.7138728323699421, |
| "grad_norm": 0.7290689810748527, |
| "learning_rate": 1.6619019315037472e-05, |
| "loss": 0.5886, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.7167630057803467, |
| "grad_norm": 0.8025709845059936, |
| "learning_rate": 1.6603881202745942e-05, |
| "loss": 0.5613, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.7196531791907514, |
| "grad_norm": 0.7682312752649727, |
| "learning_rate": 1.6588716204737282e-05, |
| "loss": 0.5921, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.7225433526011562, |
| "grad_norm": 0.6035407151326911, |
| "learning_rate": 1.657352438275122e-05, |
| "loss": 0.5655, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.7254335260115607, |
| "grad_norm": 0.7977173862284962, |
| "learning_rate": 1.6558305798636697e-05, |
| "loss": 0.5904, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.7283236994219653, |
| "grad_norm": 0.6749869989363424, |
| "learning_rate": 1.6543060514351594e-05, |
| "loss": 0.5812, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.7312138728323698, |
| "grad_norm": 0.7134556633614937, |
| "learning_rate": 1.6527788591962516e-05, |
| "loss": 0.6047, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.7341040462427746, |
| "grad_norm": 0.8293158717228061, |
| "learning_rate": 1.6512490093644494e-05, |
| "loss": 0.5879, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7369942196531793, |
| "grad_norm": 0.6811278610703695, |
| "learning_rate": 1.6497165081680766e-05, |
| "loss": 0.5921, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.739884393063584, |
| "grad_norm": 0.7337901858628713, |
| "learning_rate": 1.6481813618462513e-05, |
| "loss": 0.5894, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.7427745664739884, |
| "grad_norm": 0.689549851951907, |
| "learning_rate": 1.646643576648861e-05, |
| "loss": 0.5843, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.745664739884393, |
| "grad_norm": 0.7939807044818971, |
| "learning_rate": 1.6451031588365345e-05, |
| "loss": 0.577, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.7485549132947977, |
| "grad_norm": 0.7021558092806988, |
| "learning_rate": 1.643560114680621e-05, |
| "loss": 0.5683, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.7514450867052023, |
| "grad_norm": 0.7212482485557895, |
| "learning_rate": 1.6420144504631604e-05, |
| "loss": 0.5791, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.754335260115607, |
| "grad_norm": 0.6706672483451787, |
| "learning_rate": 1.6404661724768595e-05, |
| "loss": 0.6124, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.7572254335260116, |
| "grad_norm": 0.7432156095439864, |
| "learning_rate": 1.6389152870250677e-05, |
| "loss": 0.6018, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.760115606936416, |
| "grad_norm": 0.6406322039091135, |
| "learning_rate": 1.6373618004217483e-05, |
| "loss": 0.6029, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.7630057803468207, |
| "grad_norm": 0.7828798092411525, |
| "learning_rate": 1.6358057189914552e-05, |
| "loss": 0.5625, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7658959537572254, |
| "grad_norm": 0.643002344168019, |
| "learning_rate": 1.634247049069306e-05, |
| "loss": 0.5912, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.7687861271676302, |
| "grad_norm": 0.8961535556561148, |
| "learning_rate": 1.6326857970009568e-05, |
| "loss": 0.5687, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.7716763005780347, |
| "grad_norm": 0.6866826745827112, |
| "learning_rate": 1.6311219691425764e-05, |
| "loss": 0.6037, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.7745664739884393, |
| "grad_norm": 0.7766521938446624, |
| "learning_rate": 1.629555571860819e-05, |
| "loss": 0.5952, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.7774566473988438, |
| "grad_norm": 0.7822299785872244, |
| "learning_rate": 1.6279866115328015e-05, |
| "loss": 0.5882, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.7803468208092486, |
| "grad_norm": 0.6732758382850704, |
| "learning_rate": 1.6264150945460726e-05, |
| "loss": 0.5907, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.7832369942196533, |
| "grad_norm": 0.6998293133649915, |
| "learning_rate": 1.6248410272985928e-05, |
| "loss": 0.595, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.7861271676300579, |
| "grad_norm": 0.6427786891334882, |
| "learning_rate": 1.623264416198703e-05, |
| "loss": 0.5647, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.7890173410404624, |
| "grad_norm": 0.769900518742028, |
| "learning_rate": 1.6216852676651016e-05, |
| "loss": 0.5719, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.791907514450867, |
| "grad_norm": 0.6494905375694605, |
| "learning_rate": 1.6201035881268168e-05, |
| "loss": 0.6067, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7947976878612717, |
| "grad_norm": 0.7126382140280971, |
| "learning_rate": 1.6185193840231818e-05, |
| "loss": 0.5755, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.7976878612716765, |
| "grad_norm": 0.7579906096389109, |
| "learning_rate": 1.6169326618038067e-05, |
| "loss": 0.5847, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.800578034682081, |
| "grad_norm": 0.7487652525196908, |
| "learning_rate": 1.615343427928555e-05, |
| "loss": 0.5767, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.8034682080924855, |
| "grad_norm": 0.7661053667162049, |
| "learning_rate": 1.6137516888675143e-05, |
| "loss": 0.5772, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.80635838150289, |
| "grad_norm": 0.7960982048126151, |
| "learning_rate": 1.6121574511009712e-05, |
| "loss": 0.5838, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.8092485549132948, |
| "grad_norm": 0.7049898412611506, |
| "learning_rate": 1.6105607211193862e-05, |
| "loss": 0.5893, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.8121387283236994, |
| "grad_norm": 0.8326584145081675, |
| "learning_rate": 1.608961505423365e-05, |
| "loss": 0.5952, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.8150289017341041, |
| "grad_norm": 0.8354389404361381, |
| "learning_rate": 1.6073598105236338e-05, |
| "loss": 0.5819, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.8179190751445087, |
| "grad_norm": 0.6945106150715511, |
| "learning_rate": 1.605755642941012e-05, |
| "loss": 0.5849, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.8208092485549132, |
| "grad_norm": 0.7338790569970648, |
| "learning_rate": 1.6041490092063853e-05, |
| "loss": 0.5963, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.8236994219653178, |
| "grad_norm": 0.6655937405423681, |
| "learning_rate": 1.60253991586068e-05, |
| "loss": 0.5774, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.8265895953757225, |
| "grad_norm": 0.6996809292538112, |
| "learning_rate": 1.6009283694548365e-05, |
| "loss": 0.6022, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.8294797687861273, |
| "grad_norm": 0.6925368246008983, |
| "learning_rate": 1.5993143765497812e-05, |
| "loss": 0.5755, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.8323699421965318, |
| "grad_norm": 0.6529851507970346, |
| "learning_rate": 1.597697943716401e-05, |
| "loss": 0.5908, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.8352601156069364, |
| "grad_norm": 0.7321812506216079, |
| "learning_rate": 1.596079077535516e-05, |
| "loss": 0.5692, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.838150289017341, |
| "grad_norm": 0.6191201485424263, |
| "learning_rate": 1.5944577845978534e-05, |
| "loss": 0.5988, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.8410404624277457, |
| "grad_norm": 0.646374783015124, |
| "learning_rate": 1.5928340715040204e-05, |
| "loss": 0.5767, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.8439306358381504, |
| "grad_norm": 0.6819560688437241, |
| "learning_rate": 1.5912079448644765e-05, |
| "loss": 0.5937, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.846820809248555, |
| "grad_norm": 0.7164886573033494, |
| "learning_rate": 1.5895794112995074e-05, |
| "loss": 0.5966, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.8497109826589595, |
| "grad_norm": 0.6644650088840722, |
| "learning_rate": 1.587948477439198e-05, |
| "loss": 0.5557, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.852601156069364, |
| "grad_norm": 0.7455243137484561, |
| "learning_rate": 1.5863151499234053e-05, |
| "loss": 0.5913, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.8554913294797688, |
| "grad_norm": 0.6655201268971009, |
| "learning_rate": 1.584679435401731e-05, |
| "loss": 0.5881, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.8583815028901736, |
| "grad_norm": 0.6620269723317904, |
| "learning_rate": 1.5830413405334954e-05, |
| "loss": 0.5953, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.861271676300578, |
| "grad_norm": 0.6763021820636497, |
| "learning_rate": 1.5814008719877096e-05, |
| "loss": 0.5629, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.8641618497109826, |
| "grad_norm": 0.6941229988108782, |
| "learning_rate": 1.5797580364430475e-05, |
| "loss": 0.5896, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8670520231213872, |
| "grad_norm": 0.6665773655498453, |
| "learning_rate": 1.5781128405878202e-05, |
| "loss": 0.5801, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.869942196531792, |
| "grad_norm": 0.7132340160866797, |
| "learning_rate": 1.5764652911199488e-05, |
| "loss": 0.5945, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.8728323699421965, |
| "grad_norm": 0.6665820793295506, |
| "learning_rate": 1.5748153947469348e-05, |
| "loss": 0.5814, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.8757225433526012, |
| "grad_norm": 0.6871939300797527, |
| "learning_rate": 1.573163158185836e-05, |
| "loss": 0.594, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.8786127167630058, |
| "grad_norm": 0.6388515167925531, |
| "learning_rate": 1.5715085881632366e-05, |
| "loss": 0.5777, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8815028901734103, |
| "grad_norm": 0.7241320262561728, |
| "learning_rate": 1.569851691415221e-05, |
| "loss": 0.5658, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.8843930635838149, |
| "grad_norm": 0.6666561142088108, |
| "learning_rate": 1.568192474687346e-05, |
| "loss": 0.583, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.8872832369942196, |
| "grad_norm": 0.787208876465865, |
| "learning_rate": 1.5665309447346145e-05, |
| "loss": 0.5604, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.8901734104046244, |
| "grad_norm": 0.7039870339303322, |
| "learning_rate": 1.5648671083214454e-05, |
| "loss": 0.5824, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.893063583815029, |
| "grad_norm": 0.7121898583798462, |
| "learning_rate": 1.5632009722216493e-05, |
| "loss": 0.5903, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.8959537572254335, |
| "grad_norm": 0.7757465203962726, |
| "learning_rate": 1.5615325432183977e-05, |
| "loss": 0.5795, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.898843930635838, |
| "grad_norm": 0.6402622736455414, |
| "learning_rate": 1.559861828104198e-05, |
| "loss": 0.581, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.9017341040462428, |
| "grad_norm": 0.7782034708328404, |
| "learning_rate": 1.558188833680865e-05, |
| "loss": 0.5742, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.9046242774566475, |
| "grad_norm": 0.6324314813341162, |
| "learning_rate": 1.5565135667594916e-05, |
| "loss": 0.5714, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.907514450867052, |
| "grad_norm": 0.7762351957222875, |
| "learning_rate": 1.5548360341604246e-05, |
| "loss": 0.5917, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.9104046242774566, |
| "grad_norm": 0.8247409287869959, |
| "learning_rate": 1.5531562427132327e-05, |
| "loss": 0.566, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.9132947976878611, |
| "grad_norm": 0.6626264291918831, |
| "learning_rate": 1.5514741992566826e-05, |
| "loss": 0.5767, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.916184971098266, |
| "grad_norm": 0.9294772053397637, |
| "learning_rate": 1.549789910638708e-05, |
| "loss": 0.576, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.9190751445086707, |
| "grad_norm": 0.6981208084909047, |
| "learning_rate": 1.5481033837163844e-05, |
| "loss": 0.58, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.9219653179190752, |
| "grad_norm": 0.7557466097080571, |
| "learning_rate": 1.5464146253558987e-05, |
| "loss": 0.5751, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.9248554913294798, |
| "grad_norm": 0.8115938243522869, |
| "learning_rate": 1.5447236424325226e-05, |
| "loss": 0.6041, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.9277456647398843, |
| "grad_norm": 0.7042039351280144, |
| "learning_rate": 1.5430304418305853e-05, |
| "loss": 0.5815, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.930635838150289, |
| "grad_norm": 0.8255191008863216, |
| "learning_rate": 1.541335030443444e-05, |
| "loss": 0.5897, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.9335260115606936, |
| "grad_norm": 0.6302561443016468, |
| "learning_rate": 1.5396374151734564e-05, |
| "loss": 0.5841, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.9364161849710984, |
| "grad_norm": 0.6704642962297728, |
| "learning_rate": 1.5379376029319525e-05, |
| "loss": 0.5891, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.939306358381503, |
| "grad_norm": 0.7314361115096814, |
| "learning_rate": 1.5362356006392073e-05, |
| "loss": 0.591, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.9421965317919074, |
| "grad_norm": 0.7058800141993237, |
| "learning_rate": 1.534531415224411e-05, |
| "loss": 0.578, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.9450867052023122, |
| "grad_norm": 0.8166135309836842, |
| "learning_rate": 1.532825053625643e-05, |
| "loss": 0.5933, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.9479768786127167, |
| "grad_norm": 0.752724434644058, |
| "learning_rate": 1.5311165227898407e-05, |
| "loss": 0.5771, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.9508670520231215, |
| "grad_norm": 0.6785478290130587, |
| "learning_rate": 1.529405829672775e-05, |
| "loss": 0.5889, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.953757225433526, |
| "grad_norm": 0.7944689622366047, |
| "learning_rate": 1.527692981239017e-05, |
| "loss": 0.5785, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.9566473988439306, |
| "grad_norm": 0.7205575606177707, |
| "learning_rate": 1.5259779844619152e-05, |
| "loss": 0.5993, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.9595375722543351, |
| "grad_norm": 0.7506990852727408, |
| "learning_rate": 1.5242608463235638e-05, |
| "loss": 0.5646, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.9624277456647399, |
| "grad_norm": 0.7726824362277099, |
| "learning_rate": 1.5225415738147735e-05, |
| "loss": 0.5786, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.9653179190751446, |
| "grad_norm": 0.7716448033850631, |
| "learning_rate": 1.520820173935046e-05, |
| "loss": 0.5803, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9682080924855492, |
| "grad_norm": 0.7829292348981737, |
| "learning_rate": 1.5190966536925445e-05, |
| "loss": 0.5425, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.9710982658959537, |
| "grad_norm": 0.7948589901415222, |
| "learning_rate": 1.5173710201040615e-05, |
| "loss": 0.5952, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.9739884393063583, |
| "grad_norm": 0.7215578037880686, |
| "learning_rate": 1.5156432801949971e-05, |
| "loss": 0.5704, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.976878612716763, |
| "grad_norm": 0.7284053994843306, |
| "learning_rate": 1.5139134409993239e-05, |
| "loss": 0.5972, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.9797687861271678, |
| "grad_norm": 0.7013744195894402, |
| "learning_rate": 1.5121815095595631e-05, |
| "loss": 0.6006, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9826589595375723, |
| "grad_norm": 0.6538513212308937, |
| "learning_rate": 1.510447492926752e-05, |
| "loss": 0.5731, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.9855491329479769, |
| "grad_norm": 0.62634361862159, |
| "learning_rate": 1.508711398160419e-05, |
| "loss": 0.5587, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.9884393063583814, |
| "grad_norm": 0.6596385884566939, |
| "learning_rate": 1.5069732323285511e-05, |
| "loss": 0.5756, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.9913294797687862, |
| "grad_norm": 0.6799960306298413, |
| "learning_rate": 1.5052330025075687e-05, |
| "loss": 0.5736, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.9942196531791907, |
| "grad_norm": 0.7728857944499702, |
| "learning_rate": 1.5034907157822943e-05, |
| "loss": 0.5829, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9971098265895955, |
| "grad_norm": 0.7361233644197462, |
| "learning_rate": 1.501746379245924e-05, |
| "loss": 0.5655, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7892457565262699, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.5565, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.0028901734104045, |
| "grad_norm": 0.8917557233017886, |
| "learning_rate": 1.4982515851543808e-05, |
| "loss": 0.49, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.005780346820809, |
| "grad_norm": 0.731826726927124, |
| "learning_rate": 1.4965011418272115e-05, |
| "loss": 0.5039, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.008670520231214, |
| "grad_norm": 0.9584484699434676, |
| "learning_rate": 1.4947486771448955e-05, |
| "loss": 0.492, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.0115606936416186, |
| "grad_norm": 0.7924464564666959, |
| "learning_rate": 1.4929941982420665e-05, |
| "loss": 0.4976, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.014450867052023, |
| "grad_norm": 0.7694228147087654, |
| "learning_rate": 1.4912377122615582e-05, |
| "loss": 0.4684, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.0173410404624277, |
| "grad_norm": 0.8337400822313988, |
| "learning_rate": 1.4894792263543744e-05, |
| "loss": 0.4821, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.020231213872832, |
| "grad_norm": 0.81377528853195, |
| "learning_rate": 1.4877187476796626e-05, |
| "loss": 0.4809, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.023121387283237, |
| "grad_norm": 0.8880995758760973, |
| "learning_rate": 1.4859562834046821e-05, |
| "loss": 0.4959, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.0260115606936417, |
| "grad_norm": 0.8842076932638161, |
| "learning_rate": 1.4841918407047766e-05, |
| "loss": 0.4838, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.0289017341040463, |
| "grad_norm": 0.8021024993393874, |
| "learning_rate": 1.482425426763344e-05, |
| "loss": 0.4743, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.031791907514451, |
| "grad_norm": 0.8562302068077103, |
| "learning_rate": 1.4806570487718076e-05, |
| "loss": 0.4999, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.0346820809248554, |
| "grad_norm": 0.6671176985974866, |
| "learning_rate": 1.478886713929587e-05, |
| "loss": 0.475, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.03757225433526, |
| "grad_norm": 0.8076830275058707, |
| "learning_rate": 1.4771144294440682e-05, |
| "loss": 0.4763, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.040462427745665, |
| "grad_norm": 0.6776188308044887, |
| "learning_rate": 1.475340202530575e-05, |
| "loss": 0.4797, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.0433526011560694, |
| "grad_norm": 0.8562085572335955, |
| "learning_rate": 1.4735640404123384e-05, |
| "loss": 0.4905, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.046242774566474, |
| "grad_norm": 0.666760122889654, |
| "learning_rate": 1.4717859503204693e-05, |
| "loss": 0.4993, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.0491329479768785, |
| "grad_norm": 0.8158052894657422, |
| "learning_rate": 1.4700059394939259e-05, |
| "loss": 0.4724, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.052023121387283, |
| "grad_norm": 0.7725444120997191, |
| "learning_rate": 1.4682240151794881e-05, |
| "loss": 0.499, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.054913294797688, |
| "grad_norm": 0.7224644944519482, |
| "learning_rate": 1.4664401846317246e-05, |
| "loss": 0.4895, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.0578034682080926, |
| "grad_norm": 0.6375736963910287, |
| "learning_rate": 1.4646544551129658e-05, |
| "loss": 0.471, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.060693641618497, |
| "grad_norm": 0.6785909939945065, |
| "learning_rate": 1.4628668338932721e-05, |
| "loss": 0.476, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.0635838150289016, |
| "grad_norm": 0.6480648234855784, |
| "learning_rate": 1.461077328250406e-05, |
| "loss": 0.4755, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.066473988439306, |
| "grad_norm": 0.5973861172716439, |
| "learning_rate": 1.4592859454698021e-05, |
| "loss": 0.4741, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.069364161849711, |
| "grad_norm": 0.6341756244691673, |
| "learning_rate": 1.4574926928445366e-05, |
| "loss": 0.4755, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.0722543352601157, |
| "grad_norm": 0.6404979607956962, |
| "learning_rate": 1.4556975776752987e-05, |
| "loss": 0.4772, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.0751445086705202, |
| "grad_norm": 0.6990360916968367, |
| "learning_rate": 1.4539006072703596e-05, |
| "loss": 0.4997, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.078034682080925, |
| "grad_norm": 0.587308996267915, |
| "learning_rate": 1.4521017889455449e-05, |
| "loss": 0.481, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.0809248554913293, |
| "grad_norm": 0.6861567135110433, |
| "learning_rate": 1.4503011300242023e-05, |
| "loss": 0.4837, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0838150289017343, |
| "grad_norm": 0.5852818001000318, |
| "learning_rate": 1.4484986378371733e-05, |
| "loss": 0.5066, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.086705202312139, |
| "grad_norm": 0.613823797692949, |
| "learning_rate": 1.446694319722763e-05, |
| "loss": 0.4941, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.0895953757225434, |
| "grad_norm": 0.6226013333663201, |
| "learning_rate": 1.44488818302671e-05, |
| "loss": 0.487, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.092485549132948, |
| "grad_norm": 0.6031779435865413, |
| "learning_rate": 1.4430802351021576e-05, |
| "loss": 0.4789, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.0953757225433525, |
| "grad_norm": 0.667404269101217, |
| "learning_rate": 1.4412704833096215e-05, |
| "loss": 0.4956, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.098265895953757, |
| "grad_norm": 0.6511613836239132, |
| "learning_rate": 1.4394589350169628e-05, |
| "loss": 0.4893, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.101156069364162, |
| "grad_norm": 0.7171807632032887, |
| "learning_rate": 1.4376455975993553e-05, |
| "loss": 0.4953, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.1040462427745665, |
| "grad_norm": 0.651541501340034, |
| "learning_rate": 1.4358304784392569e-05, |
| "loss": 0.4794, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.106936416184971, |
| "grad_norm": 0.6388598045623446, |
| "learning_rate": 1.4340135849263808e-05, |
| "loss": 0.4831, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.1098265895953756, |
| "grad_norm": 0.5985686832414758, |
| "learning_rate": 1.4321949244576616e-05, |
| "loss": 0.4699, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.11271676300578, |
| "grad_norm": 0.6741219856319731, |
| "learning_rate": 1.4303745044372293e-05, |
| "loss": 0.4803, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.115606936416185, |
| "grad_norm": 0.6241553007984232, |
| "learning_rate": 1.4285523322763763e-05, |
| "loss": 0.4901, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.1184971098265897, |
| "grad_norm": 0.6342238231670317, |
| "learning_rate": 1.4267284153935295e-05, |
| "loss": 0.4857, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.121387283236994, |
| "grad_norm": 0.6615212499200348, |
| "learning_rate": 1.4249027612142174e-05, |
| "loss": 0.494, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.1242774566473988, |
| "grad_norm": 0.6287401379522876, |
| "learning_rate": 1.4230753771710428e-05, |
| "loss": 0.4641, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.1271676300578033, |
| "grad_norm": 0.6156860490644589, |
| "learning_rate": 1.4212462707036501e-05, |
| "loss": 0.4755, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.1300578034682083, |
| "grad_norm": 0.8425617665798458, |
| "learning_rate": 1.4194154492586967e-05, |
| "loss": 0.4899, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.132947976878613, |
| "grad_norm": 0.610199754195904, |
| "learning_rate": 1.4175829202898216e-05, |
| "loss": 0.475, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.1358381502890174, |
| "grad_norm": 0.6664221509260175, |
| "learning_rate": 1.415748691257616e-05, |
| "loss": 0.4632, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.138728323699422, |
| "grad_norm": 0.6198598713665869, |
| "learning_rate": 1.4139127696295913e-05, |
| "loss": 0.492, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.1416184971098264, |
| "grad_norm": 0.6627895786844057, |
| "learning_rate": 1.4120751628801512e-05, |
| "loss": 0.4887, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.1445086705202314, |
| "grad_norm": 0.6174039399372138, |
| "learning_rate": 1.4102358784905587e-05, |
| "loss": 0.4965, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.147398843930636, |
| "grad_norm": 0.6139937983480757, |
| "learning_rate": 1.4083949239489077e-05, |
| "loss": 0.474, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.1502890173410405, |
| "grad_norm": 0.6250851147746078, |
| "learning_rate": 1.4065523067500905e-05, |
| "loss": 0.4651, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.153179190751445, |
| "grad_norm": 0.6141885023959468, |
| "learning_rate": 1.4047080343957701e-05, |
| "loss": 0.4856, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.1560693641618496, |
| "grad_norm": 0.6398962354405308, |
| "learning_rate": 1.4028621143943463e-05, |
| "loss": 0.4617, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.1589595375722546, |
| "grad_norm": 0.5634682725662835, |
| "learning_rate": 1.4010145542609277e-05, |
| "loss": 0.469, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.161849710982659, |
| "grad_norm": 0.6086661562226492, |
| "learning_rate": 1.3991653615173002e-05, |
| "loss": 0.4704, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.1647398843930636, |
| "grad_norm": 0.6557665168728067, |
| "learning_rate": 1.3973145436918957e-05, |
| "loss": 0.4605, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.167630057803468, |
| "grad_norm": 0.5541968389544825, |
| "learning_rate": 1.3954621083197629e-05, |
| "loss": 0.4871, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.1705202312138727, |
| "grad_norm": 0.6466833656315703, |
| "learning_rate": 1.3936080629425356e-05, |
| "loss": 0.4775, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.1734104046242773, |
| "grad_norm": 0.6256229255740271, |
| "learning_rate": 1.3917524151084019e-05, |
| "loss": 0.5013, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.1763005780346822, |
| "grad_norm": 0.5926214989262687, |
| "learning_rate": 1.3898951723720743e-05, |
| "loss": 0.4802, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.179190751445087, |
| "grad_norm": 0.7398296579967744, |
| "learning_rate": 1.388036342294759e-05, |
| "loss": 0.472, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.1820809248554913, |
| "grad_norm": 0.6205573840232363, |
| "learning_rate": 1.3861759324441225e-05, |
| "loss": 0.4813, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.184971098265896, |
| "grad_norm": 0.7578188848578328, |
| "learning_rate": 1.3843139503942653e-05, |
| "loss": 0.4745, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.1878612716763004, |
| "grad_norm": 0.6298054483595766, |
| "learning_rate": 1.3824504037256872e-05, |
| "loss": 0.5086, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.1907514450867054, |
| "grad_norm": 0.6325488679123493, |
| "learning_rate": 1.3805853000252584e-05, |
| "loss": 0.4926, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.19364161849711, |
| "grad_norm": 0.6047656801596065, |
| "learning_rate": 1.3787186468861875e-05, |
| "loss": 0.4763, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.1965317919075145, |
| "grad_norm": 0.7132919959963034, |
| "learning_rate": 1.3768504519079923e-05, |
| "loss": 0.5122, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.199421965317919, |
| "grad_norm": 0.6238046703872717, |
| "learning_rate": 1.374980722696467e-05, |
| "loss": 0.4685, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.2023121387283235, |
| "grad_norm": 0.6243010760642064, |
| "learning_rate": 1.3731094668636516e-05, |
| "loss": 0.4916, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.2052023121387285, |
| "grad_norm": 0.6843853662389136, |
| "learning_rate": 1.3712366920278017e-05, |
| "loss": 0.477, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.208092485549133, |
| "grad_norm": 0.5871073798328166, |
| "learning_rate": 1.3693624058133575e-05, |
| "loss": 0.4677, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.2109826589595376, |
| "grad_norm": 0.7228739280407132, |
| "learning_rate": 1.3674866158509116e-05, |
| "loss": 0.487, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.213872832369942, |
| "grad_norm": 0.6275057686003038, |
| "learning_rate": 1.365609329777179e-05, |
| "loss": 0.488, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.2167630057803467, |
| "grad_norm": 0.6804954453923439, |
| "learning_rate": 1.3637305552349656e-05, |
| "loss": 0.4942, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.2196531791907512, |
| "grad_norm": 0.6318708333825124, |
| "learning_rate": 1.3618502998731372e-05, |
| "loss": 0.4957, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.222543352601156, |
| "grad_norm": 0.6122840766568456, |
| "learning_rate": 1.3599685713465882e-05, |
| "loss": 0.472, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.2254335260115607, |
| "grad_norm": 0.6265270578425641, |
| "learning_rate": 1.358085377316211e-05, |
| "loss": 0.4866, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.2283236994219653, |
| "grad_norm": 0.6832285523902126, |
| "learning_rate": 1.3562007254488634e-05, |
| "loss": 0.477, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.23121387283237, |
| "grad_norm": 0.5864628346946127, |
| "learning_rate": 1.3543146234173395e-05, |
| "loss": 0.4772, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.2341040462427744, |
| "grad_norm": 0.6951998969486958, |
| "learning_rate": 1.3524270789003362e-05, |
| "loss": 0.4999, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.2369942196531793, |
| "grad_norm": 0.5765080186621816, |
| "learning_rate": 1.350538099582424e-05, |
| "loss": 0.4799, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.239884393063584, |
| "grad_norm": 0.6479445268775318, |
| "learning_rate": 1.3486476931540145e-05, |
| "loss": 0.4889, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.2427745664739884, |
| "grad_norm": 0.6266173892843901, |
| "learning_rate": 1.3467558673113286e-05, |
| "loss": 0.4754, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.245664739884393, |
| "grad_norm": 0.5691738042569816, |
| "learning_rate": 1.344862629756367e-05, |
| "loss": 0.4905, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.2485549132947975, |
| "grad_norm": 0.6745684340820571, |
| "learning_rate": 1.342967988196877e-05, |
| "loss": 0.4878, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.2514450867052025, |
| "grad_norm": 0.5991121170038285, |
| "learning_rate": 1.3410719503463225e-05, |
| "loss": 0.5049, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.254335260115607, |
| "grad_norm": 0.6662560641610981, |
| "learning_rate": 1.3391745239238509e-05, |
| "loss": 0.4745, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.2572254335260116, |
| "grad_norm": 0.7675180134150684, |
| "learning_rate": 1.3372757166542647e-05, |
| "loss": 0.4982, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.260115606936416, |
| "grad_norm": 0.5630850126003241, |
| "learning_rate": 1.3353755362679856e-05, |
| "loss": 0.4794, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.2630057803468207, |
| "grad_norm": 0.7541638366537354, |
| "learning_rate": 1.333473990501028e-05, |
| "loss": 0.4716, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.2658959537572256, |
| "grad_norm": 0.6226190873894291, |
| "learning_rate": 1.3315710870949632e-05, |
| "loss": 0.4982, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.26878612716763, |
| "grad_norm": 0.6071189139845049, |
| "learning_rate": 1.3296668337968904e-05, |
| "loss": 0.4954, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.2716763005780347, |
| "grad_norm": 0.6478422528890034, |
| "learning_rate": 1.3277612383594045e-05, |
| "loss": 0.4836, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.2745664739884393, |
| "grad_norm": 0.5724767544283145, |
| "learning_rate": 1.3258543085405649e-05, |
| "loss": 0.4822, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.277456647398844, |
| "grad_norm": 0.6411210199107057, |
| "learning_rate": 1.3239460521038626e-05, |
| "loss": 0.4791, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.2803468208092488, |
| "grad_norm": 0.58890510940359, |
| "learning_rate": 1.3220364768181901e-05, |
| "loss": 0.4837, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.2832369942196533, |
| "grad_norm": 0.6087119937815791, |
| "learning_rate": 1.3201255904578094e-05, |
| "loss": 0.4758, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.286127167630058, |
| "grad_norm": 0.580194410281497, |
| "learning_rate": 1.3182134008023198e-05, |
| "loss": 0.4655, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.2890173410404624, |
| "grad_norm": 0.6231249120490117, |
| "learning_rate": 1.3162999156366265e-05, |
| "loss": 0.4724, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.291907514450867, |
| "grad_norm": 0.5683956777923486, |
| "learning_rate": 1.3143851427509093e-05, |
| "loss": 0.4944, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.294797687861272, |
| "grad_norm": 0.5813223298187833, |
| "learning_rate": 1.3124690899405903e-05, |
| "loss": 0.4716, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.2976878612716765, |
| "grad_norm": 0.5910053696331238, |
| "learning_rate": 1.3105517650063026e-05, |
| "loss": 0.48, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.300578034682081, |
| "grad_norm": 0.5717961169940149, |
| "learning_rate": 1.3086331757538582e-05, |
| "loss": 0.4843, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.3034682080924855, |
| "grad_norm": 0.5610354183402453, |
| "learning_rate": 1.3067133299942172e-05, |
| "loss": 0.4785, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.30635838150289, |
| "grad_norm": 0.6113141495689187, |
| "learning_rate": 1.3047922355434533e-05, |
| "loss": 0.4818, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.3092485549132946, |
| "grad_norm": 0.5861183126949195, |
| "learning_rate": 1.3028699002227256e-05, |
| "loss": 0.4963, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.3121387283236996, |
| "grad_norm": 0.6521803246079821, |
| "learning_rate": 1.3009463318582448e-05, |
| "loss": 0.4724, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.315028901734104, |
| "grad_norm": 0.6034099548310038, |
| "learning_rate": 1.2990215382812408e-05, |
| "loss": 0.4703, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.3179190751445087, |
| "grad_norm": 0.6878646284940723, |
| "learning_rate": 1.2970955273279324e-05, |
| "loss": 0.4742, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.320809248554913, |
| "grad_norm": 0.6163785271184989, |
| "learning_rate": 1.2951683068394941e-05, |
| "loss": 0.4877, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.3236994219653178, |
| "grad_norm": 0.7017702766508197, |
| "learning_rate": 1.2932398846620244e-05, |
| "loss": 0.4985, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.3265895953757223, |
| "grad_norm": 0.6514042902173288, |
| "learning_rate": 1.291310268646515e-05, |
| "loss": 0.4869, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.3294797687861273, |
| "grad_norm": 0.6582908602483947, |
| "learning_rate": 1.2893794666488175e-05, |
| "loss": 0.4681, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.332369942196532, |
| "grad_norm": 0.9008026884763592, |
| "learning_rate": 1.2874474865296112e-05, |
| "loss": 0.4951, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.3352601156069364, |
| "grad_norm": 0.7098416272580851, |
| "learning_rate": 1.285514336154373e-05, |
| "loss": 0.4928, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.338150289017341, |
| "grad_norm": 0.7763595007460438, |
| "learning_rate": 1.283580023393343e-05, |
| "loss": 0.4944, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.3410404624277454, |
| "grad_norm": 0.6153457741221146, |
| "learning_rate": 1.2816445561214947e-05, |
| "loss": 0.5017, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.3439306358381504, |
| "grad_norm": 0.6256664364781374, |
| "learning_rate": 1.2797079422185013e-05, |
| "loss": 0.5045, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.346820809248555, |
| "grad_norm": 0.6221766375998093, |
| "learning_rate": 1.2777701895687034e-05, |
| "loss": 0.4917, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.3497109826589595, |
| "grad_norm": 0.576313700159756, |
| "learning_rate": 1.275831306061079e-05, |
| "loss": 0.489, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.352601156069364, |
| "grad_norm": 0.6741249523223151, |
| "learning_rate": 1.2738912995892095e-05, |
| "loss": 0.4737, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.3554913294797686, |
| "grad_norm": 0.6292357151471923, |
| "learning_rate": 1.2719501780512475e-05, |
| "loss": 0.4832, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.3583815028901736, |
| "grad_norm": 0.5467682217417565, |
| "learning_rate": 1.2700079493498864e-05, |
| "loss": 0.4887, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.361271676300578, |
| "grad_norm": 0.6152550291647984, |
| "learning_rate": 1.2680646213923264e-05, |
| "loss": 0.4892, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.3641618497109826, |
| "grad_norm": 0.6023318829337962, |
| "learning_rate": 1.2661202020902432e-05, |
| "loss": 0.4617, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.367052023121387, |
| "grad_norm": 0.6204224802741845, |
| "learning_rate": 1.2641746993597547e-05, |
| "loss": 0.4812, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.3699421965317917, |
| "grad_norm": 0.5972288325305012, |
| "learning_rate": 1.2622281211213916e-05, |
| "loss": 0.4897, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.3728323699421967, |
| "grad_norm": 0.6005242709543999, |
| "learning_rate": 1.2602804753000611e-05, |
| "loss": 0.4961, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.3757225433526012, |
| "grad_norm": 0.6075305884093503, |
| "learning_rate": 1.2583317698250184e-05, |
| "loss": 0.496, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.378612716763006, |
| "grad_norm": 0.6080984427620649, |
| "learning_rate": 1.2563820126298313e-05, |
| "loss": 0.5099, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.3815028901734103, |
| "grad_norm": 0.6183012204818146, |
| "learning_rate": 1.2544312116523508e-05, |
| "loss": 0.5213, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.384393063583815, |
| "grad_norm": 0.6124206661136334, |
| "learning_rate": 1.252479374834676e-05, |
| "loss": 0.482, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.38728323699422, |
| "grad_norm": 0.6338531257566175, |
| "learning_rate": 1.2505265101231243e-05, |
| "loss": 0.4884, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.3901734104046244, |
| "grad_norm": 0.582075543595997, |
| "learning_rate": 1.2485726254681971e-05, |
| "loss": 0.5121, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.393063583815029, |
| "grad_norm": 0.6033739430944867, |
| "learning_rate": 1.2466177288245486e-05, |
| "loss": 0.4869, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.3959537572254335, |
| "grad_norm": 0.5995179007325128, |
| "learning_rate": 1.2446618281509526e-05, |
| "loss": 0.4728, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.398843930635838, |
| "grad_norm": 0.5561904176522109, |
| "learning_rate": 1.2427049314102708e-05, |
| "loss": 0.5027, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.401734104046243, |
| "grad_norm": 0.5803956118986667, |
| "learning_rate": 1.24074704656942e-05, |
| "loss": 0.4874, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.4046242774566475, |
| "grad_norm": 0.6133935938899633, |
| "learning_rate": 1.2387881815993396e-05, |
| "loss": 0.472, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.407514450867052, |
| "grad_norm": 0.5570740395566359, |
| "learning_rate": 1.2368283444749602e-05, |
| "loss": 0.4774, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.4104046242774566, |
| "grad_norm": 0.6263728298267713, |
| "learning_rate": 1.2348675431751685e-05, |
| "loss": 0.4907, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.413294797687861, |
| "grad_norm": 0.659842661942265, |
| "learning_rate": 1.2329057856827781e-05, |
| "loss": 0.4961, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.416184971098266, |
| "grad_norm": 0.6564033064344027, |
| "learning_rate": 1.230943079984495e-05, |
| "loss": 0.4588, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.4190751445086707, |
| "grad_norm": 0.6595577915231365, |
| "learning_rate": 1.2289794340708855e-05, |
| "loss": 0.4797, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.421965317919075, |
| "grad_norm": 0.6674280365531452, |
| "learning_rate": 1.2270148559363429e-05, |
| "loss": 0.4848, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.4248554913294798, |
| "grad_norm": 0.6890891118583778, |
| "learning_rate": 1.2250493535790574e-05, |
| "loss": 0.4943, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.4277456647398843, |
| "grad_norm": 0.6128280313168251, |
| "learning_rate": 1.2230829350009805e-05, |
| "loss": 0.4873, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.430635838150289, |
| "grad_norm": 0.7196046761480148, |
| "learning_rate": 1.2211156082077945e-05, |
| "loss": 0.4816, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.433526011560694, |
| "grad_norm": 0.5682730567737198, |
| "learning_rate": 1.219147381208879e-05, |
| "loss": 0.4851, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.4364161849710984, |
| "grad_norm": 0.7132152076959606, |
| "learning_rate": 1.2171782620172787e-05, |
| "loss": 0.4909, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.439306358381503, |
| "grad_norm": 0.6805602468690977, |
| "learning_rate": 1.2152082586496701e-05, |
| "loss": 0.487, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.4421965317919074, |
| "grad_norm": 0.6756276813728793, |
| "learning_rate": 1.21323737912633e-05, |
| "loss": 0.4845, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.445086705202312, |
| "grad_norm": 0.6410611739001532, |
| "learning_rate": 1.2112656314711017e-05, |
| "loss": 0.4819, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.447976878612717, |
| "grad_norm": 0.6613685466896656, |
| "learning_rate": 1.2092930237113637e-05, |
| "loss": 0.5023, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.4508670520231215, |
| "grad_norm": 0.6028188017759273, |
| "learning_rate": 1.2073195638779944e-05, |
| "loss": 0.5049, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.453757225433526, |
| "grad_norm": 0.5882497100696249, |
| "learning_rate": 1.2053452600053435e-05, |
| "loss": 0.4728, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.4566473988439306, |
| "grad_norm": 0.6097538357238959, |
| "learning_rate": 1.2033701201311945e-05, |
| "loss": 0.4936, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.459537572254335, |
| "grad_norm": 0.6036063170459649, |
| "learning_rate": 1.2013941522967365e-05, |
| "loss": 0.4718, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.4624277456647397, |
| "grad_norm": 0.5859989981371178, |
| "learning_rate": 1.1994173645465283e-05, |
| "loss": 0.4912, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.4653179190751446, |
| "grad_norm": 0.5423461898525301, |
| "learning_rate": 1.197439764928467e-05, |
| "loss": 0.4851, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.468208092485549, |
| "grad_norm": 0.6108838211505494, |
| "learning_rate": 1.1954613614937549e-05, |
| "loss": 0.4621, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.4710982658959537, |
| "grad_norm": 0.5689087773053834, |
| "learning_rate": 1.193482162296867e-05, |
| "loss": 0.4786, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.4739884393063583, |
| "grad_norm": 0.7301557323716112, |
| "learning_rate": 1.1915021753955178e-05, |
| "loss": 0.4852, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.476878612716763, |
| "grad_norm": 0.5652516257706838, |
| "learning_rate": 1.1895214088506284e-05, |
| "loss": 0.4906, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.479768786127168, |
| "grad_norm": 0.5913465429932605, |
| "learning_rate": 1.187539870726295e-05, |
| "loss": 0.464, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.4826589595375723, |
| "grad_norm": 0.6112116690553486, |
| "learning_rate": 1.185557569089754e-05, |
| "loss": 0.5007, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.485549132947977, |
| "grad_norm": 0.544535409192471, |
| "learning_rate": 1.183574512011351e-05, |
| "loss": 0.4896, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.4884393063583814, |
| "grad_norm": 0.5999298855888491, |
| "learning_rate": 1.181590707564506e-05, |
| "loss": 0.4689, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.491329479768786, |
| "grad_norm": 0.63230318229943, |
| "learning_rate": 1.1796061638256831e-05, |
| "loss": 0.4984, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.494219653179191, |
| "grad_norm": 0.5875879683407528, |
| "learning_rate": 1.1776208888743554e-05, |
| "loss": 0.4876, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.4971098265895955, |
| "grad_norm": 0.5740009661133425, |
| "learning_rate": 1.1756348907929733e-05, |
| "loss": 0.4996, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.6467715034441753, |
| "learning_rate": 1.1736481776669307e-05, |
| "loss": 0.4899, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.5028901734104045, |
| "grad_norm": 0.6658254556399299, |
| "learning_rate": 1.1716607575845327e-05, |
| "loss": 0.4849, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.505780346820809, |
| "grad_norm": 0.6304554371559444, |
| "learning_rate": 1.1696726386369627e-05, |
| "loss": 0.4749, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.508670520231214, |
| "grad_norm": 0.553572005776321, |
| "learning_rate": 1.1676838289182502e-05, |
| "loss": 0.4692, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.5115606936416186, |
| "grad_norm": 0.5989811250733211, |
| "learning_rate": 1.1656943365252351e-05, |
| "loss": 0.491, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.514450867052023, |
| "grad_norm": 0.5750522619417978, |
| "learning_rate": 1.1637041695575383e-05, |
| "loss": 0.4971, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.5173410404624277, |
| "grad_norm": 0.6037262595078183, |
| "learning_rate": 1.1617133361175257e-05, |
| "loss": 0.472, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.520231213872832, |
| "grad_norm": 0.6374098762461646, |
| "learning_rate": 1.159721844310278e-05, |
| "loss": 0.5009, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.523121387283237, |
| "grad_norm": 0.5574371831910762, |
| "learning_rate": 1.1577297022435548e-05, |
| "loss": 0.4644, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.5260115606936417, |
| "grad_norm": 0.6285846684044717, |
| "learning_rate": 1.155736918027764e-05, |
| "loss": 0.4657, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.5289017341040463, |
| "grad_norm": 0.5680264691810969, |
| "learning_rate": 1.153743499775927e-05, |
| "loss": 0.5, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.531791907514451, |
| "grad_norm": 0.6392558071023373, |
| "learning_rate": 1.1517494556036477e-05, |
| "loss": 0.5144, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.5346820809248554, |
| "grad_norm": 0.5635030096595876, |
| "learning_rate": 1.149754793629077e-05, |
| "loss": 0.4854, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.5375722543352603, |
| "grad_norm": 0.5188679966546239, |
| "learning_rate": 1.1477595219728817e-05, |
| "loss": 0.4893, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.540462427745665, |
| "grad_norm": 0.5485891984650414, |
| "learning_rate": 1.1457636487582104e-05, |
| "loss": 0.4676, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.5433526011560694, |
| "grad_norm": 0.5917527574622287, |
| "learning_rate": 1.143767182110661e-05, |
| "loss": 0.5032, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.546242774566474, |
| "grad_norm": 0.5373968873975472, |
| "learning_rate": 1.1417701301582476e-05, |
| "loss": 0.4932, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.5491329479768785, |
| "grad_norm": 0.5661442996520238, |
| "learning_rate": 1.1397725010313666e-05, |
| "loss": 0.4959, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.5520231213872835, |
| "grad_norm": 0.581891800537539, |
| "learning_rate": 1.137774302862764e-05, |
| "loss": 0.499, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.5549132947976876, |
| "grad_norm": 0.5480548231275786, |
| "learning_rate": 1.135775543787504e-05, |
| "loss": 0.483, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.5578034682080926, |
| "grad_norm": 0.5677370412015945, |
| "learning_rate": 1.1337762319429326e-05, |
| "loss": 0.4628, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.560693641618497, |
| "grad_norm": 0.5586053485925068, |
| "learning_rate": 1.1317763754686474e-05, |
| "loss": 0.4548, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.5635838150289016, |
| "grad_norm": 0.5578888374471789, |
| "learning_rate": 1.1297759825064624e-05, |
| "loss": 0.5068, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.5664739884393066, |
| "grad_norm": 0.5930380142370744, |
| "learning_rate": 1.1277750612003767e-05, |
| "loss": 0.4723, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.5693641618497107, |
| "grad_norm": 0.5963766508065118, |
| "learning_rate": 1.1257736196965392e-05, |
| "loss": 0.4845, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.5722543352601157, |
| "grad_norm": 0.5901899876827236, |
| "learning_rate": 1.123771666143218e-05, |
| "loss": 0.4797, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.5751445086705202, |
| "grad_norm": 0.605878081712083, |
| "learning_rate": 1.1217692086907653e-05, |
| "loss": 0.4768, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.578034682080925, |
| "grad_norm": 0.5815366679890774, |
| "learning_rate": 1.1197662554915836e-05, |
| "loss": 0.4831, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.5809248554913293, |
| "grad_norm": 0.5410869254991583, |
| "learning_rate": 1.1177628147000961e-05, |
| "loss": 0.4843, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.583815028901734, |
| "grad_norm": 0.5840458561279575, |
| "learning_rate": 1.1157588944727087e-05, |
| "loss": 0.5055, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.586705202312139, |
| "grad_norm": 0.5641649806183132, |
| "learning_rate": 1.113754502967781e-05, |
| "loss": 0.4994, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.5895953757225434, |
| "grad_norm": 0.5514001572280435, |
| "learning_rate": 1.1117496483455898e-05, |
| "loss": 0.4731, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.592485549132948, |
| "grad_norm": 0.5887720885509165, |
| "learning_rate": 1.1097443387682984e-05, |
| "loss": 0.4717, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.5953757225433525, |
| "grad_norm": 0.6310085081831622, |
| "learning_rate": 1.1077385823999218e-05, |
| "loss": 0.4806, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.598265895953757, |
| "grad_norm": 0.6074205452371207, |
| "learning_rate": 1.1057323874062942e-05, |
| "loss": 0.4841, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.601156069364162, |
| "grad_norm": 0.5338180979357099, |
| "learning_rate": 1.1037257619550353e-05, |
| "loss": 0.4805, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.6040462427745665, |
| "grad_norm": 0.5762991464150397, |
| "learning_rate": 1.1017187142155174e-05, |
| "loss": 0.4719, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.606936416184971, |
| "grad_norm": 0.6045308731955183, |
| "learning_rate": 1.0997112523588322e-05, |
| "loss": 0.4924, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.6098265895953756, |
| "grad_norm": 0.6210687094964608, |
| "learning_rate": 1.0977033845577574e-05, |
| "loss": 0.4912, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.61271676300578, |
| "grad_norm": 0.5971492004599623, |
| "learning_rate": 1.0956951189867224e-05, |
| "loss": 0.4893, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.615606936416185, |
| "grad_norm": 0.5160192763229764, |
| "learning_rate": 1.0936864638217777e-05, |
| "loss": 0.4858, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.6184971098265897, |
| "grad_norm": 0.6613318429592981, |
| "learning_rate": 1.0916774272405581e-05, |
| "loss": 0.4604, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.621387283236994, |
| "grad_norm": 0.6634111921366653, |
| "learning_rate": 1.089668017422253e-05, |
| "loss": 0.4689, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.6242774566473988, |
| "grad_norm": 0.582676908856468, |
| "learning_rate": 1.0876582425475694e-05, |
| "loss": 0.4932, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.6271676300578033, |
| "grad_norm": 0.5759991723829018, |
| "learning_rate": 1.0856481107987024e-05, |
| "loss": 0.4909, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.6300578034682083, |
| "grad_norm": 0.6602743078348354, |
| "learning_rate": 1.0836376303592991e-05, |
| "loss": 0.4854, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.632947976878613, |
| "grad_norm": 0.6242047289159153, |
| "learning_rate": 1.0816268094144257e-05, |
| "loss": 0.4885, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.6358381502890174, |
| "grad_norm": 0.6142487967868995, |
| "learning_rate": 1.0796156561505355e-05, |
| "loss": 0.4852, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.638728323699422, |
| "grad_norm": 0.6562842223395773, |
| "learning_rate": 1.0776041787554348e-05, |
| "loss": 0.4965, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.6416184971098264, |
| "grad_norm": 0.6288923772138604, |
| "learning_rate": 1.0755923854182484e-05, |
| "loss": 0.4812, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.6445086705202314, |
| "grad_norm": 0.593632937537067, |
| "learning_rate": 1.0735802843293888e-05, |
| "loss": 0.4802, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.647398843930636, |
| "grad_norm": 0.5534510402258848, |
| "learning_rate": 1.0715678836805204e-05, |
| "loss": 0.4793, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.6502890173410405, |
| "grad_norm": 0.5820026877478321, |
| "learning_rate": 1.0695551916645273e-05, |
| "loss": 0.4899, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.653179190751445, |
| "grad_norm": 0.5938224966700754, |
| "learning_rate": 1.0675422164754808e-05, |
| "loss": 0.4823, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.6560693641618496, |
| "grad_norm": 0.5738073249813049, |
| "learning_rate": 1.065528966308603e-05, |
| "loss": 0.4726, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.6589595375722546, |
| "grad_norm": 0.5912461023132076, |
| "learning_rate": 1.063515449360238e-05, |
| "loss": 0.4905, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.661849710982659, |
| "grad_norm": 0.5913815785583798, |
| "learning_rate": 1.0615016738278138e-05, |
| "loss": 0.4794, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.6647398843930636, |
| "grad_norm": 0.5272010239980836, |
| "learning_rate": 1.059487647909813e-05, |
| "loss": 0.4904, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.667630057803468, |
| "grad_norm": 0.6053997555307846, |
| "learning_rate": 1.0574733798057359e-05, |
| "loss": 0.5063, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.6705202312138727, |
| "grad_norm": 0.6350430971708139, |
| "learning_rate": 1.05545887771607e-05, |
| "loss": 0.5098, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.6734104046242777, |
| "grad_norm": 0.5647027666397387, |
| "learning_rate": 1.0534441498422552e-05, |
| "loss": 0.4815, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.6763005780346822, |
| "grad_norm": 0.5956593825863362, |
| "learning_rate": 1.0514292043866498e-05, |
| "loss": 0.4849, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.679190751445087, |
| "grad_norm": 0.5922804270276485, |
| "learning_rate": 1.0494140495524989e-05, |
| "loss": 0.4805, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.6820809248554913, |
| "grad_norm": 0.6253790789940853, |
| "learning_rate": 1.0473986935438997e-05, |
| "loss": 0.4874, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.684971098265896, |
| "grad_norm": 0.5946787261865738, |
| "learning_rate": 1.045383144565768e-05, |
| "loss": 0.4961, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.687861271676301, |
| "grad_norm": 0.6003433405556385, |
| "learning_rate": 1.0433674108238058e-05, |
| "loss": 0.4952, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.690751445086705, |
| "grad_norm": 0.5866411248720449, |
| "learning_rate": 1.0413515005244673e-05, |
| "loss": 0.4741, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.69364161849711, |
| "grad_norm": 0.6495719140271753, |
| "learning_rate": 1.0393354218749247e-05, |
| "loss": 0.4786, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.6965317919075145, |
| "grad_norm": 0.6131357075260606, |
| "learning_rate": 1.0373191830830367e-05, |
| "loss": 0.4779, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.699421965317919, |
| "grad_norm": 0.569930378251785, |
| "learning_rate": 1.0353027923573134e-05, |
| "loss": 0.4615, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.7023121387283235, |
| "grad_norm": 0.6128028358275525, |
| "learning_rate": 1.033286257906883e-05, |
| "loss": 0.4796, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.705202312138728, |
| "grad_norm": 0.6255418069337046, |
| "learning_rate": 1.0312695879414598e-05, |
| "loss": 0.4729, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.708092485549133, |
| "grad_norm": 0.5709860292638275, |
| "learning_rate": 1.0292527906713092e-05, |
| "loss": 0.4711, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.7109826589595376, |
| "grad_norm": 0.5902086869559793, |
| "learning_rate": 1.0272358743072152e-05, |
| "loss": 0.4703, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.713872832369942, |
| "grad_norm": 0.5640946249724696, |
| "learning_rate": 1.0252188470604462e-05, |
| "loss": 0.4892, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.7167630057803467, |
| "grad_norm": 0.5894758987503583, |
| "learning_rate": 1.0232017171427225e-05, |
| "loss": 0.48, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.7196531791907512, |
| "grad_norm": 0.5615067004061257, |
| "learning_rate": 1.021184492766182e-05, |
| "loss": 0.4987, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.722543352601156, |
| "grad_norm": 0.5453863485675157, |
| "learning_rate": 1.0191671821433474e-05, |
| "loss": 0.4639, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.7254335260115607, |
| "grad_norm": 0.6129944896000652, |
| "learning_rate": 1.0171497934870927e-05, |
| "loss": 0.4963, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.7283236994219653, |
| "grad_norm": 0.5875588066436983, |
| "learning_rate": 1.0151323350106088e-05, |
| "loss": 0.4985, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.73121387283237, |
| "grad_norm": 0.5679917028336635, |
| "learning_rate": 1.0131148149273724e-05, |
| "loss": 0.4777, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.7341040462427744, |
| "grad_norm": 0.5778596279001901, |
| "learning_rate": 1.0110972414511089e-05, |
| "loss": 0.4752, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.7369942196531793, |
| "grad_norm": 0.5596499004879297, |
| "learning_rate": 1.0090796227957633e-05, |
| "loss": 0.4583, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.739884393063584, |
| "grad_norm": 0.6132924162564561, |
| "learning_rate": 1.0070619671754624e-05, |
| "loss": 0.486, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.7427745664739884, |
| "grad_norm": 0.5653037104976039, |
| "learning_rate": 1.0050442828044853e-05, |
| "loss": 0.4966, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.745664739884393, |
| "grad_norm": 0.5883371672044092, |
| "learning_rate": 1.003026577897227e-05, |
| "loss": 0.48, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.7485549132947975, |
| "grad_norm": 0.6451986399775324, |
| "learning_rate": 1.0010088606681666e-05, |
| "loss": 0.4679, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.7514450867052025, |
| "grad_norm": 0.5665043387089622, |
| "learning_rate": 9.989911393318334e-06, |
| "loss": 0.4627, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.754335260115607, |
| "grad_norm": 0.6220616186244058, |
| "learning_rate": 9.969734221027732e-06, |
| "loss": 0.496, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.7572254335260116, |
| "grad_norm": 0.6152265052786257, |
| "learning_rate": 9.94955717195515e-06, |
| "loss": 0.4848, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.760115606936416, |
| "grad_norm": 0.5661554513001946, |
| "learning_rate": 9.929380328245378e-06, |
| "loss": 0.4872, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.7630057803468207, |
| "grad_norm": 0.6052397841615009, |
| "learning_rate": 9.909203772042369e-06, |
| "loss": 0.4748, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.7658959537572256, |
| "grad_norm": 0.591030265881203, |
| "learning_rate": 9.889027585488914e-06, |
| "loss": 0.4935, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.76878612716763, |
| "grad_norm": 0.6729411575926139, |
| "learning_rate": 9.868851850726281e-06, |
| "loss": 0.4791, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.7716763005780347, |
| "grad_norm": 0.5739839955739133, |
| "learning_rate": 9.848676649893912e-06, |
| "loss": 0.4984, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.7745664739884393, |
| "grad_norm": 0.6792832962777284, |
| "learning_rate": 9.828502065129076e-06, |
| "loss": 0.4687, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.777456647398844, |
| "grad_norm": 0.6192342735329593, |
| "learning_rate": 9.808328178566531e-06, |
| "loss": 0.4772, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.7803468208092488, |
| "grad_norm": 0.5352041062460219, |
| "learning_rate": 9.788155072338185e-06, |
| "loss": 0.4875, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.7832369942196533, |
| "grad_norm": 0.6095338051981292, |
| "learning_rate": 9.767982828572778e-06, |
| "loss": 0.4924, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.786127167630058, |
| "grad_norm": 0.6221863600074393, |
| "learning_rate": 9.74781152939554e-06, |
| "loss": 0.4967, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.7890173410404624, |
| "grad_norm": 0.5988117572685457, |
| "learning_rate": 9.72764125692785e-06, |
| "loss": 0.4866, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.791907514450867, |
| "grad_norm": 0.5678508843431816, |
| "learning_rate": 9.707472093286906e-06, |
| "loss": 0.4703, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.794797687861272, |
| "grad_norm": 0.7097948148937187, |
| "learning_rate": 9.687304120585405e-06, |
| "loss": 0.5112, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.7976878612716765, |
| "grad_norm": 0.6722679540742882, |
| "learning_rate": 9.667137420931174e-06, |
| "loss": 0.4931, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.800578034682081, |
| "grad_norm": 0.5801107851006374, |
| "learning_rate": 9.64697207642687e-06, |
| "loss": 0.4782, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.8034682080924855, |
| "grad_norm": 0.598003373613563, |
| "learning_rate": 9.626808169169635e-06, |
| "loss": 0.4734, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.80635838150289, |
| "grad_norm": 0.6432491971726029, |
| "learning_rate": 9.606645781250754e-06, |
| "loss": 0.5032, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.809248554913295, |
| "grad_norm": 0.5811311713045815, |
| "learning_rate": 9.586484994755332e-06, |
| "loss": 0.4731, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.812138728323699, |
| "grad_norm": 0.5536057155729461, |
| "learning_rate": 9.566325891761944e-06, |
| "loss": 0.4922, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.815028901734104, |
| "grad_norm": 0.5537928759336738, |
| "learning_rate": 9.546168554342323e-06, |
| "loss": 0.4901, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.8179190751445087, |
| "grad_norm": 0.5930344405673645, |
| "learning_rate": 9.526013064561006e-06, |
| "loss": 0.4901, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.820809248554913, |
| "grad_norm": 0.5781843724894278, |
| "learning_rate": 9.505859504475015e-06, |
| "loss": 0.4737, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.8236994219653178, |
| "grad_norm": 0.5517901321295087, |
| "learning_rate": 9.485707956133502e-06, |
| "loss": 0.4852, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.8265895953757223, |
| "grad_norm": 0.5720443943430279, |
| "learning_rate": 9.465558501577451e-06, |
| "loss": 0.4988, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.8294797687861273, |
| "grad_norm": 0.6164460011881768, |
| "learning_rate": 9.445411222839301e-06, |
| "loss": 0.4622, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.832369942196532, |
| "grad_norm": 0.6438301207439332, |
| "learning_rate": 9.425266201942646e-06, |
| "loss": 0.4874, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.8352601156069364, |
| "grad_norm": 0.5870456593275332, |
| "learning_rate": 9.405123520901871e-06, |
| "loss": 0.4748, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.838150289017341, |
| "grad_norm": 0.6667924069896521, |
| "learning_rate": 9.384983261721865e-06, |
| "loss": 0.4682, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.8410404624277454, |
| "grad_norm": 0.5473846946457717, |
| "learning_rate": 9.364845506397625e-06, |
| "loss": 0.4857, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.8439306358381504, |
| "grad_norm": 0.5383036248842854, |
| "learning_rate": 9.344710336913971e-06, |
| "loss": 0.4799, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.846820809248555, |
| "grad_norm": 0.5949748460882582, |
| "learning_rate": 9.324577835245197e-06, |
| "loss": 0.4979, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.8497109826589595, |
| "grad_norm": 0.6178058328203948, |
| "learning_rate": 9.30444808335473e-06, |
| "loss": 0.4975, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.852601156069364, |
| "grad_norm": 0.630418096260648, |
| "learning_rate": 9.284321163194801e-06, |
| "loss": 0.471, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.8554913294797686, |
| "grad_norm": 0.5495926281924244, |
| "learning_rate": 9.264197156706115e-06, |
| "loss": 0.4809, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.8583815028901736, |
| "grad_norm": 0.7086518063216776, |
| "learning_rate": 9.244076145817518e-06, |
| "loss": 0.4614, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.861271676300578, |
| "grad_norm": 0.6924691302106323, |
| "learning_rate": 9.223958212445657e-06, |
| "loss": 0.5023, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.8641618497109826, |
| "grad_norm": 0.5755074319539322, |
| "learning_rate": 9.203843438494644e-06, |
| "loss": 0.5028, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.867052023121387, |
| "grad_norm": 0.7339069507333836, |
| "learning_rate": 9.183731905855746e-06, |
| "loss": 0.481, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.8699421965317917, |
| "grad_norm": 0.5573775780123593, |
| "learning_rate": 9.163623696407014e-06, |
| "loss": 0.4907, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.8728323699421967, |
| "grad_norm": 0.6062346515497278, |
| "learning_rate": 9.14351889201298e-06, |
| "loss": 0.4749, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.8757225433526012, |
| "grad_norm": 0.631590029057518, |
| "learning_rate": 9.123417574524308e-06, |
| "loss": 0.4883, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.878612716763006, |
| "grad_norm": 0.5722689231967004, |
| "learning_rate": 9.103319825777475e-06, |
| "loss": 0.4915, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.8815028901734103, |
| "grad_norm": 0.6002470468969344, |
| "learning_rate": 9.083225727594422e-06, |
| "loss": 0.4914, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.884393063583815, |
| "grad_norm": 0.5315745683150092, |
| "learning_rate": 9.063135361782228e-06, |
| "loss": 0.4761, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.88728323699422, |
| "grad_norm": 0.5901949821752347, |
| "learning_rate": 9.043048810132778e-06, |
| "loss": 0.4927, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.8901734104046244, |
| "grad_norm": 0.696770939108106, |
| "learning_rate": 9.02296615442243e-06, |
| "loss": 0.4979, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.893063583815029, |
| "grad_norm": 0.5335722900416059, |
| "learning_rate": 9.002887476411681e-06, |
| "loss": 0.4506, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.8959537572254335, |
| "grad_norm": 0.5448959769913916, |
| "learning_rate": 8.982812857844826e-06, |
| "loss": 0.4768, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.898843930635838, |
| "grad_norm": 0.6178055948176863, |
| "learning_rate": 8.96274238044965e-06, |
| "loss": 0.4867, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.901734104046243, |
| "grad_norm": 0.6091101645045837, |
| "learning_rate": 8.942676125937063e-06, |
| "loss": 0.4854, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.9046242774566475, |
| "grad_norm": 0.57752939009839, |
| "learning_rate": 8.922614176000783e-06, |
| "loss": 0.47, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.907514450867052, |
| "grad_norm": 0.532063897992589, |
| "learning_rate": 8.902556612317017e-06, |
| "loss": 0.5041, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.9104046242774566, |
| "grad_norm": 0.6190519199948481, |
| "learning_rate": 8.882503516544105e-06, |
| "loss": 0.4674, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.913294797687861, |
| "grad_norm": 0.6052048247524936, |
| "learning_rate": 8.862454970322194e-06, |
| "loss": 0.4713, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.916184971098266, |
| "grad_norm": 0.6017438939459462, |
| "learning_rate": 8.842411055272913e-06, |
| "loss": 0.478, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.9190751445086707, |
| "grad_norm": 0.5768744304740853, |
| "learning_rate": 8.82237185299904e-06, |
| "loss": 0.4875, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.921965317919075, |
| "grad_norm": 0.5951970872451218, |
| "learning_rate": 8.802337445084165e-06, |
| "loss": 0.4814, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.9248554913294798, |
| "grad_norm": 0.5608710672407204, |
| "learning_rate": 8.782307913092354e-06, |
| "loss": 0.4752, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.9277456647398843, |
| "grad_norm": 0.5509512795540918, |
| "learning_rate": 8.762283338567822e-06, |
| "loss": 0.492, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.9306358381502893, |
| "grad_norm": 0.5975138590995895, |
| "learning_rate": 8.74226380303461e-06, |
| "loss": 0.4828, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.9335260115606934, |
| "grad_norm": 0.5864321790518813, |
| "learning_rate": 8.722249387996238e-06, |
| "loss": 0.4859, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.9364161849710984, |
| "grad_norm": 0.5555533284809581, |
| "learning_rate": 8.702240174935377e-06, |
| "loss": 0.5051, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.939306358381503, |
| "grad_norm": 0.5650980172734773, |
| "learning_rate": 8.68223624531353e-06, |
| "loss": 0.4781, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.9421965317919074, |
| "grad_norm": 0.5991051719573498, |
| "learning_rate": 8.662237680570676e-06, |
| "loss": 0.4972, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.9450867052023124, |
| "grad_norm": 0.6209924930788805, |
| "learning_rate": 8.642244562124962e-06, |
| "loss": 0.4771, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.9479768786127165, |
| "grad_norm": 0.5411586227886513, |
| "learning_rate": 8.622256971372361e-06, |
| "loss": 0.4847, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.9508670520231215, |
| "grad_norm": 0.5668517081503266, |
| "learning_rate": 8.602274989686338e-06, |
| "loss": 0.4848, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.953757225433526, |
| "grad_norm": 0.5759877651439558, |
| "learning_rate": 8.582298698417529e-06, |
| "loss": 0.4884, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.9566473988439306, |
| "grad_norm": 0.5862302492886513, |
| "learning_rate": 8.562328178893392e-06, |
| "loss": 0.4932, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.959537572254335, |
| "grad_norm": 0.5481516742462423, |
| "learning_rate": 8.542363512417898e-06, |
| "loss": 0.5047, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.9624277456647397, |
| "grad_norm": 0.513093768869479, |
| "learning_rate": 8.522404780271187e-06, |
| "loss": 0.4664, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.9653179190751446, |
| "grad_norm": 0.5575656175840487, |
| "learning_rate": 8.502452063709235e-06, |
| "loss": 0.4606, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.968208092485549, |
| "grad_norm": 0.5556502229251714, |
| "learning_rate": 8.482505443963526e-06, |
| "loss": 0.4775, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.9710982658959537, |
| "grad_norm": 0.5733747154067709, |
| "learning_rate": 8.462565002240733e-06, |
| "loss": 0.4666, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.9739884393063583, |
| "grad_norm": 0.5612472015255541, |
| "learning_rate": 8.442630819722365e-06, |
| "loss": 0.4949, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.976878612716763, |
| "grad_norm": 0.6413581942492151, |
| "learning_rate": 8.422702977564453e-06, |
| "loss": 0.4987, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.979768786127168, |
| "grad_norm": 0.5763965026676654, |
| "learning_rate": 8.402781556897222e-06, |
| "loss": 0.4829, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.9826589595375723, |
| "grad_norm": 0.5273947424887864, |
| "learning_rate": 8.382866638824745e-06, |
| "loss": 0.4574, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.985549132947977, |
| "grad_norm": 0.7908791681969232, |
| "learning_rate": 8.362958304424622e-06, |
| "loss": 0.4724, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.9884393063583814, |
| "grad_norm": 0.6609996660422045, |
| "learning_rate": 8.34305663474765e-06, |
| "loss": 0.481, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.991329479768786, |
| "grad_norm": 0.6358348872654778, |
| "learning_rate": 8.323161710817502e-06, |
| "loss": 0.4858, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.994219653179191, |
| "grad_norm": 0.6714732526625259, |
| "learning_rate": 8.303273613630376e-06, |
| "loss": 0.5107, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.9971098265895955, |
| "grad_norm": 0.5450824548582315, |
| "learning_rate": 8.28339242415468e-06, |
| "loss": 0.4952, |
| "step": 1037 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.6874759980679123, |
| "learning_rate": 8.263518223330698e-06, |
| "loss": 0.4575, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.0028901734104045, |
| "grad_norm": 0.9527898393580617, |
| "learning_rate": 8.24365109207027e-06, |
| "loss": 0.3802, |
| "step": 1039 |
| }, |
| { |
| "epoch": 3.005780346820809, |
| "grad_norm": 0.7089585065956318, |
| "learning_rate": 8.223791111256447e-06, |
| "loss": 0.3941, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.008670520231214, |
| "grad_norm": 1.1904284295357626, |
| "learning_rate": 8.203938361743169e-06, |
| "loss": 0.3994, |
| "step": 1041 |
| }, |
| { |
| "epoch": 3.0115606936416186, |
| "grad_norm": 0.6023200450037057, |
| "learning_rate": 8.184092924354943e-06, |
| "loss": 0.3836, |
| "step": 1042 |
| }, |
| { |
| "epoch": 3.014450867052023, |
| "grad_norm": 0.7823993760566921, |
| "learning_rate": 8.164254879886496e-06, |
| "loss": 0.4116, |
| "step": 1043 |
| }, |
| { |
| "epoch": 3.0173410404624277, |
| "grad_norm": 0.7376436181873413, |
| "learning_rate": 8.144424309102463e-06, |
| "loss": 0.4203, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.020231213872832, |
| "grad_norm": 0.614281631401755, |
| "learning_rate": 8.124601292737051e-06, |
| "loss": 0.3856, |
| "step": 1045 |
| }, |
| { |
| "epoch": 3.023121387283237, |
| "grad_norm": 0.6160206160107256, |
| "learning_rate": 8.10478591149372e-06, |
| "loss": 0.4013, |
| "step": 1046 |
| }, |
| { |
| "epoch": 3.0260115606936417, |
| "grad_norm": 0.6319733094936663, |
| "learning_rate": 8.084978246044827e-06, |
| "loss": 0.3874, |
| "step": 1047 |
| }, |
| { |
| "epoch": 3.0289017341040463, |
| "grad_norm": 0.5472529492540473, |
| "learning_rate": 8.065178377031333e-06, |
| "loss": 0.3672, |
| "step": 1048 |
| }, |
| { |
| "epoch": 3.031791907514451, |
| "grad_norm": 0.6252569965995163, |
| "learning_rate": 8.045386385062453e-06, |
| "loss": 0.4023, |
| "step": 1049 |
| }, |
| { |
| "epoch": 3.0346820809248554, |
| "grad_norm": 0.5822538115356047, |
| "learning_rate": 8.025602350715332e-06, |
| "loss": 0.3942, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.03757225433526, |
| "grad_norm": 0.567501237074041, |
| "learning_rate": 8.005826354534722e-06, |
| "loss": 0.3783, |
| "step": 1051 |
| }, |
| { |
| "epoch": 3.040462427745665, |
| "grad_norm": 0.5400206076485143, |
| "learning_rate": 7.986058477032638e-06, |
| "loss": 0.3871, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.0433526011560694, |
| "grad_norm": 0.5846560249755006, |
| "learning_rate": 7.966298798688059e-06, |
| "loss": 0.3816, |
| "step": 1053 |
| }, |
| { |
| "epoch": 3.046242774566474, |
| "grad_norm": 0.5676434874248799, |
| "learning_rate": 7.94654739994657e-06, |
| "loss": 0.3924, |
| "step": 1054 |
| }, |
| { |
| "epoch": 3.0491329479768785, |
| "grad_norm": 0.5333952954526797, |
| "learning_rate": 7.926804361220056e-06, |
| "loss": 0.3878, |
| "step": 1055 |
| }, |
| { |
| "epoch": 3.052023121387283, |
| "grad_norm": 0.5851971305918012, |
| "learning_rate": 7.907069762886366e-06, |
| "loss": 0.3824, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.054913294797688, |
| "grad_norm": 0.578493351433017, |
| "learning_rate": 7.887343685288985e-06, |
| "loss": 0.3799, |
| "step": 1057 |
| }, |
| { |
| "epoch": 3.0578034682080926, |
| "grad_norm": 0.5889989245232254, |
| "learning_rate": 7.867626208736703e-06, |
| "loss": 0.3819, |
| "step": 1058 |
| }, |
| { |
| "epoch": 3.060693641618497, |
| "grad_norm": 0.5723245049646746, |
| "learning_rate": 7.8479174135033e-06, |
| "loss": 0.3722, |
| "step": 1059 |
| }, |
| { |
| "epoch": 3.0635838150289016, |
| "grad_norm": 0.6215894667582502, |
| "learning_rate": 7.828217379827216e-06, |
| "loss": 0.3899, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.066473988439306, |
| "grad_norm": 0.5938247793105494, |
| "learning_rate": 7.808526187911214e-06, |
| "loss": 0.431, |
| "step": 1061 |
| }, |
| { |
| "epoch": 3.069364161849711, |
| "grad_norm": 0.5228702312038274, |
| "learning_rate": 7.788843917922056e-06, |
| "loss": 0.3647, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.0722543352601157, |
| "grad_norm": 0.6091134386113599, |
| "learning_rate": 7.769170649990198e-06, |
| "loss": 0.3816, |
| "step": 1063 |
| }, |
| { |
| "epoch": 3.0751445086705202, |
| "grad_norm": 0.5642938672189238, |
| "learning_rate": 7.749506464209428e-06, |
| "loss": 0.3939, |
| "step": 1064 |
| }, |
| { |
| "epoch": 3.078034682080925, |
| "grad_norm": 0.5421265675643979, |
| "learning_rate": 7.729851440636576e-06, |
| "loss": 0.3886, |
| "step": 1065 |
| }, |
| { |
| "epoch": 3.0809248554913293, |
| "grad_norm": 0.5902121269790457, |
| "learning_rate": 7.710205659291147e-06, |
| "loss": 0.372, |
| "step": 1066 |
| }, |
| { |
| "epoch": 3.0838150289017343, |
| "grad_norm": 0.5532348889649435, |
| "learning_rate": 7.690569200155052e-06, |
| "loss": 0.3899, |
| "step": 1067 |
| }, |
| { |
| "epoch": 3.086705202312139, |
| "grad_norm": 0.5288570224088542, |
| "learning_rate": 7.67094214317222e-06, |
| "loss": 0.3981, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.0895953757225434, |
| "grad_norm": 0.5529303979607351, |
| "learning_rate": 7.651324568248319e-06, |
| "loss": 0.3915, |
| "step": 1069 |
| }, |
| { |
| "epoch": 3.092485549132948, |
| "grad_norm": 0.574796902074762, |
| "learning_rate": 7.631716555250401e-06, |
| "loss": 0.3738, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.0953757225433525, |
| "grad_norm": 0.5997982476864651, |
| "learning_rate": 7.612118184006606e-06, |
| "loss": 0.4025, |
| "step": 1071 |
| }, |
| { |
| "epoch": 3.098265895953757, |
| "grad_norm": 0.5586255763175132, |
| "learning_rate": 7.592529534305805e-06, |
| "loss": 0.3829, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.101156069364162, |
| "grad_norm": 0.5662255742739997, |
| "learning_rate": 7.572950685897295e-06, |
| "loss": 0.3796, |
| "step": 1073 |
| }, |
| { |
| "epoch": 3.1040462427745665, |
| "grad_norm": 0.5455105617146326, |
| "learning_rate": 7.5533817184904765e-06, |
| "loss": 0.3812, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.106936416184971, |
| "grad_norm": 0.5541786996420895, |
| "learning_rate": 7.533822711754516e-06, |
| "loss": 0.3903, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.1098265895953756, |
| "grad_norm": 0.4957281719052488, |
| "learning_rate": 7.514273745318032e-06, |
| "loss": 0.4175, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.11271676300578, |
| "grad_norm": 0.5194029043856342, |
| "learning_rate": 7.494734898768758e-06, |
| "loss": 0.3985, |
| "step": 1077 |
| }, |
| { |
| "epoch": 3.115606936416185, |
| "grad_norm": 0.5467787010237095, |
| "learning_rate": 7.475206251653242e-06, |
| "loss": 0.3798, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.1184971098265897, |
| "grad_norm": 0.4946840290814573, |
| "learning_rate": 7.4556878834764965e-06, |
| "loss": 0.3862, |
| "step": 1079 |
| }, |
| { |
| "epoch": 3.121387283236994, |
| "grad_norm": 0.5675664027874467, |
| "learning_rate": 7.436179873701687e-06, |
| "loss": 0.404, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.1242774566473988, |
| "grad_norm": 0.5758919601256814, |
| "learning_rate": 7.416682301749819e-06, |
| "loss": 0.3864, |
| "step": 1081 |
| }, |
| { |
| "epoch": 3.1271676300578033, |
| "grad_norm": 0.4944933437081969, |
| "learning_rate": 7.397195246999391e-06, |
| "loss": 0.3918, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.1300578034682083, |
| "grad_norm": 0.5370826053866485, |
| "learning_rate": 7.377718788786088e-06, |
| "loss": 0.3908, |
| "step": 1083 |
| }, |
| { |
| "epoch": 3.132947976878613, |
| "grad_norm": 0.5074787765859067, |
| "learning_rate": 7.358253006402454e-06, |
| "loss": 0.3911, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.1358381502890174, |
| "grad_norm": 0.5337743052057817, |
| "learning_rate": 7.338797979097572e-06, |
| "loss": 0.3871, |
| "step": 1085 |
| }, |
| { |
| "epoch": 3.138728323699422, |
| "grad_norm": 0.5266563755134518, |
| "learning_rate": 7.319353786076739e-06, |
| "loss": 0.3826, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.1416184971098264, |
| "grad_norm": 0.6054998252332005, |
| "learning_rate": 7.299920506501135e-06, |
| "loss": 0.3885, |
| "step": 1087 |
| }, |
| { |
| "epoch": 3.1445086705202314, |
| "grad_norm": 0.5609546262425186, |
| "learning_rate": 7.280498219487526e-06, |
| "loss": 0.4117, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.147398843930636, |
| "grad_norm": 0.5517930019790442, |
| "learning_rate": 7.261087004107909e-06, |
| "loss": 0.4003, |
| "step": 1089 |
| }, |
| { |
| "epoch": 3.1502890173410405, |
| "grad_norm": 0.518951997928602, |
| "learning_rate": 7.241686939389214e-06, |
| "loss": 0.4002, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.153179190751445, |
| "grad_norm": 0.5541234205878507, |
| "learning_rate": 7.222298104312966e-06, |
| "loss": 0.3917, |
| "step": 1091 |
| }, |
| { |
| "epoch": 3.1560693641618496, |
| "grad_norm": 0.6050039791749773, |
| "learning_rate": 7.202920577814991e-06, |
| "loss": 0.4036, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.1589595375722546, |
| "grad_norm": 0.48174559599735306, |
| "learning_rate": 7.183554438785054e-06, |
| "loss": 0.3919, |
| "step": 1093 |
| }, |
| { |
| "epoch": 3.161849710982659, |
| "grad_norm": 0.5735781752986523, |
| "learning_rate": 7.164199766066571e-06, |
| "loss": 0.3865, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.1647398843930636, |
| "grad_norm": 0.54707725193784, |
| "learning_rate": 7.144856638456271e-06, |
| "loss": 0.3848, |
| "step": 1095 |
| }, |
| { |
| "epoch": 3.167630057803468, |
| "grad_norm": 0.5156168830536783, |
| "learning_rate": 7.125525134703891e-06, |
| "loss": 0.3659, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.1705202312138727, |
| "grad_norm": 0.5268935444771893, |
| "learning_rate": 7.106205333511829e-06, |
| "loss": 0.3818, |
| "step": 1097 |
| }, |
| { |
| "epoch": 3.1734104046242773, |
| "grad_norm": 0.47788719205051533, |
| "learning_rate": 7.086897313534851e-06, |
| "loss": 0.4042, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.1763005780346822, |
| "grad_norm": 0.5259915318997668, |
| "learning_rate": 7.067601153379757e-06, |
| "loss": 0.4052, |
| "step": 1099 |
| }, |
| { |
| "epoch": 3.179190751445087, |
| "grad_norm": 0.5283480560745818, |
| "learning_rate": 7.048316931605062e-06, |
| "loss": 0.4134, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.1820809248554913, |
| "grad_norm": 0.4586208673070595, |
| "learning_rate": 7.029044726720681e-06, |
| "loss": 0.4114, |
| "step": 1101 |
| }, |
| { |
| "epoch": 3.184971098265896, |
| "grad_norm": 0.49357031094595183, |
| "learning_rate": 7.009784617187594e-06, |
| "loss": 0.4041, |
| "step": 1102 |
| }, |
| { |
| "epoch": 3.1878612716763004, |
| "grad_norm": 0.5037877175085824, |
| "learning_rate": 6.990536681417555e-06, |
| "loss": 0.4198, |
| "step": 1103 |
| }, |
| { |
| "epoch": 3.1907514450867054, |
| "grad_norm": 0.5017313490764773, |
| "learning_rate": 6.971300997772746e-06, |
| "loss": 0.3801, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.19364161849711, |
| "grad_norm": 0.4879258565266107, |
| "learning_rate": 6.9520776445654696e-06, |
| "loss": 0.4155, |
| "step": 1105 |
| }, |
| { |
| "epoch": 3.1965317919075145, |
| "grad_norm": 0.5095618152215767, |
| "learning_rate": 6.932866700057832e-06, |
| "loss": 0.396, |
| "step": 1106 |
| }, |
| { |
| "epoch": 3.199421965317919, |
| "grad_norm": 0.5221572691725388, |
| "learning_rate": 6.91366824246142e-06, |
| "loss": 0.3836, |
| "step": 1107 |
| }, |
| { |
| "epoch": 3.2023121387283235, |
| "grad_norm": 0.5089182169867535, |
| "learning_rate": 6.894482349936977e-06, |
| "loss": 0.3892, |
| "step": 1108 |
| }, |
| { |
| "epoch": 3.2052023121387285, |
| "grad_norm": 0.4800548236411057, |
| "learning_rate": 6.875309100594098e-06, |
| "loss": 0.3998, |
| "step": 1109 |
| }, |
| { |
| "epoch": 3.208092485549133, |
| "grad_norm": 0.5125112881492649, |
| "learning_rate": 6.8561485724909105e-06, |
| "loss": 0.4056, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.2109826589595376, |
| "grad_norm": 0.5757448225017041, |
| "learning_rate": 6.83700084363374e-06, |
| "loss": 0.4033, |
| "step": 1111 |
| }, |
| { |
| "epoch": 3.213872832369942, |
| "grad_norm": 0.5119217186783843, |
| "learning_rate": 6.8178659919768056e-06, |
| "loss": 0.3985, |
| "step": 1112 |
| }, |
| { |
| "epoch": 3.2167630057803467, |
| "grad_norm": 0.5094018785467254, |
| "learning_rate": 6.798744095421909e-06, |
| "loss": 0.3727, |
| "step": 1113 |
| }, |
| { |
| "epoch": 3.2196531791907512, |
| "grad_norm": 0.5371490562135045, |
| "learning_rate": 6.7796352318181015e-06, |
| "loss": 0.4222, |
| "step": 1114 |
| }, |
| { |
| "epoch": 3.222543352601156, |
| "grad_norm": 0.530597597226532, |
| "learning_rate": 6.76053947896138e-06, |
| "loss": 0.4138, |
| "step": 1115 |
| }, |
| { |
| "epoch": 3.2254335260115607, |
| "grad_norm": 0.5360287546084145, |
| "learning_rate": 6.741456914594353e-06, |
| "loss": 0.406, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.2283236994219653, |
| "grad_norm": 0.5254677842306964, |
| "learning_rate": 6.722387616405957e-06, |
| "loss": 0.3964, |
| "step": 1117 |
| }, |
| { |
| "epoch": 3.23121387283237, |
| "grad_norm": 0.5118935706321023, |
| "learning_rate": 6.7033316620310985e-06, |
| "loss": 0.3854, |
| "step": 1118 |
| }, |
| { |
| "epoch": 3.2341040462427744, |
| "grad_norm": 0.542111399954812, |
| "learning_rate": 6.68428912905037e-06, |
| "loss": 0.3889, |
| "step": 1119 |
| }, |
| { |
| "epoch": 3.2369942196531793, |
| "grad_norm": 0.5231724529368768, |
| "learning_rate": 6.6652600949897206e-06, |
| "loss": 0.4199, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.239884393063584, |
| "grad_norm": 0.4928163479094274, |
| "learning_rate": 6.646244637320145e-06, |
| "loss": 0.3981, |
| "step": 1121 |
| }, |
| { |
| "epoch": 3.2427745664739884, |
| "grad_norm": 0.5345331053208331, |
| "learning_rate": 6.627242833457357e-06, |
| "loss": 0.3847, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.245664739884393, |
| "grad_norm": 0.559335727163023, |
| "learning_rate": 6.60825476076149e-06, |
| "loss": 0.402, |
| "step": 1123 |
| }, |
| { |
| "epoch": 3.2485549132947975, |
| "grad_norm": 0.5029209372564127, |
| "learning_rate": 6.5892804965367785e-06, |
| "loss": 0.3916, |
| "step": 1124 |
| }, |
| { |
| "epoch": 3.2514450867052025, |
| "grad_norm": 0.49343284170126384, |
| "learning_rate": 6.570320118031233e-06, |
| "loss": 0.4022, |
| "step": 1125 |
| }, |
| { |
| "epoch": 3.254335260115607, |
| "grad_norm": 0.5401891502634477, |
| "learning_rate": 6.551373702436335e-06, |
| "loss": 0.3948, |
| "step": 1126 |
| }, |
| { |
| "epoch": 3.2572254335260116, |
| "grad_norm": 0.49439119896843026, |
| "learning_rate": 6.532441326886716e-06, |
| "loss": 0.3892, |
| "step": 1127 |
| }, |
| { |
| "epoch": 3.260115606936416, |
| "grad_norm": 0.4974225213503987, |
| "learning_rate": 6.513523068459859e-06, |
| "loss": 0.4086, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.2630057803468207, |
| "grad_norm": 0.5162026470577603, |
| "learning_rate": 6.494619004175761e-06, |
| "loss": 0.403, |
| "step": 1129 |
| }, |
| { |
| "epoch": 3.2658959537572256, |
| "grad_norm": 0.524460346825612, |
| "learning_rate": 6.475729210996637e-06, |
| "loss": 0.3998, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.26878612716763, |
| "grad_norm": 0.5513128758798284, |
| "learning_rate": 6.456853765826609e-06, |
| "loss": 0.3894, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.2716763005780347, |
| "grad_norm": 0.48701380796712024, |
| "learning_rate": 6.43799274551137e-06, |
| "loss": 0.4033, |
| "step": 1132 |
| }, |
| { |
| "epoch": 3.2745664739884393, |
| "grad_norm": 0.4970848753735955, |
| "learning_rate": 6.419146226837894e-06, |
| "loss": 0.4109, |
| "step": 1133 |
| }, |
| { |
| "epoch": 3.277456647398844, |
| "grad_norm": 0.5093548978079553, |
| "learning_rate": 6.400314286534119e-06, |
| "loss": 0.391, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.2803468208092488, |
| "grad_norm": 0.5429732351329991, |
| "learning_rate": 6.381497001268631e-06, |
| "loss": 0.3989, |
| "step": 1135 |
| }, |
| { |
| "epoch": 3.2832369942196533, |
| "grad_norm": 0.5457996431220703, |
| "learning_rate": 6.3626944476503485e-06, |
| "loss": 0.3822, |
| "step": 1136 |
| }, |
| { |
| "epoch": 3.286127167630058, |
| "grad_norm": 0.5885691843952315, |
| "learning_rate": 6.343906702228213e-06, |
| "loss": 0.4026, |
| "step": 1137 |
| }, |
| { |
| "epoch": 3.2890173410404624, |
| "grad_norm": 0.675751707823907, |
| "learning_rate": 6.325133841490887e-06, |
| "loss": 0.3897, |
| "step": 1138 |
| }, |
| { |
| "epoch": 3.291907514450867, |
| "grad_norm": 0.544504709401962, |
| "learning_rate": 6.306375941866428e-06, |
| "loss": 0.3808, |
| "step": 1139 |
| }, |
| { |
| "epoch": 3.294797687861272, |
| "grad_norm": 0.5233663308147422, |
| "learning_rate": 6.287633079721986e-06, |
| "loss": 0.3775, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.2976878612716765, |
| "grad_norm": 0.5962514875461025, |
| "learning_rate": 6.268905331363487e-06, |
| "loss": 0.4067, |
| "step": 1141 |
| }, |
| { |
| "epoch": 3.300578034682081, |
| "grad_norm": 0.7022260424825366, |
| "learning_rate": 6.250192773035333e-06, |
| "loss": 0.4003, |
| "step": 1142 |
| }, |
| { |
| "epoch": 3.3034682080924855, |
| "grad_norm": 0.489621823505066, |
| "learning_rate": 6.231495480920078e-06, |
| "loss": 0.4192, |
| "step": 1143 |
| }, |
| { |
| "epoch": 3.30635838150289, |
| "grad_norm": 0.5934308896956955, |
| "learning_rate": 6.212813531138124e-06, |
| "loss": 0.3865, |
| "step": 1144 |
| }, |
| { |
| "epoch": 3.3092485549132946, |
| "grad_norm": 0.5852026626951878, |
| "learning_rate": 6.194146999747419e-06, |
| "loss": 0.4077, |
| "step": 1145 |
| }, |
| { |
| "epoch": 3.3121387283236996, |
| "grad_norm": 0.5034245128179445, |
| "learning_rate": 6.1754959627431325e-06, |
| "loss": 0.4108, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.315028901734104, |
| "grad_norm": 0.48420407726634407, |
| "learning_rate": 6.156860496057351e-06, |
| "loss": 0.4003, |
| "step": 1147 |
| }, |
| { |
| "epoch": 3.3179190751445087, |
| "grad_norm": 0.4906099299312336, |
| "learning_rate": 6.138240675558778e-06, |
| "loss": 0.4012, |
| "step": 1148 |
| }, |
| { |
| "epoch": 3.320809248554913, |
| "grad_norm": 0.5112625863289267, |
| "learning_rate": 6.119636577052414e-06, |
| "loss": 0.3972, |
| "step": 1149 |
| }, |
| { |
| "epoch": 3.3236994219653178, |
| "grad_norm": 0.4758612139780586, |
| "learning_rate": 6.101048276279259e-06, |
| "loss": 0.3867, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.3265895953757223, |
| "grad_norm": 0.5099399487696842, |
| "learning_rate": 6.082475848915985e-06, |
| "loss": 0.4004, |
| "step": 1151 |
| }, |
| { |
| "epoch": 3.3294797687861273, |
| "grad_norm": 0.49659635472420044, |
| "learning_rate": 6.063919370574647e-06, |
| "loss": 0.4203, |
| "step": 1152 |
| }, |
| { |
| "epoch": 3.332369942196532, |
| "grad_norm": 0.5246518362455117, |
| "learning_rate": 6.045378916802374e-06, |
| "loss": 0.3774, |
| "step": 1153 |
| }, |
| { |
| "epoch": 3.3352601156069364, |
| "grad_norm": 0.505469636516935, |
| "learning_rate": 6.026854563081046e-06, |
| "loss": 0.4151, |
| "step": 1154 |
| }, |
| { |
| "epoch": 3.338150289017341, |
| "grad_norm": 0.515524111669419, |
| "learning_rate": 6.008346384827e-06, |
| "loss": 0.3923, |
| "step": 1155 |
| }, |
| { |
| "epoch": 3.3410404624277454, |
| "grad_norm": 0.5294276547027134, |
| "learning_rate": 5.989854457390725e-06, |
| "loss": 0.3772, |
| "step": 1156 |
| }, |
| { |
| "epoch": 3.3439306358381504, |
| "grad_norm": 0.5162545524791332, |
| "learning_rate": 5.97137885605654e-06, |
| "loss": 0.4161, |
| "step": 1157 |
| }, |
| { |
| "epoch": 3.346820809248555, |
| "grad_norm": 0.4833823444894276, |
| "learning_rate": 5.952919656042302e-06, |
| "loss": 0.4192, |
| "step": 1158 |
| }, |
| { |
| "epoch": 3.3497109826589595, |
| "grad_norm": 0.5098142017762833, |
| "learning_rate": 5.934476932499096e-06, |
| "loss": 0.4095, |
| "step": 1159 |
| }, |
| { |
| "epoch": 3.352601156069364, |
| "grad_norm": 0.49912178138612384, |
| "learning_rate": 5.916050760510928e-06, |
| "loss": 0.4193, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.3554913294797686, |
| "grad_norm": 0.48744060348108537, |
| "learning_rate": 5.897641215094417e-06, |
| "loss": 0.4107, |
| "step": 1161 |
| }, |
| { |
| "epoch": 3.3583815028901736, |
| "grad_norm": 0.5061412741063811, |
| "learning_rate": 5.879248371198492e-06, |
| "loss": 0.379, |
| "step": 1162 |
| }, |
| { |
| "epoch": 3.361271676300578, |
| "grad_norm": 0.4994719238375096, |
| "learning_rate": 5.8608723037040894e-06, |
| "loss": 0.4095, |
| "step": 1163 |
| }, |
| { |
| "epoch": 3.3641618497109826, |
| "grad_norm": 0.5358940583528377, |
| "learning_rate": 5.842513087423844e-06, |
| "loss": 0.3814, |
| "step": 1164 |
| }, |
| { |
| "epoch": 3.367052023121387, |
| "grad_norm": 0.5235124787983995, |
| "learning_rate": 5.824170797101787e-06, |
| "loss": 0.3933, |
| "step": 1165 |
| }, |
| { |
| "epoch": 3.3699421965317917, |
| "grad_norm": 0.5039212115911841, |
| "learning_rate": 5.805845507413032e-06, |
| "loss": 0.3985, |
| "step": 1166 |
| }, |
| { |
| "epoch": 3.3728323699421967, |
| "grad_norm": 0.5060043732907776, |
| "learning_rate": 5.7875372929635e-06, |
| "loss": 0.3974, |
| "step": 1167 |
| }, |
| { |
| "epoch": 3.3757225433526012, |
| "grad_norm": 0.5581048525929695, |
| "learning_rate": 5.769246228289578e-06, |
| "loss": 0.3877, |
| "step": 1168 |
| }, |
| { |
| "epoch": 3.378612716763006, |
| "grad_norm": 0.5495580859568931, |
| "learning_rate": 5.750972387857827e-06, |
| "loss": 0.3819, |
| "step": 1169 |
| }, |
| { |
| "epoch": 3.3815028901734103, |
| "grad_norm": 0.5067255290694974, |
| "learning_rate": 5.732715846064707e-06, |
| "loss": 0.4018, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.384393063583815, |
| "grad_norm": 0.5077609535944343, |
| "learning_rate": 5.714476677236238e-06, |
| "loss": 0.395, |
| "step": 1171 |
| }, |
| { |
| "epoch": 3.38728323699422, |
| "grad_norm": 0.5579418064478606, |
| "learning_rate": 5.6962549556277134e-06, |
| "loss": 0.3984, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.3901734104046244, |
| "grad_norm": 0.49258839842495805, |
| "learning_rate": 5.678050755423385e-06, |
| "loss": 0.4233, |
| "step": 1173 |
| }, |
| { |
| "epoch": 3.393063583815029, |
| "grad_norm": 0.5423286857222281, |
| "learning_rate": 5.659864150736194e-06, |
| "loss": 0.4053, |
| "step": 1174 |
| }, |
| { |
| "epoch": 3.3959537572254335, |
| "grad_norm": 0.5232668352102138, |
| "learning_rate": 5.64169521560743e-06, |
| "loss": 0.3846, |
| "step": 1175 |
| }, |
| { |
| "epoch": 3.398843930635838, |
| "grad_norm": 0.491353654719555, |
| "learning_rate": 5.623544024006451e-06, |
| "loss": 0.3775, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.401734104046243, |
| "grad_norm": 0.51410989795105, |
| "learning_rate": 5.605410649830375e-06, |
| "loss": 0.3864, |
| "step": 1177 |
| }, |
| { |
| "epoch": 3.4046242774566475, |
| "grad_norm": 0.5219888675404522, |
| "learning_rate": 5.587295166903785e-06, |
| "loss": 0.3727, |
| "step": 1178 |
| }, |
| { |
| "epoch": 3.407514450867052, |
| "grad_norm": 0.5458655990227211, |
| "learning_rate": 5.569197648978427e-06, |
| "loss": 0.3958, |
| "step": 1179 |
| }, |
| { |
| "epoch": 3.4104046242774566, |
| "grad_norm": 0.4953302314125495, |
| "learning_rate": 5.551118169732901e-06, |
| "loss": 0.3569, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.413294797687861, |
| "grad_norm": 0.5476231027492036, |
| "learning_rate": 5.533056802772374e-06, |
| "loss": 0.3827, |
| "step": 1181 |
| }, |
| { |
| "epoch": 3.416184971098266, |
| "grad_norm": 0.5230969438602984, |
| "learning_rate": 5.515013621628271e-06, |
| "loss": 0.3857, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.4190751445086707, |
| "grad_norm": 0.5335942107594615, |
| "learning_rate": 5.496988699757982e-06, |
| "loss": 0.4121, |
| "step": 1183 |
| }, |
| { |
| "epoch": 3.421965317919075, |
| "grad_norm": 0.5220805134558412, |
| "learning_rate": 5.478982110544555e-06, |
| "loss": 0.3929, |
| "step": 1184 |
| }, |
| { |
| "epoch": 3.4248554913294798, |
| "grad_norm": 0.481695491417452, |
| "learning_rate": 5.460993927296408e-06, |
| "loss": 0.4079, |
| "step": 1185 |
| }, |
| { |
| "epoch": 3.4277456647398843, |
| "grad_norm": 0.5091817629646063, |
| "learning_rate": 5.44302422324702e-06, |
| "loss": 0.4012, |
| "step": 1186 |
| }, |
| { |
| "epoch": 3.430635838150289, |
| "grad_norm": 0.5603932046574709, |
| "learning_rate": 5.425073071554634e-06, |
| "loss": 0.3784, |
| "step": 1187 |
| }, |
| { |
| "epoch": 3.433526011560694, |
| "grad_norm": 0.5131330214838938, |
| "learning_rate": 5.4071405453019786e-06, |
| "loss": 0.3929, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.4364161849710984, |
| "grad_norm": 0.5512023006176935, |
| "learning_rate": 5.389226717495943e-06, |
| "loss": 0.4071, |
| "step": 1189 |
| }, |
| { |
| "epoch": 3.439306358381503, |
| "grad_norm": 0.5472275221249205, |
| "learning_rate": 5.371331661067284e-06, |
| "loss": 0.4022, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.4421965317919074, |
| "grad_norm": 0.46058321008388775, |
| "learning_rate": 5.353455448870344e-06, |
| "loss": 0.414, |
| "step": 1191 |
| }, |
| { |
| "epoch": 3.445086705202312, |
| "grad_norm": 0.5247383099681343, |
| "learning_rate": 5.335598153682754e-06, |
| "loss": 0.3908, |
| "step": 1192 |
| }, |
| { |
| "epoch": 3.447976878612717, |
| "grad_norm": 0.5232079526618546, |
| "learning_rate": 5.317759848205124e-06, |
| "loss": 0.3983, |
| "step": 1193 |
| }, |
| { |
| "epoch": 3.4508670520231215, |
| "grad_norm": 0.4903680174032517, |
| "learning_rate": 5.299940605060743e-06, |
| "loss": 0.4113, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.453757225433526, |
| "grad_norm": 0.5314891850338158, |
| "learning_rate": 5.2821404967953116e-06, |
| "loss": 0.3715, |
| "step": 1195 |
| }, |
| { |
| "epoch": 3.4566473988439306, |
| "grad_norm": 0.5397537307529937, |
| "learning_rate": 5.264359595876618e-06, |
| "loss": 0.4043, |
| "step": 1196 |
| }, |
| { |
| "epoch": 3.459537572254335, |
| "grad_norm": 0.47328749106274787, |
| "learning_rate": 5.2465979746942565e-06, |
| "loss": 0.4137, |
| "step": 1197 |
| }, |
| { |
| "epoch": 3.4624277456647397, |
| "grad_norm": 0.5362144233759804, |
| "learning_rate": 5.228855705559319e-06, |
| "loss": 0.3914, |
| "step": 1198 |
| }, |
| { |
| "epoch": 3.4653179190751446, |
| "grad_norm": 0.5422695568332802, |
| "learning_rate": 5.211132860704131e-06, |
| "loss": 0.4199, |
| "step": 1199 |
| }, |
| { |
| "epoch": 3.468208092485549, |
| "grad_norm": 0.4718961202791377, |
| "learning_rate": 5.193429512281925e-06, |
| "loss": 0.4026, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.4710982658959537, |
| "grad_norm": 0.4876987449009726, |
| "learning_rate": 5.1757457323665625e-06, |
| "loss": 0.3996, |
| "step": 1201 |
| }, |
| { |
| "epoch": 3.4739884393063583, |
| "grad_norm": 0.5155793556037093, |
| "learning_rate": 5.158081592952237e-06, |
| "loss": 0.3901, |
| "step": 1202 |
| }, |
| { |
| "epoch": 3.476878612716763, |
| "grad_norm": 0.5272401689022481, |
| "learning_rate": 5.1404371659531806e-06, |
| "loss": 0.4087, |
| "step": 1203 |
| }, |
| { |
| "epoch": 3.479768786127168, |
| "grad_norm": 0.48712333585359746, |
| "learning_rate": 5.122812523203376e-06, |
| "loss": 0.3904, |
| "step": 1204 |
| }, |
| { |
| "epoch": 3.4826589595375723, |
| "grad_norm": 0.5184846661575929, |
| "learning_rate": 5.105207736456257e-06, |
| "loss": 0.4176, |
| "step": 1205 |
| }, |
| { |
| "epoch": 3.485549132947977, |
| "grad_norm": 0.4786955304589319, |
| "learning_rate": 5.087622877384422e-06, |
| "loss": 0.4181, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.4884393063583814, |
| "grad_norm": 0.5042504732169668, |
| "learning_rate": 5.070058017579335e-06, |
| "loss": 0.3857, |
| "step": 1207 |
| }, |
| { |
| "epoch": 3.491329479768786, |
| "grad_norm": 0.4799858953189233, |
| "learning_rate": 5.052513228551048e-06, |
| "loss": 0.3914, |
| "step": 1208 |
| }, |
| { |
| "epoch": 3.494219653179191, |
| "grad_norm": 0.4658279526771844, |
| "learning_rate": 5.034988581727891e-06, |
| "loss": 0.3988, |
| "step": 1209 |
| }, |
| { |
| "epoch": 3.4971098265895955, |
| "grad_norm": 0.5108447742628904, |
| "learning_rate": 5.017484148456195e-06, |
| "loss": 0.4215, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.5, |
| "grad_norm": 0.5402881254642645, |
| "learning_rate": 5.000000000000003e-06, |
| "loss": 0.3946, |
| "step": 1211 |
| }, |
| { |
| "epoch": 3.5028901734104045, |
| "grad_norm": 0.51161404379665, |
| "learning_rate": 4.98253620754076e-06, |
| "loss": 0.4165, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.505780346820809, |
| "grad_norm": 0.4974311157637146, |
| "learning_rate": 4.965092842177063e-06, |
| "loss": 0.393, |
| "step": 1213 |
| }, |
| { |
| "epoch": 3.508670520231214, |
| "grad_norm": 0.50083094507991, |
| "learning_rate": 4.947669974924318e-06, |
| "loss": 0.3925, |
| "step": 1214 |
| }, |
| { |
| "epoch": 3.5115606936416186, |
| "grad_norm": 0.5084864388305871, |
| "learning_rate": 4.930267676714493e-06, |
| "loss": 0.4202, |
| "step": 1215 |
| }, |
| { |
| "epoch": 3.514450867052023, |
| "grad_norm": 0.48637370325861295, |
| "learning_rate": 4.912886018395812e-06, |
| "loss": 0.3939, |
| "step": 1216 |
| }, |
| { |
| "epoch": 3.5173410404624277, |
| "grad_norm": 0.4827838362996411, |
| "learning_rate": 4.89552507073248e-06, |
| "loss": 0.4036, |
| "step": 1217 |
| }, |
| { |
| "epoch": 3.520231213872832, |
| "grad_norm": 0.5081515501494188, |
| "learning_rate": 4.878184904404375e-06, |
| "loss": 0.4024, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.523121387283237, |
| "grad_norm": 0.509383375704138, |
| "learning_rate": 4.86086559000676e-06, |
| "loss": 0.4007, |
| "step": 1219 |
| }, |
| { |
| "epoch": 3.5260115606936417, |
| "grad_norm": 0.5044115703037381, |
| "learning_rate": 4.843567198050031e-06, |
| "loss": 0.4243, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.5289017341040463, |
| "grad_norm": 0.5019649447142344, |
| "learning_rate": 4.8262897989593846e-06, |
| "loss": 0.3872, |
| "step": 1221 |
| }, |
| { |
| "epoch": 3.531791907514451, |
| "grad_norm": 0.4958060052336904, |
| "learning_rate": 4.809033463074564e-06, |
| "loss": 0.3898, |
| "step": 1222 |
| }, |
| { |
| "epoch": 3.5346820809248554, |
| "grad_norm": 0.4656358996491728, |
| "learning_rate": 4.791798260649538e-06, |
| "loss": 0.382, |
| "step": 1223 |
| }, |
| { |
| "epoch": 3.5375722543352603, |
| "grad_norm": 0.5046209644376203, |
| "learning_rate": 4.774584261852266e-06, |
| "loss": 0.3973, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.540462427745665, |
| "grad_norm": 0.473802279248715, |
| "learning_rate": 4.757391536764366e-06, |
| "loss": 0.3669, |
| "step": 1225 |
| }, |
| { |
| "epoch": 3.5433526011560694, |
| "grad_norm": 0.47987460911133734, |
| "learning_rate": 4.74022015538085e-06, |
| "loss": 0.4229, |
| "step": 1226 |
| }, |
| { |
| "epoch": 3.546242774566474, |
| "grad_norm": 0.47898689908086095, |
| "learning_rate": 4.723070187609834e-06, |
| "loss": 0.3934, |
| "step": 1227 |
| }, |
| { |
| "epoch": 3.5491329479768785, |
| "grad_norm": 0.5192852542397911, |
| "learning_rate": 4.705941703272256e-06, |
| "loss": 0.3769, |
| "step": 1228 |
| }, |
| { |
| "epoch": 3.5520231213872835, |
| "grad_norm": 0.4773762683627811, |
| "learning_rate": 4.688834772101593e-06, |
| "loss": 0.4012, |
| "step": 1229 |
| }, |
| { |
| "epoch": 3.5549132947976876, |
| "grad_norm": 0.4845549925124719, |
| "learning_rate": 4.671749463743572e-06, |
| "loss": 0.3907, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.5578034682080926, |
| "grad_norm": 0.48089857297464916, |
| "learning_rate": 4.65468584775589e-06, |
| "loss": 0.384, |
| "step": 1231 |
| }, |
| { |
| "epoch": 3.560693641618497, |
| "grad_norm": 0.5174166036846797, |
| "learning_rate": 4.637643993607928e-06, |
| "loss": 0.3983, |
| "step": 1232 |
| }, |
| { |
| "epoch": 3.5635838150289016, |
| "grad_norm": 0.5121238560523732, |
| "learning_rate": 4.620623970680473e-06, |
| "loss": 0.3893, |
| "step": 1233 |
| }, |
| { |
| "epoch": 3.5664739884393066, |
| "grad_norm": 0.516055886227455, |
| "learning_rate": 4.60362584826544e-06, |
| "loss": 0.3895, |
| "step": 1234 |
| }, |
| { |
| "epoch": 3.5693641618497107, |
| "grad_norm": 0.4991463204559961, |
| "learning_rate": 4.586649695565563e-06, |
| "loss": 0.3996, |
| "step": 1235 |
| }, |
| { |
| "epoch": 3.5722543352601157, |
| "grad_norm": 0.4823769438999723, |
| "learning_rate": 4.569695581694149e-06, |
| "loss": 0.399, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.5751445086705202, |
| "grad_norm": 0.547610891529702, |
| "learning_rate": 4.552763575674772e-06, |
| "loss": 0.3975, |
| "step": 1237 |
| }, |
| { |
| "epoch": 3.578034682080925, |
| "grad_norm": 0.49499114644780257, |
| "learning_rate": 4.535853746441018e-06, |
| "loss": 0.3927, |
| "step": 1238 |
| }, |
| { |
| "epoch": 3.5809248554913293, |
| "grad_norm": 0.48678386713561067, |
| "learning_rate": 4.518966162836159e-06, |
| "loss": 0.3871, |
| "step": 1239 |
| }, |
| { |
| "epoch": 3.583815028901734, |
| "grad_norm": 0.5034769399989679, |
| "learning_rate": 4.502100893612922e-06, |
| "loss": 0.416, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.586705202312139, |
| "grad_norm": 0.5066444239071285, |
| "learning_rate": 4.485258007433175e-06, |
| "loss": 0.4096, |
| "step": 1241 |
| }, |
| { |
| "epoch": 3.5895953757225434, |
| "grad_norm": 0.5016451005013188, |
| "learning_rate": 4.468437572867673e-06, |
| "loss": 0.4118, |
| "step": 1242 |
| }, |
| { |
| "epoch": 3.592485549132948, |
| "grad_norm": 0.5150332892840315, |
| "learning_rate": 4.451639658395761e-06, |
| "loss": 0.3969, |
| "step": 1243 |
| }, |
| { |
| "epoch": 3.5953757225433525, |
| "grad_norm": 0.5549645396194977, |
| "learning_rate": 4.434864332405085e-06, |
| "loss": 0.4178, |
| "step": 1244 |
| }, |
| { |
| "epoch": 3.598265895953757, |
| "grad_norm": 0.565355807978924, |
| "learning_rate": 4.418111663191355e-06, |
| "loss": 0.3883, |
| "step": 1245 |
| }, |
| { |
| "epoch": 3.601156069364162, |
| "grad_norm": 0.5472283526926208, |
| "learning_rate": 4.401381718958022e-06, |
| "loss": 0.4079, |
| "step": 1246 |
| }, |
| { |
| "epoch": 3.6040462427745665, |
| "grad_norm": 0.5124990525593747, |
| "learning_rate": 4.384674567816029e-06, |
| "loss": 0.4001, |
| "step": 1247 |
| }, |
| { |
| "epoch": 3.606936416184971, |
| "grad_norm": 0.4889182687602902, |
| "learning_rate": 4.36799027778351e-06, |
| "loss": 0.3888, |
| "step": 1248 |
| }, |
| { |
| "epoch": 3.6098265895953756, |
| "grad_norm": 0.48849681036045084, |
| "learning_rate": 4.3513289167855464e-06, |
| "loss": 0.3973, |
| "step": 1249 |
| }, |
| { |
| "epoch": 3.61271676300578, |
| "grad_norm": 0.5325960144702762, |
| "learning_rate": 4.3346905526538576e-06, |
| "loss": 0.4119, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.615606936416185, |
| "grad_norm": 0.5252422646593361, |
| "learning_rate": 4.31807525312654e-06, |
| "loss": 0.3844, |
| "step": 1251 |
| }, |
| { |
| "epoch": 3.6184971098265897, |
| "grad_norm": 0.515346473715291, |
| "learning_rate": 4.301483085847793e-06, |
| "loss": 0.406, |
| "step": 1252 |
| }, |
| { |
| "epoch": 3.621387283236994, |
| "grad_norm": 0.5590943402054944, |
| "learning_rate": 4.284914118367637e-06, |
| "loss": 0.3971, |
| "step": 1253 |
| }, |
| { |
| "epoch": 3.6242774566473988, |
| "grad_norm": 0.5178331984088388, |
| "learning_rate": 4.268368418141642e-06, |
| "loss": 0.4122, |
| "step": 1254 |
| }, |
| { |
| "epoch": 3.6271676300578033, |
| "grad_norm": 0.49667709804077503, |
| "learning_rate": 4.251846052530653e-06, |
| "loss": 0.3802, |
| "step": 1255 |
| }, |
| { |
| "epoch": 3.6300578034682083, |
| "grad_norm": 0.5084603762461885, |
| "learning_rate": 4.235347088800515e-06, |
| "loss": 0.3916, |
| "step": 1256 |
| }, |
| { |
| "epoch": 3.632947976878613, |
| "grad_norm": 0.5694214487049762, |
| "learning_rate": 4.2188715941217996e-06, |
| "loss": 0.4046, |
| "step": 1257 |
| }, |
| { |
| "epoch": 3.6358381502890174, |
| "grad_norm": 0.5171477728373481, |
| "learning_rate": 4.2024196355695254e-06, |
| "loss": 0.3905, |
| "step": 1258 |
| }, |
| { |
| "epoch": 3.638728323699422, |
| "grad_norm": 0.47173687115678026, |
| "learning_rate": 4.185991280122909e-06, |
| "loss": 0.3884, |
| "step": 1259 |
| }, |
| { |
| "epoch": 3.6416184971098264, |
| "grad_norm": 0.5208263137887216, |
| "learning_rate": 4.169586594665048e-06, |
| "loss": 0.3914, |
| "step": 1260 |
| }, |
| { |
| "epoch": 3.6445086705202314, |
| "grad_norm": 0.5659791491973168, |
| "learning_rate": 4.153205645982693e-06, |
| "loss": 0.3951, |
| "step": 1261 |
| }, |
| { |
| "epoch": 3.647398843930636, |
| "grad_norm": 0.6075893121263456, |
| "learning_rate": 4.136848500765948e-06, |
| "loss": 0.3949, |
| "step": 1262 |
| }, |
| { |
| "epoch": 3.6502890173410405, |
| "grad_norm": 0.512881571716333, |
| "learning_rate": 4.120515225608025e-06, |
| "loss": 0.3849, |
| "step": 1263 |
| }, |
| { |
| "epoch": 3.653179190751445, |
| "grad_norm": 0.5075832985581248, |
| "learning_rate": 4.104205887004931e-06, |
| "loss": 0.3824, |
| "step": 1264 |
| }, |
| { |
| "epoch": 3.6560693641618496, |
| "grad_norm": 0.5076914828789318, |
| "learning_rate": 4.08792055135524e-06, |
| "loss": 0.4022, |
| "step": 1265 |
| }, |
| { |
| "epoch": 3.6589595375722546, |
| "grad_norm": 0.5025241926533359, |
| "learning_rate": 4.0716592849597955e-06, |
| "loss": 0.3905, |
| "step": 1266 |
| }, |
| { |
| "epoch": 3.661849710982659, |
| "grad_norm": 0.496222438413654, |
| "learning_rate": 4.055422154021465e-06, |
| "loss": 0.3892, |
| "step": 1267 |
| }, |
| { |
| "epoch": 3.6647398843930636, |
| "grad_norm": 0.48534032089531876, |
| "learning_rate": 4.039209224644845e-06, |
| "loss": 0.3886, |
| "step": 1268 |
| }, |
| { |
| "epoch": 3.667630057803468, |
| "grad_norm": 0.5418497860771618, |
| "learning_rate": 4.0230205628359945e-06, |
| "loss": 0.3982, |
| "step": 1269 |
| }, |
| { |
| "epoch": 3.6705202312138727, |
| "grad_norm": 0.5147052856265083, |
| "learning_rate": 4.006856234502191e-06, |
| "loss": 0.39, |
| "step": 1270 |
| }, |
| { |
| "epoch": 3.6734104046242777, |
| "grad_norm": 0.494017251636487, |
| "learning_rate": 3.990716305451636e-06, |
| "loss": 0.4083, |
| "step": 1271 |
| }, |
| { |
| "epoch": 3.6763005780346822, |
| "grad_norm": 0.5717362220657668, |
| "learning_rate": 3.974600841393204e-06, |
| "loss": 0.3824, |
| "step": 1272 |
| }, |
| { |
| "epoch": 3.679190751445087, |
| "grad_norm": 0.5763029540574294, |
| "learning_rate": 3.958509907936149e-06, |
| "loss": 0.3934, |
| "step": 1273 |
| }, |
| { |
| "epoch": 3.6820809248554913, |
| "grad_norm": 0.47695488593530383, |
| "learning_rate": 3.942443570589882e-06, |
| "loss": 0.4102, |
| "step": 1274 |
| }, |
| { |
| "epoch": 3.684971098265896, |
| "grad_norm": 0.46522466243173516, |
| "learning_rate": 3.9264018947636626e-06, |
| "loss": 0.4064, |
| "step": 1275 |
| }, |
| { |
| "epoch": 3.687861271676301, |
| "grad_norm": 0.4901388621576566, |
| "learning_rate": 3.910384945766351e-06, |
| "loss": 0.4064, |
| "step": 1276 |
| }, |
| { |
| "epoch": 3.690751445086705, |
| "grad_norm": 0.5319476053231882, |
| "learning_rate": 3.894392788806141e-06, |
| "loss": 0.3584, |
| "step": 1277 |
| }, |
| { |
| "epoch": 3.69364161849711, |
| "grad_norm": 0.50332879249549, |
| "learning_rate": 3.878425488990289e-06, |
| "loss": 0.4003, |
| "step": 1278 |
| }, |
| { |
| "epoch": 3.6965317919075145, |
| "grad_norm": 0.4718359464324147, |
| "learning_rate": 3.862483111324861e-06, |
| "loss": 0.4125, |
| "step": 1279 |
| }, |
| { |
| "epoch": 3.699421965317919, |
| "grad_norm": 0.5047398998313649, |
| "learning_rate": 3.846565720714451e-06, |
| "loss": 0.395, |
| "step": 1280 |
| }, |
| { |
| "epoch": 3.7023121387283235, |
| "grad_norm": 0.5229923048712791, |
| "learning_rate": 3.830673381961933e-06, |
| "loss": 0.3812, |
| "step": 1281 |
| }, |
| { |
| "epoch": 3.705202312138728, |
| "grad_norm": 0.5775482777590354, |
| "learning_rate": 3.814806159768187e-06, |
| "loss": 0.3894, |
| "step": 1282 |
| }, |
| { |
| "epoch": 3.708092485549133, |
| "grad_norm": 0.49362257902570184, |
| "learning_rate": 3.7989641187318327e-06, |
| "loss": 0.3914, |
| "step": 1283 |
| }, |
| { |
| "epoch": 3.7109826589595376, |
| "grad_norm": 0.4725314340419177, |
| "learning_rate": 3.783147323348989e-06, |
| "loss": 0.4157, |
| "step": 1284 |
| }, |
| { |
| "epoch": 3.713872832369942, |
| "grad_norm": 0.493051522080004, |
| "learning_rate": 3.767355838012974e-06, |
| "loss": 0.3932, |
| "step": 1285 |
| }, |
| { |
| "epoch": 3.7167630057803467, |
| "grad_norm": 0.4938182282472529, |
| "learning_rate": 3.7515897270140755e-06, |
| "loss": 0.4056, |
| "step": 1286 |
| }, |
| { |
| "epoch": 3.7196531791907512, |
| "grad_norm": 0.5050054128840156, |
| "learning_rate": 3.7358490545392734e-06, |
| "loss": 0.3993, |
| "step": 1287 |
| }, |
| { |
| "epoch": 3.722543352601156, |
| "grad_norm": 0.4855493281052087, |
| "learning_rate": 3.7201338846719926e-06, |
| "loss": 0.397, |
| "step": 1288 |
| }, |
| { |
| "epoch": 3.7254335260115607, |
| "grad_norm": 0.4948276672840782, |
| "learning_rate": 3.7044442813918125e-06, |
| "loss": 0.4146, |
| "step": 1289 |
| }, |
| { |
| "epoch": 3.7283236994219653, |
| "grad_norm": 0.465286865663395, |
| "learning_rate": 3.688780308574238e-06, |
| "loss": 0.4193, |
| "step": 1290 |
| }, |
| { |
| "epoch": 3.73121387283237, |
| "grad_norm": 0.5040094181912368, |
| "learning_rate": 3.6731420299904317e-06, |
| "loss": 0.3888, |
| "step": 1291 |
| }, |
| { |
| "epoch": 3.7341040462427744, |
| "grad_norm": 0.484768931579548, |
| "learning_rate": 3.6575295093069397e-06, |
| "loss": 0.3908, |
| "step": 1292 |
| }, |
| { |
| "epoch": 3.7369942196531793, |
| "grad_norm": 0.49290459637271655, |
| "learning_rate": 3.6419428100854515e-06, |
| "loss": 0.3783, |
| "step": 1293 |
| }, |
| { |
| "epoch": 3.739884393063584, |
| "grad_norm": 0.5173781080231862, |
| "learning_rate": 3.6263819957825163e-06, |
| "loss": 0.4028, |
| "step": 1294 |
| }, |
| { |
| "epoch": 3.7427745664739884, |
| "grad_norm": 0.48333701877259566, |
| "learning_rate": 3.610847129749323e-06, |
| "loss": 0.3908, |
| "step": 1295 |
| }, |
| { |
| "epoch": 3.745664739884393, |
| "grad_norm": 0.46514377347373626, |
| "learning_rate": 3.5953382752314036e-06, |
| "loss": 0.4052, |
| "step": 1296 |
| }, |
| { |
| "epoch": 3.7485549132947975, |
| "grad_norm": 0.4611540713015221, |
| "learning_rate": 3.579855495368403e-06, |
| "loss": 0.4148, |
| "step": 1297 |
| }, |
| { |
| "epoch": 3.7514450867052025, |
| "grad_norm": 0.4918418237562193, |
| "learning_rate": 3.5643988531937923e-06, |
| "loss": 0.3999, |
| "step": 1298 |
| }, |
| { |
| "epoch": 3.754335260115607, |
| "grad_norm": 0.48102179878442763, |
| "learning_rate": 3.5489684116346556e-06, |
| "loss": 0.3781, |
| "step": 1299 |
| }, |
| { |
| "epoch": 3.7572254335260116, |
| "grad_norm": 0.5010976552573143, |
| "learning_rate": 3.533564233511394e-06, |
| "loss": 0.3775, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.760115606936416, |
| "grad_norm": 0.46729685486727424, |
| "learning_rate": 3.518186381537486e-06, |
| "loss": 0.373, |
| "step": 1301 |
| }, |
| { |
| "epoch": 3.7630057803468207, |
| "grad_norm": 0.48872167812753237, |
| "learning_rate": 3.5028349183192365e-06, |
| "loss": 0.3839, |
| "step": 1302 |
| }, |
| { |
| "epoch": 3.7658959537572256, |
| "grad_norm": 0.4833733217230953, |
| "learning_rate": 3.4875099063555105e-06, |
| "loss": 0.4164, |
| "step": 1303 |
| }, |
| { |
| "epoch": 3.76878612716763, |
| "grad_norm": 0.48933797219192665, |
| "learning_rate": 3.472211408037488e-06, |
| "loss": 0.4062, |
| "step": 1304 |
| }, |
| { |
| "epoch": 3.7716763005780347, |
| "grad_norm": 0.48178223947944576, |
| "learning_rate": 3.456939485648406e-06, |
| "loss": 0.3915, |
| "step": 1305 |
| }, |
| { |
| "epoch": 3.7745664739884393, |
| "grad_norm": 0.5220222059582195, |
| "learning_rate": 3.441694201363307e-06, |
| "loss": 0.3781, |
| "step": 1306 |
| }, |
| { |
| "epoch": 3.777456647398844, |
| "grad_norm": 0.5024321900572036, |
| "learning_rate": 3.4264756172487813e-06, |
| "loss": 0.3738, |
| "step": 1307 |
| }, |
| { |
| "epoch": 3.7803468208092488, |
| "grad_norm": 0.5223109129718773, |
| "learning_rate": 3.4112837952627175e-06, |
| "loss": 0.4036, |
| "step": 1308 |
| }, |
| { |
| "epoch": 3.7832369942196533, |
| "grad_norm": 0.4840530871394166, |
| "learning_rate": 3.3961187972540598e-06, |
| "loss": 0.3788, |
| "step": 1309 |
| }, |
| { |
| "epoch": 3.786127167630058, |
| "grad_norm": 0.5403498006249526, |
| "learning_rate": 3.3809806849625315e-06, |
| "loss": 0.3727, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.7890173410404624, |
| "grad_norm": 0.4959578369395937, |
| "learning_rate": 3.3658695200184075e-06, |
| "loss": 0.3714, |
| "step": 1311 |
| }, |
| { |
| "epoch": 3.791907514450867, |
| "grad_norm": 0.46367701058442384, |
| "learning_rate": 3.3507853639422514e-06, |
| "loss": 0.3886, |
| "step": 1312 |
| }, |
| { |
| "epoch": 3.794797687861272, |
| "grad_norm": 0.5315085676034551, |
| "learning_rate": 3.3357282781446787e-06, |
| "loss": 0.4034, |
| "step": 1313 |
| }, |
| { |
| "epoch": 3.7976878612716765, |
| "grad_norm": 0.5185261007954255, |
| "learning_rate": 3.320698323926079e-06, |
| "loss": 0.3988, |
| "step": 1314 |
| }, |
| { |
| "epoch": 3.800578034682081, |
| "grad_norm": 0.5194949353467634, |
| "learning_rate": 3.305695562476393e-06, |
| "loss": 0.3799, |
| "step": 1315 |
| }, |
| { |
| "epoch": 3.8034682080924855, |
| "grad_norm": 0.47443505146882026, |
| "learning_rate": 3.29072005487486e-06, |
| "loss": 0.4158, |
| "step": 1316 |
| }, |
| { |
| "epoch": 3.80635838150289, |
| "grad_norm": 0.4720956993326887, |
| "learning_rate": 3.2757718620897574e-06, |
| "loss": 0.4226, |
| "step": 1317 |
| }, |
| { |
| "epoch": 3.809248554913295, |
| "grad_norm": 0.5187600274566376, |
| "learning_rate": 3.2608510449781526e-06, |
| "loss": 0.4108, |
| "step": 1318 |
| }, |
| { |
| "epoch": 3.812138728323699, |
| "grad_norm": 0.48257622577269477, |
| "learning_rate": 3.245957664285666e-06, |
| "loss": 0.4029, |
| "step": 1319 |
| }, |
| { |
| "epoch": 3.815028901734104, |
| "grad_norm": 0.48180267094515367, |
| "learning_rate": 3.2310917806462274e-06, |
| "loss": 0.398, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.8179190751445087, |
| "grad_norm": 0.5990856236553259, |
| "learning_rate": 3.2162534545818104e-06, |
| "loss": 0.385, |
| "step": 1321 |
| }, |
| { |
| "epoch": 3.820809248554913, |
| "grad_norm": 0.5016426592485167, |
| "learning_rate": 3.2014427465022014e-06, |
| "loss": 0.3844, |
| "step": 1322 |
| }, |
| { |
| "epoch": 3.8236994219653178, |
| "grad_norm": 0.48857761234195757, |
| "learning_rate": 3.1866597167047364e-06, |
| "loss": 0.4028, |
| "step": 1323 |
| }, |
| { |
| "epoch": 3.8265895953757223, |
| "grad_norm": 0.47082788199594566, |
| "learning_rate": 3.1719044253740874e-06, |
| "loss": 0.4185, |
| "step": 1324 |
| }, |
| { |
| "epoch": 3.8294797687861273, |
| "grad_norm": 0.4839708323294498, |
| "learning_rate": 3.1571769325819834e-06, |
| "loss": 0.3902, |
| "step": 1325 |
| }, |
| { |
| "epoch": 3.832369942196532, |
| "grad_norm": 0.46860357381932183, |
| "learning_rate": 3.1424772982869866e-06, |
| "loss": 0.3887, |
| "step": 1326 |
| }, |
| { |
| "epoch": 3.8352601156069364, |
| "grad_norm": 0.4867042348950247, |
| "learning_rate": 3.127805582334236e-06, |
| "loss": 0.3944, |
| "step": 1327 |
| }, |
| { |
| "epoch": 3.838150289017341, |
| "grad_norm": 0.5089482015595527, |
| "learning_rate": 3.1131618444552148e-06, |
| "loss": 0.3823, |
| "step": 1328 |
| }, |
| { |
| "epoch": 3.8410404624277454, |
| "grad_norm": 0.4886105855662341, |
| "learning_rate": 3.098546144267499e-06, |
| "loss": 0.4218, |
| "step": 1329 |
| }, |
| { |
| "epoch": 3.8439306358381504, |
| "grad_norm": 0.5143763846812575, |
| "learning_rate": 3.0839585412745178e-06, |
| "loss": 0.3961, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.846820809248555, |
| "grad_norm": 0.47071190340225716, |
| "learning_rate": 3.069399094865312e-06, |
| "loss": 0.3972, |
| "step": 1331 |
| }, |
| { |
| "epoch": 3.8497109826589595, |
| "grad_norm": 0.49795601399511025, |
| "learning_rate": 3.0548678643142916e-06, |
| "loss": 0.3891, |
| "step": 1332 |
| }, |
| { |
| "epoch": 3.852601156069364, |
| "grad_norm": 0.5066155576475394, |
| "learning_rate": 3.0403649087809905e-06, |
| "loss": 0.4085, |
| "step": 1333 |
| }, |
| { |
| "epoch": 3.8554913294797686, |
| "grad_norm": 0.4720442115733795, |
| "learning_rate": 3.0258902873098406e-06, |
| "loss": 0.3927, |
| "step": 1334 |
| }, |
| { |
| "epoch": 3.8583815028901736, |
| "grad_norm": 0.5187384473204723, |
| "learning_rate": 3.011444058829903e-06, |
| "loss": 0.3834, |
| "step": 1335 |
| }, |
| { |
| "epoch": 3.861271676300578, |
| "grad_norm": 0.46952384983808076, |
| "learning_rate": 2.997026282154658e-06, |
| "loss": 0.4172, |
| "step": 1336 |
| }, |
| { |
| "epoch": 3.8641618497109826, |
| "grad_norm": 0.47076026733477655, |
| "learning_rate": 2.9826370159817465e-06, |
| "loss": 0.3982, |
| "step": 1337 |
| }, |
| { |
| "epoch": 3.867052023121387, |
| "grad_norm": 0.48222677969659716, |
| "learning_rate": 2.9682763188927476e-06, |
| "loss": 0.3895, |
| "step": 1338 |
| }, |
| { |
| "epoch": 3.8699421965317917, |
| "grad_norm": 0.4707754155595722, |
| "learning_rate": 2.953944249352916e-06, |
| "loss": 0.3982, |
| "step": 1339 |
| }, |
| { |
| "epoch": 3.8728323699421967, |
| "grad_norm": 0.48597477112642706, |
| "learning_rate": 2.939640865710961e-06, |
| "loss": 0.3828, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.8757225433526012, |
| "grad_norm": 0.4893204505305053, |
| "learning_rate": 2.925366226198819e-06, |
| "loss": 0.384, |
| "step": 1341 |
| }, |
| { |
| "epoch": 3.878612716763006, |
| "grad_norm": 0.4511534069636998, |
| "learning_rate": 2.911120388931392e-06, |
| "loss": 0.4129, |
| "step": 1342 |
| }, |
| { |
| "epoch": 3.8815028901734103, |
| "grad_norm": 0.4665021353659143, |
| "learning_rate": 2.8969034119063176e-06, |
| "loss": 0.3853, |
| "step": 1343 |
| }, |
| { |
| "epoch": 3.884393063583815, |
| "grad_norm": 0.46725384145430443, |
| "learning_rate": 2.8827153530037444e-06, |
| "loss": 0.3926, |
| "step": 1344 |
| }, |
| { |
| "epoch": 3.88728323699422, |
| "grad_norm": 0.4463766065379448, |
| "learning_rate": 2.868556269986096e-06, |
| "loss": 0.4163, |
| "step": 1345 |
| }, |
| { |
| "epoch": 3.8901734104046244, |
| "grad_norm": 0.4641928583572942, |
| "learning_rate": 2.8544262204978178e-06, |
| "loss": 0.379, |
| "step": 1346 |
| }, |
| { |
| "epoch": 3.893063583815029, |
| "grad_norm": 0.6233225476901676, |
| "learning_rate": 2.840325262065161e-06, |
| "loss": 0.3705, |
| "step": 1347 |
| }, |
| { |
| "epoch": 3.8959537572254335, |
| "grad_norm": 0.46924299510074996, |
| "learning_rate": 2.826253452095937e-06, |
| "loss": 0.4119, |
| "step": 1348 |
| }, |
| { |
| "epoch": 3.898843930635838, |
| "grad_norm": 0.4834134656368184, |
| "learning_rate": 2.812210847879291e-06, |
| "loss": 0.3861, |
| "step": 1349 |
| }, |
| { |
| "epoch": 3.901734104046243, |
| "grad_norm": 0.46251839056953825, |
| "learning_rate": 2.798197506585464e-06, |
| "loss": 0.3822, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.9046242774566475, |
| "grad_norm": 0.5271881209856408, |
| "learning_rate": 2.7842134852655635e-06, |
| "loss": 0.3849, |
| "step": 1351 |
| }, |
| { |
| "epoch": 3.907514450867052, |
| "grad_norm": 0.4798577219404323, |
| "learning_rate": 2.7702588408513276e-06, |
| "loss": 0.3879, |
| "step": 1352 |
| }, |
| { |
| "epoch": 3.9104046242774566, |
| "grad_norm": 0.47590125838213543, |
| "learning_rate": 2.756333630154897e-06, |
| "loss": 0.4067, |
| "step": 1353 |
| }, |
| { |
| "epoch": 3.913294797687861, |
| "grad_norm": 0.4909876690916432, |
| "learning_rate": 2.7424379098685806e-06, |
| "loss": 0.3943, |
| "step": 1354 |
| }, |
| { |
| "epoch": 3.916184971098266, |
| "grad_norm": 0.4989923470121859, |
| "learning_rate": 2.7285717365646258e-06, |
| "loss": 0.4009, |
| "step": 1355 |
| }, |
| { |
| "epoch": 3.9190751445086707, |
| "grad_norm": 0.47528546667752497, |
| "learning_rate": 2.7147351666949904e-06, |
| "loss": 0.3988, |
| "step": 1356 |
| }, |
| { |
| "epoch": 3.921965317919075, |
| "grad_norm": 0.43780206676111283, |
| "learning_rate": 2.7009282565911077e-06, |
| "loss": 0.3959, |
| "step": 1357 |
| }, |
| { |
| "epoch": 3.9248554913294798, |
| "grad_norm": 0.4679114324049943, |
| "learning_rate": 2.6871510624636586e-06, |
| "loss": 0.3951, |
| "step": 1358 |
| }, |
| { |
| "epoch": 3.9277456647398843, |
| "grad_norm": 0.4637620747382237, |
| "learning_rate": 2.673403640402359e-06, |
| "loss": 0.4088, |
| "step": 1359 |
| }, |
| { |
| "epoch": 3.9306358381502893, |
| "grad_norm": 0.45457986314823645, |
| "learning_rate": 2.6596860463756935e-06, |
| "loss": 0.3929, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.9335260115606934, |
| "grad_norm": 0.4761238450741067, |
| "learning_rate": 2.6459983362307263e-06, |
| "loss": 0.383, |
| "step": 1361 |
| }, |
| { |
| "epoch": 3.9364161849710984, |
| "grad_norm": 0.4927539770863447, |
| "learning_rate": 2.632340565692851e-06, |
| "loss": 0.3787, |
| "step": 1362 |
| }, |
| { |
| "epoch": 3.939306358381503, |
| "grad_norm": 0.4858307892706921, |
| "learning_rate": 2.618712790365585e-06, |
| "loss": 0.3924, |
| "step": 1363 |
| }, |
| { |
| "epoch": 3.9421965317919074, |
| "grad_norm": 0.46312831934840626, |
| "learning_rate": 2.6051150657303103e-06, |
| "loss": 0.4016, |
| "step": 1364 |
| }, |
| { |
| "epoch": 3.9450867052023124, |
| "grad_norm": 0.4777949934568993, |
| "learning_rate": 2.5915474471460733e-06, |
| "loss": 0.3923, |
| "step": 1365 |
| }, |
| { |
| "epoch": 3.9479768786127165, |
| "grad_norm": 0.506011577237325, |
| "learning_rate": 2.5780099898493617e-06, |
| "loss": 0.3773, |
| "step": 1366 |
| }, |
| { |
| "epoch": 3.9508670520231215, |
| "grad_norm": 0.49038674746439304, |
| "learning_rate": 2.5645027489538633e-06, |
| "loss": 0.3909, |
| "step": 1367 |
| }, |
| { |
| "epoch": 3.953757225433526, |
| "grad_norm": 0.47337641434225514, |
| "learning_rate": 2.5510257794502423e-06, |
| "loss": 0.3887, |
| "step": 1368 |
| }, |
| { |
| "epoch": 3.9566473988439306, |
| "grad_norm": 0.47126439390870567, |
| "learning_rate": 2.5375791362059287e-06, |
| "loss": 0.3946, |
| "step": 1369 |
| }, |
| { |
| "epoch": 3.959537572254335, |
| "grad_norm": 0.49310721717359374, |
| "learning_rate": 2.524162873964896e-06, |
| "loss": 0.4, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.9624277456647397, |
| "grad_norm": 0.5107504252689111, |
| "learning_rate": 2.510777047347419e-06, |
| "loss": 0.3738, |
| "step": 1371 |
| }, |
| { |
| "epoch": 3.9653179190751446, |
| "grad_norm": 0.48531406074206546, |
| "learning_rate": 2.4974217108498656e-06, |
| "loss": 0.3981, |
| "step": 1372 |
| }, |
| { |
| "epoch": 3.968208092485549, |
| "grad_norm": 0.49546193426834495, |
| "learning_rate": 2.4840969188444754e-06, |
| "loss": 0.3835, |
| "step": 1373 |
| }, |
| { |
| "epoch": 3.9710982658959537, |
| "grad_norm": 0.46548489662595355, |
| "learning_rate": 2.4708027255791323e-06, |
| "loss": 0.4155, |
| "step": 1374 |
| }, |
| { |
| "epoch": 3.9739884393063583, |
| "grad_norm": 0.44586516274034266, |
| "learning_rate": 2.457539185177148e-06, |
| "loss": 0.3964, |
| "step": 1375 |
| }, |
| { |
| "epoch": 3.976878612716763, |
| "grad_norm": 0.5251475706820591, |
| "learning_rate": 2.4443063516370415e-06, |
| "loss": 0.3769, |
| "step": 1376 |
| }, |
| { |
| "epoch": 3.979768786127168, |
| "grad_norm": 0.45717894373015, |
| "learning_rate": 2.431104278832315e-06, |
| "loss": 0.4089, |
| "step": 1377 |
| }, |
| { |
| "epoch": 3.9826589595375723, |
| "grad_norm": 0.4770020764557666, |
| "learning_rate": 2.4179330205112383e-06, |
| "loss": 0.3949, |
| "step": 1378 |
| }, |
| { |
| "epoch": 3.985549132947977, |
| "grad_norm": 0.48235033068032995, |
| "learning_rate": 2.404792630296633e-06, |
| "loss": 0.3924, |
| "step": 1379 |
| }, |
| { |
| "epoch": 3.9884393063583814, |
| "grad_norm": 0.4878128649867659, |
| "learning_rate": 2.3916831616856475e-06, |
| "loss": 0.4063, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.991329479768786, |
| "grad_norm": 0.46889111549403895, |
| "learning_rate": 2.37860466804954e-06, |
| "loss": 0.3921, |
| "step": 1381 |
| }, |
| { |
| "epoch": 3.994219653179191, |
| "grad_norm": 0.4566000580810184, |
| "learning_rate": 2.3655572026334674e-06, |
| "loss": 0.4113, |
| "step": 1382 |
| }, |
| { |
| "epoch": 3.9971098265895955, |
| "grad_norm": 0.5291952784963643, |
| "learning_rate": 2.3525408185562626e-06, |
| "loss": 0.3827, |
| "step": 1383 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.5182380653301021, |
| "learning_rate": 2.339555568810221e-06, |
| "loss": 0.3698, |
| "step": 1384 |
| }, |
| { |
| "epoch": 4.002890173410405, |
| "grad_norm": 0.6698862793434772, |
| "learning_rate": 2.3266015062608837e-06, |
| "loss": 0.3456, |
| "step": 1385 |
| }, |
| { |
| "epoch": 4.005780346820809, |
| "grad_norm": 0.5774888203625725, |
| "learning_rate": 2.3136786836468204e-06, |
| "loss": 0.3233, |
| "step": 1386 |
| }, |
| { |
| "epoch": 4.008670520231214, |
| "grad_norm": 0.496192301374043, |
| "learning_rate": 2.300787153579418e-06, |
| "loss": 0.338, |
| "step": 1387 |
| }, |
| { |
| "epoch": 4.011560693641618, |
| "grad_norm": 0.6361731482559027, |
| "learning_rate": 2.2879269685426742e-06, |
| "loss": 0.3243, |
| "step": 1388 |
| }, |
| { |
| "epoch": 4.014450867052023, |
| "grad_norm": 0.7439747647099445, |
| "learning_rate": 2.2750981808929563e-06, |
| "loss": 0.349, |
| "step": 1389 |
| }, |
| { |
| "epoch": 4.017341040462428, |
| "grad_norm": 0.5589574759688413, |
| "learning_rate": 2.262300842858818e-06, |
| "loss": 0.3521, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.020231213872832, |
| "grad_norm": 0.4896141267394832, |
| "learning_rate": 2.2495350065407797e-06, |
| "loss": 0.3426, |
| "step": 1391 |
| }, |
| { |
| "epoch": 4.023121387283237, |
| "grad_norm": 0.5639827670600378, |
| "learning_rate": 2.236800723911107e-06, |
| "loss": 0.3395, |
| "step": 1392 |
| }, |
| { |
| "epoch": 4.026011560693641, |
| "grad_norm": 0.5752090316401731, |
| "learning_rate": 2.2240980468135962e-06, |
| "loss": 0.3644, |
| "step": 1393 |
| }, |
| { |
| "epoch": 4.028901734104046, |
| "grad_norm": 0.5788393068913655, |
| "learning_rate": 2.21142702696338e-06, |
| "loss": 0.3454, |
| "step": 1394 |
| }, |
| { |
| "epoch": 4.031791907514451, |
| "grad_norm": 0.5063205120505316, |
| "learning_rate": 2.198787715946712e-06, |
| "loss": 0.334, |
| "step": 1395 |
| }, |
| { |
| "epoch": 4.034682080924855, |
| "grad_norm": 0.46871942776478365, |
| "learning_rate": 2.1861801652207472e-06, |
| "loss": 0.352, |
| "step": 1396 |
| }, |
| { |
| "epoch": 4.03757225433526, |
| "grad_norm": 0.4814257774715175, |
| "learning_rate": 2.1736044261133305e-06, |
| "loss": 0.3537, |
| "step": 1397 |
| }, |
| { |
| "epoch": 4.040462427745664, |
| "grad_norm": 0.4944947737271302, |
| "learning_rate": 2.161060549822811e-06, |
| "loss": 0.3443, |
| "step": 1398 |
| }, |
| { |
| "epoch": 4.043352601156069, |
| "grad_norm": 0.5422000937512472, |
| "learning_rate": 2.148548587417808e-06, |
| "loss": 0.3629, |
| "step": 1399 |
| }, |
| { |
| "epoch": 4.046242774566474, |
| "grad_norm": 0.5033172587407463, |
| "learning_rate": 2.1360685898370147e-06, |
| "loss": 0.37, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.0491329479768785, |
| "grad_norm": 0.47142049139932146, |
| "learning_rate": 2.123620607888991e-06, |
| "loss": 0.3451, |
| "step": 1401 |
| }, |
| { |
| "epoch": 4.0520231213872835, |
| "grad_norm": 0.4592932152977094, |
| "learning_rate": 2.1112046922519524e-06, |
| "loss": 0.3365, |
| "step": 1402 |
| }, |
| { |
| "epoch": 4.054913294797688, |
| "grad_norm": 0.4677681799519137, |
| "learning_rate": 2.0988208934735664e-06, |
| "loss": 0.332, |
| "step": 1403 |
| }, |
| { |
| "epoch": 4.057803468208093, |
| "grad_norm": 0.47746092825207925, |
| "learning_rate": 2.0864692619707493e-06, |
| "loss": 0.3152, |
| "step": 1404 |
| }, |
| { |
| "epoch": 4.0606936416184976, |
| "grad_norm": 0.48786302102807205, |
| "learning_rate": 2.074149848029453e-06, |
| "loss": 0.3359, |
| "step": 1405 |
| }, |
| { |
| "epoch": 4.063583815028902, |
| "grad_norm": 0.4775605547438963, |
| "learning_rate": 2.06186270180447e-06, |
| "loss": 0.3126, |
| "step": 1406 |
| }, |
| { |
| "epoch": 4.066473988439307, |
| "grad_norm": 0.46623710415414105, |
| "learning_rate": 2.0496078733192216e-06, |
| "loss": 0.3415, |
| "step": 1407 |
| }, |
| { |
| "epoch": 4.069364161849711, |
| "grad_norm": 0.5176099022694585, |
| "learning_rate": 2.037385412465558e-06, |
| "loss": 0.3048, |
| "step": 1408 |
| }, |
| { |
| "epoch": 4.072254335260116, |
| "grad_norm": 0.4787733747514296, |
| "learning_rate": 2.0251953690035543e-06, |
| "loss": 0.3398, |
| "step": 1409 |
| }, |
| { |
| "epoch": 4.07514450867052, |
| "grad_norm": 0.47788782780581107, |
| "learning_rate": 2.01303779256131e-06, |
| "loss": 0.3416, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.078034682080925, |
| "grad_norm": 0.4601914046139569, |
| "learning_rate": 2.00091273263474e-06, |
| "loss": 0.3271, |
| "step": 1411 |
| }, |
| { |
| "epoch": 4.08092485549133, |
| "grad_norm": 0.4361807898527824, |
| "learning_rate": 1.988820238587379e-06, |
| "loss": 0.3391, |
| "step": 1412 |
| }, |
| { |
| "epoch": 4.083815028901734, |
| "grad_norm": 0.46670742374819324, |
| "learning_rate": 1.9767603596501915e-06, |
| "loss": 0.3339, |
| "step": 1413 |
| }, |
| { |
| "epoch": 4.086705202312139, |
| "grad_norm": 0.43632480253238454, |
| "learning_rate": 1.9647331449213393e-06, |
| "loss": 0.3686, |
| "step": 1414 |
| }, |
| { |
| "epoch": 4.089595375722543, |
| "grad_norm": 0.44832973060002657, |
| "learning_rate": 1.952738643366011e-06, |
| "loss": 0.3505, |
| "step": 1415 |
| }, |
| { |
| "epoch": 4.092485549132948, |
| "grad_norm": 0.45939235288075486, |
| "learning_rate": 1.9407769038162216e-06, |
| "loss": 0.3297, |
| "step": 1416 |
| }, |
| { |
| "epoch": 4.095375722543353, |
| "grad_norm": 0.4409722647472371, |
| "learning_rate": 1.9288479749705967e-06, |
| "loss": 0.3128, |
| "step": 1417 |
| }, |
| { |
| "epoch": 4.098265895953757, |
| "grad_norm": 0.47224181678349825, |
| "learning_rate": 1.9169519053941786e-06, |
| "loss": 0.3208, |
| "step": 1418 |
| }, |
| { |
| "epoch": 4.101156069364162, |
| "grad_norm": 0.47439726771277907, |
| "learning_rate": 1.9050887435182386e-06, |
| "loss": 0.3398, |
| "step": 1419 |
| }, |
| { |
| "epoch": 4.104046242774566, |
| "grad_norm": 0.4658176671090241, |
| "learning_rate": 1.8932585376400803e-06, |
| "loss": 0.3396, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.106936416184971, |
| "grad_norm": 0.45373368834737315, |
| "learning_rate": 1.8814613359228296e-06, |
| "loss": 0.3579, |
| "step": 1421 |
| }, |
| { |
| "epoch": 4.109826589595376, |
| "grad_norm": 0.4416328012965478, |
| "learning_rate": 1.8696971863952385e-06, |
| "loss": 0.3299, |
| "step": 1422 |
| }, |
| { |
| "epoch": 4.11271676300578, |
| "grad_norm": 0.48886940224883624, |
| "learning_rate": 1.8579661369515155e-06, |
| "loss": 0.3488, |
| "step": 1423 |
| }, |
| { |
| "epoch": 4.115606936416185, |
| "grad_norm": 0.4590041411360483, |
| "learning_rate": 1.8462682353510974e-06, |
| "loss": 0.331, |
| "step": 1424 |
| }, |
| { |
| "epoch": 4.118497109826589, |
| "grad_norm": 0.4495947318171395, |
| "learning_rate": 1.834603529218475e-06, |
| "loss": 0.3447, |
| "step": 1425 |
| }, |
| { |
| "epoch": 4.121387283236994, |
| "grad_norm": 0.5098247907708044, |
| "learning_rate": 1.8229720660429916e-06, |
| "loss": 0.3313, |
| "step": 1426 |
| }, |
| { |
| "epoch": 4.124277456647399, |
| "grad_norm": 0.45941639945045426, |
| "learning_rate": 1.8113738931786551e-06, |
| "loss": 0.3509, |
| "step": 1427 |
| }, |
| { |
| "epoch": 4.127167630057803, |
| "grad_norm": 0.46263196317597743, |
| "learning_rate": 1.7998090578439375e-06, |
| "loss": 0.3466, |
| "step": 1428 |
| }, |
| { |
| "epoch": 4.130057803468208, |
| "grad_norm": 0.4743888485808943, |
| "learning_rate": 1.7882776071215912e-06, |
| "loss": 0.3103, |
| "step": 1429 |
| }, |
| { |
| "epoch": 4.132947976878612, |
| "grad_norm": 0.4553318759212632, |
| "learning_rate": 1.7767795879584504e-06, |
| "loss": 0.352, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.135838150289017, |
| "grad_norm": 0.4535461955976109, |
| "learning_rate": 1.7653150471652435e-06, |
| "loss": 0.349, |
| "step": 1431 |
| }, |
| { |
| "epoch": 4.138728323699422, |
| "grad_norm": 0.43364595489226815, |
| "learning_rate": 1.753884031416403e-06, |
| "loss": 0.3393, |
| "step": 1432 |
| }, |
| { |
| "epoch": 4.141618497109826, |
| "grad_norm": 0.49226388211750033, |
| "learning_rate": 1.742486587249873e-06, |
| "loss": 0.349, |
| "step": 1433 |
| }, |
| { |
| "epoch": 4.144508670520231, |
| "grad_norm": 0.4588798495243272, |
| "learning_rate": 1.7311227610669202e-06, |
| "loss": 0.3455, |
| "step": 1434 |
| }, |
| { |
| "epoch": 4.1473988439306355, |
| "grad_norm": 0.4548756292362763, |
| "learning_rate": 1.7197925991319486e-06, |
| "loss": 0.3469, |
| "step": 1435 |
| }, |
| { |
| "epoch": 4.1502890173410405, |
| "grad_norm": 0.4582296941792938, |
| "learning_rate": 1.708496147572305e-06, |
| "loss": 0.3342, |
| "step": 1436 |
| }, |
| { |
| "epoch": 4.1531791907514455, |
| "grad_norm": 0.4708824572032611, |
| "learning_rate": 1.697233452378093e-06, |
| "loss": 0.3441, |
| "step": 1437 |
| }, |
| { |
| "epoch": 4.15606936416185, |
| "grad_norm": 0.45376778758330355, |
| "learning_rate": 1.6860045594020003e-06, |
| "loss": 0.3307, |
| "step": 1438 |
| }, |
| { |
| "epoch": 4.158959537572255, |
| "grad_norm": 0.4725339238588213, |
| "learning_rate": 1.6748095143590804e-06, |
| "loss": 0.3535, |
| "step": 1439 |
| }, |
| { |
| "epoch": 4.161849710982659, |
| "grad_norm": 0.4692515895943304, |
| "learning_rate": 1.6636483628265942e-06, |
| "loss": 0.3573, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.164739884393064, |
| "grad_norm": 0.4371382256412352, |
| "learning_rate": 1.6525211502438188e-06, |
| "loss": 0.3616, |
| "step": 1441 |
| }, |
| { |
| "epoch": 4.167630057803469, |
| "grad_norm": 0.4604484221618385, |
| "learning_rate": 1.6414279219118568e-06, |
| "loss": 0.354, |
| "step": 1442 |
| }, |
| { |
| "epoch": 4.170520231213873, |
| "grad_norm": 0.4431830293421041, |
| "learning_rate": 1.6303687229934461e-06, |
| "loss": 0.3476, |
| "step": 1443 |
| }, |
| { |
| "epoch": 4.173410404624278, |
| "grad_norm": 0.44939578224273397, |
| "learning_rate": 1.6193435985127926e-06, |
| "loss": 0.331, |
| "step": 1444 |
| }, |
| { |
| "epoch": 4.176300578034682, |
| "grad_norm": 0.4512301766319807, |
| "learning_rate": 1.60835259335538e-06, |
| "loss": 0.3471, |
| "step": 1445 |
| }, |
| { |
| "epoch": 4.179190751445087, |
| "grad_norm": 0.4476904324897404, |
| "learning_rate": 1.5973957522677818e-06, |
| "loss": 0.3319, |
| "step": 1446 |
| }, |
| { |
| "epoch": 4.182080924855491, |
| "grad_norm": 0.45894696212505476, |
| "learning_rate": 1.5864731198574768e-06, |
| "loss": 0.3534, |
| "step": 1447 |
| }, |
| { |
| "epoch": 4.184971098265896, |
| "grad_norm": 0.4431828943221232, |
| "learning_rate": 1.575584740592685e-06, |
| "loss": 0.3523, |
| "step": 1448 |
| }, |
| { |
| "epoch": 4.187861271676301, |
| "grad_norm": 0.4761638484119464, |
| "learning_rate": 1.5647306588021672e-06, |
| "loss": 0.3376, |
| "step": 1449 |
| }, |
| { |
| "epoch": 4.190751445086705, |
| "grad_norm": 0.4847200373219092, |
| "learning_rate": 1.5539109186750544e-06, |
| "loss": 0.3097, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.19364161849711, |
| "grad_norm": 0.46747500615396664, |
| "learning_rate": 1.543125564260668e-06, |
| "loss": 0.3339, |
| "step": 1451 |
| }, |
| { |
| "epoch": 4.196531791907514, |
| "grad_norm": 0.47459635670072037, |
| "learning_rate": 1.5323746394683348e-06, |
| "loss": 0.3398, |
| "step": 1452 |
| }, |
| { |
| "epoch": 4.199421965317919, |
| "grad_norm": 0.4544955022339449, |
| "learning_rate": 1.5216581880672122e-06, |
| "loss": 0.3489, |
| "step": 1453 |
| }, |
| { |
| "epoch": 4.202312138728324, |
| "grad_norm": 0.4726389666592722, |
| "learning_rate": 1.5109762536861127e-06, |
| "loss": 0.3438, |
| "step": 1454 |
| }, |
| { |
| "epoch": 4.205202312138728, |
| "grad_norm": 0.4311706192954136, |
| "learning_rate": 1.5003288798133197e-06, |
| "loss": 0.3499, |
| "step": 1455 |
| }, |
| { |
| "epoch": 4.208092485549133, |
| "grad_norm": 0.45368588849949304, |
| "learning_rate": 1.4897161097964164e-06, |
| "loss": 0.3261, |
| "step": 1456 |
| }, |
| { |
| "epoch": 4.210982658959537, |
| "grad_norm": 0.44332312547505837, |
| "learning_rate": 1.4791379868421052e-06, |
| "loss": 0.3467, |
| "step": 1457 |
| }, |
| { |
| "epoch": 4.213872832369942, |
| "grad_norm": 0.4530598562314223, |
| "learning_rate": 1.4685945540160328e-06, |
| "loss": 0.3245, |
| "step": 1458 |
| }, |
| { |
| "epoch": 4.216763005780347, |
| "grad_norm": 0.46322900429248803, |
| "learning_rate": 1.458085854242617e-06, |
| "loss": 0.3452, |
| "step": 1459 |
| }, |
| { |
| "epoch": 4.219653179190751, |
| "grad_norm": 0.45588154749357934, |
| "learning_rate": 1.4476119303048709e-06, |
| "loss": 0.3262, |
| "step": 1460 |
| }, |
| { |
| "epoch": 4.222543352601156, |
| "grad_norm": 0.44527165693981313, |
| "learning_rate": 1.437172824844224e-06, |
| "loss": 0.3373, |
| "step": 1461 |
| }, |
| { |
| "epoch": 4.22543352601156, |
| "grad_norm": 0.45716413877466067, |
| "learning_rate": 1.4267685803603615e-06, |
| "loss": 0.3352, |
| "step": 1462 |
| }, |
| { |
| "epoch": 4.228323699421965, |
| "grad_norm": 0.46836574889519467, |
| "learning_rate": 1.416399239211036e-06, |
| "loss": 0.3178, |
| "step": 1463 |
| }, |
| { |
| "epoch": 4.23121387283237, |
| "grad_norm": 0.4534593906103841, |
| "learning_rate": 1.4060648436118985e-06, |
| "loss": 0.3415, |
| "step": 1464 |
| }, |
| { |
| "epoch": 4.234104046242774, |
| "grad_norm": 0.45695949138372266, |
| "learning_rate": 1.395765435636335e-06, |
| "loss": 0.3449, |
| "step": 1465 |
| }, |
| { |
| "epoch": 4.236994219653179, |
| "grad_norm": 0.4651531043657846, |
| "learning_rate": 1.3855010572152927e-06, |
| "loss": 0.3426, |
| "step": 1466 |
| }, |
| { |
| "epoch": 4.2398843930635834, |
| "grad_norm": 0.4434611463546597, |
| "learning_rate": 1.3752717501371037e-06, |
| "loss": 0.3437, |
| "step": 1467 |
| }, |
| { |
| "epoch": 4.242774566473988, |
| "grad_norm": 0.4569031499337852, |
| "learning_rate": 1.3650775560473118e-06, |
| "loss": 0.3317, |
| "step": 1468 |
| }, |
| { |
| "epoch": 4.245664739884393, |
| "grad_norm": 0.4377009893024095, |
| "learning_rate": 1.3549185164485135e-06, |
| "loss": 0.3469, |
| "step": 1469 |
| }, |
| { |
| "epoch": 4.2485549132947975, |
| "grad_norm": 0.439428219289406, |
| "learning_rate": 1.3447946727001881e-06, |
| "loss": 0.3508, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.2514450867052025, |
| "grad_norm": 0.4240581035879888, |
| "learning_rate": 1.3347060660185251e-06, |
| "loss": 0.3337, |
| "step": 1471 |
| }, |
| { |
| "epoch": 4.254335260115607, |
| "grad_norm": 0.44821038073621283, |
| "learning_rate": 1.324652737476244e-06, |
| "loss": 0.3334, |
| "step": 1472 |
| }, |
| { |
| "epoch": 4.257225433526012, |
| "grad_norm": 0.4717586682934258, |
| "learning_rate": 1.3146347280024586e-06, |
| "loss": 0.3285, |
| "step": 1473 |
| }, |
| { |
| "epoch": 4.2601156069364166, |
| "grad_norm": 0.45970185661583424, |
| "learning_rate": 1.304652078382479e-06, |
| "loss": 0.3345, |
| "step": 1474 |
| }, |
| { |
| "epoch": 4.263005780346821, |
| "grad_norm": 0.4537595102615837, |
| "learning_rate": 1.2947048292576635e-06, |
| "loss": 0.3471, |
| "step": 1475 |
| }, |
| { |
| "epoch": 4.265895953757226, |
| "grad_norm": 0.4488199865437098, |
| "learning_rate": 1.284793021125247e-06, |
| "loss": 0.338, |
| "step": 1476 |
| }, |
| { |
| "epoch": 4.26878612716763, |
| "grad_norm": 0.4765959434746097, |
| "learning_rate": 1.2749166943381763e-06, |
| "loss": 0.3462, |
| "step": 1477 |
| }, |
| { |
| "epoch": 4.271676300578035, |
| "grad_norm": 0.45420852625737385, |
| "learning_rate": 1.2650758891049464e-06, |
| "loss": 0.3381, |
| "step": 1478 |
| }, |
| { |
| "epoch": 4.27456647398844, |
| "grad_norm": 0.4537304824757465, |
| "learning_rate": 1.255270645489438e-06, |
| "loss": 0.3455, |
| "step": 1479 |
| }, |
| { |
| "epoch": 4.277456647398844, |
| "grad_norm": 0.43717037502746525, |
| "learning_rate": 1.2455010034107529e-06, |
| "loss": 0.3358, |
| "step": 1480 |
| }, |
| { |
| "epoch": 4.280346820809249, |
| "grad_norm": 0.4507791761925461, |
| "learning_rate": 1.2357670026430524e-06, |
| "loss": 0.3448, |
| "step": 1481 |
| }, |
| { |
| "epoch": 4.283236994219653, |
| "grad_norm": 0.4511509118103023, |
| "learning_rate": 1.2260686828153934e-06, |
| "loss": 0.3451, |
| "step": 1482 |
| }, |
| { |
| "epoch": 4.286127167630058, |
| "grad_norm": 0.4734480889620212, |
| "learning_rate": 1.216406083411571e-06, |
| "loss": 0.3321, |
| "step": 1483 |
| }, |
| { |
| "epoch": 4.289017341040463, |
| "grad_norm": 0.4537221581801662, |
| "learning_rate": 1.2067792437699532e-06, |
| "loss": 0.3322, |
| "step": 1484 |
| }, |
| { |
| "epoch": 4.291907514450867, |
| "grad_norm": 0.4665588809965585, |
| "learning_rate": 1.1971882030833248e-06, |
| "loss": 0.3377, |
| "step": 1485 |
| }, |
| { |
| "epoch": 4.294797687861272, |
| "grad_norm": 0.42934685126372396, |
| "learning_rate": 1.1876330003987214e-06, |
| "loss": 0.348, |
| "step": 1486 |
| }, |
| { |
| "epoch": 4.297687861271676, |
| "grad_norm": 0.44276344174103444, |
| "learning_rate": 1.178113674617285e-06, |
| "loss": 0.34, |
| "step": 1487 |
| }, |
| { |
| "epoch": 4.300578034682081, |
| "grad_norm": 0.4370964807675226, |
| "learning_rate": 1.1686302644940817e-06, |
| "loss": 0.3395, |
| "step": 1488 |
| }, |
| { |
| "epoch": 4.303468208092486, |
| "grad_norm": 0.4473311511207275, |
| "learning_rate": 1.1591828086379697e-06, |
| "loss": 0.3225, |
| "step": 1489 |
| }, |
| { |
| "epoch": 4.30635838150289, |
| "grad_norm": 0.46150180784367567, |
| "learning_rate": 1.1497713455114212e-06, |
| "loss": 0.3517, |
| "step": 1490 |
| }, |
| { |
| "epoch": 4.309248554913295, |
| "grad_norm": 0.44794545147604187, |
| "learning_rate": 1.1403959134303832e-06, |
| "loss": 0.3388, |
| "step": 1491 |
| }, |
| { |
| "epoch": 4.312138728323699, |
| "grad_norm": 0.4453836249012523, |
| "learning_rate": 1.1310565505641114e-06, |
| "loss": 0.3366, |
| "step": 1492 |
| }, |
| { |
| "epoch": 4.315028901734104, |
| "grad_norm": 0.4639655297914257, |
| "learning_rate": 1.1217532949350075e-06, |
| "loss": 0.3541, |
| "step": 1493 |
| }, |
| { |
| "epoch": 4.317919075144509, |
| "grad_norm": 0.42738098566365257, |
| "learning_rate": 1.1124861844184809e-06, |
| "loss": 0.3328, |
| "step": 1494 |
| }, |
| { |
| "epoch": 4.320809248554913, |
| "grad_norm": 0.42359690830348806, |
| "learning_rate": 1.1032552567427911e-06, |
| "loss": 0.3401, |
| "step": 1495 |
| }, |
| { |
| "epoch": 4.323699421965318, |
| "grad_norm": 0.44323975529607346, |
| "learning_rate": 1.0940605494888856e-06, |
| "loss": 0.3309, |
| "step": 1496 |
| }, |
| { |
| "epoch": 4.326589595375722, |
| "grad_norm": 0.42295793743010124, |
| "learning_rate": 1.0849021000902427e-06, |
| "loss": 0.3474, |
| "step": 1497 |
| }, |
| { |
| "epoch": 4.329479768786127, |
| "grad_norm": 0.4531839136253995, |
| "learning_rate": 1.0757799458327445e-06, |
| "loss": 0.3272, |
| "step": 1498 |
| }, |
| { |
| "epoch": 4.332369942196531, |
| "grad_norm": 0.452886303528469, |
| "learning_rate": 1.0666941238544958e-06, |
| "loss": 0.3374, |
| "step": 1499 |
| }, |
| { |
| "epoch": 4.335260115606936, |
| "grad_norm": 0.4724238524003501, |
| "learning_rate": 1.0576446711456933e-06, |
| "loss": 0.3302, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.338150289017341, |
| "grad_norm": 0.42187648027212826, |
| "learning_rate": 1.0486316245484574e-06, |
| "loss": 0.3506, |
| "step": 1501 |
| }, |
| { |
| "epoch": 4.341040462427745, |
| "grad_norm": 0.4473258685192444, |
| "learning_rate": 1.0396550207567014e-06, |
| "loss": 0.3541, |
| "step": 1502 |
| }, |
| { |
| "epoch": 4.34393063583815, |
| "grad_norm": 0.4382796239703373, |
| "learning_rate": 1.0307148963159697e-06, |
| "loss": 0.341, |
| "step": 1503 |
| }, |
| { |
| "epoch": 4.3468208092485545, |
| "grad_norm": 0.46978904741617084, |
| "learning_rate": 1.021811287623291e-06, |
| "loss": 0.3073, |
| "step": 1504 |
| }, |
| { |
| "epoch": 4.3497109826589595, |
| "grad_norm": 0.4391751386270589, |
| "learning_rate": 1.012944230927031e-06, |
| "loss": 0.3378, |
| "step": 1505 |
| }, |
| { |
| "epoch": 4.3526011560693645, |
| "grad_norm": 0.43296798929341374, |
| "learning_rate": 1.0041137623267461e-06, |
| "loss": 0.3312, |
| "step": 1506 |
| }, |
| { |
| "epoch": 4.355491329479769, |
| "grad_norm": 0.44452658529893974, |
| "learning_rate": 9.953199177730334e-07, |
| "loss": 0.344, |
| "step": 1507 |
| }, |
| { |
| "epoch": 4.358381502890174, |
| "grad_norm": 0.44374319164635884, |
| "learning_rate": 9.86562733067389e-07, |
| "loss": 0.3248, |
| "step": 1508 |
| }, |
| { |
| "epoch": 4.361271676300578, |
| "grad_norm": 0.451766337725714, |
| "learning_rate": 9.778422438620572e-07, |
| "loss": 0.3158, |
| "step": 1509 |
| }, |
| { |
| "epoch": 4.364161849710983, |
| "grad_norm": 0.46955060860789766, |
| "learning_rate": 9.69158485659889e-07, |
| "loss": 0.3491, |
| "step": 1510 |
| }, |
| { |
| "epoch": 4.367052023121388, |
| "grad_norm": 0.47181813714230825, |
| "learning_rate": 9.605114938141935e-07, |
| "loss": 0.3294, |
| "step": 1511 |
| }, |
| { |
| "epoch": 4.369942196531792, |
| "grad_norm": 0.45297589475510724, |
| "learning_rate": 9.519013035286029e-07, |
| "loss": 0.3428, |
| "step": 1512 |
| }, |
| { |
| "epoch": 4.372832369942197, |
| "grad_norm": 0.42322621216412715, |
| "learning_rate": 9.433279498569147e-07, |
| "loss": 0.3656, |
| "step": 1513 |
| }, |
| { |
| "epoch": 4.375722543352601, |
| "grad_norm": 0.456838453411949, |
| "learning_rate": 9.347914677029624e-07, |
| "loss": 0.3144, |
| "step": 1514 |
| }, |
| { |
| "epoch": 4.378612716763006, |
| "grad_norm": 0.459132470959214, |
| "learning_rate": 9.262918918204644e-07, |
| "loss": 0.3401, |
| "step": 1515 |
| }, |
| { |
| "epoch": 4.381502890173411, |
| "grad_norm": 0.44484864784662287, |
| "learning_rate": 9.178292568128944e-07, |
| "loss": 0.343, |
| "step": 1516 |
| }, |
| { |
| "epoch": 4.384393063583815, |
| "grad_norm": 0.43728677906015745, |
| "learning_rate": 9.094035971333248e-07, |
| "loss": 0.3569, |
| "step": 1517 |
| }, |
| { |
| "epoch": 4.38728323699422, |
| "grad_norm": 0.45722288513195564, |
| "learning_rate": 9.010149470842933e-07, |
| "loss": 0.3682, |
| "step": 1518 |
| }, |
| { |
| "epoch": 4.390173410404624, |
| "grad_norm": 0.4468422842165502, |
| "learning_rate": 8.926633408176677e-07, |
| "loss": 0.3587, |
| "step": 1519 |
| }, |
| { |
| "epoch": 4.393063583815029, |
| "grad_norm": 0.4523020085308276, |
| "learning_rate": 8.843488123345045e-07, |
| "loss": 0.3693, |
| "step": 1520 |
| }, |
| { |
| "epoch": 4.395953757225434, |
| "grad_norm": 0.46504497106862763, |
| "learning_rate": 8.760713954849087e-07, |
| "loss": 0.348, |
| "step": 1521 |
| }, |
| { |
| "epoch": 4.398843930635838, |
| "grad_norm": 0.4572345065882788, |
| "learning_rate": 8.678311239678872e-07, |
| "loss": 0.3329, |
| "step": 1522 |
| }, |
| { |
| "epoch": 4.401734104046243, |
| "grad_norm": 0.45212729648907957, |
| "learning_rate": 8.596280313312355e-07, |
| "loss": 0.3667, |
| "step": 1523 |
| }, |
| { |
| "epoch": 4.404624277456647, |
| "grad_norm": 0.44257006814068606, |
| "learning_rate": 8.514621509713772e-07, |
| "loss": 0.3469, |
| "step": 1524 |
| }, |
| { |
| "epoch": 4.407514450867052, |
| "grad_norm": 0.4876093667366179, |
| "learning_rate": 8.433335161332412e-07, |
| "loss": 0.3238, |
| "step": 1525 |
| }, |
| { |
| "epoch": 4.410404624277457, |
| "grad_norm": 0.48545364939390045, |
| "learning_rate": 8.352421599101157e-07, |
| "loss": 0.3584, |
| "step": 1526 |
| }, |
| { |
| "epoch": 4.413294797687861, |
| "grad_norm": 0.4582224069454899, |
| "learning_rate": 8.27188115243529e-07, |
| "loss": 0.3395, |
| "step": 1527 |
| }, |
| { |
| "epoch": 4.416184971098266, |
| "grad_norm": 0.44141560547767483, |
| "learning_rate": 8.191714149231022e-07, |
| "loss": 0.3486, |
| "step": 1528 |
| }, |
| { |
| "epoch": 4.41907514450867, |
| "grad_norm": 0.4337129493212092, |
| "learning_rate": 8.111920915864213e-07, |
| "loss": 0.3648, |
| "step": 1529 |
| }, |
| { |
| "epoch": 4.421965317919075, |
| "grad_norm": 0.4485339769994593, |
| "learning_rate": 8.032501777189016e-07, |
| "loss": 0.3587, |
| "step": 1530 |
| }, |
| { |
| "epoch": 4.424855491329479, |
| "grad_norm": 0.43414525391667297, |
| "learning_rate": 7.953457056536596e-07, |
| "loss": 0.3399, |
| "step": 1531 |
| }, |
| { |
| "epoch": 4.427745664739884, |
| "grad_norm": 0.43376869094234677, |
| "learning_rate": 7.874787075713742e-07, |
| "loss": 0.3462, |
| "step": 1532 |
| }, |
| { |
| "epoch": 4.430635838150289, |
| "grad_norm": 0.45750126312142625, |
| "learning_rate": 7.796492155001656e-07, |
| "loss": 0.3271, |
| "step": 1533 |
| }, |
| { |
| "epoch": 4.433526011560693, |
| "grad_norm": 0.46580876925404385, |
| "learning_rate": 7.718572613154574e-07, |
| "loss": 0.3321, |
| "step": 1534 |
| }, |
| { |
| "epoch": 4.436416184971098, |
| "grad_norm": 0.4446063454528743, |
| "learning_rate": 7.641028767398473e-07, |
| "loss": 0.3328, |
| "step": 1535 |
| }, |
| { |
| "epoch": 4.4393063583815024, |
| "grad_norm": 0.47054539897600545, |
| "learning_rate": 7.563860933429789e-07, |
| "loss": 0.3315, |
| "step": 1536 |
| }, |
| { |
| "epoch": 4.442196531791907, |
| "grad_norm": 0.4673864472062917, |
| "learning_rate": 7.48706942541424e-07, |
| "loss": 0.3197, |
| "step": 1537 |
| }, |
| { |
| "epoch": 4.445086705202312, |
| "grad_norm": 0.44842080291220054, |
| "learning_rate": 7.410654555985287e-07, |
| "loss": 0.3515, |
| "step": 1538 |
| }, |
| { |
| "epoch": 4.4479768786127165, |
| "grad_norm": 0.41012475610075394, |
| "learning_rate": 7.33461663624313e-07, |
| "loss": 0.3612, |
| "step": 1539 |
| }, |
| { |
| "epoch": 4.4508670520231215, |
| "grad_norm": 0.45327829164620953, |
| "learning_rate": 7.258955975753279e-07, |
| "loss": 0.3518, |
| "step": 1540 |
| }, |
| { |
| "epoch": 4.453757225433526, |
| "grad_norm": 0.45932612475028833, |
| "learning_rate": 7.183672882545401e-07, |
| "loss": 0.3439, |
| "step": 1541 |
| }, |
| { |
| "epoch": 4.456647398843931, |
| "grad_norm": 0.4859816695971326, |
| "learning_rate": 7.108767663111993e-07, |
| "loss": 0.3407, |
| "step": 1542 |
| }, |
| { |
| "epoch": 4.459537572254336, |
| "grad_norm": 0.4323085447994151, |
| "learning_rate": 7.034240622407085e-07, |
| "loss": 0.3493, |
| "step": 1543 |
| }, |
| { |
| "epoch": 4.46242774566474, |
| "grad_norm": 0.4284267539030077, |
| "learning_rate": 6.960092063845148e-07, |
| "loss": 0.3516, |
| "step": 1544 |
| }, |
| { |
| "epoch": 4.465317919075145, |
| "grad_norm": 0.4418025417466207, |
| "learning_rate": 6.886322289299763e-07, |
| "loss": 0.3059, |
| "step": 1545 |
| }, |
| { |
| "epoch": 4.468208092485549, |
| "grad_norm": 0.429128506405581, |
| "learning_rate": 6.812931599102379e-07, |
| "loss": 0.3352, |
| "step": 1546 |
| }, |
| { |
| "epoch": 4.471098265895954, |
| "grad_norm": 0.43750017936727364, |
| "learning_rate": 6.739920292041102e-07, |
| "loss": 0.3456, |
| "step": 1547 |
| }, |
| { |
| "epoch": 4.473988439306359, |
| "grad_norm": 0.44953049761535996, |
| "learning_rate": 6.667288665359562e-07, |
| "loss": 0.3363, |
| "step": 1548 |
| }, |
| { |
| "epoch": 4.476878612716763, |
| "grad_norm": 0.4489565986882684, |
| "learning_rate": 6.595037014755578e-07, |
| "loss": 0.3266, |
| "step": 1549 |
| }, |
| { |
| "epoch": 4.479768786127168, |
| "grad_norm": 0.4309164502180863, |
| "learning_rate": 6.523165634380047e-07, |
| "loss": 0.3472, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.482658959537572, |
| "grad_norm": 0.42593312051489646, |
| "learning_rate": 6.451674816835618e-07, |
| "loss": 0.3605, |
| "step": 1551 |
| }, |
| { |
| "epoch": 4.485549132947977, |
| "grad_norm": 0.425417146538673, |
| "learning_rate": 6.380564853175741e-07, |
| "loss": 0.3358, |
| "step": 1552 |
| }, |
| { |
| "epoch": 4.488439306358382, |
| "grad_norm": 0.46193990483385394, |
| "learning_rate": 6.309836032903228e-07, |
| "loss": 0.2985, |
| "step": 1553 |
| }, |
| { |
| "epoch": 4.491329479768786, |
| "grad_norm": 0.43253506570848216, |
| "learning_rate": 6.239488643969205e-07, |
| "loss": 0.3226, |
| "step": 1554 |
| }, |
| { |
| "epoch": 4.494219653179191, |
| "grad_norm": 0.4433378890746248, |
| "learning_rate": 6.169522972771924e-07, |
| "loss": 0.3273, |
| "step": 1555 |
| }, |
| { |
| "epoch": 4.497109826589595, |
| "grad_norm": 0.42184643595929217, |
| "learning_rate": 6.099939304155577e-07, |
| "loss": 0.3202, |
| "step": 1556 |
| }, |
| { |
| "epoch": 4.5, |
| "grad_norm": 0.47640273912437076, |
| "learning_rate": 6.030737921409169e-07, |
| "loss": 0.2722, |
| "step": 1557 |
| }, |
| { |
| "epoch": 4.502890173410405, |
| "grad_norm": 0.45407141264243817, |
| "learning_rate": 5.961919106265313e-07, |
| "loss": 0.3439, |
| "step": 1558 |
| }, |
| { |
| "epoch": 4.505780346820809, |
| "grad_norm": 0.4376449567849091, |
| "learning_rate": 5.893483138899125e-07, |
| "loss": 0.3513, |
| "step": 1559 |
| }, |
| { |
| "epoch": 4.508670520231214, |
| "grad_norm": 0.46675708746409345, |
| "learning_rate": 5.825430297927093e-07, |
| "loss": 0.3303, |
| "step": 1560 |
| }, |
| { |
| "epoch": 4.511560693641618, |
| "grad_norm": 0.44146368613515313, |
| "learning_rate": 5.757760860405859e-07, |
| "loss": 0.332, |
| "step": 1561 |
| }, |
| { |
| "epoch": 4.514450867052023, |
| "grad_norm": 0.418864555410123, |
| "learning_rate": 5.690475101831261e-07, |
| "loss": 0.3472, |
| "step": 1562 |
| }, |
| { |
| "epoch": 4.517341040462428, |
| "grad_norm": 0.4586426558580989, |
| "learning_rate": 5.623573296136997e-07, |
| "loss": 0.3382, |
| "step": 1563 |
| }, |
| { |
| "epoch": 4.520231213872832, |
| "grad_norm": 0.42898089728523714, |
| "learning_rate": 5.557055715693649e-07, |
| "loss": 0.3324, |
| "step": 1564 |
| }, |
| { |
| "epoch": 4.523121387283237, |
| "grad_norm": 0.4676061900308173, |
| "learning_rate": 5.49092263130756e-07, |
| "loss": 0.3348, |
| "step": 1565 |
| }, |
| { |
| "epoch": 4.526011560693641, |
| "grad_norm": 0.4543700925597362, |
| "learning_rate": 5.425174312219739e-07, |
| "loss": 0.3113, |
| "step": 1566 |
| }, |
| { |
| "epoch": 4.528901734104046, |
| "grad_norm": 0.465108477478383, |
| "learning_rate": 5.359811026104645e-07, |
| "loss": 0.3391, |
| "step": 1567 |
| }, |
| { |
| "epoch": 4.531791907514451, |
| "grad_norm": 0.4808284435115195, |
| "learning_rate": 5.294833039069269e-07, |
| "loss": 0.3346, |
| "step": 1568 |
| }, |
| { |
| "epoch": 4.534682080924855, |
| "grad_norm": 0.44287503653196786, |
| "learning_rate": 5.230240615651972e-07, |
| "loss": 0.3357, |
| "step": 1569 |
| }, |
| { |
| "epoch": 4.53757225433526, |
| "grad_norm": 0.43946866441137844, |
| "learning_rate": 5.166034018821364e-07, |
| "loss": 0.3283, |
| "step": 1570 |
| }, |
| { |
| "epoch": 4.540462427745664, |
| "grad_norm": 0.436053347850471, |
| "learning_rate": 5.10221350997534e-07, |
| "loss": 0.3538, |
| "step": 1571 |
| }, |
| { |
| "epoch": 4.543352601156069, |
| "grad_norm": 0.4410619890844322, |
| "learning_rate": 5.03877934893986e-07, |
| "loss": 0.3456, |
| "step": 1572 |
| }, |
| { |
| "epoch": 4.546242774566474, |
| "grad_norm": 0.4711814603971082, |
| "learning_rate": 4.975731793968075e-07, |
| "loss": 0.3352, |
| "step": 1573 |
| }, |
| { |
| "epoch": 4.5491329479768785, |
| "grad_norm": 0.4545605967025349, |
| "learning_rate": 4.913071101739164e-07, |
| "loss": 0.3403, |
| "step": 1574 |
| }, |
| { |
| "epoch": 4.5520231213872835, |
| "grad_norm": 0.43235496189688866, |
| "learning_rate": 4.850797527357287e-07, |
| "loss": 0.3349, |
| "step": 1575 |
| }, |
| { |
| "epoch": 4.554913294797688, |
| "grad_norm": 0.47022676916855527, |
| "learning_rate": 4.788911324350564e-07, |
| "loss": 0.3256, |
| "step": 1576 |
| }, |
| { |
| "epoch": 4.557803468208093, |
| "grad_norm": 0.4452792759670324, |
| "learning_rate": 4.72741274467009e-07, |
| "loss": 0.3511, |
| "step": 1577 |
| }, |
| { |
| "epoch": 4.5606936416184976, |
| "grad_norm": 0.44372850033678257, |
| "learning_rate": 4.6663020386888416e-07, |
| "loss": 0.3181, |
| "step": 1578 |
| }, |
| { |
| "epoch": 4.563583815028902, |
| "grad_norm": 0.4398620339745168, |
| "learning_rate": 4.6055794552006817e-07, |
| "loss": 0.3326, |
| "step": 1579 |
| }, |
| { |
| "epoch": 4.566473988439307, |
| "grad_norm": 0.4662378155864558, |
| "learning_rate": 4.5452452414193495e-07, |
| "loss": 0.3237, |
| "step": 1580 |
| }, |
| { |
| "epoch": 4.569364161849711, |
| "grad_norm": 0.42225338968814385, |
| "learning_rate": 4.485299642977481e-07, |
| "loss": 0.3468, |
| "step": 1581 |
| }, |
| { |
| "epoch": 4.572254335260116, |
| "grad_norm": 0.44278019638014077, |
| "learning_rate": 4.425742903925534e-07, |
| "loss": 0.3204, |
| "step": 1582 |
| }, |
| { |
| "epoch": 4.575144508670521, |
| "grad_norm": 0.5016084179745222, |
| "learning_rate": 4.366575266730888e-07, |
| "loss": 0.3332, |
| "step": 1583 |
| }, |
| { |
| "epoch": 4.578034682080925, |
| "grad_norm": 0.469191932516797, |
| "learning_rate": 4.3077969722767897e-07, |
| "loss": 0.3471, |
| "step": 1584 |
| }, |
| { |
| "epoch": 4.58092485549133, |
| "grad_norm": 0.42826255111061495, |
| "learning_rate": 4.2494082598613875e-07, |
| "loss": 0.3682, |
| "step": 1585 |
| }, |
| { |
| "epoch": 4.583815028901734, |
| "grad_norm": 0.4597551582701016, |
| "learning_rate": 4.191409367196753e-07, |
| "loss": 0.3388, |
| "step": 1586 |
| }, |
| { |
| "epoch": 4.586705202312139, |
| "grad_norm": 0.4373614912267801, |
| "learning_rate": 4.133800530407994e-07, |
| "loss": 0.3203, |
| "step": 1587 |
| }, |
| { |
| "epoch": 4.589595375722544, |
| "grad_norm": 0.43968666610635687, |
| "learning_rate": 4.0765819840321353e-07, |
| "loss": 0.3402, |
| "step": 1588 |
| }, |
| { |
| "epoch": 4.592485549132948, |
| "grad_norm": 0.44351171734497535, |
| "learning_rate": 4.019753961017292e-07, |
| "loss": 0.3357, |
| "step": 1589 |
| }, |
| { |
| "epoch": 4.595375722543353, |
| "grad_norm": 0.4415848556722189, |
| "learning_rate": 3.9633166927216637e-07, |
| "loss": 0.3291, |
| "step": 1590 |
| }, |
| { |
| "epoch": 4.598265895953757, |
| "grad_norm": 0.4652210434796125, |
| "learning_rate": 3.9072704089126666e-07, |
| "loss": 0.3418, |
| "step": 1591 |
| }, |
| { |
| "epoch": 4.601156069364162, |
| "grad_norm": 0.4543224901650214, |
| "learning_rate": 3.8516153377658574e-07, |
| "loss": 0.3356, |
| "step": 1592 |
| }, |
| { |
| "epoch": 4.604046242774566, |
| "grad_norm": 0.4553390702328126, |
| "learning_rate": 3.7963517058641206e-07, |
| "loss": 0.3335, |
| "step": 1593 |
| }, |
| { |
| "epoch": 4.606936416184971, |
| "grad_norm": 0.4352888331046387, |
| "learning_rate": 3.741479738196763e-07, |
| "loss": 0.3502, |
| "step": 1594 |
| }, |
| { |
| "epoch": 4.609826589595376, |
| "grad_norm": 0.4374869535509039, |
| "learning_rate": 3.6869996581584746e-07, |
| "loss": 0.3477, |
| "step": 1595 |
| }, |
| { |
| "epoch": 4.61271676300578, |
| "grad_norm": 0.4348034826002002, |
| "learning_rate": 3.632911687548546e-07, |
| "loss": 0.3609, |
| "step": 1596 |
| }, |
| { |
| "epoch": 4.615606936416185, |
| "grad_norm": 0.448678536253477, |
| "learning_rate": 3.5792160465698555e-07, |
| "loss": 0.3503, |
| "step": 1597 |
| }, |
| { |
| "epoch": 4.618497109826589, |
| "grad_norm": 0.4574142336139396, |
| "learning_rate": 3.5259129538281034e-07, |
| "loss": 0.3355, |
| "step": 1598 |
| }, |
| { |
| "epoch": 4.621387283236994, |
| "grad_norm": 0.45364396763072645, |
| "learning_rate": 3.473002626330779e-07, |
| "loss": 0.3321, |
| "step": 1599 |
| }, |
| { |
| "epoch": 4.624277456647399, |
| "grad_norm": 0.4506428297942708, |
| "learning_rate": 3.4204852794863855e-07, |
| "loss": 0.3357, |
| "step": 1600 |
| }, |
| { |
| "epoch": 4.627167630057803, |
| "grad_norm": 0.4041242550173493, |
| "learning_rate": 3.3683611271035256e-07, |
| "loss": 0.3563, |
| "step": 1601 |
| }, |
| { |
| "epoch": 4.630057803468208, |
| "grad_norm": 0.44434168162177407, |
| "learning_rate": 3.316630381389996e-07, |
| "loss": 0.3306, |
| "step": 1602 |
| }, |
| { |
| "epoch": 4.632947976878612, |
| "grad_norm": 0.47932184626470214, |
| "learning_rate": 3.2652932529519843e-07, |
| "loss": 0.3447, |
| "step": 1603 |
| }, |
| { |
| "epoch": 4.635838150289017, |
| "grad_norm": 0.46908958631347786, |
| "learning_rate": 3.214349950793183e-07, |
| "loss": 0.3006, |
| "step": 1604 |
| }, |
| { |
| "epoch": 4.638728323699422, |
| "grad_norm": 0.4694171525413062, |
| "learning_rate": 3.163800682313933e-07, |
| "loss": 0.3245, |
| "step": 1605 |
| }, |
| { |
| "epoch": 4.641618497109826, |
| "grad_norm": 0.41468178348710893, |
| "learning_rate": 3.113645653310382e-07, |
| "loss": 0.3303, |
| "step": 1606 |
| }, |
| { |
| "epoch": 4.644508670520231, |
| "grad_norm": 0.4462666197450191, |
| "learning_rate": 3.0638850679736485e-07, |
| "loss": 0.3413, |
| "step": 1607 |
| }, |
| { |
| "epoch": 4.6473988439306355, |
| "grad_norm": 0.4469140886647421, |
| "learning_rate": 3.014519128888993e-07, |
| "loss": 0.3368, |
| "step": 1608 |
| }, |
| { |
| "epoch": 4.6502890173410405, |
| "grad_norm": 0.4272319628195244, |
| "learning_rate": 2.965548037035015e-07, |
| "loss": 0.3462, |
| "step": 1609 |
| }, |
| { |
| "epoch": 4.653179190751445, |
| "grad_norm": 0.4643364991741565, |
| "learning_rate": 2.91697199178278e-07, |
| "loss": 0.3222, |
| "step": 1610 |
| }, |
| { |
| "epoch": 4.65606936416185, |
| "grad_norm": 0.457141530509153, |
| "learning_rate": 2.868791190895048e-07, |
| "loss": 0.3429, |
| "step": 1611 |
| }, |
| { |
| "epoch": 4.658959537572255, |
| "grad_norm": 0.4700258494548005, |
| "learning_rate": 2.8210058305255026e-07, |
| "loss": 0.3539, |
| "step": 1612 |
| }, |
| { |
| "epoch": 4.661849710982659, |
| "grad_norm": 0.45216546599088037, |
| "learning_rate": 2.773616105217836e-07, |
| "loss": 0.3421, |
| "step": 1613 |
| }, |
| { |
| "epoch": 4.664739884393064, |
| "grad_norm": 0.44191986169167397, |
| "learning_rate": 2.7266222079050717e-07, |
| "loss": 0.3241, |
| "step": 1614 |
| }, |
| { |
| "epoch": 4.667630057803468, |
| "grad_norm": 0.4334486391581851, |
| "learning_rate": 2.680024329908737e-07, |
| "loss": 0.3407, |
| "step": 1615 |
| }, |
| { |
| "epoch": 4.670520231213873, |
| "grad_norm": 0.46200158841454086, |
| "learning_rate": 2.633822660938112e-07, |
| "loss": 0.3357, |
| "step": 1616 |
| }, |
| { |
| "epoch": 4.673410404624278, |
| "grad_norm": 0.4500995146006072, |
| "learning_rate": 2.588017389089381e-07, |
| "loss": 0.3321, |
| "step": 1617 |
| }, |
| { |
| "epoch": 4.676300578034682, |
| "grad_norm": 0.4541876298171543, |
| "learning_rate": 2.5426087008449173e-07, |
| "loss": 0.3335, |
| "step": 1618 |
| }, |
| { |
| "epoch": 4.679190751445087, |
| "grad_norm": 0.42614932613213036, |
| "learning_rate": 2.4975967810725865e-07, |
| "loss": 0.3252, |
| "step": 1619 |
| }, |
| { |
| "epoch": 4.682080924855491, |
| "grad_norm": 0.453308198867481, |
| "learning_rate": 2.452981813024868e-07, |
| "loss": 0.3569, |
| "step": 1620 |
| }, |
| { |
| "epoch": 4.684971098265896, |
| "grad_norm": 0.46066755061318243, |
| "learning_rate": 2.4087639783382133e-07, |
| "loss": 0.3318, |
| "step": 1621 |
| }, |
| { |
| "epoch": 4.687861271676301, |
| "grad_norm": 0.4253730592911683, |
| "learning_rate": 2.3649434570321984e-07, |
| "loss": 0.3318, |
| "step": 1622 |
| }, |
| { |
| "epoch": 4.690751445086705, |
| "grad_norm": 0.4170677149513208, |
| "learning_rate": 2.3215204275089388e-07, |
| "loss": 0.3237, |
| "step": 1623 |
| }, |
| { |
| "epoch": 4.69364161849711, |
| "grad_norm": 0.43218581173346604, |
| "learning_rate": 2.2784950665522443e-07, |
| "loss": 0.3507, |
| "step": 1624 |
| }, |
| { |
| "epoch": 4.696531791907514, |
| "grad_norm": 0.42990916259901707, |
| "learning_rate": 2.235867549326931e-07, |
| "loss": 0.3425, |
| "step": 1625 |
| }, |
| { |
| "epoch": 4.699421965317919, |
| "grad_norm": 0.43695936554688053, |
| "learning_rate": 2.1936380493781218e-07, |
| "loss": 0.3426, |
| "step": 1626 |
| }, |
| { |
| "epoch": 4.702312138728324, |
| "grad_norm": 0.46400517481162973, |
| "learning_rate": 2.151806738630524e-07, |
| "loss": 0.3274, |
| "step": 1627 |
| }, |
| { |
| "epoch": 4.705202312138728, |
| "grad_norm": 0.4490940433053954, |
| "learning_rate": 2.110373787387754e-07, |
| "loss": 0.335, |
| "step": 1628 |
| }, |
| { |
| "epoch": 4.708092485549133, |
| "grad_norm": 0.4252766476863728, |
| "learning_rate": 2.069339364331624e-07, |
| "loss": 0.3484, |
| "step": 1629 |
| }, |
| { |
| "epoch": 4.710982658959537, |
| "grad_norm": 0.4559621883794528, |
| "learning_rate": 2.028703636521434e-07, |
| "loss": 0.3285, |
| "step": 1630 |
| }, |
| { |
| "epoch": 4.713872832369942, |
| "grad_norm": 0.47333283731170095, |
| "learning_rate": 1.988466769393349e-07, |
| "loss": 0.3169, |
| "step": 1631 |
| }, |
| { |
| "epoch": 4.716763005780347, |
| "grad_norm": 0.44926712131599644, |
| "learning_rate": 1.948628926759666e-07, |
| "loss": 0.3577, |
| "step": 1632 |
| }, |
| { |
| "epoch": 4.719653179190751, |
| "grad_norm": 0.4406937465143532, |
| "learning_rate": 1.909190270808192e-07, |
| "loss": 0.3519, |
| "step": 1633 |
| }, |
| { |
| "epoch": 4.722543352601156, |
| "grad_norm": 0.43131627374523984, |
| "learning_rate": 1.870150962101569e-07, |
| "loss": 0.3606, |
| "step": 1634 |
| }, |
| { |
| "epoch": 4.72543352601156, |
| "grad_norm": 0.4328772609998375, |
| "learning_rate": 1.8315111595765933e-07, |
| "loss": 0.3271, |
| "step": 1635 |
| }, |
| { |
| "epoch": 4.728323699421965, |
| "grad_norm": 0.4351914089691055, |
| "learning_rate": 1.7932710205435966e-07, |
| "loss": 0.3429, |
| "step": 1636 |
| }, |
| { |
| "epoch": 4.73121387283237, |
| "grad_norm": 0.4500492231878512, |
| "learning_rate": 1.755430700685845e-07, |
| "loss": 0.3483, |
| "step": 1637 |
| }, |
| { |
| "epoch": 4.734104046242774, |
| "grad_norm": 0.4351826250212104, |
| "learning_rate": 1.7179903540587962e-07, |
| "loss": 0.3311, |
| "step": 1638 |
| }, |
| { |
| "epoch": 4.736994219653179, |
| "grad_norm": 0.4476955647395091, |
| "learning_rate": 1.680950133089565e-07, |
| "loss": 0.3359, |
| "step": 1639 |
| }, |
| { |
| "epoch": 4.7398843930635834, |
| "grad_norm": 0.4395449881892697, |
| "learning_rate": 1.6443101885762812e-07, |
| "loss": 0.3207, |
| "step": 1640 |
| }, |
| { |
| "epoch": 4.742774566473988, |
| "grad_norm": 0.425174851579623, |
| "learning_rate": 1.6080706696874893e-07, |
| "loss": 0.3344, |
| "step": 1641 |
| }, |
| { |
| "epoch": 4.745664739884393, |
| "grad_norm": 0.47236662938582863, |
| "learning_rate": 1.5722317239614592e-07, |
| "loss": 0.346, |
| "step": 1642 |
| }, |
| { |
| "epoch": 4.7485549132947975, |
| "grad_norm": 0.421518858724491, |
| "learning_rate": 1.5367934973056997e-07, |
| "loss": 0.3268, |
| "step": 1643 |
| }, |
| { |
| "epoch": 4.7514450867052025, |
| "grad_norm": 0.4184742989579567, |
| "learning_rate": 1.5017561339963015e-07, |
| "loss": 0.3537, |
| "step": 1644 |
| }, |
| { |
| "epoch": 4.754335260115607, |
| "grad_norm": 0.428901797065803, |
| "learning_rate": 1.4671197766773616e-07, |
| "loss": 0.3217, |
| "step": 1645 |
| }, |
| { |
| "epoch": 4.757225433526012, |
| "grad_norm": 0.3907897216934688, |
| "learning_rate": 1.432884566360393e-07, |
| "loss": 0.3367, |
| "step": 1646 |
| }, |
| { |
| "epoch": 4.7601156069364166, |
| "grad_norm": 0.4292623463312309, |
| "learning_rate": 1.3990506424237382e-07, |
| "loss": 0.3533, |
| "step": 1647 |
| }, |
| { |
| "epoch": 4.763005780346821, |
| "grad_norm": 0.44620500922357464, |
| "learning_rate": 1.3656181426120907e-07, |
| "loss": 0.3254, |
| "step": 1648 |
| }, |
| { |
| "epoch": 4.765895953757226, |
| "grad_norm": 0.45625749916755926, |
| "learning_rate": 1.3325872030357955e-07, |
| "loss": 0.3317, |
| "step": 1649 |
| }, |
| { |
| "epoch": 4.76878612716763, |
| "grad_norm": 0.47956732191703283, |
| "learning_rate": 1.2999579581703948e-07, |
| "loss": 0.3295, |
| "step": 1650 |
| }, |
| { |
| "epoch": 4.771676300578035, |
| "grad_norm": 0.4548429868833743, |
| "learning_rate": 1.2677305408560602e-07, |
| "loss": 0.3312, |
| "step": 1651 |
| }, |
| { |
| "epoch": 4.77456647398844, |
| "grad_norm": 0.43813428920113245, |
| "learning_rate": 1.2359050822970287e-07, |
| "loss": 0.348, |
| "step": 1652 |
| }, |
| { |
| "epoch": 4.777456647398844, |
| "grad_norm": 0.4354750741426635, |
| "learning_rate": 1.2044817120610896e-07, |
| "loss": 0.3516, |
| "step": 1653 |
| }, |
| { |
| "epoch": 4.780346820809249, |
| "grad_norm": 0.4304180249939501, |
| "learning_rate": 1.1734605580790426e-07, |
| "loss": 0.3552, |
| "step": 1654 |
| }, |
| { |
| "epoch": 4.783236994219653, |
| "grad_norm": 0.46194955068474436, |
| "learning_rate": 1.1428417466442077e-07, |
| "loss": 0.3351, |
| "step": 1655 |
| }, |
| { |
| "epoch": 4.786127167630058, |
| "grad_norm": 0.44976136379540815, |
| "learning_rate": 1.1126254024118488e-07, |
| "loss": 0.3256, |
| "step": 1656 |
| }, |
| { |
| "epoch": 4.789017341040463, |
| "grad_norm": 0.45376970823270474, |
| "learning_rate": 1.0828116483987405e-07, |
| "loss": 0.3103, |
| "step": 1657 |
| }, |
| { |
| "epoch": 4.791907514450867, |
| "grad_norm": 0.4252964208492344, |
| "learning_rate": 1.053400605982613e-07, |
| "loss": 0.3421, |
| "step": 1658 |
| }, |
| { |
| "epoch": 4.794797687861272, |
| "grad_norm": 0.4575525290978499, |
| "learning_rate": 1.0243923949016966e-07, |
| "loss": 0.3318, |
| "step": 1659 |
| }, |
| { |
| "epoch": 4.797687861271676, |
| "grad_norm": 0.4315729396314116, |
| "learning_rate": 9.957871332541891e-08, |
| "loss": 0.3333, |
| "step": 1660 |
| }, |
| { |
| "epoch": 4.800578034682081, |
| "grad_norm": 0.4632976454740385, |
| "learning_rate": 9.67584937497823e-08, |
| "loss": 0.3298, |
| "step": 1661 |
| }, |
| { |
| "epoch": 4.803468208092486, |
| "grad_norm": 0.4499425222875539, |
| "learning_rate": 9.397859224493656e-08, |
| "loss": 0.3576, |
| "step": 1662 |
| }, |
| { |
| "epoch": 4.80635838150289, |
| "grad_norm": 0.4396266921318992, |
| "learning_rate": 9.123902012841301e-08, |
| "loss": 0.3612, |
| "step": 1663 |
| }, |
| { |
| "epoch": 4.809248554913295, |
| "grad_norm": 0.44847121001326007, |
| "learning_rate": 8.853978855355883e-08, |
| "loss": 0.3393, |
| "step": 1664 |
| }, |
| { |
| "epoch": 4.812138728323699, |
| "grad_norm": 0.4644459108259271, |
| "learning_rate": 8.588090850948028e-08, |
| "loss": 0.3246, |
| "step": 1665 |
| }, |
| { |
| "epoch": 4.815028901734104, |
| "grad_norm": 0.4268024970743814, |
| "learning_rate": 8.326239082101173e-08, |
| "loss": 0.3414, |
| "step": 1666 |
| }, |
| { |
| "epoch": 4.817919075144509, |
| "grad_norm": 0.43070189303356615, |
| "learning_rate": 8.0684246148659e-08, |
| "loss": 0.3478, |
| "step": 1667 |
| }, |
| { |
| "epoch": 4.820809248554913, |
| "grad_norm": 0.421476341135529, |
| "learning_rate": 7.81464849885627e-08, |
| "loss": 0.357, |
| "step": 1668 |
| }, |
| { |
| "epoch": 4.823699421965318, |
| "grad_norm": 0.43518704104148476, |
| "learning_rate": 7.564911767245609e-08, |
| "loss": 0.345, |
| "step": 1669 |
| }, |
| { |
| "epoch": 4.826589595375722, |
| "grad_norm": 0.43385274927828715, |
| "learning_rate": 7.31921543676184e-08, |
| "loss": 0.3447, |
| "step": 1670 |
| }, |
| { |
| "epoch": 4.829479768786127, |
| "grad_norm": 0.45581456572701706, |
| "learning_rate": 7.077560507683712e-08, |
| "loss": 0.3302, |
| "step": 1671 |
| }, |
| { |
| "epoch": 4.832369942196532, |
| "grad_norm": 0.4283824645917809, |
| "learning_rate": 6.83994796383669e-08, |
| "loss": 0.3684, |
| "step": 1672 |
| }, |
| { |
| "epoch": 4.835260115606936, |
| "grad_norm": 0.43036551648477195, |
| "learning_rate": 6.60637877258874e-08, |
| "loss": 0.3647, |
| "step": 1673 |
| }, |
| { |
| "epoch": 4.838150289017341, |
| "grad_norm": 0.42690403732855864, |
| "learning_rate": 6.376853884846656e-08, |
| "loss": 0.3455, |
| "step": 1674 |
| }, |
| { |
| "epoch": 4.841040462427745, |
| "grad_norm": 0.4219758733193144, |
| "learning_rate": 6.151374235051966e-08, |
| "loss": 0.3672, |
| "step": 1675 |
| }, |
| { |
| "epoch": 4.84393063583815, |
| "grad_norm": 0.4379144455753565, |
| "learning_rate": 5.929940741177476e-08, |
| "loss": 0.3383, |
| "step": 1676 |
| }, |
| { |
| "epoch": 4.846820809248555, |
| "grad_norm": 0.4434027052467159, |
| "learning_rate": 5.7125543047228395e-08, |
| "loss": 0.321, |
| "step": 1677 |
| }, |
| { |
| "epoch": 4.8497109826589595, |
| "grad_norm": 0.44390976754330297, |
| "learning_rate": 5.4992158107116664e-08, |
| "loss": 0.3346, |
| "step": 1678 |
| }, |
| { |
| "epoch": 4.8526011560693645, |
| "grad_norm": 0.4330405056189658, |
| "learning_rate": 5.289926127687639e-08, |
| "loss": 0.3314, |
| "step": 1679 |
| }, |
| { |
| "epoch": 4.855491329479769, |
| "grad_norm": 0.45680791949365546, |
| "learning_rate": 5.084686107710512e-08, |
| "loss": 0.3207, |
| "step": 1680 |
| }, |
| { |
| "epoch": 4.858381502890174, |
| "grad_norm": 0.4448930539952292, |
| "learning_rate": 4.8834965863536755e-08, |
| "loss": 0.3454, |
| "step": 1681 |
| }, |
| { |
| "epoch": 4.861271676300578, |
| "grad_norm": 0.42026937097621103, |
| "learning_rate": 4.686358382699485e-08, |
| "loss": 0.3387, |
| "step": 1682 |
| }, |
| { |
| "epoch": 4.864161849710983, |
| "grad_norm": 0.4507339735950156, |
| "learning_rate": 4.493272299337048e-08, |
| "loss": 0.3391, |
| "step": 1683 |
| }, |
| { |
| "epoch": 4.867052023121388, |
| "grad_norm": 0.45447507822713396, |
| "learning_rate": 4.3042391223582226e-08, |
| "loss": 0.3041, |
| "step": 1684 |
| }, |
| { |
| "epoch": 4.869942196531792, |
| "grad_norm": 0.4432495734282729, |
| "learning_rate": 4.119259621354843e-08, |
| "loss": 0.3549, |
| "step": 1685 |
| }, |
| { |
| "epoch": 4.872832369942197, |
| "grad_norm": 0.4034187281207412, |
| "learning_rate": 3.938334549415168e-08, |
| "loss": 0.3489, |
| "step": 1686 |
| }, |
| { |
| "epoch": 4.875722543352601, |
| "grad_norm": 0.44807856188638867, |
| "learning_rate": 3.761464643121548e-08, |
| "loss": 0.3437, |
| "step": 1687 |
| }, |
| { |
| "epoch": 4.878612716763006, |
| "grad_norm": 0.42048520977505255, |
| "learning_rate": 3.5886506225463194e-08, |
| "loss": 0.3457, |
| "step": 1688 |
| }, |
| { |
| "epoch": 4.881502890173411, |
| "grad_norm": 0.43027452694512436, |
| "learning_rate": 3.419893191250023e-08, |
| "loss": 0.3163, |
| "step": 1689 |
| }, |
| { |
| "epoch": 4.884393063583815, |
| "grad_norm": 0.4308993314733086, |
| "learning_rate": 3.255193036277637e-08, |
| "loss": 0.3437, |
| "step": 1690 |
| }, |
| { |
| "epoch": 4.88728323699422, |
| "grad_norm": 0.431450612571325, |
| "learning_rate": 3.094550828156573e-08, |
| "loss": 0.3375, |
| "step": 1691 |
| }, |
| { |
| "epoch": 4.890173410404624, |
| "grad_norm": 0.4431635701323724, |
| "learning_rate": 2.937967220893123e-08, |
| "loss": 0.3318, |
| "step": 1692 |
| }, |
| { |
| "epoch": 4.893063583815029, |
| "grad_norm": 0.4330022821758298, |
| "learning_rate": 2.7854428519703546e-08, |
| "loss": 0.3431, |
| "step": 1693 |
| }, |
| { |
| "epoch": 4.895953757225434, |
| "grad_norm": 0.42453430258622377, |
| "learning_rate": 2.636978342345553e-08, |
| "loss": 0.3743, |
| "step": 1694 |
| }, |
| { |
| "epoch": 4.898843930635838, |
| "grad_norm": 0.44637278693106885, |
| "learning_rate": 2.4925742964471145e-08, |
| "loss": 0.3201, |
| "step": 1695 |
| }, |
| { |
| "epoch": 4.901734104046243, |
| "grad_norm": 0.43885017321147307, |
| "learning_rate": 2.3522313021728805e-08, |
| "loss": 0.3423, |
| "step": 1696 |
| }, |
| { |
| "epoch": 4.904624277456647, |
| "grad_norm": 0.43250678490930095, |
| "learning_rate": 2.215949930887029e-08, |
| "loss": 0.3514, |
| "step": 1697 |
| }, |
| { |
| "epoch": 4.907514450867052, |
| "grad_norm": 0.4196196401753552, |
| "learning_rate": 2.083730737418299e-08, |
| "loss": 0.3452, |
| "step": 1698 |
| }, |
| { |
| "epoch": 4.910404624277456, |
| "grad_norm": 0.4542871643674, |
| "learning_rate": 1.9555742600573247e-08, |
| "loss": 0.3408, |
| "step": 1699 |
| }, |
| { |
| "epoch": 4.913294797687861, |
| "grad_norm": 0.43417937342131485, |
| "learning_rate": 1.8314810205547485e-08, |
| "loss": 0.3175, |
| "step": 1700 |
| }, |
| { |
| "epoch": 4.916184971098266, |
| "grad_norm": 0.42866176133661305, |
| "learning_rate": 1.7114515241188902e-08, |
| "loss": 0.3496, |
| "step": 1701 |
| }, |
| { |
| "epoch": 4.91907514450867, |
| "grad_norm": 0.4331937602055982, |
| "learning_rate": 1.5954862594136367e-08, |
| "loss": 0.3539, |
| "step": 1702 |
| }, |
| { |
| "epoch": 4.921965317919075, |
| "grad_norm": 0.4546919927327096, |
| "learning_rate": 1.4835856985568887e-08, |
| "loss": 0.3437, |
| "step": 1703 |
| }, |
| { |
| "epoch": 4.924855491329479, |
| "grad_norm": 0.44616273282584096, |
| "learning_rate": 1.3757502971178948e-08, |
| "loss": 0.3352, |
| "step": 1704 |
| }, |
| { |
| "epoch": 4.927745664739884, |
| "grad_norm": 0.43042187879856225, |
| "learning_rate": 1.2719804941163649e-08, |
| "loss": 0.3277, |
| "step": 1705 |
| }, |
| { |
| "epoch": 4.930635838150289, |
| "grad_norm": 0.42419151713724373, |
| "learning_rate": 1.1722767120196932e-08, |
| "loss": 0.3515, |
| "step": 1706 |
| }, |
| { |
| "epoch": 4.933526011560693, |
| "grad_norm": 0.4330173444705608, |
| "learning_rate": 1.0766393567418487e-08, |
| "loss": 0.365, |
| "step": 1707 |
| }, |
| { |
| "epoch": 4.936416184971098, |
| "grad_norm": 0.4393369170187094, |
| "learning_rate": 9.85068817641599e-09, |
| "loss": 0.3393, |
| "step": 1708 |
| }, |
| { |
| "epoch": 4.9393063583815024, |
| "grad_norm": 0.4237121649080187, |
| "learning_rate": 8.975654675208445e-09, |
| "loss": 0.3511, |
| "step": 1709 |
| }, |
| { |
| "epoch": 4.942196531791907, |
| "grad_norm": 0.45994595122972776, |
| "learning_rate": 8.141296626231754e-09, |
| "loss": 0.3504, |
| "step": 1710 |
| }, |
| { |
| "epoch": 4.945086705202312, |
| "grad_norm": 0.43930885084169213, |
| "learning_rate": 7.347617426325393e-09, |
| "loss": 0.3481, |
| "step": 1711 |
| }, |
| { |
| "epoch": 4.9479768786127165, |
| "grad_norm": 0.4272366397651899, |
| "learning_rate": 6.5946203067135395e-09, |
| "loss": 0.3327, |
| "step": 1712 |
| }, |
| { |
| "epoch": 4.9508670520231215, |
| "grad_norm": 0.4559639493418854, |
| "learning_rate": 5.88230833299841e-09, |
| "loss": 0.3435, |
| "step": 1713 |
| }, |
| { |
| "epoch": 4.953757225433526, |
| "grad_norm": 0.47676301343666305, |
| "learning_rate": 5.210684405144717e-09, |
| "loss": 0.3166, |
| "step": 1714 |
| }, |
| { |
| "epoch": 4.956647398843931, |
| "grad_norm": 0.4231619739273129, |
| "learning_rate": 4.579751257466347e-09, |
| "loss": 0.3723, |
| "step": 1715 |
| }, |
| { |
| "epoch": 4.959537572254336, |
| "grad_norm": 0.45086590601295234, |
| "learning_rate": 3.989511458618589e-09, |
| "loss": 0.3431, |
| "step": 1716 |
| }, |
| { |
| "epoch": 4.96242774566474, |
| "grad_norm": 0.43706998519645374, |
| "learning_rate": 3.43996741158481e-09, |
| "loss": 0.3149, |
| "step": 1717 |
| }, |
| { |
| "epoch": 4.965317919075145, |
| "grad_norm": 0.42743087853849465, |
| "learning_rate": 2.9311213536686868e-09, |
| "loss": 0.3576, |
| "step": 1718 |
| }, |
| { |
| "epoch": 4.968208092485549, |
| "grad_norm": 0.4188172942844786, |
| "learning_rate": 2.4629753564842095e-09, |
| "loss": 0.3515, |
| "step": 1719 |
| }, |
| { |
| "epoch": 4.971098265895954, |
| "grad_norm": 0.4425146472622218, |
| "learning_rate": 2.0355313259468045e-09, |
| "loss": 0.336, |
| "step": 1720 |
| }, |
| { |
| "epoch": 4.973988439306359, |
| "grad_norm": 0.44582007168071364, |
| "learning_rate": 1.6487910022666698e-09, |
| "loss": 0.3322, |
| "step": 1721 |
| }, |
| { |
| "epoch": 4.976878612716763, |
| "grad_norm": 0.43565612155479977, |
| "learning_rate": 1.3027559599410044e-09, |
| "loss": 0.3699, |
| "step": 1722 |
| }, |
| { |
| "epoch": 4.979768786127168, |
| "grad_norm": 0.42224175219522175, |
| "learning_rate": 9.974276077462375e-10, |
| "loss": 0.3458, |
| "step": 1723 |
| }, |
| { |
| "epoch": 4.982658959537572, |
| "grad_norm": 0.43381161082142367, |
| "learning_rate": 7.328071887358068e-10, |
| "loss": 0.3569, |
| "step": 1724 |
| }, |
| { |
| "epoch": 4.985549132947977, |
| "grad_norm": 0.4595886172323835, |
| "learning_rate": 5.088957802323879e-10, |
| "loss": 0.3033, |
| "step": 1725 |
| }, |
| { |
| "epoch": 4.988439306358382, |
| "grad_norm": 0.45716642590282125, |
| "learning_rate": 3.256942938234531e-10, |
| "loss": 0.3148, |
| "step": 1726 |
| }, |
| { |
| "epoch": 4.991329479768786, |
| "grad_norm": 0.4488664794487837, |
| "learning_rate": 1.8320347536016082e-10, |
| "loss": 0.3591, |
| "step": 1727 |
| }, |
| { |
| "epoch": 4.994219653179191, |
| "grad_norm": 0.40480634643061625, |
| "learning_rate": 8.14239049484744e-11, |
| "loss": 0.3753, |
| "step": 1728 |
| }, |
| { |
| "epoch": 4.997109826589595, |
| "grad_norm": 0.4362498372754465, |
| "learning_rate": 2.035599695582313e-11, |
| "loss": 0.3142, |
| "step": 1729 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.43942713509202896, |
| "learning_rate": 0.0, |
| "loss": 0.3327, |
| "step": 1730 |
| }, |
| { |
| "epoch": 5.0, |
| "step": 1730, |
| "total_flos": 5.076921768434729e+18, |
| "train_loss": 0.5051262063442628, |
| "train_runtime": 40991.9822, |
| "train_samples_per_second": 5.397, |
| "train_steps_per_second": 0.042 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1730, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.076921768434729e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|