| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996064541519087, | |
| "eval_steps": 500, | |
| "global_step": 1270, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007870916961826052, | |
| "grad_norm": 4.882866791973475, | |
| "learning_rate": 7.8125e-08, | |
| "loss": 0.357, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0015741833923652105, | |
| "grad_norm": 4.89981767485179, | |
| "learning_rate": 1.5625e-07, | |
| "loss": 0.3398, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0023612750885478157, | |
| "grad_norm": 4.908915346462736, | |
| "learning_rate": 2.3437500000000003e-07, | |
| "loss": 0.3326, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.003148366784730421, | |
| "grad_norm": 4.731649267914947, | |
| "learning_rate": 3.125e-07, | |
| "loss": 0.342, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003935458480913027, | |
| "grad_norm": 4.769633474207938, | |
| "learning_rate": 3.90625e-07, | |
| "loss": 0.3431, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004722550177095631, | |
| "grad_norm": 4.754569879633701, | |
| "learning_rate": 4.6875000000000006e-07, | |
| "loss": 0.3369, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.005509641873278237, | |
| "grad_norm": 4.354074850343827, | |
| "learning_rate": 5.468750000000001e-07, | |
| "loss": 0.3416, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.006296733569460842, | |
| "grad_norm": 4.51384309102365, | |
| "learning_rate": 6.25e-07, | |
| "loss": 0.3577, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0070838252656434475, | |
| "grad_norm": 4.127868399899779, | |
| "learning_rate": 7.03125e-07, | |
| "loss": 0.3447, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.007870916961826053, | |
| "grad_norm": 4.268670776824985, | |
| "learning_rate": 7.8125e-07, | |
| "loss": 0.318, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008658008658008658, | |
| "grad_norm": 3.7083716156028674, | |
| "learning_rate": 8.59375e-07, | |
| "loss": 0.3106, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.009445100354191263, | |
| "grad_norm": 3.1545864445099263, | |
| "learning_rate": 9.375000000000001e-07, | |
| "loss": 0.3131, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01023219205037387, | |
| "grad_norm": 3.020045714405798, | |
| "learning_rate": 1.0156250000000001e-06, | |
| "loss": 0.3215, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.011019283746556474, | |
| "grad_norm": 2.748876831681126, | |
| "learning_rate": 1.0937500000000001e-06, | |
| "loss": 0.3072, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.011806375442739079, | |
| "grad_norm": 2.2307366833759485, | |
| "learning_rate": 1.1718750000000001e-06, | |
| "loss": 0.2922, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.012593467138921684, | |
| "grad_norm": 2.219422516987874, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.2842, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01338055883510429, | |
| "grad_norm": 2.7073031779339973, | |
| "learning_rate": 1.328125e-06, | |
| "loss": 0.2674, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.014167650531286895, | |
| "grad_norm": 2.873035911017537, | |
| "learning_rate": 1.40625e-06, | |
| "loss": 0.294, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0149547422274695, | |
| "grad_norm": 2.119880363778339, | |
| "learning_rate": 1.484375e-06, | |
| "loss": 0.2693, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.015741833923652106, | |
| "grad_norm": 1.7740660860958901, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.2607, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01652892561983471, | |
| "grad_norm": 1.654838099179133, | |
| "learning_rate": 1.640625e-06, | |
| "loss": 0.2539, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.017316017316017316, | |
| "grad_norm": 2.1067372096520884, | |
| "learning_rate": 1.71875e-06, | |
| "loss": 0.274, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01810310901219992, | |
| "grad_norm": 2.227492365997846, | |
| "learning_rate": 1.796875e-06, | |
| "loss": 0.2667, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.018890200708382526, | |
| "grad_norm": 1.9818482942437547, | |
| "learning_rate": 1.8750000000000003e-06, | |
| "loss": 0.2507, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.01967729240456513, | |
| "grad_norm": 1.9916019664977938, | |
| "learning_rate": 1.953125e-06, | |
| "loss": 0.2302, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02046438410074774, | |
| "grad_norm": 2.0987871479467533, | |
| "learning_rate": 2.0312500000000002e-06, | |
| "loss": 0.2563, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.021251475796930343, | |
| "grad_norm": 1.7851505967742112, | |
| "learning_rate": 2.109375e-06, | |
| "loss": 0.2432, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.02203856749311295, | |
| "grad_norm": 1.6067598902195293, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "loss": 0.2466, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.022825659189295553, | |
| "grad_norm": 1.436243142469347, | |
| "learning_rate": 2.265625e-06, | |
| "loss": 0.2486, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.023612750885478158, | |
| "grad_norm": 1.631080710695958, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 0.2692, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.024399842581660763, | |
| "grad_norm": 1.42554302342302, | |
| "learning_rate": 2.421875e-06, | |
| "loss": 0.2374, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.025186934277843367, | |
| "grad_norm": 1.479794666013743, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.238, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.025974025974025976, | |
| "grad_norm": 1.3857185652178832, | |
| "learning_rate": 2.5781250000000004e-06, | |
| "loss": 0.2366, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.02676111767020858, | |
| "grad_norm": 1.335993998237778, | |
| "learning_rate": 2.65625e-06, | |
| "loss": 0.2251, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.027548209366391185, | |
| "grad_norm": 1.5950255189913525, | |
| "learning_rate": 2.7343750000000004e-06, | |
| "loss": 0.2506, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02833530106257379, | |
| "grad_norm": 1.3773024411686232, | |
| "learning_rate": 2.8125e-06, | |
| "loss": 0.2107, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.029122392758756395, | |
| "grad_norm": 1.391558709917223, | |
| "learning_rate": 2.8906250000000004e-06, | |
| "loss": 0.236, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.029909484454939, | |
| "grad_norm": 1.4317153691023394, | |
| "learning_rate": 2.96875e-06, | |
| "loss": 0.2369, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.030696576151121605, | |
| "grad_norm": 1.5982850208202695, | |
| "learning_rate": 3.0468750000000004e-06, | |
| "loss": 0.2135, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03148366784730421, | |
| "grad_norm": 1.2463748947443163, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.2109, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.032270759543486814, | |
| "grad_norm": 1.489195263138514, | |
| "learning_rate": 3.2031250000000004e-06, | |
| "loss": 0.2254, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03305785123966942, | |
| "grad_norm": 1.1952984228039816, | |
| "learning_rate": 3.28125e-06, | |
| "loss": 0.2124, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.033844942935852024, | |
| "grad_norm": 1.3331698750786545, | |
| "learning_rate": 3.3593750000000003e-06, | |
| "loss": 0.2192, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.03463203463203463, | |
| "grad_norm": 1.3944936006633961, | |
| "learning_rate": 3.4375e-06, | |
| "loss": 0.2024, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03541912632821724, | |
| "grad_norm": 1.3992213437238004, | |
| "learning_rate": 3.5156250000000003e-06, | |
| "loss": 0.2274, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03620621802439984, | |
| "grad_norm": 1.3664016160053327, | |
| "learning_rate": 3.59375e-06, | |
| "loss": 0.2152, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03699330972058245, | |
| "grad_norm": 1.4891884814728509, | |
| "learning_rate": 3.6718750000000003e-06, | |
| "loss": 0.2292, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.03778040141676505, | |
| "grad_norm": 1.3270512221194979, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.1997, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03856749311294766, | |
| "grad_norm": 1.4002885202427642, | |
| "learning_rate": 3.828125000000001e-06, | |
| "loss": 0.2049, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03935458480913026, | |
| "grad_norm": 1.3129491965923514, | |
| "learning_rate": 3.90625e-06, | |
| "loss": 0.2108, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04014167650531287, | |
| "grad_norm": 1.5308223134960726, | |
| "learning_rate": 3.984375e-06, | |
| "loss": 0.2185, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.04092876820149548, | |
| "grad_norm": 1.4637864330743848, | |
| "learning_rate": 4.0625000000000005e-06, | |
| "loss": 0.1959, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.04171585989767808, | |
| "grad_norm": 1.3727993193826824, | |
| "learning_rate": 4.140625000000001e-06, | |
| "loss": 0.2079, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.04250295159386069, | |
| "grad_norm": 1.324719262777532, | |
| "learning_rate": 4.21875e-06, | |
| "loss": 0.1986, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04329004329004329, | |
| "grad_norm": 1.5169702547195179, | |
| "learning_rate": 4.296875e-06, | |
| "loss": 0.2233, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0440771349862259, | |
| "grad_norm": 1.2762861570952524, | |
| "learning_rate": 4.3750000000000005e-06, | |
| "loss": 0.1887, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0448642266824085, | |
| "grad_norm": 1.3276694806352698, | |
| "learning_rate": 4.453125000000001e-06, | |
| "loss": 0.2039, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.045651318378591106, | |
| "grad_norm": 1.3146250055299598, | |
| "learning_rate": 4.53125e-06, | |
| "loss": 0.2142, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.046438410074773714, | |
| "grad_norm": 1.333854988794211, | |
| "learning_rate": 4.609375e-06, | |
| "loss": 0.1999, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.047225501770956316, | |
| "grad_norm": 1.3581177171903593, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 0.1957, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.048012593467138924, | |
| "grad_norm": 1.3130565355707189, | |
| "learning_rate": 4.765625000000001e-06, | |
| "loss": 0.2122, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.048799685163321525, | |
| "grad_norm": 1.4142816132337854, | |
| "learning_rate": 4.84375e-06, | |
| "loss": 0.2147, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.049586776859504134, | |
| "grad_norm": 1.357898232243354, | |
| "learning_rate": 4.921875e-06, | |
| "loss": 0.1877, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.050373868555686735, | |
| "grad_norm": 1.4153342064426397, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1945, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05116096025186934, | |
| "grad_norm": 1.4495342715013748, | |
| "learning_rate": 4.999991517675219e-06, | |
| "loss": 0.1939, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05194805194805195, | |
| "grad_norm": 1.1539274129121713, | |
| "learning_rate": 4.999966070758437e-06, | |
| "loss": 0.2003, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05273514364423455, | |
| "grad_norm": 1.3379283904444008, | |
| "learning_rate": 4.999923659422332e-06, | |
| "loss": 0.2007, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.05352223534041716, | |
| "grad_norm": 1.3492954613335875, | |
| "learning_rate": 4.999864283954702e-06, | |
| "loss": 0.1989, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.05430932703659976, | |
| "grad_norm": 1.1801853129144864, | |
| "learning_rate": 4.99978794475846e-06, | |
| "loss": 0.2114, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.05509641873278237, | |
| "grad_norm": 1.2068999367428581, | |
| "learning_rate": 4.999694642351633e-06, | |
| "loss": 0.2033, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05588351042896497, | |
| "grad_norm": 1.2287271472480104, | |
| "learning_rate": 4.999584377367359e-06, | |
| "loss": 0.1895, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.05667060212514758, | |
| "grad_norm": 1.3129837217534652, | |
| "learning_rate": 4.99945715055388e-06, | |
| "loss": 0.1905, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.05745769382133018, | |
| "grad_norm": 1.1734967025843308, | |
| "learning_rate": 4.99931296277454e-06, | |
| "loss": 0.213, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.05824478551751279, | |
| "grad_norm": 1.3738466570011791, | |
| "learning_rate": 4.999151815007776e-06, | |
| "loss": 0.2214, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0590318772136954, | |
| "grad_norm": 1.273179655688277, | |
| "learning_rate": 4.9989737083471165e-06, | |
| "loss": 0.1894, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.059818968909878, | |
| "grad_norm": 1.0843431120214646, | |
| "learning_rate": 4.998778644001165e-06, | |
| "loss": 0.1967, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06060606060606061, | |
| "grad_norm": 1.4896402431576707, | |
| "learning_rate": 4.998566623293603e-06, | |
| "loss": 0.1752, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.06139315230224321, | |
| "grad_norm": 1.3405458603738243, | |
| "learning_rate": 4.9983376476631725e-06, | |
| "loss": 0.1998, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06218024399842582, | |
| "grad_norm": 1.3641086369593634, | |
| "learning_rate": 4.998091718663671e-06, | |
| "loss": 0.2047, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.06296733569460843, | |
| "grad_norm": 1.3391162585136267, | |
| "learning_rate": 4.997828837963937e-06, | |
| "loss": 0.181, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06375442739079103, | |
| "grad_norm": 1.1899411991269295, | |
| "learning_rate": 4.997549007347843e-06, | |
| "loss": 0.1946, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.06454151908697363, | |
| "grad_norm": 1.3917818646896112, | |
| "learning_rate": 4.997252228714279e-06, | |
| "loss": 0.1919, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.06532861078315624, | |
| "grad_norm": 1.2543099071691322, | |
| "learning_rate": 4.996938504077145e-06, | |
| "loss": 0.1948, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.06611570247933884, | |
| "grad_norm": 1.3941008619735185, | |
| "learning_rate": 4.99660783556533e-06, | |
| "loss": 0.1861, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.06690279417552145, | |
| "grad_norm": 1.1765528133487257, | |
| "learning_rate": 4.9962602254227075e-06, | |
| "loss": 0.1817, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.06768988587170405, | |
| "grad_norm": 1.223066746932356, | |
| "learning_rate": 4.995895676008109e-06, | |
| "loss": 0.1934, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.06847697756788666, | |
| "grad_norm": 1.3140944559909808, | |
| "learning_rate": 4.995514189795316e-06, | |
| "loss": 0.197, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.06926406926406926, | |
| "grad_norm": 1.1819977914205286, | |
| "learning_rate": 4.99511576937304e-06, | |
| "loss": 0.1972, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.07005116096025187, | |
| "grad_norm": 1.3152579578345207, | |
| "learning_rate": 4.994700417444907e-06, | |
| "loss": 0.207, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.07083825265643448, | |
| "grad_norm": 1.2064669225701854, | |
| "learning_rate": 4.994268136829438e-06, | |
| "loss": 0.1953, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07162534435261708, | |
| "grad_norm": 1.1619755664518439, | |
| "learning_rate": 4.993818930460026e-06, | |
| "loss": 0.1982, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.07241243604879968, | |
| "grad_norm": 1.1792837872493809, | |
| "learning_rate": 4.993352801384924e-06, | |
| "loss": 0.1886, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07319952774498228, | |
| "grad_norm": 1.097328306217708, | |
| "learning_rate": 4.992869752767218e-06, | |
| "loss": 0.1673, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.0739866194411649, | |
| "grad_norm": 1.2788239338552108, | |
| "learning_rate": 4.992369787884809e-06, | |
| "loss": 0.1972, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0747737111373475, | |
| "grad_norm": 1.1905278770669998, | |
| "learning_rate": 4.991852910130388e-06, | |
| "loss": 0.1872, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.0755608028335301, | |
| "grad_norm": 1.2133270115400816, | |
| "learning_rate": 4.9913191230114154e-06, | |
| "loss": 0.1748, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.07634789452971272, | |
| "grad_norm": 1.2840440499091732, | |
| "learning_rate": 4.990768430150096e-06, | |
| "loss": 0.1942, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.07713498622589532, | |
| "grad_norm": 1.5346248945491554, | |
| "learning_rate": 4.990200835283353e-06, | |
| "loss": 0.1861, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.07792207792207792, | |
| "grad_norm": 1.1936205681426777, | |
| "learning_rate": 4.989616342262807e-06, | |
| "loss": 0.1975, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.07870916961826052, | |
| "grad_norm": 1.2662437794316659, | |
| "learning_rate": 4.989014955054746e-06, | |
| "loss": 0.1853, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07949626131444314, | |
| "grad_norm": 1.116915072535967, | |
| "learning_rate": 4.988396677740097e-06, | |
| "loss": 0.1738, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.08028335301062574, | |
| "grad_norm": 1.1577366023558335, | |
| "learning_rate": 4.9877615145144055e-06, | |
| "loss": 0.2045, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08107044470680834, | |
| "grad_norm": 1.1022178093801993, | |
| "learning_rate": 4.9871094696878e-06, | |
| "loss": 0.1814, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.08185753640299095, | |
| "grad_norm": 1.218327314143879, | |
| "learning_rate": 4.986440547684963e-06, | |
| "loss": 0.1822, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08264462809917356, | |
| "grad_norm": 1.0747362510591434, | |
| "learning_rate": 4.985754753045108e-06, | |
| "loss": 0.1639, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08343171979535616, | |
| "grad_norm": 1.236686976609853, | |
| "learning_rate": 4.9850520904219406e-06, | |
| "loss": 0.1773, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.08421881149153876, | |
| "grad_norm": 1.2843110878866029, | |
| "learning_rate": 4.98433256458363e-06, | |
| "loss": 0.1931, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.08500590318772137, | |
| "grad_norm": 1.2556201190754803, | |
| "learning_rate": 4.983596180412779e-06, | |
| "loss": 0.1891, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.08579299488390398, | |
| "grad_norm": 1.1736861180333642, | |
| "learning_rate": 4.982842942906386e-06, | |
| "loss": 0.1932, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.08658008658008658, | |
| "grad_norm": 1.250703274500956, | |
| "learning_rate": 4.982072857175816e-06, | |
| "loss": 0.1979, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08736717827626919, | |
| "grad_norm": 1.111910462348759, | |
| "learning_rate": 4.981285928446762e-06, | |
| "loss": 0.1729, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.0881542699724518, | |
| "grad_norm": 1.2267625409230847, | |
| "learning_rate": 4.980482162059214e-06, | |
| "loss": 0.1993, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.0889413616686344, | |
| "grad_norm": 1.350342930816002, | |
| "learning_rate": 4.979661563467415e-06, | |
| "loss": 0.1914, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.089728453364817, | |
| "grad_norm": 0.9837790085016399, | |
| "learning_rate": 4.978824138239835e-06, | |
| "loss": 0.1852, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09051554506099961, | |
| "grad_norm": 1.1412715811918805, | |
| "learning_rate": 4.977969892059123e-06, | |
| "loss": 0.1791, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.09130263675718221, | |
| "grad_norm": 1.091735318231847, | |
| "learning_rate": 4.977098830722074e-06, | |
| "loss": 0.1879, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09208972845336481, | |
| "grad_norm": 1.1356995797773966, | |
| "learning_rate": 4.976210960139587e-06, | |
| "loss": 0.1942, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.09287682014954743, | |
| "grad_norm": 1.197221158258512, | |
| "learning_rate": 4.975306286336628e-06, | |
| "loss": 0.1822, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.09366391184573003, | |
| "grad_norm": 1.1622435205009634, | |
| "learning_rate": 4.974384815452187e-06, | |
| "loss": 0.1938, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.09445100354191263, | |
| "grad_norm": 1.1980574826361372, | |
| "learning_rate": 4.9734465537392365e-06, | |
| "loss": 0.1703, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 1.090793092501407, | |
| "learning_rate": 4.972491507564688e-06, | |
| "loss": 0.1681, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.09602518693427785, | |
| "grad_norm": 1.2120296842604672, | |
| "learning_rate": 4.9715196834093525e-06, | |
| "loss": 0.1562, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.09681227863046045, | |
| "grad_norm": 1.1420618659168036, | |
| "learning_rate": 4.97053108786789e-06, | |
| "loss": 0.1812, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.09759937032664305, | |
| "grad_norm": 1.145370838994205, | |
| "learning_rate": 4.969525727648774e-06, | |
| "loss": 0.1873, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.09838646202282567, | |
| "grad_norm": 1.1676600414602372, | |
| "learning_rate": 4.9685036095742365e-06, | |
| "loss": 0.1972, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.09917355371900827, | |
| "grad_norm": 1.204479600477317, | |
| "learning_rate": 4.967464740580228e-06, | |
| "loss": 0.1904, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.09996064541519087, | |
| "grad_norm": 1.119994012971968, | |
| "learning_rate": 4.9664091277163664e-06, | |
| "loss": 0.1851, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.10074773711137347, | |
| "grad_norm": 1.1043684264734095, | |
| "learning_rate": 4.9653367781458946e-06, | |
| "loss": 0.1926, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10153482880755609, | |
| "grad_norm": 1.127680976136701, | |
| "learning_rate": 4.964247699145626e-06, | |
| "loss": 0.1886, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.10232192050373869, | |
| "grad_norm": 1.1813875803533898, | |
| "learning_rate": 4.963141898105898e-06, | |
| "loss": 0.1858, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10310901219992129, | |
| "grad_norm": 1.1497128287458092, | |
| "learning_rate": 4.962019382530521e-06, | |
| "loss": 0.1724, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1038961038961039, | |
| "grad_norm": 1.223498886081565, | |
| "learning_rate": 4.960880160036728e-06, | |
| "loss": 0.194, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1046831955922865, | |
| "grad_norm": 1.1861652899170938, | |
| "learning_rate": 4.959724238355124e-06, | |
| "loss": 0.1841, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1054702872884691, | |
| "grad_norm": 1.0805114288365025, | |
| "learning_rate": 4.958551625329631e-06, | |
| "loss": 0.1646, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.10625737898465171, | |
| "grad_norm": 1.29808710670669, | |
| "learning_rate": 4.957362328917437e-06, | |
| "loss": 0.1833, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.10704447068083432, | |
| "grad_norm": 1.2660501691777906, | |
| "learning_rate": 4.95615635718894e-06, | |
| "loss": 0.1753, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.10783156237701692, | |
| "grad_norm": 1.1429230314494303, | |
| "learning_rate": 4.954933718327697e-06, | |
| "loss": 0.1734, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.10861865407319952, | |
| "grad_norm": 1.114357361335831, | |
| "learning_rate": 4.953694420630361e-06, | |
| "loss": 0.1925, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.10940574576938213, | |
| "grad_norm": 1.1238119767186239, | |
| "learning_rate": 4.952438472506636e-06, | |
| "loss": 0.1805, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.11019283746556474, | |
| "grad_norm": 1.1524735878912507, | |
| "learning_rate": 4.951165882479206e-06, | |
| "loss": 0.1783, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11097992916174734, | |
| "grad_norm": 1.0546198047284017, | |
| "learning_rate": 4.949876659183693e-06, | |
| "loss": 0.1745, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.11176702085792994, | |
| "grad_norm": 1.0925714956018635, | |
| "learning_rate": 4.94857081136858e-06, | |
| "loss": 0.1763, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.11255411255411256, | |
| "grad_norm": 1.1039385653204372, | |
| "learning_rate": 4.947248347895172e-06, | |
| "loss": 0.1777, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.11334120425029516, | |
| "grad_norm": 1.145622347104172, | |
| "learning_rate": 4.945909277737519e-06, | |
| "loss": 0.1804, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.11412829594647776, | |
| "grad_norm": 1.0810330697861197, | |
| "learning_rate": 4.944553609982363e-06, | |
| "loss": 0.18, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11491538764266036, | |
| "grad_norm": 1.079722871077113, | |
| "learning_rate": 4.943181353829077e-06, | |
| "loss": 0.1805, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.11570247933884298, | |
| "grad_norm": 1.2122723148500483, | |
| "learning_rate": 4.941792518589596e-06, | |
| "loss": 0.2113, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.11648957103502558, | |
| "grad_norm": 1.1619622709214918, | |
| "learning_rate": 4.940387113688364e-06, | |
| "loss": 0.1714, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.11727666273120818, | |
| "grad_norm": 1.0508760593348456, | |
| "learning_rate": 4.93896514866226e-06, | |
| "loss": 0.1625, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.1180637544273908, | |
| "grad_norm": 1.0710088382142664, | |
| "learning_rate": 4.93752663316054e-06, | |
| "loss": 0.1778, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1188508461235734, | |
| "grad_norm": 1.0503531295721205, | |
| "learning_rate": 4.936071576944769e-06, | |
| "loss": 0.1726, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.119637937819756, | |
| "grad_norm": 1.0686610020146463, | |
| "learning_rate": 4.934599989888753e-06, | |
| "loss": 0.1769, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1204250295159386, | |
| "grad_norm": 1.072378297090023, | |
| "learning_rate": 4.933111881978478e-06, | |
| "loss": 0.1866, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.12121212121212122, | |
| "grad_norm": 1.2495883030259693, | |
| "learning_rate": 4.931607263312033e-06, | |
| "loss": 0.1998, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.12199921290830382, | |
| "grad_norm": 1.109893027407933, | |
| "learning_rate": 4.93008614409955e-06, | |
| "loss": 0.1805, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12278630460448642, | |
| "grad_norm": 1.1570851370725408, | |
| "learning_rate": 4.928548534663133e-06, | |
| "loss": 0.1725, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.12357339630066903, | |
| "grad_norm": 1.1758781032456742, | |
| "learning_rate": 4.9269944454367815e-06, | |
| "loss": 0.176, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.12436048799685163, | |
| "grad_norm": 1.1408455648753233, | |
| "learning_rate": 4.925423886966328e-06, | |
| "loss": 0.1848, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.12514757969303425, | |
| "grad_norm": 1.1318514267380126, | |
| "learning_rate": 4.923836869909363e-06, | |
| "loss": 0.1764, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.12593467138921685, | |
| "grad_norm": 1.1451300788977063, | |
| "learning_rate": 4.9222334050351595e-06, | |
| "loss": 0.1756, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12672176308539945, | |
| "grad_norm": 1.1117305593028235, | |
| "learning_rate": 4.920613503224608e-06, | |
| "loss": 0.1797, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.12750885478158205, | |
| "grad_norm": 1.1301581138966732, | |
| "learning_rate": 4.9189771754701335e-06, | |
| "loss": 0.1675, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.12829594647776466, | |
| "grad_norm": 1.0326917294149387, | |
| "learning_rate": 4.917324432875627e-06, | |
| "loss": 0.1784, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.12908303817394726, | |
| "grad_norm": 1.1983588521884831, | |
| "learning_rate": 4.915655286656368e-06, | |
| "loss": 0.1966, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.12987012987012986, | |
| "grad_norm": 1.0140424703790007, | |
| "learning_rate": 4.9139697481389505e-06, | |
| "loss": 0.1744, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1306572215663125, | |
| "grad_norm": 1.223539092779737, | |
| "learning_rate": 4.9122678287612e-06, | |
| "loss": 0.1831, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1314443132624951, | |
| "grad_norm": 1.0918972348910556, | |
| "learning_rate": 4.910549540072104e-06, | |
| "loss": 0.1843, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.1322314049586777, | |
| "grad_norm": 1.1292739304249166, | |
| "learning_rate": 4.908814893731728e-06, | |
| "loss": 0.1552, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1330184966548603, | |
| "grad_norm": 1.1923518362383727, | |
| "learning_rate": 4.9070639015111406e-06, | |
| "loss": 0.1895, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1338055883510429, | |
| "grad_norm": 1.083542335588892, | |
| "learning_rate": 4.905296575292329e-06, | |
| "loss": 0.1745, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1345926800472255, | |
| "grad_norm": 1.2673623015109376, | |
| "learning_rate": 4.90351292706812e-06, | |
| "loss": 0.1726, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.1353797717434081, | |
| "grad_norm": 1.1129476624507257, | |
| "learning_rate": 4.901712968942101e-06, | |
| "loss": 0.1706, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.13616686343959072, | |
| "grad_norm": 1.1735922432656085, | |
| "learning_rate": 4.899896713128536e-06, | |
| "loss": 0.1741, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.13695395513577333, | |
| "grad_norm": 1.2331570034422519, | |
| "learning_rate": 4.898064171952281e-06, | |
| "loss": 0.1946, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.13774104683195593, | |
| "grad_norm": 1.2376618802061816, | |
| "learning_rate": 4.896215357848706e-06, | |
| "loss": 0.1715, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.13852813852813853, | |
| "grad_norm": 1.0860947256302276, | |
| "learning_rate": 4.894350283363603e-06, | |
| "loss": 0.1664, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.13931523022432113, | |
| "grad_norm": 1.1284792933006988, | |
| "learning_rate": 4.892468961153105e-06, | |
| "loss": 0.1721, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.14010232192050373, | |
| "grad_norm": 1.1811695933066144, | |
| "learning_rate": 4.8905714039836026e-06, | |
| "loss": 0.1768, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.14088941361668633, | |
| "grad_norm": 1.1690172197627666, | |
| "learning_rate": 4.888657624731652e-06, | |
| "loss": 0.1784, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.14167650531286896, | |
| "grad_norm": 1.2215187765329307, | |
| "learning_rate": 4.88672763638389e-06, | |
| "loss": 0.1762, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14246359700905156, | |
| "grad_norm": 1.1657625904368065, | |
| "learning_rate": 4.884781452036948e-06, | |
| "loss": 0.1754, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.14325068870523416, | |
| "grad_norm": 1.0812019740421663, | |
| "learning_rate": 4.88281908489736e-06, | |
| "loss": 0.1745, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.14403778040141677, | |
| "grad_norm": 1.1662972444477193, | |
| "learning_rate": 4.880840548281475e-06, | |
| "loss": 0.1844, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.14482487209759937, | |
| "grad_norm": 1.1318261660435303, | |
| "learning_rate": 4.878845855615364e-06, | |
| "loss": 0.177, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.14561196379378197, | |
| "grad_norm": 1.0454173174935852, | |
| "learning_rate": 4.876835020434733e-06, | |
| "loss": 0.1726, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.14639905548996457, | |
| "grad_norm": 1.1649728572384528, | |
| "learning_rate": 4.874808056384826e-06, | |
| "loss": 0.1829, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.1471861471861472, | |
| "grad_norm": 0.9809711097737751, | |
| "learning_rate": 4.8727649772203375e-06, | |
| "loss": 0.1626, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1479732388823298, | |
| "grad_norm": 1.0024677570018588, | |
| "learning_rate": 4.8707057968053175e-06, | |
| "loss": 0.1564, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.1487603305785124, | |
| "grad_norm": 1.0801740218719516, | |
| "learning_rate": 4.868630529113075e-06, | |
| "loss": 0.1571, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.149547422274695, | |
| "grad_norm": 1.0633734918657578, | |
| "learning_rate": 4.866539188226086e-06, | |
| "loss": 0.1558, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1503345139708776, | |
| "grad_norm": 1.1110942685300096, | |
| "learning_rate": 4.864431788335895e-06, | |
| "loss": 0.1739, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.1511216056670602, | |
| "grad_norm": 1.088865739839623, | |
| "learning_rate": 4.862308343743024e-06, | |
| "loss": 0.1705, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1519086973632428, | |
| "grad_norm": 1.158763785538179, | |
| "learning_rate": 4.86016886885687e-06, | |
| "loss": 0.1754, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.15269578905942544, | |
| "grad_norm": 1.0665033787081621, | |
| "learning_rate": 4.858013378195609e-06, | |
| "loss": 0.1814, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.15348288075560804, | |
| "grad_norm": 1.0347506383595513, | |
| "learning_rate": 4.855841886386099e-06, | |
| "loss": 0.1659, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15426997245179064, | |
| "grad_norm": 1.3652087096932124, | |
| "learning_rate": 4.8536544081637785e-06, | |
| "loss": 0.1693, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.15505706414797324, | |
| "grad_norm": 1.1571980267809596, | |
| "learning_rate": 4.8514509583725685e-06, | |
| "loss": 0.1735, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.15584415584415584, | |
| "grad_norm": 1.1126611625924816, | |
| "learning_rate": 4.849231551964771e-06, | |
| "loss": 0.1878, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.15663124754033844, | |
| "grad_norm": 1.0666827506948415, | |
| "learning_rate": 4.846996204000967e-06, | |
| "loss": 0.1686, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.15741833923652104, | |
| "grad_norm": 1.1408187983192677, | |
| "learning_rate": 4.844744929649912e-06, | |
| "loss": 0.1785, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15820543093270367, | |
| "grad_norm": 1.1050850982745672, | |
| "learning_rate": 4.842477744188441e-06, | |
| "loss": 0.1663, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.15899252262888627, | |
| "grad_norm": 1.0153624350350885, | |
| "learning_rate": 4.840194663001354e-06, | |
| "loss": 0.1755, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.15977961432506887, | |
| "grad_norm": 1.0251264155888737, | |
| "learning_rate": 4.837895701581322e-06, | |
| "loss": 0.1537, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.16056670602125148, | |
| "grad_norm": 1.0673153393456505, | |
| "learning_rate": 4.835580875528776e-06, | |
| "loss": 0.1633, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.16135379771743408, | |
| "grad_norm": 1.0273828987011315, | |
| "learning_rate": 4.833250200551798e-06, | |
| "loss": 0.1746, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16214088941361668, | |
| "grad_norm": 1.0964068866663357, | |
| "learning_rate": 4.830903692466023e-06, | |
| "loss": 0.1674, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.16292798110979928, | |
| "grad_norm": 1.1142080493277295, | |
| "learning_rate": 4.828541367194527e-06, | |
| "loss": 0.1828, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.1637150728059819, | |
| "grad_norm": 1.0617790409690397, | |
| "learning_rate": 4.826163240767717e-06, | |
| "loss": 0.1676, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1645021645021645, | |
| "grad_norm": 1.2859855971245049, | |
| "learning_rate": 4.8237693293232256e-06, | |
| "loss": 0.1942, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.1652892561983471, | |
| "grad_norm": 1.000840540957111, | |
| "learning_rate": 4.821359649105801e-06, | |
| "loss": 0.1686, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1660763478945297, | |
| "grad_norm": 1.049595380158752, | |
| "learning_rate": 4.818934216467195e-06, | |
| "loss": 0.1696, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.16686343959071231, | |
| "grad_norm": 1.0218031530162965, | |
| "learning_rate": 4.816493047866053e-06, | |
| "loss": 0.1653, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.16765053128689492, | |
| "grad_norm": 1.0715206508098112, | |
| "learning_rate": 4.8140361598678034e-06, | |
| "loss": 0.1735, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.16843762298307752, | |
| "grad_norm": 1.093161202120212, | |
| "learning_rate": 4.811563569144544e-06, | |
| "loss": 0.1698, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.16922471467926015, | |
| "grad_norm": 1.078958887147992, | |
| "learning_rate": 4.809075292474929e-06, | |
| "loss": 0.1671, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.17001180637544275, | |
| "grad_norm": 1.1213364259804648, | |
| "learning_rate": 4.806571346744053e-06, | |
| "loss": 0.1798, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.17079889807162535, | |
| "grad_norm": 1.102076724202232, | |
| "learning_rate": 4.804051748943343e-06, | |
| "loss": 0.1845, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.17158598976780795, | |
| "grad_norm": 1.1103430873095865, | |
| "learning_rate": 4.801516516170437e-06, | |
| "loss": 0.177, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.17237308146399055, | |
| "grad_norm": 1.228711789290585, | |
| "learning_rate": 4.798965665629068e-06, | |
| "loss": 0.1636, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.17316017316017315, | |
| "grad_norm": 1.1219855198900837, | |
| "learning_rate": 4.796399214628949e-06, | |
| "loss": 0.1802, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17394726485635575, | |
| "grad_norm": 1.1846418832749555, | |
| "learning_rate": 4.7938171805856596e-06, | |
| "loss": 0.1717, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.17473435655253838, | |
| "grad_norm": 1.0672386815907553, | |
| "learning_rate": 4.791219581020518e-06, | |
| "loss": 0.1663, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.17552144824872098, | |
| "grad_norm": 1.0398388591323704, | |
| "learning_rate": 4.788606433560473e-06, | |
| "loss": 0.1593, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.1763085399449036, | |
| "grad_norm": 1.1402534682960337, | |
| "learning_rate": 4.785977755937977e-06, | |
| "loss": 0.1876, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1770956316410862, | |
| "grad_norm": 1.1260603683997887, | |
| "learning_rate": 4.783333565990865e-06, | |
| "loss": 0.172, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1778827233372688, | |
| "grad_norm": 1.062290554096683, | |
| "learning_rate": 4.780673881662242e-06, | |
| "loss": 0.1709, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1786698150334514, | |
| "grad_norm": 1.0650729387286197, | |
| "learning_rate": 4.777998721000353e-06, | |
| "loss": 0.1614, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.179456906729634, | |
| "grad_norm": 1.0365419204779498, | |
| "learning_rate": 4.775308102158461e-06, | |
| "loss": 0.1605, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.18024399842581662, | |
| "grad_norm": 1.1444494636007958, | |
| "learning_rate": 4.772602043394731e-06, | |
| "loss": 0.1867, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.18103109012199922, | |
| "grad_norm": 1.1053808430839196, | |
| "learning_rate": 4.769880563072097e-06, | |
| "loss": 0.1627, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 1.0763207393373317, | |
| "learning_rate": 4.767143679658143e-06, | |
| "loss": 0.1703, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.18260527351436442, | |
| "grad_norm": 1.1302336936081483, | |
| "learning_rate": 4.764391411724977e-06, | |
| "loss": 0.1697, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.18339236521054703, | |
| "grad_norm": 1.059980991296742, | |
| "learning_rate": 4.7616237779491026e-06, | |
| "loss": 0.1658, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.18417945690672963, | |
| "grad_norm": 1.0952807461742509, | |
| "learning_rate": 4.758840797111295e-06, | |
| "loss": 0.1833, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.18496654860291223, | |
| "grad_norm": 1.0263555674269131, | |
| "learning_rate": 4.756042488096472e-06, | |
| "loss": 0.1732, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.18575364029909486, | |
| "grad_norm": 1.088261327233659, | |
| "learning_rate": 4.753228869893566e-06, | |
| "loss": 0.1646, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.18654073199527746, | |
| "grad_norm": 1.0644325115229099, | |
| "learning_rate": 4.750399961595395e-06, | |
| "loss": 0.1576, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.18732782369146006, | |
| "grad_norm": 0.9952967090049917, | |
| "learning_rate": 4.747555782398537e-06, | |
| "loss": 0.1598, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.18811491538764266, | |
| "grad_norm": 1.0300249714418026, | |
| "learning_rate": 4.7446963516031904e-06, | |
| "loss": 0.1883, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.18890200708382526, | |
| "grad_norm": 1.0275382678304879, | |
| "learning_rate": 4.741821688613054e-06, | |
| "loss": 0.1704, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.18968909878000786, | |
| "grad_norm": 1.0616733952682182, | |
| "learning_rate": 4.738931812935186e-06, | |
| "loss": 0.1907, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 1.0103628221724312, | |
| "learning_rate": 4.736026744179878e-06, | |
| "loss": 0.1556, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1912632821723731, | |
| "grad_norm": 1.0535669337117792, | |
| "learning_rate": 4.73310650206052e-06, | |
| "loss": 0.1809, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.1920503738685557, | |
| "grad_norm": 1.0554553563643476, | |
| "learning_rate": 4.730171106393466e-06, | |
| "loss": 0.1675, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.1928374655647383, | |
| "grad_norm": 0.9417424551436594, | |
| "learning_rate": 4.7272205770979e-06, | |
| "loss": 0.1438, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1936245572609209, | |
| "grad_norm": 1.1154888244817747, | |
| "learning_rate": 4.724254934195698e-06, | |
| "loss": 0.1765, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.1944116489571035, | |
| "grad_norm": 1.1742188581521773, | |
| "learning_rate": 4.721274197811298e-06, | |
| "loss": 0.1711, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.1951987406532861, | |
| "grad_norm": 1.057640390538921, | |
| "learning_rate": 4.71827838817156e-06, | |
| "loss": 0.1678, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1959858323494687, | |
| "grad_norm": 1.022336905613029, | |
| "learning_rate": 4.715267525605627e-06, | |
| "loss": 0.1552, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.19677292404565133, | |
| "grad_norm": 1.181830506383501, | |
| "learning_rate": 4.712241630544792e-06, | |
| "loss": 0.1765, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.19756001574183393, | |
| "grad_norm": 1.1571296526874602, | |
| "learning_rate": 4.709200723522353e-06, | |
| "loss": 0.1758, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.19834710743801653, | |
| "grad_norm": 1.082056647389628, | |
| "learning_rate": 4.706144825173481e-06, | |
| "loss": 0.1638, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.19913419913419914, | |
| "grad_norm": 1.0648327864294944, | |
| "learning_rate": 4.703073956235071e-06, | |
| "loss": 0.1747, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.19992129083038174, | |
| "grad_norm": 1.1273460773870558, | |
| "learning_rate": 4.6999881375456116e-06, | |
| "loss": 0.1767, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.20070838252656434, | |
| "grad_norm": 1.0782376126285664, | |
| "learning_rate": 4.696887390045035e-06, | |
| "loss": 0.169, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.20149547422274694, | |
| "grad_norm": 1.043398805036875, | |
| "learning_rate": 4.693771734774578e-06, | |
| "loss": 0.1774, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.20228256591892957, | |
| "grad_norm": 1.067320862475683, | |
| "learning_rate": 4.690641192876643e-06, | |
| "loss": 0.1607, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.20306965761511217, | |
| "grad_norm": 1.1843944163744937, | |
| "learning_rate": 4.687495785594646e-06, | |
| "loss": 0.1633, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.20385674931129477, | |
| "grad_norm": 1.0931562611646284, | |
| "learning_rate": 4.684335534272881e-06, | |
| "loss": 0.1687, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.20464384100747737, | |
| "grad_norm": 1.1204870400497637, | |
| "learning_rate": 4.68116046035637e-06, | |
| "loss": 0.1639, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.20543093270365997, | |
| "grad_norm": 1.2082791443480092, | |
| "learning_rate": 4.6779705853907205e-06, | |
| "loss": 0.1683, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.20621802439984258, | |
| "grad_norm": 1.0646518318192153, | |
| "learning_rate": 4.674765931021976e-06, | |
| "loss": 0.1611, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.20700511609602518, | |
| "grad_norm": 1.1268791395123645, | |
| "learning_rate": 4.671546518996473e-06, | |
| "loss": 0.1553, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2077922077922078, | |
| "grad_norm": 1.0048534045343525, | |
| "learning_rate": 4.668312371160688e-06, | |
| "loss": 0.1571, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2085792994883904, | |
| "grad_norm": 1.0052893495164037, | |
| "learning_rate": 4.665063509461098e-06, | |
| "loss": 0.1679, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.209366391184573, | |
| "grad_norm": 0.9679422598052939, | |
| "learning_rate": 4.661799955944019e-06, | |
| "loss": 0.1556, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2101534828807556, | |
| "grad_norm": 1.0487292157874373, | |
| "learning_rate": 4.658521732755471e-06, | |
| "loss": 0.183, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.2109405745769382, | |
| "grad_norm": 1.0878511570789495, | |
| "learning_rate": 4.655228862141017e-06, | |
| "loss": 0.1762, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2117276662731208, | |
| "grad_norm": 0.9275216638767947, | |
| "learning_rate": 4.651921366445613e-06, | |
| "loss": 0.1483, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.21251475796930341, | |
| "grad_norm": 1.0291173856009612, | |
| "learning_rate": 4.648599268113464e-06, | |
| "loss": 0.1657, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21330184966548604, | |
| "grad_norm": 0.9814951923963836, | |
| "learning_rate": 4.645262589687861e-06, | |
| "loss": 0.1737, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.21408894136166864, | |
| "grad_norm": 0.9574503772544043, | |
| "learning_rate": 4.641911353811038e-06, | |
| "loss": 0.1638, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.21487603305785125, | |
| "grad_norm": 0.9684496500051328, | |
| "learning_rate": 4.638545583224011e-06, | |
| "loss": 0.1649, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.21566312475403385, | |
| "grad_norm": 1.0314787067828541, | |
| "learning_rate": 4.635165300766428e-06, | |
| "loss": 0.1699, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.21645021645021645, | |
| "grad_norm": 1.0287264097080684, | |
| "learning_rate": 4.63177052937641e-06, | |
| "loss": 0.1602, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.21723730814639905, | |
| "grad_norm": 1.1114659065296888, | |
| "learning_rate": 4.628361292090403e-06, | |
| "loss": 0.1783, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.21802439984258165, | |
| "grad_norm": 1.0298788844790752, | |
| "learning_rate": 4.6249376120430115e-06, | |
| "loss": 0.1678, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.21881149153876425, | |
| "grad_norm": 1.0099420287081406, | |
| "learning_rate": 4.621499512466847e-06, | |
| "loss": 0.1672, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.21959858323494688, | |
| "grad_norm": 0.9892117727941296, | |
| "learning_rate": 4.618047016692374e-06, | |
| "loss": 0.1663, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.22038567493112948, | |
| "grad_norm": 0.9289360238552057, | |
| "learning_rate": 4.614580148147744e-06, | |
| "loss": 0.1563, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22117276662731208, | |
| "grad_norm": 0.9603340451855991, | |
| "learning_rate": 4.61109893035864e-06, | |
| "loss": 0.1561, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.22195985832349469, | |
| "grad_norm": 1.0449269347565262, | |
| "learning_rate": 4.607603386948119e-06, | |
| "loss": 0.165, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2227469500196773, | |
| "grad_norm": 0.990226128298578, | |
| "learning_rate": 4.604093541636448e-06, | |
| "loss": 0.1704, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.2235340417158599, | |
| "grad_norm": 1.031797952555019, | |
| "learning_rate": 4.600569418240946e-06, | |
| "loss": 0.1677, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.2243211334120425, | |
| "grad_norm": 1.0506428763431659, | |
| "learning_rate": 4.597031040675819e-06, | |
| "loss": 0.1802, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22510822510822512, | |
| "grad_norm": 0.980146123693525, | |
| "learning_rate": 4.593478432952002e-06, | |
| "loss": 0.1656, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.22589531680440772, | |
| "grad_norm": 1.0058178922055618, | |
| "learning_rate": 4.589911619176993e-06, | |
| "loss": 0.1601, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.22668240850059032, | |
| "grad_norm": 1.1532752501338874, | |
| "learning_rate": 4.586330623554691e-06, | |
| "loss": 0.1707, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.22746950019677292, | |
| "grad_norm": 0.9925104519486038, | |
| "learning_rate": 4.582735470385229e-06, | |
| "loss": 0.1712, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.22825659189295552, | |
| "grad_norm": 1.1312813134045174, | |
| "learning_rate": 4.579126184064814e-06, | |
| "loss": 0.1607, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.22904368358913813, | |
| "grad_norm": 1.2454875330122912, | |
| "learning_rate": 4.575502789085555e-06, | |
| "loss": 0.1656, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.22983077528532073, | |
| "grad_norm": 0.9825183210915687, | |
| "learning_rate": 4.571865310035304e-06, | |
| "loss": 0.1589, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.23061786698150336, | |
| "grad_norm": 1.0887371255437703, | |
| "learning_rate": 4.568213771597484e-06, | |
| "loss": 0.1585, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.23140495867768596, | |
| "grad_norm": 1.0975434488519114, | |
| "learning_rate": 4.564548198550922e-06, | |
| "loss": 0.1435, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.23219205037386856, | |
| "grad_norm": 1.0593259383463134, | |
| "learning_rate": 4.5608686157696844e-06, | |
| "loss": 0.167, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23297914207005116, | |
| "grad_norm": 1.1536948102561841, | |
| "learning_rate": 4.557175048222901e-06, | |
| "loss": 0.1621, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.23376623376623376, | |
| "grad_norm": 1.1369019291567328, | |
| "learning_rate": 4.5534675209746076e-06, | |
| "loss": 0.1654, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.23455332546241636, | |
| "grad_norm": 0.9585590140764199, | |
| "learning_rate": 4.5497460591835615e-06, | |
| "loss": 0.148, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.23534041715859896, | |
| "grad_norm": 1.2337420030262027, | |
| "learning_rate": 4.546010688103082e-06, | |
| "loss": 0.1599, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.2361275088547816, | |
| "grad_norm": 1.1641848426244756, | |
| "learning_rate": 4.542261433080874e-06, | |
| "loss": 0.1641, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2369146005509642, | |
| "grad_norm": 0.9715264597638171, | |
| "learning_rate": 4.538498319558854e-06, | |
| "loss": 0.1604, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.2377016922471468, | |
| "grad_norm": 1.2043568904283137, | |
| "learning_rate": 4.534721373072986e-06, | |
| "loss": 0.1561, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2384887839433294, | |
| "grad_norm": 1.087701432883666, | |
| "learning_rate": 4.530930619253097e-06, | |
| "loss": 0.1573, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.239275875639512, | |
| "grad_norm": 1.0432095830081018, | |
| "learning_rate": 4.527126083822713e-06, | |
| "loss": 0.1576, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.2400629673356946, | |
| "grad_norm": 1.1515388977241858, | |
| "learning_rate": 4.523307792598877e-06, | |
| "loss": 0.1836, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2408500590318772, | |
| "grad_norm": 1.1236907370811289, | |
| "learning_rate": 4.519475771491978e-06, | |
| "loss": 0.1654, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.24163715072805983, | |
| "grad_norm": 1.0492490872684002, | |
| "learning_rate": 4.515630046505575e-06, | |
| "loss": 0.1604, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.24242424242424243, | |
| "grad_norm": 1.0414505694174347, | |
| "learning_rate": 4.511770643736217e-06, | |
| "loss": 0.1587, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.24321133412042503, | |
| "grad_norm": 0.9963463131455829, | |
| "learning_rate": 4.507897589373272e-06, | |
| "loss": 0.1536, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.24399842581660763, | |
| "grad_norm": 0.9437267739253786, | |
| "learning_rate": 4.504010909698744e-06, | |
| "loss": 0.1573, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24478551751279023, | |
| "grad_norm": 0.9915304289222059, | |
| "learning_rate": 4.500110631087095e-06, | |
| "loss": 0.1519, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.24557260920897284, | |
| "grad_norm": 0.9782358310573961, | |
| "learning_rate": 4.496196780005069e-06, | |
| "loss": 0.1629, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.24635970090515544, | |
| "grad_norm": 1.0770165377269398, | |
| "learning_rate": 4.492269383011512e-06, | |
| "loss": 0.1623, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.24714679260133807, | |
| "grad_norm": 1.052396599909024, | |
| "learning_rate": 4.4883284667571894e-06, | |
| "loss": 0.1533, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.24793388429752067, | |
| "grad_norm": 1.0084809840218907, | |
| "learning_rate": 4.4843740579846055e-06, | |
| "loss": 0.1512, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.24872097599370327, | |
| "grad_norm": 1.0756395659672484, | |
| "learning_rate": 4.480406183527823e-06, | |
| "loss": 0.1682, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.24950806768988587, | |
| "grad_norm": 1.095604151904482, | |
| "learning_rate": 4.476424870312286e-06, | |
| "loss": 0.1588, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.2502951593860685, | |
| "grad_norm": 1.073871876794014, | |
| "learning_rate": 4.472430145354622e-06, | |
| "loss": 0.1663, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.2510822510822511, | |
| "grad_norm": 1.00181438336178, | |
| "learning_rate": 4.46842203576248e-06, | |
| "loss": 0.1668, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.2518693427784337, | |
| "grad_norm": 1.0179064844212398, | |
| "learning_rate": 4.464400568734327e-06, | |
| "loss": 0.1618, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2526564344746163, | |
| "grad_norm": 1.1266566093245078, | |
| "learning_rate": 4.460365771559275e-06, | |
| "loss": 0.1726, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.2534435261707989, | |
| "grad_norm": 1.0831980755033608, | |
| "learning_rate": 4.456317671616892e-06, | |
| "loss": 0.1674, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2542306178669815, | |
| "grad_norm": 0.9991360442603613, | |
| "learning_rate": 4.452256296377017e-06, | |
| "loss": 0.1534, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.2550177095631641, | |
| "grad_norm": 0.9497710360440503, | |
| "learning_rate": 4.448181673399573e-06, | |
| "loss": 0.1562, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.25580480125934674, | |
| "grad_norm": 1.1113260986403124, | |
| "learning_rate": 4.444093830334381e-06, | |
| "loss": 0.1639, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2565918929555293, | |
| "grad_norm": 1.1452949830587935, | |
| "learning_rate": 4.4399927949209685e-06, | |
| "loss": 0.1633, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.25737898465171194, | |
| "grad_norm": 1.0842379105419755, | |
| "learning_rate": 4.43587859498839e-06, | |
| "loss": 0.1754, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.2581660763478945, | |
| "grad_norm": 1.0361570331888057, | |
| "learning_rate": 4.431751258455029e-06, | |
| "loss": 0.1629, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.25895316804407714, | |
| "grad_norm": 0.9514704452172565, | |
| "learning_rate": 4.4276108133284115e-06, | |
| "loss": 0.1615, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.2597402597402597, | |
| "grad_norm": 1.0051943736689641, | |
| "learning_rate": 4.4234572877050175e-06, | |
| "loss": 0.1635, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26052735143644234, | |
| "grad_norm": 1.061826511574687, | |
| "learning_rate": 4.419290709770091e-06, | |
| "loss": 0.1572, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.261314443132625, | |
| "grad_norm": 1.0098180333606226, | |
| "learning_rate": 4.415111107797445e-06, | |
| "loss": 0.1625, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.26210153482880755, | |
| "grad_norm": 0.9258158779374888, | |
| "learning_rate": 4.4109185101492735e-06, | |
| "loss": 0.163, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2628886265249902, | |
| "grad_norm": 1.031959410480149, | |
| "learning_rate": 4.406712945275955e-06, | |
| "loss": 0.1601, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.26367571822117275, | |
| "grad_norm": 1.098174422684468, | |
| "learning_rate": 4.402494441715864e-06, | |
| "loss": 0.1632, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2644628099173554, | |
| "grad_norm": 0.9325275936138202, | |
| "learning_rate": 4.398263028095175e-06, | |
| "loss": 0.1568, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.26524990161353795, | |
| "grad_norm": 0.9452361980478395, | |
| "learning_rate": 4.394018733127667e-06, | |
| "loss": 0.1514, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2660369933097206, | |
| "grad_norm": 0.9440560796701104, | |
| "learning_rate": 4.389761585614531e-06, | |
| "loss": 0.1568, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.2668240850059032, | |
| "grad_norm": 0.9825093172685871, | |
| "learning_rate": 4.3854916144441714e-06, | |
| "loss": 0.1513, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2676111767020858, | |
| "grad_norm": 0.9909422001877334, | |
| "learning_rate": 4.381208848592017e-06, | |
| "loss": 0.1607, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.2683982683982684, | |
| "grad_norm": 1.026772957857381, | |
| "learning_rate": 4.3769133171203146e-06, | |
| "loss": 0.1579, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.269185360094451, | |
| "grad_norm": 0.9727634660522837, | |
| "learning_rate": 4.372605049177939e-06, | |
| "loss": 0.1611, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2699724517906336, | |
| "grad_norm": 0.9991705382361779, | |
| "learning_rate": 4.368284074000193e-06, | |
| "loss": 0.1423, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2707595434868162, | |
| "grad_norm": 1.0413166825567135, | |
| "learning_rate": 4.363950420908608e-06, | |
| "loss": 0.1531, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2715466351829988, | |
| "grad_norm": 1.051399331371367, | |
| "learning_rate": 4.3596041193107475e-06, | |
| "loss": 0.1537, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.27233372687918145, | |
| "grad_norm": 1.1268002118202416, | |
| "learning_rate": 4.355245198700003e-06, | |
| "loss": 0.1687, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.273120818575364, | |
| "grad_norm": 1.0579162910588005, | |
| "learning_rate": 4.3508736886554e-06, | |
| "loss": 0.1545, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.27390791027154665, | |
| "grad_norm": 1.0780531804812832, | |
| "learning_rate": 4.346489618841393e-06, | |
| "loss": 0.1478, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2746950019677292, | |
| "grad_norm": 1.1629336261073622, | |
| "learning_rate": 4.342093019007664e-06, | |
| "loss": 0.1507, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.27548209366391185, | |
| "grad_norm": 0.9806357134318359, | |
| "learning_rate": 4.337683918988924e-06, | |
| "loss": 0.1605, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2762691853600944, | |
| "grad_norm": 1.0271547256327147, | |
| "learning_rate": 4.333262348704708e-06, | |
| "loss": 0.1544, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.27705627705627706, | |
| "grad_norm": 1.040963108089893, | |
| "learning_rate": 4.328828338159173e-06, | |
| "loss": 0.1505, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2778433687524597, | |
| "grad_norm": 1.036202462349552, | |
| "learning_rate": 4.324381917440891e-06, | |
| "loss": 0.1558, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.27863046044864226, | |
| "grad_norm": 0.975994343559266, | |
| "learning_rate": 4.319923116722651e-06, | |
| "loss": 0.1641, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2794175521448249, | |
| "grad_norm": 1.039409188253541, | |
| "learning_rate": 4.315451966261248e-06, | |
| "loss": 0.1549, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.28020464384100746, | |
| "grad_norm": 1.047725080130562, | |
| "learning_rate": 4.310968496397284e-06, | |
| "loss": 0.165, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.2809917355371901, | |
| "grad_norm": 1.0011313336241248, | |
| "learning_rate": 4.306472737554957e-06, | |
| "loss": 0.1456, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.28177882723337266, | |
| "grad_norm": 0.9015679935576075, | |
| "learning_rate": 4.301964720241857e-06, | |
| "loss": 0.1369, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2825659189295553, | |
| "grad_norm": 1.049381444767021, | |
| "learning_rate": 4.297444475048755e-06, | |
| "loss": 0.1563, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.2833530106257379, | |
| "grad_norm": 1.0194195709152667, | |
| "learning_rate": 4.292912032649403e-06, | |
| "loss": 0.1649, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2841401023219205, | |
| "grad_norm": 0.957368492301693, | |
| "learning_rate": 4.2883674238003195e-06, | |
| "loss": 0.1515, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.2849271940181031, | |
| "grad_norm": 1.1143901057936236, | |
| "learning_rate": 4.2838106793405825e-06, | |
| "loss": 0.1625, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 1.0882799366794436, | |
| "learning_rate": 4.2792418301916225e-06, | |
| "loss": 0.153, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.2865013774104683, | |
| "grad_norm": 1.0192177035415801, | |
| "learning_rate": 4.274660907357009e-06, | |
| "loss": 0.1645, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.2872884691066509, | |
| "grad_norm": 1.0205240256871457, | |
| "learning_rate": 4.2700679419222415e-06, | |
| "loss": 0.1459, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.28807556080283353, | |
| "grad_norm": 1.2141057030738465, | |
| "learning_rate": 4.265462965054539e-06, | |
| "loss": 0.1597, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.28886265249901616, | |
| "grad_norm": 1.017121088131926, | |
| "learning_rate": 4.260846008002631e-06, | |
| "loss": 0.1619, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.28964974419519873, | |
| "grad_norm": 1.0877328731947116, | |
| "learning_rate": 4.25621710209654e-06, | |
| "loss": 0.1716, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.29043683589138136, | |
| "grad_norm": 1.1099554764936985, | |
| "learning_rate": 4.251576278747372e-06, | |
| "loss": 0.1599, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.29122392758756394, | |
| "grad_norm": 0.962048395782395, | |
| "learning_rate": 4.246923569447105e-06, | |
| "loss": 0.1465, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.29201101928374656, | |
| "grad_norm": 1.0782102946203345, | |
| "learning_rate": 4.24225900576837e-06, | |
| "loss": 0.1584, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.29279811097992914, | |
| "grad_norm": 1.0600722154446367, | |
| "learning_rate": 4.237582619364244e-06, | |
| "loss": 0.1518, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.29358520267611177, | |
| "grad_norm": 1.0154082912245785, | |
| "learning_rate": 4.23289444196803e-06, | |
| "loss": 0.1455, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2943722943722944, | |
| "grad_norm": 1.1254176051245297, | |
| "learning_rate": 4.228194505393041e-06, | |
| "loss": 0.1544, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.29515938606847697, | |
| "grad_norm": 1.1003313998342341, | |
| "learning_rate": 4.22348284153239e-06, | |
| "loss": 0.1611, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2959464777646596, | |
| "grad_norm": 0.9110264218620379, | |
| "learning_rate": 4.218759482358765e-06, | |
| "loss": 0.1479, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.2967335694608422, | |
| "grad_norm": 1.0433752096490876, | |
| "learning_rate": 4.214024459924221e-06, | |
| "loss": 0.1561, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.2975206611570248, | |
| "grad_norm": 0.9985242964251728, | |
| "learning_rate": 4.209277806359956e-06, | |
| "loss": 0.1486, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2983077528532074, | |
| "grad_norm": 0.9830203630270159, | |
| "learning_rate": 4.204519553876095e-06, | |
| "loss": 0.153, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.29909484454939, | |
| "grad_norm": 1.0623334389041004, | |
| "learning_rate": 4.199749734761473e-06, | |
| "loss": 0.1584, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.29988193624557263, | |
| "grad_norm": 1.007050119697646, | |
| "learning_rate": 4.194968381383414e-06, | |
| "loss": 0.162, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3006690279417552, | |
| "grad_norm": 0.9212276043601202, | |
| "learning_rate": 4.1901755261875116e-06, | |
| "loss": 0.1417, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.30145611963793784, | |
| "grad_norm": 1.0195210503773229, | |
| "learning_rate": 4.18537120169741e-06, | |
| "loss": 0.1631, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.3022432113341204, | |
| "grad_norm": 0.9791393783618954, | |
| "learning_rate": 4.1805554405145805e-06, | |
| "loss": 0.151, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.30303030303030304, | |
| "grad_norm": 0.9560471554995319, | |
| "learning_rate": 4.175728275318105e-06, | |
| "loss": 0.1537, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3038173947264856, | |
| "grad_norm": 0.9732207377472094, | |
| "learning_rate": 4.170889738864448e-06, | |
| "loss": 0.1541, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.30460448642266824, | |
| "grad_norm": 1.0273971232086052, | |
| "learning_rate": 4.166039863987241e-06, | |
| "loss": 0.1623, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.30539157811885087, | |
| "grad_norm": 1.0066781633766182, | |
| "learning_rate": 4.161178683597055e-06, | |
| "loss": 0.1623, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.30617866981503344, | |
| "grad_norm": 0.9519906303887405, | |
| "learning_rate": 4.156306230681178e-06, | |
| "loss": 0.1606, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3069657615112161, | |
| "grad_norm": 1.0274010773396909, | |
| "learning_rate": 4.151422538303393e-06, | |
| "loss": 0.1588, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.30775285320739865, | |
| "grad_norm": 1.0092975668609663, | |
| "learning_rate": 4.1465276396037516e-06, | |
| "loss": 0.1549, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3085399449035813, | |
| "grad_norm": 0.9952464194936945, | |
| "learning_rate": 4.141621567798351e-06, | |
| "loss": 0.1468, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.30932703659976385, | |
| "grad_norm": 0.9809046515355889, | |
| "learning_rate": 4.136704356179105e-06, | |
| "loss": 0.1509, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.3101141282959465, | |
| "grad_norm": 1.0624718823483572, | |
| "learning_rate": 4.131776038113524e-06, | |
| "loss": 0.1629, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3109012199921291, | |
| "grad_norm": 0.9361042861540975, | |
| "learning_rate": 4.126836647044484e-06, | |
| "loss": 0.1453, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3116883116883117, | |
| "grad_norm": 1.0675702598561039, | |
| "learning_rate": 4.121886216489999e-06, | |
| "loss": 0.1657, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3124754033844943, | |
| "grad_norm": 1.0221190108601212, | |
| "learning_rate": 4.116924780042997e-06, | |
| "loss": 0.1609, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.3132624950806769, | |
| "grad_norm": 0.98812521742716, | |
| "learning_rate": 4.111952371371091e-06, | |
| "loss": 0.1488, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3140495867768595, | |
| "grad_norm": 0.9689235987787954, | |
| "learning_rate": 4.106969024216348e-06, | |
| "loss": 0.1547, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.3148366784730421, | |
| "grad_norm": 1.0046393279094348, | |
| "learning_rate": 4.101974772395066e-06, | |
| "loss": 0.1467, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3156237701692247, | |
| "grad_norm": 0.968527185963086, | |
| "learning_rate": 4.096969649797534e-06, | |
| "loss": 0.1432, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.31641086186540734, | |
| "grad_norm": 1.0188815460176754, | |
| "learning_rate": 4.091953690387815e-06, | |
| "loss": 0.1521, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3171979535615899, | |
| "grad_norm": 1.035965071382904, | |
| "learning_rate": 4.086926928203506e-06, | |
| "loss": 0.1575, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.31798504525777255, | |
| "grad_norm": 1.0400881738148544, | |
| "learning_rate": 4.081889397355509e-06, | |
| "loss": 0.1646, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.3187721369539551, | |
| "grad_norm": 1.0353365656909388, | |
| "learning_rate": 4.076841132027805e-06, | |
| "loss": 0.1578, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.31955922865013775, | |
| "grad_norm": 0.9785090873779988, | |
| "learning_rate": 4.071782166477213e-06, | |
| "loss": 0.1485, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.3203463203463203, | |
| "grad_norm": 1.0365440161437718, | |
| "learning_rate": 4.066712535033164e-06, | |
| "loss": 0.1644, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.32113341204250295, | |
| "grad_norm": 0.9337858697268638, | |
| "learning_rate": 4.061632272097467e-06, | |
| "loss": 0.1396, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3219205037386856, | |
| "grad_norm": 0.9930564105014524, | |
| "learning_rate": 4.056541412144073e-06, | |
| "loss": 0.1466, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.32270759543486816, | |
| "grad_norm": 1.0123860857315623, | |
| "learning_rate": 4.051439989718845e-06, | |
| "loss": 0.1718, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3234946871310508, | |
| "grad_norm": 0.9886983463565112, | |
| "learning_rate": 4.0463280394393216e-06, | |
| "loss": 0.1465, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.32428177882723336, | |
| "grad_norm": 0.9489896550219313, | |
| "learning_rate": 4.041205595994478e-06, | |
| "loss": 0.1553, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.325068870523416, | |
| "grad_norm": 0.935055903913981, | |
| "learning_rate": 4.036072694144501e-06, | |
| "loss": 0.1486, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.32585596221959856, | |
| "grad_norm": 1.0109287737515016, | |
| "learning_rate": 4.030929368720539e-06, | |
| "loss": 0.1563, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.3266430539157812, | |
| "grad_norm": 0.9682667210224672, | |
| "learning_rate": 4.025775654624481e-06, | |
| "loss": 0.154, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3274301456119638, | |
| "grad_norm": 0.9195115794003238, | |
| "learning_rate": 4.020611586828705e-06, | |
| "loss": 0.1433, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.3282172373081464, | |
| "grad_norm": 0.886911970381121, | |
| "learning_rate": 4.015437200375855e-06, | |
| "loss": 0.1374, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.329004329004329, | |
| "grad_norm": 1.021240159520919, | |
| "learning_rate": 4.01025253037859e-06, | |
| "loss": 0.1567, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.3297914207005116, | |
| "grad_norm": 0.9462875663398478, | |
| "learning_rate": 4.005057612019353e-06, | |
| "loss": 0.1516, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.3305785123966942, | |
| "grad_norm": 0.9850150964188347, | |
| "learning_rate": 3.9998524805501335e-06, | |
| "loss": 0.149, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3313656040928768, | |
| "grad_norm": 1.0466582919958791, | |
| "learning_rate": 3.994637171292223e-06, | |
| "loss": 0.1504, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.3321526957890594, | |
| "grad_norm": 0.9892923577104711, | |
| "learning_rate": 3.989411719635979e-06, | |
| "loss": 0.1465, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.33293978748524206, | |
| "grad_norm": 1.0797299646481497, | |
| "learning_rate": 3.984176161040585e-06, | |
| "loss": 0.1655, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.33372687918142463, | |
| "grad_norm": 1.0280855278386611, | |
| "learning_rate": 3.978930531033807e-06, | |
| "loss": 0.1614, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.33451397087760726, | |
| "grad_norm": 1.0013220452351206, | |
| "learning_rate": 3.973674865211754e-06, | |
| "loss": 0.1529, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33530106257378983, | |
| "grad_norm": 1.0185754306460224, | |
| "learning_rate": 3.968409199238639e-06, | |
| "loss": 0.1535, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.33608815426997246, | |
| "grad_norm": 0.9680713968649773, | |
| "learning_rate": 3.963133568846533e-06, | |
| "loss": 0.1532, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.33687524596615503, | |
| "grad_norm": 1.0465737054070945, | |
| "learning_rate": 3.957848009835125e-06, | |
| "loss": 0.1557, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.33766233766233766, | |
| "grad_norm": 1.0072097384887637, | |
| "learning_rate": 3.952552558071475e-06, | |
| "loss": 0.1686, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.3384494293585203, | |
| "grad_norm": 1.0495298691679416, | |
| "learning_rate": 3.947247249489779e-06, | |
| "loss": 0.1487, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.33923652105470287, | |
| "grad_norm": 1.0214586461562896, | |
| "learning_rate": 3.941932120091116e-06, | |
| "loss": 0.1621, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.3400236127508855, | |
| "grad_norm": 1.0494096714602847, | |
| "learning_rate": 3.93660720594321e-06, | |
| "loss": 0.1598, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.34081070444706807, | |
| "grad_norm": 1.0334818385570048, | |
| "learning_rate": 3.93127254318018e-06, | |
| "loss": 0.1577, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.3415977961432507, | |
| "grad_norm": 0.9700994625756835, | |
| "learning_rate": 3.925928168002302e-06, | |
| "loss": 0.1526, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.34238488783943327, | |
| "grad_norm": 1.047736033995709, | |
| "learning_rate": 3.920574116675756e-06, | |
| "loss": 0.1581, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.3431719795356159, | |
| "grad_norm": 1.0493869403649712, | |
| "learning_rate": 3.915210425532383e-06, | |
| "loss": 0.1495, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.34395907123179853, | |
| "grad_norm": 1.010254528268069, | |
| "learning_rate": 3.90983713096944e-06, | |
| "loss": 0.1539, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.3447461629279811, | |
| "grad_norm": 0.9846398029609658, | |
| "learning_rate": 3.9044542694493515e-06, | |
| "loss": 0.1463, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.34553325462416373, | |
| "grad_norm": 1.2083136674514858, | |
| "learning_rate": 3.899061877499461e-06, | |
| "loss": 0.1601, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.3463203463203463, | |
| "grad_norm": 0.97978554217786, | |
| "learning_rate": 3.893659991711782e-06, | |
| "loss": 0.139, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.34710743801652894, | |
| "grad_norm": 1.1022405018344112, | |
| "learning_rate": 3.888248648742756e-06, | |
| "loss": 0.1617, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.3478945297127115, | |
| "grad_norm": 1.0077367730076683, | |
| "learning_rate": 3.882827885312999e-06, | |
| "loss": 0.1488, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.34868162140889414, | |
| "grad_norm": 1.0119669193080498, | |
| "learning_rate": 3.877397738207051e-06, | |
| "loss": 0.1433, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.34946871310507677, | |
| "grad_norm": 0.9336119872704435, | |
| "learning_rate": 3.8719582442731276e-06, | |
| "loss": 0.1393, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.35025580480125934, | |
| "grad_norm": 1.0144372790745282, | |
| "learning_rate": 3.866509440422873e-06, | |
| "loss": 0.1515, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.35104289649744197, | |
| "grad_norm": 1.0618851735205919, | |
| "learning_rate": 3.861051363631107e-06, | |
| "loss": 0.1403, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.35182998819362454, | |
| "grad_norm": 1.0256940692518137, | |
| "learning_rate": 3.855584050935574e-06, | |
| "loss": 0.1533, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.3526170798898072, | |
| "grad_norm": 1.004262449427633, | |
| "learning_rate": 3.85010753943669e-06, | |
| "loss": 0.1437, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.35340417158598975, | |
| "grad_norm": 0.9608822952661715, | |
| "learning_rate": 3.844621866297295e-06, | |
| "loss": 0.1374, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3541912632821724, | |
| "grad_norm": 1.032805552257636, | |
| "learning_rate": 3.839127068742399e-06, | |
| "loss": 0.1612, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.354978354978355, | |
| "grad_norm": 1.089158815357864, | |
| "learning_rate": 3.833623184058926e-06, | |
| "loss": 0.1564, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.3557654466745376, | |
| "grad_norm": 1.0527347217082683, | |
| "learning_rate": 3.8281102495954684e-06, | |
| "loss": 0.1475, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.3565525383707202, | |
| "grad_norm": 1.012969201356965, | |
| "learning_rate": 3.8225883027620245e-06, | |
| "loss": 0.1443, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3573396300669028, | |
| "grad_norm": 0.9952397622221426, | |
| "learning_rate": 3.817057381029752e-06, | |
| "loss": 0.1488, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.3581267217630854, | |
| "grad_norm": 0.9773911500192811, | |
| "learning_rate": 3.811517521930711e-06, | |
| "loss": 0.1419, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.358913813459268, | |
| "grad_norm": 1.04344141144674, | |
| "learning_rate": 3.805968763057609e-06, | |
| "loss": 0.1335, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3597009051554506, | |
| "grad_norm": 0.9127224357677829, | |
| "learning_rate": 3.8004111420635453e-06, | |
| "loss": 0.1421, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.36048799685163324, | |
| "grad_norm": 0.948335811441799, | |
| "learning_rate": 3.7948446966617568e-06, | |
| "loss": 0.1545, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3612750885478158, | |
| "grad_norm": 1.054156015531643, | |
| "learning_rate": 3.7892694646253624e-06, | |
| "loss": 0.1462, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.36206218024399844, | |
| "grad_norm": 1.0883694334017704, | |
| "learning_rate": 3.783685483787105e-06, | |
| "loss": 0.1469, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.362849271940181, | |
| "grad_norm": 1.0265972829923478, | |
| "learning_rate": 3.7780927920390965e-06, | |
| "loss": 0.1572, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.9351090223515385, | |
| "learning_rate": 3.772491427332557e-06, | |
| "loss": 0.1317, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.3644234553325462, | |
| "grad_norm": 0.96672130032329, | |
| "learning_rate": 3.766881427677563e-06, | |
| "loss": 0.1474, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.36521054702872885, | |
| "grad_norm": 0.9284227954997755, | |
| "learning_rate": 3.761262831142788e-06, | |
| "loss": 0.144, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.3659976387249115, | |
| "grad_norm": 1.02329013434613, | |
| "learning_rate": 3.755635675855238e-06, | |
| "loss": 0.1459, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.36678473042109405, | |
| "grad_norm": 0.9548918394606087, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.1431, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3675718221172767, | |
| "grad_norm": 1.0029018534160843, | |
| "learning_rate": 3.744355841819983e-06, | |
| "loss": 0.1551, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.36835891381345925, | |
| "grad_norm": 1.0170466682076178, | |
| "learning_rate": 3.7387032396156497e-06, | |
| "loss": 0.1574, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.3691460055096419, | |
| "grad_norm": 0.950504547373793, | |
| "learning_rate": 3.7330422317447686e-06, | |
| "loss": 0.1413, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.36993309720582446, | |
| "grad_norm": 1.00490564394254, | |
| "learning_rate": 3.7273728566221447e-06, | |
| "loss": 0.1539, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3707201889020071, | |
| "grad_norm": 1.0241073155182219, | |
| "learning_rate": 3.721695152719364e-06, | |
| "loss": 0.1505, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3715072805981897, | |
| "grad_norm": 1.0650129974030413, | |
| "learning_rate": 3.716009158564528e-06, | |
| "loss": 0.1517, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3722943722943723, | |
| "grad_norm": 1.0412500508709561, | |
| "learning_rate": 3.710314912741997e-06, | |
| "loss": 0.1447, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.3730814639905549, | |
| "grad_norm": 1.0273490151395026, | |
| "learning_rate": 3.7046124538921237e-06, | |
| "loss": 0.1429, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.3738685556867375, | |
| "grad_norm": 0.9952543111661871, | |
| "learning_rate": 3.698901820710995e-06, | |
| "loss": 0.1418, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3746556473829201, | |
| "grad_norm": 1.0824700054534682, | |
| "learning_rate": 3.693183051950168e-06, | |
| "loss": 0.1437, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.3754427390791027, | |
| "grad_norm": 1.0142752196453109, | |
| "learning_rate": 3.6874561864164056e-06, | |
| "loss": 0.1435, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3762298307752853, | |
| "grad_norm": 0.9888106276082754, | |
| "learning_rate": 3.6817212629714135e-06, | |
| "loss": 0.1395, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.37701692247146795, | |
| "grad_norm": 0.9673698851206235, | |
| "learning_rate": 3.675978320531579e-06, | |
| "loss": 0.1425, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.3778040141676505, | |
| "grad_norm": 1.096283920214562, | |
| "learning_rate": 3.670227398067705e-06, | |
| "loss": 0.1515, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.37859110586383315, | |
| "grad_norm": 1.0303413811027284, | |
| "learning_rate": 3.664468534604745e-06, | |
| "loss": 0.1462, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.37937819756001573, | |
| "grad_norm": 0.9550517801003708, | |
| "learning_rate": 3.6587017692215387e-06, | |
| "loss": 0.1483, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.38016528925619836, | |
| "grad_norm": 1.0499765951347195, | |
| "learning_rate": 3.6529271410505483e-06, | |
| "loss": 0.1516, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 1.0612285971687154, | |
| "learning_rate": 3.6471446892775896e-06, | |
| "loss": 0.145, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.38173947264856356, | |
| "grad_norm": 0.9976574649139153, | |
| "learning_rate": 3.6413544531415712e-06, | |
| "loss": 0.1493, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3825265643447462, | |
| "grad_norm": 1.011974051278155, | |
| "learning_rate": 3.635556471934224e-06, | |
| "loss": 0.1557, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.38331365604092876, | |
| "grad_norm": 1.015959048224715, | |
| "learning_rate": 3.629750784999835e-06, | |
| "loss": 0.152, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.3841007477371114, | |
| "grad_norm": 0.9638439392236781, | |
| "learning_rate": 3.623937431734982e-06, | |
| "loss": 0.1464, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.38488783943329397, | |
| "grad_norm": 0.9820530085625633, | |
| "learning_rate": 3.6181164515882663e-06, | |
| "loss": 0.1468, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3856749311294766, | |
| "grad_norm": 0.9281524539517508, | |
| "learning_rate": 3.6122878840600417e-06, | |
| "loss": 0.1451, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.38646202282565917, | |
| "grad_norm": 1.039305922376239, | |
| "learning_rate": 3.606451768702151e-06, | |
| "loss": 0.1486, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.3872491145218418, | |
| "grad_norm": 1.026987888426606, | |
| "learning_rate": 3.600608145117656e-06, | |
| "loss": 0.1381, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.3880362062180244, | |
| "grad_norm": 1.058827889093346, | |
| "learning_rate": 3.594757052960566e-06, | |
| "loss": 0.1555, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.388823297914207, | |
| "grad_norm": 1.0027016575115129, | |
| "learning_rate": 3.588898531935573e-06, | |
| "loss": 0.1413, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.38961038961038963, | |
| "grad_norm": 1.0766471714794614, | |
| "learning_rate": 3.583032621797778e-06, | |
| "loss": 0.1418, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3903974813065722, | |
| "grad_norm": 1.0326313481110534, | |
| "learning_rate": 3.5771593623524263e-06, | |
| "loss": 0.1345, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.39118457300275483, | |
| "grad_norm": 0.9649958546178075, | |
| "learning_rate": 3.5712787934546336e-06, | |
| "loss": 0.1397, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3919716646989374, | |
| "grad_norm": 1.0461258832079, | |
| "learning_rate": 3.5653909550091138e-06, | |
| "loss": 0.16, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.39275875639512003, | |
| "grad_norm": 0.9741702004779168, | |
| "learning_rate": 3.559495886969916e-06, | |
| "loss": 0.1366, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.39354584809130266, | |
| "grad_norm": 0.9875184829637668, | |
| "learning_rate": 3.553593629340144e-06, | |
| "loss": 0.1391, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39354584809130266, | |
| "eval_loss": 0.14773064851760864, | |
| "eval_runtime": 18.0322, | |
| "eval_samples_per_second": 45.585, | |
| "eval_steps_per_second": 5.712, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39433293978748524, | |
| "grad_norm": 0.9563741831859393, | |
| "learning_rate": 3.5476842221716915e-06, | |
| "loss": 0.1453, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.39512003148366787, | |
| "grad_norm": 0.9839000041167648, | |
| "learning_rate": 3.541767705564967e-06, | |
| "loss": 0.1509, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.39590712317985044, | |
| "grad_norm": 0.9666175985112762, | |
| "learning_rate": 3.535844119668622e-06, | |
| "loss": 0.1436, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.39669421487603307, | |
| "grad_norm": 1.0513295542177603, | |
| "learning_rate": 3.5299135046792816e-06, | |
| "loss": 0.1371, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.39748130657221564, | |
| "grad_norm": 1.0136623338528887, | |
| "learning_rate": 3.5239759008412666e-06, | |
| "loss": 0.1498, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.39826839826839827, | |
| "grad_norm": 0.9764920494655156, | |
| "learning_rate": 3.518031348446324e-06, | |
| "loss": 0.1371, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.3990554899645809, | |
| "grad_norm": 1.0113031849627157, | |
| "learning_rate": 3.5120798878333544e-06, | |
| "loss": 0.1453, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.3998425816607635, | |
| "grad_norm": 0.9947509560502654, | |
| "learning_rate": 3.506121559388135e-06, | |
| "loss": 0.1233, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4006296733569461, | |
| "grad_norm": 1.1135464243984814, | |
| "learning_rate": 3.500156403543046e-06, | |
| "loss": 0.151, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.4014167650531287, | |
| "grad_norm": 1.0687025563863246, | |
| "learning_rate": 3.4941844607768007e-06, | |
| "loss": 0.1384, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4022038567493113, | |
| "grad_norm": 0.9654525860741724, | |
| "learning_rate": 3.488205771614164e-06, | |
| "loss": 0.1348, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.4029909484454939, | |
| "grad_norm": 1.07357744190682, | |
| "learning_rate": 3.4822203766256834e-06, | |
| "loss": 0.1412, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.4037780401416765, | |
| "grad_norm": 1.2491546536330014, | |
| "learning_rate": 3.4762283164274104e-06, | |
| "loss": 0.1523, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.40456513183785914, | |
| "grad_norm": 1.0398955239354635, | |
| "learning_rate": 3.4702296316806243e-06, | |
| "loss": 0.1507, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4053522235340417, | |
| "grad_norm": 0.947562520308943, | |
| "learning_rate": 3.4642243630915606e-06, | |
| "loss": 0.1486, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.40613931523022434, | |
| "grad_norm": 0.9405204018759319, | |
| "learning_rate": 3.45821255141113e-06, | |
| "loss": 0.1287, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.4069264069264069, | |
| "grad_norm": 1.01025400774114, | |
| "learning_rate": 3.452194237434642e-06, | |
| "loss": 0.1349, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.40771349862258954, | |
| "grad_norm": 1.0404932578099988, | |
| "learning_rate": 3.446169462001534e-06, | |
| "loss": 0.1508, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4085005903187721, | |
| "grad_norm": 1.029425420995215, | |
| "learning_rate": 3.4401382659950868e-06, | |
| "loss": 0.1362, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.40928768201495475, | |
| "grad_norm": 1.025768159905711, | |
| "learning_rate": 3.4341006903421493e-06, | |
| "loss": 0.1437, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4100747737111374, | |
| "grad_norm": 0.9507044448226175, | |
| "learning_rate": 3.4280567760128658e-06, | |
| "loss": 0.1393, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.41086186540731995, | |
| "grad_norm": 1.0374082813027519, | |
| "learning_rate": 3.4220065640203916e-06, | |
| "loss": 0.16, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.4116489571035026, | |
| "grad_norm": 0.9378353888939086, | |
| "learning_rate": 3.415950095420616e-06, | |
| "loss": 0.1355, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.41243604879968515, | |
| "grad_norm": 0.924561930587711, | |
| "learning_rate": 3.4098874113118863e-06, | |
| "loss": 0.1452, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.4132231404958678, | |
| "grad_norm": 0.9505049489489825, | |
| "learning_rate": 3.403818552834727e-06, | |
| "loss": 0.1448, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.41401023219205035, | |
| "grad_norm": 0.9701870488491394, | |
| "learning_rate": 3.397743561171562e-06, | |
| "loss": 0.1341, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.414797323888233, | |
| "grad_norm": 0.9122288876708122, | |
| "learning_rate": 3.3916624775464318e-06, | |
| "loss": 0.1291, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.4155844155844156, | |
| "grad_norm": 0.985150804267496, | |
| "learning_rate": 3.385575343224718e-06, | |
| "loss": 0.141, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4163715072805982, | |
| "grad_norm": 0.9910844190276262, | |
| "learning_rate": 3.3794821995128606e-06, | |
| "loss": 0.1473, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.4171585989767808, | |
| "grad_norm": 0.9925292173111532, | |
| "learning_rate": 3.3733830877580796e-06, | |
| "loss": 0.1492, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4179456906729634, | |
| "grad_norm": 0.9483537804421872, | |
| "learning_rate": 3.3672780493480927e-06, | |
| "loss": 0.1476, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.418732782369146, | |
| "grad_norm": 0.9716970355806354, | |
| "learning_rate": 3.3611671257108323e-06, | |
| "loss": 0.1288, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.4195198740653286, | |
| "grad_norm": 1.0525983321400059, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "loss": 0.1541, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.4203069657615112, | |
| "grad_norm": 0.9476841095185634, | |
| "learning_rate": 3.3489277886656373e-06, | |
| "loss": 0.1395, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.42109405745769385, | |
| "grad_norm": 0.8883884320293254, | |
| "learning_rate": 3.342799458312127e-06, | |
| "loss": 0.1374, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.4218811491538764, | |
| "grad_norm": 1.026818858865084, | |
| "learning_rate": 3.336665408839633e-06, | |
| "loss": 0.1413, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.42266824085005905, | |
| "grad_norm": 0.9146805645048051, | |
| "learning_rate": 3.330525681872954e-06, | |
| "loss": 0.1352, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.4234553325462416, | |
| "grad_norm": 1.0439955820386841, | |
| "learning_rate": 3.3243803190754166e-06, | |
| "loss": 0.1482, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.42424242424242425, | |
| "grad_norm": 0.9964413472110166, | |
| "learning_rate": 3.3182293621485923e-06, | |
| "loss": 0.1524, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.42502951593860683, | |
| "grad_norm": 0.9626977177442709, | |
| "learning_rate": 3.312072852832012e-06, | |
| "loss": 0.1427, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.42581660763478946, | |
| "grad_norm": 0.8939878261316884, | |
| "learning_rate": 3.3059108329028845e-06, | |
| "loss": 0.1283, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.4266036993309721, | |
| "grad_norm": 1.0155176108909485, | |
| "learning_rate": 3.299743344175814e-06, | |
| "loss": 0.1434, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.42739079102715466, | |
| "grad_norm": 0.983969699589635, | |
| "learning_rate": 3.293570428502515e-06, | |
| "loss": 0.1479, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.4281778827233373, | |
| "grad_norm": 1.006021089515589, | |
| "learning_rate": 3.287392127771526e-06, | |
| "loss": 0.1386, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.42896497441951986, | |
| "grad_norm": 0.9594215523929834, | |
| "learning_rate": 3.2812084839079316e-06, | |
| "loss": 0.1326, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.4297520661157025, | |
| "grad_norm": 0.958170693889915, | |
| "learning_rate": 3.275019538873071e-06, | |
| "loss": 0.1418, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.43053915781188506, | |
| "grad_norm": 1.0256772435691563, | |
| "learning_rate": 3.268825334664259e-06, | |
| "loss": 0.1526, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.4313262495080677, | |
| "grad_norm": 1.0552921930006323, | |
| "learning_rate": 3.2626259133144955e-06, | |
| "loss": 0.1441, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.43211334120425027, | |
| "grad_norm": 1.0605556822333475, | |
| "learning_rate": 3.2564213168921867e-06, | |
| "loss": 0.1431, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.4329004329004329, | |
| "grad_norm": 0.982864648139079, | |
| "learning_rate": 3.2502115875008523e-06, | |
| "loss": 0.149, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4336875245966155, | |
| "grad_norm": 1.0049025924744737, | |
| "learning_rate": 3.2439967672788462e-06, | |
| "loss": 0.1334, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.4344746162927981, | |
| "grad_norm": 0.9263761254409442, | |
| "learning_rate": 3.2377768983990677e-06, | |
| "loss": 0.1401, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.43526170798898073, | |
| "grad_norm": 1.0390883988019344, | |
| "learning_rate": 3.2315520230686747e-06, | |
| "loss": 0.1493, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.4360487996851633, | |
| "grad_norm": 0.9913895430005143, | |
| "learning_rate": 3.2253221835287984e-06, | |
| "loss": 0.1406, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.43683589138134593, | |
| "grad_norm": 0.9801664753977715, | |
| "learning_rate": 3.2190874220542577e-06, | |
| "loss": 0.1341, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.4376229830775285, | |
| "grad_norm": 0.9519041413125566, | |
| "learning_rate": 3.2128477809532687e-06, | |
| "loss": 0.1469, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.43841007477371113, | |
| "grad_norm": 1.0289764585305627, | |
| "learning_rate": 3.2066033025671612e-06, | |
| "loss": 0.1473, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.43919716646989376, | |
| "grad_norm": 1.0005689404595521, | |
| "learning_rate": 3.200354029270091e-06, | |
| "loss": 0.1477, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.43998425816607634, | |
| "grad_norm": 1.056463362355424, | |
| "learning_rate": 3.1941000034687516e-06, | |
| "loss": 0.1488, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.44077134986225897, | |
| "grad_norm": 0.9574144607007496, | |
| "learning_rate": 3.187841267602084e-06, | |
| "loss": 0.1445, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.44155844155844154, | |
| "grad_norm": 0.9562953543913302, | |
| "learning_rate": 3.1815778641409924e-06, | |
| "loss": 0.1414, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.44234553325462417, | |
| "grad_norm": 0.9444651486667015, | |
| "learning_rate": 3.1753098355880557e-06, | |
| "loss": 0.138, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.44313262495080674, | |
| "grad_norm": 0.9465351053953429, | |
| "learning_rate": 3.169037224477236e-06, | |
| "loss": 0.1437, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.44391971664698937, | |
| "grad_norm": 1.0206836940486426, | |
| "learning_rate": 3.162760073373594e-06, | |
| "loss": 0.1411, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.444706808343172, | |
| "grad_norm": 1.0878905236564318, | |
| "learning_rate": 3.1564784248729965e-06, | |
| "loss": 0.1408, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.4454939000393546, | |
| "grad_norm": 1.0130102955883906, | |
| "learning_rate": 3.15019232160183e-06, | |
| "loss": 0.1428, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.4462809917355372, | |
| "grad_norm": 0.980086016231054, | |
| "learning_rate": 3.1439018062167092e-06, | |
| "loss": 0.143, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.4470680834317198, | |
| "grad_norm": 1.0249915137559014, | |
| "learning_rate": 3.1376069214041917e-06, | |
| "loss": 0.1471, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4478551751279024, | |
| "grad_norm": 1.1016327132095007, | |
| "learning_rate": 3.1313077098804817e-06, | |
| "loss": 0.1606, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.448642266824085, | |
| "grad_norm": 1.0411771801722989, | |
| "learning_rate": 3.1250042143911462e-06, | |
| "loss": 0.1499, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.4494293585202676, | |
| "grad_norm": 1.0122030093902548, | |
| "learning_rate": 3.118696477710822e-06, | |
| "loss": 0.141, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.45021645021645024, | |
| "grad_norm": 1.0708872672849516, | |
| "learning_rate": 3.1123845426429265e-06, | |
| "loss": 0.128, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4510035419126328, | |
| "grad_norm": 1.029737403462412, | |
| "learning_rate": 3.106068452019365e-06, | |
| "loss": 0.1383, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.45179063360881544, | |
| "grad_norm": 0.9988296671107193, | |
| "learning_rate": 3.099748248700245e-06, | |
| "loss": 0.1376, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.452577725304998, | |
| "grad_norm": 1.0475513726672416, | |
| "learning_rate": 3.0934239755735782e-06, | |
| "loss": 0.1355, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.45336481700118064, | |
| "grad_norm": 1.0654745191838768, | |
| "learning_rate": 3.0870956755549973e-06, | |
| "loss": 0.143, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.4541519086973632, | |
| "grad_norm": 0.9397526290083124, | |
| "learning_rate": 3.0807633915874585e-06, | |
| "loss": 0.1406, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.45493900039354584, | |
| "grad_norm": 1.052837564760308, | |
| "learning_rate": 3.0744271666409526e-06, | |
| "loss": 0.1454, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.4557260920897285, | |
| "grad_norm": 1.1289865006459998, | |
| "learning_rate": 3.0680870437122145e-06, | |
| "loss": 0.1554, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.45651318378591105, | |
| "grad_norm": 0.9614320131595296, | |
| "learning_rate": 3.0617430658244295e-06, | |
| "loss": 0.1368, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.4573002754820937, | |
| "grad_norm": 0.9849943444472453, | |
| "learning_rate": 3.0553952760269427e-06, | |
| "loss": 0.1372, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.45808736717827625, | |
| "grad_norm": 0.9938446057985301, | |
| "learning_rate": 3.0490437173949656e-06, | |
| "loss": 0.1397, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.4588744588744589, | |
| "grad_norm": 0.9430590894578916, | |
| "learning_rate": 3.0426884330292844e-06, | |
| "loss": 0.1404, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.45966155057064145, | |
| "grad_norm": 0.8998337127762756, | |
| "learning_rate": 3.0363294660559685e-06, | |
| "loss": 0.133, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.4604486422668241, | |
| "grad_norm": 0.9469777276964015, | |
| "learning_rate": 3.0299668596260755e-06, | |
| "loss": 0.1429, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4612357339630067, | |
| "grad_norm": 0.9961208676961326, | |
| "learning_rate": 3.023600656915362e-06, | |
| "loss": 0.1381, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4620228256591893, | |
| "grad_norm": 0.9726679702119774, | |
| "learning_rate": 3.017230901123985e-06, | |
| "loss": 0.1391, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.4628099173553719, | |
| "grad_norm": 1.016233754336966, | |
| "learning_rate": 3.0108576354762176e-06, | |
| "loss": 0.1464, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4635970090515545, | |
| "grad_norm": 0.891890572894692, | |
| "learning_rate": 3.0044809032201448e-06, | |
| "loss": 0.1312, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.4643841007477371, | |
| "grad_norm": 0.9300922465018149, | |
| "learning_rate": 2.9981007476273787e-06, | |
| "loss": 0.1272, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4651711924439197, | |
| "grad_norm": 1.0381540629264334, | |
| "learning_rate": 2.9917172119927607e-06, | |
| "loss": 0.1479, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.4659582841401023, | |
| "grad_norm": 1.0642195977009175, | |
| "learning_rate": 2.9853303396340695e-06, | |
| "loss": 0.1364, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.46674537583628495, | |
| "grad_norm": 0.9295272897205104, | |
| "learning_rate": 2.9789401738917244e-06, | |
| "loss": 0.1249, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.4675324675324675, | |
| "grad_norm": 1.0180029223750298, | |
| "learning_rate": 2.9725467581284944e-06, | |
| "loss": 0.1407, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.46831955922865015, | |
| "grad_norm": 1.1385262618991847, | |
| "learning_rate": 2.966150135729203e-06, | |
| "loss": 0.1502, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.4691066509248327, | |
| "grad_norm": 1.0067715931565462, | |
| "learning_rate": 2.9597503501004345e-06, | |
| "loss": 0.1286, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.46989374262101535, | |
| "grad_norm": 0.9465710841629198, | |
| "learning_rate": 2.9533474446702346e-06, | |
| "loss": 0.1358, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.4706808343171979, | |
| "grad_norm": 1.04804051578767, | |
| "learning_rate": 2.946941462887824e-06, | |
| "loss": 0.1333, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.47146792601338056, | |
| "grad_norm": 1.0917713383450702, | |
| "learning_rate": 2.940532448223296e-06, | |
| "loss": 0.1462, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.4722550177095632, | |
| "grad_norm": 0.9580513732250364, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "loss": 0.1321, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47304210940574576, | |
| "grad_norm": 0.9439921102070582, | |
| "learning_rate": 2.927705494230875e-06, | |
| "loss": 0.1441, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.4738292011019284, | |
| "grad_norm": 1.0178216949448748, | |
| "learning_rate": 2.9212876419448943e-06, | |
| "loss": 0.1405, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.47461629279811096, | |
| "grad_norm": 1.0297426762245179, | |
| "learning_rate": 2.9148669308600298e-06, | |
| "loss": 0.1392, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4754033844942936, | |
| "grad_norm": 0.9415986568330708, | |
| "learning_rate": 2.9084434045463255e-06, | |
| "loss": 0.1282, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 1.0337230890115443, | |
| "learning_rate": 2.9020171065929327e-06, | |
| "loss": 0.1394, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.4769775678866588, | |
| "grad_norm": 1.0540052550471415, | |
| "learning_rate": 2.895588080607807e-06, | |
| "loss": 0.1472, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.4777646595828414, | |
| "grad_norm": 1.0081872244466563, | |
| "learning_rate": 2.8891563702174174e-06, | |
| "loss": 0.1372, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.478551751279024, | |
| "grad_norm": 1.0145019904402564, | |
| "learning_rate": 2.8827220190664505e-06, | |
| "loss": 0.1399, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.4793388429752066, | |
| "grad_norm": 1.0258604105718838, | |
| "learning_rate": 2.8762850708175098e-06, | |
| "loss": 0.1499, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.4801259346713892, | |
| "grad_norm": 1.0836484331180423, | |
| "learning_rate": 2.869845569150825e-06, | |
| "loss": 0.1388, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4809130263675718, | |
| "grad_norm": 0.9946389106293178, | |
| "learning_rate": 2.863403557763951e-06, | |
| "loss": 0.1323, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.4817001180637544, | |
| "grad_norm": 0.9968164583365795, | |
| "learning_rate": 2.856959080371474e-06, | |
| "loss": 0.1402, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.48248720975993703, | |
| "grad_norm": 1.0526146596249044, | |
| "learning_rate": 2.8505121807047155e-06, | |
| "loss": 0.1342, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.48327430145611966, | |
| "grad_norm": 0.9881771003275511, | |
| "learning_rate": 2.8440629025114308e-06, | |
| "loss": 0.1414, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.48406139315230223, | |
| "grad_norm": 1.0170639400089367, | |
| "learning_rate": 2.8376112895555184e-06, | |
| "loss": 0.1415, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.48484848484848486, | |
| "grad_norm": 0.9618458339894986, | |
| "learning_rate": 2.83115738561672e-06, | |
| "loss": 0.125, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.48563557654466744, | |
| "grad_norm": 1.166675709546666, | |
| "learning_rate": 2.8247012344903235e-06, | |
| "loss": 0.1537, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.48642266824085006, | |
| "grad_norm": 1.0308351089525765, | |
| "learning_rate": 2.8182428799868643e-06, | |
| "loss": 0.1435, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.48720975993703264, | |
| "grad_norm": 0.9008466844444718, | |
| "learning_rate": 2.811782365931832e-06, | |
| "loss": 0.1255, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.48799685163321527, | |
| "grad_norm": 1.0328591551300574, | |
| "learning_rate": 2.8053197361653684e-06, | |
| "loss": 0.1431, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4887839433293979, | |
| "grad_norm": 1.0223227370370647, | |
| "learning_rate": 2.7988550345419733e-06, | |
| "loss": 0.1302, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.48957103502558047, | |
| "grad_norm": 1.0130656273790444, | |
| "learning_rate": 2.792388304930207e-06, | |
| "loss": 0.1413, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4903581267217631, | |
| "grad_norm": 0.9678629630338841, | |
| "learning_rate": 2.7859195912123875e-06, | |
| "loss": 0.1411, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.4911452184179457, | |
| "grad_norm": 1.0630235458290422, | |
| "learning_rate": 2.779448937284302e-06, | |
| "loss": 0.144, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.4919323101141283, | |
| "grad_norm": 1.1368466359085148, | |
| "learning_rate": 2.772976387054899e-06, | |
| "loss": 0.1603, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4927194018103109, | |
| "grad_norm": 1.0638972206646764, | |
| "learning_rate": 2.766501984445999e-06, | |
| "loss": 0.1469, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.4935064935064935, | |
| "grad_norm": 0.9878723437777639, | |
| "learning_rate": 2.7600257733919887e-06, | |
| "loss": 0.1347, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.49429358520267613, | |
| "grad_norm": 0.9482438523704221, | |
| "learning_rate": 2.7535477978395295e-06, | |
| "loss": 0.1301, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4950806768988587, | |
| "grad_norm": 1.0213978192147322, | |
| "learning_rate": 2.7470681017472556e-06, | |
| "loss": 0.1442, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.49586776859504134, | |
| "grad_norm": 1.0113916573838844, | |
| "learning_rate": 2.740586729085476e-06, | |
| "loss": 0.1477, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4966548602912239, | |
| "grad_norm": 1.0353820062718653, | |
| "learning_rate": 2.7341037238358774e-06, | |
| "loss": 0.1483, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.49744195198740654, | |
| "grad_norm": 1.0552352024187672, | |
| "learning_rate": 2.727619129991224e-06, | |
| "loss": 0.1328, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4982290436835891, | |
| "grad_norm": 0.9937705442973395, | |
| "learning_rate": 2.7211329915550615e-06, | |
| "loss": 0.1409, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.49901613537977174, | |
| "grad_norm": 1.0486309341654392, | |
| "learning_rate": 2.714645352541415e-06, | |
| "loss": 0.15, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.49980322707595437, | |
| "grad_norm": 1.015369060592149, | |
| "learning_rate": 2.7081562569744948e-06, | |
| "loss": 0.1298, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.500590318772137, | |
| "grad_norm": 1.014091287328762, | |
| "learning_rate": 2.701665748888393e-06, | |
| "loss": 0.139, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5013774104683195, | |
| "grad_norm": 1.010797057516188, | |
| "learning_rate": 2.695173872326788e-06, | |
| "loss": 0.1306, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5021645021645021, | |
| "grad_norm": 0.9886264059190445, | |
| "learning_rate": 2.6886806713426435e-06, | |
| "loss": 0.1493, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5029515938606848, | |
| "grad_norm": 0.9006497838538798, | |
| "learning_rate": 2.6821861899979116e-06, | |
| "loss": 0.127, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.5037386855568674, | |
| "grad_norm": 1.0409028373992908, | |
| "learning_rate": 2.6756904723632325e-06, | |
| "loss": 0.1453, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.50452577725305, | |
| "grad_norm": 0.9741943151013064, | |
| "learning_rate": 2.6691935625176357e-06, | |
| "loss": 0.1353, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5053128689492326, | |
| "grad_norm": 0.949636504358609, | |
| "learning_rate": 2.6626955045482405e-06, | |
| "loss": 0.1335, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5060999606454152, | |
| "grad_norm": 0.9249297082390363, | |
| "learning_rate": 2.6561963425499575e-06, | |
| "loss": 0.1338, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.5068870523415978, | |
| "grad_norm": 1.0151555535359889, | |
| "learning_rate": 2.649696120625188e-06, | |
| "loss": 0.1515, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5076741440377804, | |
| "grad_norm": 1.039472398997662, | |
| "learning_rate": 2.643194882883528e-06, | |
| "loss": 0.1474, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.508461235733963, | |
| "grad_norm": 0.9434610266773801, | |
| "learning_rate": 2.6366926734414648e-06, | |
| "loss": 0.1304, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5092483274301456, | |
| "grad_norm": 0.8865198426440791, | |
| "learning_rate": 2.6301895364220816e-06, | |
| "loss": 0.1202, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.5100354191263282, | |
| "grad_norm": 0.9546278944005607, | |
| "learning_rate": 2.6236855159547527e-06, | |
| "loss": 0.1291, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5108225108225108, | |
| "grad_norm": 0.9693013564144493, | |
| "learning_rate": 2.6171806561748503e-06, | |
| "loss": 0.1339, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.5116096025186935, | |
| "grad_norm": 1.0027100891356027, | |
| "learning_rate": 2.610675001223441e-06, | |
| "loss": 0.1407, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.512396694214876, | |
| "grad_norm": 0.8560729540932264, | |
| "learning_rate": 2.6041685952469877e-06, | |
| "loss": 0.116, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.5131837859110586, | |
| "grad_norm": 0.9257606438562741, | |
| "learning_rate": 2.597661482397049e-06, | |
| "loss": 0.1262, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5139708776072412, | |
| "grad_norm": 1.0514657045725575, | |
| "learning_rate": 2.5911537068299803e-06, | |
| "loss": 0.1469, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.5147579693034239, | |
| "grad_norm": 0.9545058570137028, | |
| "learning_rate": 2.584645312706634e-06, | |
| "loss": 0.1302, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5155450609996065, | |
| "grad_norm": 0.9392962167917809, | |
| "learning_rate": 2.5781363441920614e-06, | |
| "loss": 0.1335, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.516332152695789, | |
| "grad_norm": 0.9496925045032614, | |
| "learning_rate": 2.5716268454552094e-06, | |
| "loss": 0.135, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5171192443919717, | |
| "grad_norm": 1.002838327785164, | |
| "learning_rate": 2.565116860668625e-06, | |
| "loss": 0.1316, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.5179063360881543, | |
| "grad_norm": 1.053225279922735, | |
| "learning_rate": 2.5586064340081516e-06, | |
| "loss": 0.1512, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5186934277843369, | |
| "grad_norm": 0.9441130740117648, | |
| "learning_rate": 2.5520956096526323e-06, | |
| "loss": 0.131, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.5194805194805194, | |
| "grad_norm": 0.9889334534500898, | |
| "learning_rate": 2.5455844317836077e-06, | |
| "loss": 0.1331, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5202676111767021, | |
| "grad_norm": 0.9646380257679634, | |
| "learning_rate": 2.53907294458502e-06, | |
| "loss": 0.1291, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.5210547028728847, | |
| "grad_norm": 1.0529655580058879, | |
| "learning_rate": 2.5325611922429074e-06, | |
| "loss": 0.1491, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5218417945690673, | |
| "grad_norm": 0.9926818678117324, | |
| "learning_rate": 2.5260492189451076e-06, | |
| "loss": 0.1443, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.52262888626525, | |
| "grad_norm": 0.9144135454846201, | |
| "learning_rate": 2.51953706888096e-06, | |
| "loss": 0.1217, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.5234159779614325, | |
| "grad_norm": 0.921592430215234, | |
| "learning_rate": 2.513024786241001e-06, | |
| "loss": 0.1248, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5242030696576151, | |
| "grad_norm": 0.9514782593826102, | |
| "learning_rate": 2.5065124152166692e-06, | |
| "loss": 0.1297, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5249901613537977, | |
| "grad_norm": 1.0117305817250293, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.1497, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.5257772530499804, | |
| "grad_norm": 1.013668842181626, | |
| "learning_rate": 2.4934875847833308e-06, | |
| "loss": 0.1224, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.526564344746163, | |
| "grad_norm": 1.000211936689413, | |
| "learning_rate": 2.4869752137589994e-06, | |
| "loss": 0.1419, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.5273514364423455, | |
| "grad_norm": 0.9733370358487723, | |
| "learning_rate": 2.48046293111904e-06, | |
| "loss": 0.1245, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5281385281385281, | |
| "grad_norm": 1.0646062724041805, | |
| "learning_rate": 2.473950781054893e-06, | |
| "loss": 0.1383, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.5289256198347108, | |
| "grad_norm": 0.930785726380819, | |
| "learning_rate": 2.467438807757094e-06, | |
| "loss": 0.1295, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5297127115308934, | |
| "grad_norm": 0.9786127857256359, | |
| "learning_rate": 2.460927055414981e-06, | |
| "loss": 0.146, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.5304998032270759, | |
| "grad_norm": 0.9744140929407867, | |
| "learning_rate": 2.4544155682163922e-06, | |
| "loss": 0.1298, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.5312868949232585, | |
| "grad_norm": 0.9484782784554407, | |
| "learning_rate": 2.447904390347369e-06, | |
| "loss": 0.1278, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5320739866194412, | |
| "grad_norm": 1.1172109264151044, | |
| "learning_rate": 2.441393565991849e-06, | |
| "loss": 0.146, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5328610783156238, | |
| "grad_norm": 0.9832334733375834, | |
| "learning_rate": 2.4348831393313763e-06, | |
| "loss": 0.1341, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.5336481700118064, | |
| "grad_norm": 0.9993612072993626, | |
| "learning_rate": 2.428373154544791e-06, | |
| "loss": 0.1348, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.5344352617079889, | |
| "grad_norm": 0.8743067419696096, | |
| "learning_rate": 2.42186365580794e-06, | |
| "loss": 0.1127, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.5352223534041716, | |
| "grad_norm": 0.9878917132746777, | |
| "learning_rate": 2.4153546872933667e-06, | |
| "loss": 0.1289, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5360094451003542, | |
| "grad_norm": 0.9872701224310093, | |
| "learning_rate": 2.4088462931700214e-06, | |
| "loss": 0.1382, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.5367965367965368, | |
| "grad_norm": 1.0291331541759994, | |
| "learning_rate": 2.4023385176029516e-06, | |
| "loss": 0.1398, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5375836284927195, | |
| "grad_norm": 1.0314844938730774, | |
| "learning_rate": 2.3958314047530127e-06, | |
| "loss": 0.1407, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.538370720188902, | |
| "grad_norm": 0.9922009235690711, | |
| "learning_rate": 2.3893249987765598e-06, | |
| "loss": 0.1375, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5391578118850846, | |
| "grad_norm": 1.0407160996339295, | |
| "learning_rate": 2.3828193438251497e-06, | |
| "loss": 0.1356, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5399449035812672, | |
| "grad_norm": 0.9860703004700557, | |
| "learning_rate": 2.376314484045248e-06, | |
| "loss": 0.132, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5407319952774499, | |
| "grad_norm": 1.0540933767364977, | |
| "learning_rate": 2.369810463577919e-06, | |
| "loss": 0.1467, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.5415190869736324, | |
| "grad_norm": 1.0135356185084303, | |
| "learning_rate": 2.3633073265585356e-06, | |
| "loss": 0.1381, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.542306178669815, | |
| "grad_norm": 0.9743937278639236, | |
| "learning_rate": 2.3568051171164724e-06, | |
| "loss": 0.1324, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.5430932703659976, | |
| "grad_norm": 1.0422560526589146, | |
| "learning_rate": 2.350303879374813e-06, | |
| "loss": 0.136, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5438803620621803, | |
| "grad_norm": 1.0503391352080245, | |
| "learning_rate": 2.3438036574500434e-06, | |
| "loss": 0.147, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.5446674537583629, | |
| "grad_norm": 0.9557517793781123, | |
| "learning_rate": 2.3373044954517603e-06, | |
| "loss": 0.1216, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.9898057468780994, | |
| "learning_rate": 2.330806437482365e-06, | |
| "loss": 0.1342, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.546241637150728, | |
| "grad_norm": 0.9685373418602369, | |
| "learning_rate": 2.3243095276367687e-06, | |
| "loss": 0.1294, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5470287288469107, | |
| "grad_norm": 1.0187901801029866, | |
| "learning_rate": 2.317813810002089e-06, | |
| "loss": 0.1366, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5478158205430933, | |
| "grad_norm": 1.036393473441657, | |
| "learning_rate": 2.3113193286573577e-06, | |
| "loss": 0.1384, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5486029122392759, | |
| "grad_norm": 0.9735402694275894, | |
| "learning_rate": 2.3048261276732133e-06, | |
| "loss": 0.1325, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.5493900039354584, | |
| "grad_norm": 0.9435211562075637, | |
| "learning_rate": 2.298334251111607e-06, | |
| "loss": 0.1272, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5501770956316411, | |
| "grad_norm": 0.9238771765346788, | |
| "learning_rate": 2.2918437430255056e-06, | |
| "loss": 0.1329, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.5509641873278237, | |
| "grad_norm": 0.9732329075427437, | |
| "learning_rate": 2.285354647458585e-06, | |
| "loss": 0.1316, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5517512790240063, | |
| "grad_norm": 1.032698839528823, | |
| "learning_rate": 2.2788670084449393e-06, | |
| "loss": 0.1438, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.5525383707201889, | |
| "grad_norm": 0.9453000245373157, | |
| "learning_rate": 2.2723808700087764e-06, | |
| "loss": 0.1349, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5533254624163715, | |
| "grad_norm": 1.017447417352295, | |
| "learning_rate": 2.2658962761641235e-06, | |
| "loss": 0.1346, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.5541125541125541, | |
| "grad_norm": 1.0593240948345142, | |
| "learning_rate": 2.2594132709145245e-06, | |
| "loss": 0.1391, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5548996458087367, | |
| "grad_norm": 1.0524825811903469, | |
| "learning_rate": 2.2529318982527453e-06, | |
| "loss": 0.1397, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5556867375049194, | |
| "grad_norm": 0.9994684504324172, | |
| "learning_rate": 2.246452202160471e-06, | |
| "loss": 0.1416, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.5564738292011019, | |
| "grad_norm": 1.033406524556106, | |
| "learning_rate": 2.2399742266080126e-06, | |
| "loss": 0.1269, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.5572609208972845, | |
| "grad_norm": 0.9246218098662428, | |
| "learning_rate": 2.233498015554002e-06, | |
| "loss": 0.1242, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5580480125934671, | |
| "grad_norm": 0.904021008692359, | |
| "learning_rate": 2.227023612945102e-06, | |
| "loss": 0.1217, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.5588351042896498, | |
| "grad_norm": 0.9921447266760961, | |
| "learning_rate": 2.220551062715699e-06, | |
| "loss": 0.1241, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5596221959858324, | |
| "grad_norm": 1.0273952935358304, | |
| "learning_rate": 2.2140804087876134e-06, | |
| "loss": 0.1244, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.5604092876820149, | |
| "grad_norm": 0.9958421204937957, | |
| "learning_rate": 2.207611695069794e-06, | |
| "loss": 0.1277, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.5611963793781976, | |
| "grad_norm": 1.0226304738126037, | |
| "learning_rate": 2.2011449654580266e-06, | |
| "loss": 0.1319, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.5619834710743802, | |
| "grad_norm": 0.9427241568832295, | |
| "learning_rate": 2.1946802638346324e-06, | |
| "loss": 0.1208, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.5627705627705628, | |
| "grad_norm": 0.9526667511261941, | |
| "learning_rate": 2.1882176340681682e-06, | |
| "loss": 0.1234, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5635576544667453, | |
| "grad_norm": 0.9726636294262463, | |
| "learning_rate": 2.181757120013136e-06, | |
| "loss": 0.1241, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.564344746162928, | |
| "grad_norm": 0.9577642489452165, | |
| "learning_rate": 2.1752987655096765e-06, | |
| "loss": 0.1286, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.5651318378591106, | |
| "grad_norm": 0.9119267395234483, | |
| "learning_rate": 2.1688426143832804e-06, | |
| "loss": 0.132, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.5659189295552932, | |
| "grad_norm": 0.944139995902989, | |
| "learning_rate": 2.162388710444482e-06, | |
| "loss": 0.1234, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.5667060212514758, | |
| "grad_norm": 0.9645692490749199, | |
| "learning_rate": 2.155937097488571e-06, | |
| "loss": 0.1251, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5674931129476584, | |
| "grad_norm": 0.9720255939912888, | |
| "learning_rate": 2.1494878192952857e-06, | |
| "loss": 0.1319, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.568280204643841, | |
| "grad_norm": 0.9511775624645177, | |
| "learning_rate": 2.1430409196285268e-06, | |
| "loss": 0.1327, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.5690672963400236, | |
| "grad_norm": 0.9008868958605895, | |
| "learning_rate": 2.1365964422360495e-06, | |
| "loss": 0.1257, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.5698543880362062, | |
| "grad_norm": 1.0087528087899673, | |
| "learning_rate": 2.1301544308491755e-06, | |
| "loss": 0.1404, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5706414797323889, | |
| "grad_norm": 0.9236247008656706, | |
| "learning_rate": 2.1237149291824906e-06, | |
| "loss": 0.1122, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.9656011748968637, | |
| "learning_rate": 2.11727798093355e-06, | |
| "loss": 0.1238, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.572215663124754, | |
| "grad_norm": 0.9574761410065884, | |
| "learning_rate": 2.110843629782583e-06, | |
| "loss": 0.1205, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.5730027548209367, | |
| "grad_norm": 1.0073901156504852, | |
| "learning_rate": 2.1044119193921935e-06, | |
| "loss": 0.141, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5737898465171193, | |
| "grad_norm": 1.0135184994615516, | |
| "learning_rate": 2.097982893407068e-06, | |
| "loss": 0.1391, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.5745769382133018, | |
| "grad_norm": 0.9943855979768463, | |
| "learning_rate": 2.0915565954536745e-06, | |
| "loss": 0.1261, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5753640299094844, | |
| "grad_norm": 0.9263403288426786, | |
| "learning_rate": 2.085133069139971e-06, | |
| "loss": 0.1199, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.5761511216056671, | |
| "grad_norm": 0.9250174978891127, | |
| "learning_rate": 2.078712358055106e-06, | |
| "loss": 0.1292, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5769382133018497, | |
| "grad_norm": 0.9594966083023022, | |
| "learning_rate": 2.0722945057691253e-06, | |
| "loss": 0.13, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.5777253049980323, | |
| "grad_norm": 0.9996158846425939, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "loss": 0.1346, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5785123966942148, | |
| "grad_norm": 0.9512575411801811, | |
| "learning_rate": 2.059467551776705e-06, | |
| "loss": 0.1277, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5792994883903975, | |
| "grad_norm": 0.9956048932258434, | |
| "learning_rate": 2.053058537112177e-06, | |
| "loss": 0.1364, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.5800865800865801, | |
| "grad_norm": 0.9822563309180476, | |
| "learning_rate": 2.0466525553297666e-06, | |
| "loss": 0.124, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5808736717827627, | |
| "grad_norm": 0.9983781840041562, | |
| "learning_rate": 2.0402496498995667e-06, | |
| "loss": 0.1347, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5816607634789452, | |
| "grad_norm": 0.9827604253780587, | |
| "learning_rate": 2.0338498642707977e-06, | |
| "loss": 0.1369, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5824478551751279, | |
| "grad_norm": 1.0756801683767687, | |
| "learning_rate": 2.027453241871506e-06, | |
| "loss": 0.1323, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5832349468713105, | |
| "grad_norm": 1.0081971409472221, | |
| "learning_rate": 2.0210598261082764e-06, | |
| "loss": 0.1356, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5840220385674931, | |
| "grad_norm": 0.9898906943423369, | |
| "learning_rate": 2.014669660365931e-06, | |
| "loss": 0.1368, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5848091302636758, | |
| "grad_norm": 0.9690524566063999, | |
| "learning_rate": 2.0082827880072393e-06, | |
| "loss": 0.135, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.5855962219598583, | |
| "grad_norm": 0.9708412001010785, | |
| "learning_rate": 2.0018992523726217e-06, | |
| "loss": 0.1252, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5863833136560409, | |
| "grad_norm": 0.9322317029959182, | |
| "learning_rate": 1.995519096779855e-06, | |
| "loss": 0.1205, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5871704053522235, | |
| "grad_norm": 0.934365579766912, | |
| "learning_rate": 1.9891423645237832e-06, | |
| "loss": 0.1194, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.5879574970484062, | |
| "grad_norm": 0.9421279165900748, | |
| "learning_rate": 1.982769098876015e-06, | |
| "loss": 0.1319, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.5887445887445888, | |
| "grad_norm": 0.9954570155310445, | |
| "learning_rate": 1.9763993430846394e-06, | |
| "loss": 0.1369, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5895316804407713, | |
| "grad_norm": 0.931791564112829, | |
| "learning_rate": 1.970033140373925e-06, | |
| "loss": 0.1315, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5903187721369539, | |
| "grad_norm": 0.9679723780616554, | |
| "learning_rate": 1.9636705339440327e-06, | |
| "loss": 0.1377, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5911058638331366, | |
| "grad_norm": 0.9863750681505877, | |
| "learning_rate": 1.957311566970716e-06, | |
| "loss": 0.1293, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.5918929555293192, | |
| "grad_norm": 0.9598202963903522, | |
| "learning_rate": 1.9509562826050353e-06, | |
| "loss": 0.1273, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5926800472255017, | |
| "grad_norm": 0.9800756257622318, | |
| "learning_rate": 1.944604723973058e-06, | |
| "loss": 0.1284, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.5934671389216843, | |
| "grad_norm": 1.0001757874575956, | |
| "learning_rate": 1.938256934175571e-06, | |
| "loss": 0.1303, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.594254230617867, | |
| "grad_norm": 0.9299561635899479, | |
| "learning_rate": 1.9319129562877863e-06, | |
| "loss": 0.1239, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5950413223140496, | |
| "grad_norm": 1.0345095738407815, | |
| "learning_rate": 1.925572833359048e-06, | |
| "loss": 0.1305, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5958284140102322, | |
| "grad_norm": 1.0520535233317054, | |
| "learning_rate": 1.9192366084125423e-06, | |
| "loss": 0.1373, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5966155057064148, | |
| "grad_norm": 1.029018918955376, | |
| "learning_rate": 1.9129043244450027e-06, | |
| "loss": 0.1382, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5974025974025974, | |
| "grad_norm": 1.0294584813791954, | |
| "learning_rate": 1.906576024426422e-06, | |
| "loss": 0.1368, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.59818968909878, | |
| "grad_norm": 0.9330122675132353, | |
| "learning_rate": 1.9002517512997555e-06, | |
| "loss": 0.1145, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5989767807949626, | |
| "grad_norm": 0.9623676868988281, | |
| "learning_rate": 1.8939315479806352e-06, | |
| "loss": 0.1335, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.5997638724911453, | |
| "grad_norm": 0.9245436138689049, | |
| "learning_rate": 1.8876154573570744e-06, | |
| "loss": 0.1307, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.6005509641873278, | |
| "grad_norm": 0.8942520983106202, | |
| "learning_rate": 1.8813035222891785e-06, | |
| "loss": 0.1272, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.6013380558835104, | |
| "grad_norm": 0.9343347217079488, | |
| "learning_rate": 1.8749957856088546e-06, | |
| "loss": 0.1317, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.602125147579693, | |
| "grad_norm": 0.9393324326245188, | |
| "learning_rate": 1.8686922901195197e-06, | |
| "loss": 0.1313, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6029122392758757, | |
| "grad_norm": 0.9333264421793994, | |
| "learning_rate": 1.8623930785958092e-06, | |
| "loss": 0.1226, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6036993309720582, | |
| "grad_norm": 0.9718728327996774, | |
| "learning_rate": 1.8560981937832916e-06, | |
| "loss": 0.1314, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.6044864226682408, | |
| "grad_norm": 0.9437466891844623, | |
| "learning_rate": 1.849807678398171e-06, | |
| "loss": 0.1271, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6052735143644234, | |
| "grad_norm": 0.9433172532376, | |
| "learning_rate": 1.8435215751270048e-06, | |
| "loss": 0.1083, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 0.9486485907428178, | |
| "learning_rate": 1.8372399266264069e-06, | |
| "loss": 0.1245, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6068476977567887, | |
| "grad_norm": 0.9345585045873044, | |
| "learning_rate": 1.8309627755227643e-06, | |
| "loss": 0.1205, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.6076347894529712, | |
| "grad_norm": 1.0082946745736912, | |
| "learning_rate": 1.8246901644119447e-06, | |
| "loss": 0.1337, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6084218811491539, | |
| "grad_norm": 0.9640602588467792, | |
| "learning_rate": 1.8184221358590078e-06, | |
| "loss": 0.123, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.6092089728453365, | |
| "grad_norm": 1.0256774883323883, | |
| "learning_rate": 1.812158732397917e-06, | |
| "loss": 0.1331, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6099960645415191, | |
| "grad_norm": 0.9485492161002549, | |
| "learning_rate": 1.8058999965312484e-06, | |
| "loss": 0.1328, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6107831562377017, | |
| "grad_norm": 0.9763406590147844, | |
| "learning_rate": 1.799645970729909e-06, | |
| "loss": 0.1309, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6115702479338843, | |
| "grad_norm": 0.9917227985654803, | |
| "learning_rate": 1.793396697432839e-06, | |
| "loss": 0.1349, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.6123573396300669, | |
| "grad_norm": 0.9926597353156553, | |
| "learning_rate": 1.7871522190467327e-06, | |
| "loss": 0.1303, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6131444313262495, | |
| "grad_norm": 0.9259479446299848, | |
| "learning_rate": 1.7809125779457432e-06, | |
| "loss": 0.1145, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.6139315230224321, | |
| "grad_norm": 0.9291022839595524, | |
| "learning_rate": 1.7746778164712024e-06, | |
| "loss": 0.119, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6147186147186147, | |
| "grad_norm": 0.951539094911597, | |
| "learning_rate": 1.768447976931326e-06, | |
| "loss": 0.1261, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.6155057064147973, | |
| "grad_norm": 0.993556323836548, | |
| "learning_rate": 1.7622231016009333e-06, | |
| "loss": 0.1297, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6162927981109799, | |
| "grad_norm": 0.9391833540663885, | |
| "learning_rate": 1.7560032327211546e-06, | |
| "loss": 0.124, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.6170798898071626, | |
| "grad_norm": 1.031878772377542, | |
| "learning_rate": 1.7497884124991487e-06, | |
| "loss": 0.1308, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6178669815033452, | |
| "grad_norm": 0.9834671756142636, | |
| "learning_rate": 1.7435786831078144e-06, | |
| "loss": 0.1303, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6186540731995277, | |
| "grad_norm": 0.9859388240495401, | |
| "learning_rate": 1.7373740866855043e-06, | |
| "loss": 0.1326, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6194411648957103, | |
| "grad_norm": 1.0156315373671152, | |
| "learning_rate": 1.731174665335742e-06, | |
| "loss": 0.1333, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.620228256591893, | |
| "grad_norm": 0.8457875340285443, | |
| "learning_rate": 1.724980461126929e-06, | |
| "loss": 0.1149, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.6210153482880756, | |
| "grad_norm": 0.9812167735229308, | |
| "learning_rate": 1.7187915160920692e-06, | |
| "loss": 0.1341, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.6218024399842582, | |
| "grad_norm": 0.9479256338770862, | |
| "learning_rate": 1.7126078722284739e-06, | |
| "loss": 0.1171, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6225895316804407, | |
| "grad_norm": 0.9626147311559159, | |
| "learning_rate": 1.706429571497486e-06, | |
| "loss": 0.1195, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.6233766233766234, | |
| "grad_norm": 0.996537388600602, | |
| "learning_rate": 1.7002566558241862e-06, | |
| "loss": 0.1347, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.624163715072806, | |
| "grad_norm": 1.0979789094333103, | |
| "learning_rate": 1.694089167097116e-06, | |
| "loss": 0.1442, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.6249508067689886, | |
| "grad_norm": 0.9903394042224888, | |
| "learning_rate": 1.6879271471679887e-06, | |
| "loss": 0.1275, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6257378984651711, | |
| "grad_norm": 0.9904940377814807, | |
| "learning_rate": 1.681770637851409e-06, | |
| "loss": 0.139, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6265249901613538, | |
| "grad_norm": 0.9969630818236452, | |
| "learning_rate": 1.675619680924584e-06, | |
| "loss": 0.1325, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6273120818575364, | |
| "grad_norm": 1.0558109930918702, | |
| "learning_rate": 1.6694743181270474e-06, | |
| "loss": 0.1448, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.628099173553719, | |
| "grad_norm": 0.956496508451797, | |
| "learning_rate": 1.663334591160368e-06, | |
| "loss": 0.1217, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6288862652499017, | |
| "grad_norm": 0.9677341961932617, | |
| "learning_rate": 1.657200541687874e-06, | |
| "loss": 0.136, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.6296733569460842, | |
| "grad_norm": 1.0046971327809577, | |
| "learning_rate": 1.6510722113343633e-06, | |
| "loss": 0.1322, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6304604486422668, | |
| "grad_norm": 0.9254874616921521, | |
| "learning_rate": 1.6449496416858285e-06, | |
| "loss": 0.1227, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.6312475403384494, | |
| "grad_norm": 1.128513971443689, | |
| "learning_rate": 1.6388328742891679e-06, | |
| "loss": 0.1357, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.6320346320346321, | |
| "grad_norm": 1.0186779597478501, | |
| "learning_rate": 1.6327219506519082e-06, | |
| "loss": 0.1369, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.6328217237308147, | |
| "grad_norm": 0.9605839825909683, | |
| "learning_rate": 1.6266169122419208e-06, | |
| "loss": 0.1222, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6336088154269972, | |
| "grad_norm": 1.0294396317293524, | |
| "learning_rate": 1.6205178004871392e-06, | |
| "loss": 0.1265, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6343959071231798, | |
| "grad_norm": 0.9797067002404048, | |
| "learning_rate": 1.6144246567752831e-06, | |
| "loss": 0.1298, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6351829988193625, | |
| "grad_norm": 0.9531255383226177, | |
| "learning_rate": 1.6083375224535689e-06, | |
| "loss": 0.1204, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.6359700905155451, | |
| "grad_norm": 0.9228494475342526, | |
| "learning_rate": 1.6022564388284391e-06, | |
| "loss": 0.1122, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6367571822117276, | |
| "grad_norm": 0.9854787445128979, | |
| "learning_rate": 1.596181447165273e-06, | |
| "loss": 0.1287, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.6375442739079102, | |
| "grad_norm": 0.9205768495534565, | |
| "learning_rate": 1.5901125886881147e-06, | |
| "loss": 0.1206, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6383313656040929, | |
| "grad_norm": 0.9798975631304712, | |
| "learning_rate": 1.5840499045793845e-06, | |
| "loss": 0.1231, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.6391184573002755, | |
| "grad_norm": 0.9296415144186752, | |
| "learning_rate": 1.5779934359796095e-06, | |
| "loss": 0.1202, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6399055489964581, | |
| "grad_norm": 0.9814672778856722, | |
| "learning_rate": 1.5719432239871347e-06, | |
| "loss": 0.1211, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.6406926406926406, | |
| "grad_norm": 0.9650667010737961, | |
| "learning_rate": 1.5658993096578512e-06, | |
| "loss": 0.123, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6414797323888233, | |
| "grad_norm": 0.9606428490346777, | |
| "learning_rate": 1.5598617340049145e-06, | |
| "loss": 0.1196, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6422668240850059, | |
| "grad_norm": 0.9865842262641049, | |
| "learning_rate": 1.5538305379984661e-06, | |
| "loss": 0.1414, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.6430539157811885, | |
| "grad_norm": 0.9734870716677574, | |
| "learning_rate": 1.547805762565358e-06, | |
| "loss": 0.1286, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.6438410074773712, | |
| "grad_norm": 0.9439567875437019, | |
| "learning_rate": 1.5417874485888706e-06, | |
| "loss": 0.1109, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6446280991735537, | |
| "grad_norm": 1.0235998100882107, | |
| "learning_rate": 1.5357756369084398e-06, | |
| "loss": 0.123, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.6454151908697363, | |
| "grad_norm": 1.0269524388931728, | |
| "learning_rate": 1.5297703683193755e-06, | |
| "loss": 0.1324, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6462022825659189, | |
| "grad_norm": 0.9493642789135233, | |
| "learning_rate": 1.5237716835725907e-06, | |
| "loss": 0.1125, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.6469893742621016, | |
| "grad_norm": 1.162601877497598, | |
| "learning_rate": 1.5177796233743174e-06, | |
| "loss": 0.1249, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6477764659582841, | |
| "grad_norm": 1.034038388079516, | |
| "learning_rate": 1.511794228385837e-06, | |
| "loss": 0.1217, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.6485635576544667, | |
| "grad_norm": 0.9672639615152381, | |
| "learning_rate": 1.5058155392232004e-06, | |
| "loss": 0.1208, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6493506493506493, | |
| "grad_norm": 1.080256521732267, | |
| "learning_rate": 1.4998435964569552e-06, | |
| "loss": 0.1279, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.650137741046832, | |
| "grad_norm": 0.9417495839242918, | |
| "learning_rate": 1.4938784406118663e-06, | |
| "loss": 0.1249, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.6509248327430146, | |
| "grad_norm": 1.006350286001005, | |
| "learning_rate": 1.4879201121666466e-06, | |
| "loss": 0.1251, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.6517119244391971, | |
| "grad_norm": 0.968507626389286, | |
| "learning_rate": 1.4819686515536763e-06, | |
| "loss": 0.1203, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6524990161353798, | |
| "grad_norm": 0.979256644659201, | |
| "learning_rate": 1.4760240991587338e-06, | |
| "loss": 0.1309, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.6532861078315624, | |
| "grad_norm": 1.041802414674734, | |
| "learning_rate": 1.4700864953207192e-06, | |
| "loss": 0.124, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.654073199527745, | |
| "grad_norm": 0.9623673717149763, | |
| "learning_rate": 1.4641558803313783e-06, | |
| "loss": 0.1153, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.6548602912239276, | |
| "grad_norm": 1.0049463554640272, | |
| "learning_rate": 1.4582322944350335e-06, | |
| "loss": 0.123, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.6556473829201102, | |
| "grad_norm": 0.9822560730942449, | |
| "learning_rate": 1.4523157778283082e-06, | |
| "loss": 0.1253, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.6564344746162928, | |
| "grad_norm": 1.0300014906979744, | |
| "learning_rate": 1.4464063706598563e-06, | |
| "loss": 0.121, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.6572215663124754, | |
| "grad_norm": 0.9605069437184749, | |
| "learning_rate": 1.440504113030084e-06, | |
| "loss": 0.1303, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.658008658008658, | |
| "grad_norm": 1.0062748427154549, | |
| "learning_rate": 1.4346090449908862e-06, | |
| "loss": 0.1254, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.6587957497048406, | |
| "grad_norm": 1.003505120930448, | |
| "learning_rate": 1.4287212065453681e-06, | |
| "loss": 0.1293, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.6595828414010232, | |
| "grad_norm": 0.9215109848797975, | |
| "learning_rate": 1.4228406376475741e-06, | |
| "loss": 0.1156, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.6603699330972058, | |
| "grad_norm": 1.0375359512611602, | |
| "learning_rate": 1.4169673782022232e-06, | |
| "loss": 0.1251, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.6611570247933884, | |
| "grad_norm": 1.0075633482471045, | |
| "learning_rate": 1.411101468064429e-06, | |
| "loss": 0.1273, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6619441164895711, | |
| "grad_norm": 1.0079245494150497, | |
| "learning_rate": 1.4052429470394353e-06, | |
| "loss": 0.1302, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.6627312081857536, | |
| "grad_norm": 0.9589739631373009, | |
| "learning_rate": 1.3993918548823453e-06, | |
| "loss": 0.1219, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.6635182998819362, | |
| "grad_norm": 0.9854619269672102, | |
| "learning_rate": 1.3935482312978494e-06, | |
| "loss": 0.1264, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.6643053915781189, | |
| "grad_norm": 1.0139593156707545, | |
| "learning_rate": 1.3877121159399587e-06, | |
| "loss": 0.1352, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.6650924832743015, | |
| "grad_norm": 0.9879913850797528, | |
| "learning_rate": 1.381883548411735e-06, | |
| "loss": 0.1252, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6658795749704841, | |
| "grad_norm": 0.9828821822604814, | |
| "learning_rate": 1.376062568265018e-06, | |
| "loss": 0.1262, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.9902383754663022, | |
| "learning_rate": 1.370249215000166e-06, | |
| "loss": 0.1339, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.6674537583628493, | |
| "grad_norm": 1.0169925787410046, | |
| "learning_rate": 1.3644435280657765e-06, | |
| "loss": 0.1325, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.6682408500590319, | |
| "grad_norm": 0.9802382914836032, | |
| "learning_rate": 1.3586455468584292e-06, | |
| "loss": 0.1294, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.6690279417552145, | |
| "grad_norm": 0.9103087080426163, | |
| "learning_rate": 1.3528553107224108e-06, | |
| "loss": 0.1132, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.669815033451397, | |
| "grad_norm": 1.0322697690605673, | |
| "learning_rate": 1.347072858949453e-06, | |
| "loss": 0.1326, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.6706021251475797, | |
| "grad_norm": 0.940497609406273, | |
| "learning_rate": 1.3412982307784617e-06, | |
| "loss": 0.1142, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.6713892168437623, | |
| "grad_norm": 0.9651333506256994, | |
| "learning_rate": 1.3355314653952555e-06, | |
| "loss": 0.12, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.6721763085399449, | |
| "grad_norm": 0.8974492403550183, | |
| "learning_rate": 1.3297726019322948e-06, | |
| "loss": 0.1252, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.6729634002361276, | |
| "grad_norm": 0.9779192150286001, | |
| "learning_rate": 1.3240216794684212e-06, | |
| "loss": 0.1265, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6737504919323101, | |
| "grad_norm": 1.0060169889058102, | |
| "learning_rate": 1.3182787370285865e-06, | |
| "loss": 0.1305, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.6745375836284927, | |
| "grad_norm": 0.9623311050243877, | |
| "learning_rate": 1.3125438135835955e-06, | |
| "loss": 0.114, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.6753246753246753, | |
| "grad_norm": 1.005880860747008, | |
| "learning_rate": 1.3068169480498333e-06, | |
| "loss": 0.1237, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.676111767020858, | |
| "grad_norm": 1.0295442665880505, | |
| "learning_rate": 1.3010981792890053e-06, | |
| "loss": 0.141, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.6768988587170406, | |
| "grad_norm": 0.9746775819035803, | |
| "learning_rate": 1.2953875461078777e-06, | |
| "loss": 0.1174, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6776859504132231, | |
| "grad_norm": 0.9651023742880912, | |
| "learning_rate": 1.289685087258004e-06, | |
| "loss": 0.1179, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.6784730421094057, | |
| "grad_norm": 0.9778504990448126, | |
| "learning_rate": 1.283990841435473e-06, | |
| "loss": 0.1232, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.6792601338055884, | |
| "grad_norm": 0.9823411560425596, | |
| "learning_rate": 1.2783048472806364e-06, | |
| "loss": 0.1214, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.680047225501771, | |
| "grad_norm": 0.9509119170509043, | |
| "learning_rate": 1.2726271433778559e-06, | |
| "loss": 0.1331, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.6808343171979535, | |
| "grad_norm": 0.9637465369074552, | |
| "learning_rate": 1.266957768255232e-06, | |
| "loss": 0.1221, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6816214088941361, | |
| "grad_norm": 1.0309739334485784, | |
| "learning_rate": 1.2612967603843512e-06, | |
| "loss": 0.1337, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.6824085005903188, | |
| "grad_norm": 0.9227141127754309, | |
| "learning_rate": 1.2556441581800182e-06, | |
| "loss": 0.1118, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.6831955922865014, | |
| "grad_norm": 0.983027599423059, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "loss": 0.1201, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.683982683982684, | |
| "grad_norm": 0.969869074022873, | |
| "learning_rate": 1.2443643241447629e-06, | |
| "loss": 0.1205, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.6847697756788665, | |
| "grad_norm": 0.9626068462653994, | |
| "learning_rate": 1.2387371688572133e-06, | |
| "loss": 0.1294, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6855568673750492, | |
| "grad_norm": 0.9924688128052054, | |
| "learning_rate": 1.233118572322437e-06, | |
| "loss": 0.1193, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.6863439590712318, | |
| "grad_norm": 0.9409212105627156, | |
| "learning_rate": 1.2275085726674442e-06, | |
| "loss": 0.1186, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.6871310507674144, | |
| "grad_norm": 0.9321864217317675, | |
| "learning_rate": 1.2219072079609046e-06, | |
| "loss": 0.118, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.6879181424635971, | |
| "grad_norm": 0.8802354237634122, | |
| "learning_rate": 1.2163145162128948e-06, | |
| "loss": 0.1092, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.6887052341597796, | |
| "grad_norm": 0.9820858832906886, | |
| "learning_rate": 1.2107305353746376e-06, | |
| "loss": 0.1261, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6894923258559622, | |
| "grad_norm": 1.0214787998802317, | |
| "learning_rate": 1.2051553033382426e-06, | |
| "loss": 0.121, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.6902794175521448, | |
| "grad_norm": 0.9157258726824631, | |
| "learning_rate": 1.1995888579364551e-06, | |
| "loss": 0.1189, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.6910665092483275, | |
| "grad_norm": 0.9531462191249618, | |
| "learning_rate": 1.1940312369423919e-06, | |
| "loss": 0.1184, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.69185360094451, | |
| "grad_norm": 0.9748879770068989, | |
| "learning_rate": 1.18848247806929e-06, | |
| "loss": 0.1201, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.6926406926406926, | |
| "grad_norm": 0.9952760658770881, | |
| "learning_rate": 1.1829426189702487e-06, | |
| "loss": 0.1211, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6934277843368752, | |
| "grad_norm": 0.9561514586133496, | |
| "learning_rate": 1.177411697237977e-06, | |
| "loss": 0.1208, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.6942148760330579, | |
| "grad_norm": 1.0289787958991654, | |
| "learning_rate": 1.1718897504045328e-06, | |
| "loss": 0.1329, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.6950019677292405, | |
| "grad_norm": 1.012367533381528, | |
| "learning_rate": 1.1663768159410748e-06, | |
| "loss": 0.1286, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.695789059425423, | |
| "grad_norm": 0.9932326189371155, | |
| "learning_rate": 1.160872931257602e-06, | |
| "loss": 0.1207, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6965761511216056, | |
| "grad_norm": 0.9375475650331836, | |
| "learning_rate": 1.1553781337027061e-06, | |
| "loss": 0.1162, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.6973632428177883, | |
| "grad_norm": 1.0035582921316957, | |
| "learning_rate": 1.149892460563311e-06, | |
| "loss": 0.1272, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6981503345139709, | |
| "grad_norm": 0.969216495536807, | |
| "learning_rate": 1.1444159490644278e-06, | |
| "loss": 0.1322, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6989374262101535, | |
| "grad_norm": 0.9727140149487835, | |
| "learning_rate": 1.1389486363688935e-06, | |
| "loss": 0.1109, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.699724517906336, | |
| "grad_norm": 1.035921852021017, | |
| "learning_rate": 1.1334905595771274e-06, | |
| "loss": 0.125, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.7005116096025187, | |
| "grad_norm": 0.9817389469807767, | |
| "learning_rate": 1.1280417557268735e-06, | |
| "loss": 0.1263, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7012987012987013, | |
| "grad_norm": 0.941993125359632, | |
| "learning_rate": 1.12260226179295e-06, | |
| "loss": 0.1204, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.7020857929948839, | |
| "grad_norm": 0.9776393790876531, | |
| "learning_rate": 1.1171721146870015e-06, | |
| "loss": 0.1351, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7028728846910665, | |
| "grad_norm": 1.0272253940679958, | |
| "learning_rate": 1.1117513512572436e-06, | |
| "loss": 0.1297, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.7036599763872491, | |
| "grad_norm": 0.9825257474446853, | |
| "learning_rate": 1.1063400082882188e-06, | |
| "loss": 0.1089, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7044470680834317, | |
| "grad_norm": 0.9941185616779367, | |
| "learning_rate": 1.10093812250054e-06, | |
| "loss": 0.1182, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7052341597796143, | |
| "grad_norm": 0.9809962090348159, | |
| "learning_rate": 1.095545730550649e-06, | |
| "loss": 0.1221, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.706021251475797, | |
| "grad_norm": 0.9359419759382669, | |
| "learning_rate": 1.0901628690305593e-06, | |
| "loss": 0.1175, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.7068083431719795, | |
| "grad_norm": 1.0254153011332428, | |
| "learning_rate": 1.0847895744676173e-06, | |
| "loss": 0.1364, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7075954348681621, | |
| "grad_norm": 1.0451822058149052, | |
| "learning_rate": 1.0794258833242452e-06, | |
| "loss": 0.1341, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.7083825265643447, | |
| "grad_norm": 1.0267091614696302, | |
| "learning_rate": 1.0740718319976992e-06, | |
| "loss": 0.1284, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7091696182605274, | |
| "grad_norm": 0.8928053655240218, | |
| "learning_rate": 1.0687274568198208e-06, | |
| "loss": 0.1009, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.70995670995671, | |
| "grad_norm": 1.057234091878292, | |
| "learning_rate": 1.063392794056792e-06, | |
| "loss": 0.1346, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7107438016528925, | |
| "grad_norm": 0.9612239375437197, | |
| "learning_rate": 1.0580678799088847e-06, | |
| "loss": 0.1158, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.7115308933490752, | |
| "grad_norm": 0.9876590104136502, | |
| "learning_rate": 1.0527527505102213e-06, | |
| "loss": 0.1193, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7123179850452578, | |
| "grad_norm": 1.0155629806285287, | |
| "learning_rate": 1.0474474419285255e-06, | |
| "loss": 0.1206, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7131050767414404, | |
| "grad_norm": 1.108914897353474, | |
| "learning_rate": 1.0421519901648759e-06, | |
| "loss": 0.1244, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7138921684376229, | |
| "grad_norm": 0.9624208122062576, | |
| "learning_rate": 1.0368664311534674e-06, | |
| "loss": 0.122, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.7146792601338056, | |
| "grad_norm": 0.9051835119610858, | |
| "learning_rate": 1.031590800761361e-06, | |
| "loss": 0.1115, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.7154663518299882, | |
| "grad_norm": 1.001641822545354, | |
| "learning_rate": 1.0263251347882467e-06, | |
| "loss": 0.1205, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.7162534435261708, | |
| "grad_norm": 0.9587134043689033, | |
| "learning_rate": 1.021069468966194e-06, | |
| "loss": 0.114, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7170405352223534, | |
| "grad_norm": 0.9734138315261187, | |
| "learning_rate": 1.0158238389594164e-06, | |
| "loss": 0.1237, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.717827626918536, | |
| "grad_norm": 0.9654730718585164, | |
| "learning_rate": 1.0105882803640215e-06, | |
| "loss": 0.1241, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.7186147186147186, | |
| "grad_norm": 1.0069324283880368, | |
| "learning_rate": 1.0053628287077782e-06, | |
| "loss": 0.129, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.7194018103109012, | |
| "grad_norm": 0.9724112904663149, | |
| "learning_rate": 1.000147519449867e-06, | |
| "loss": 0.1217, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7201889020070839, | |
| "grad_norm": 0.9009157337976961, | |
| "learning_rate": 9.94942387980648e-07, | |
| "loss": 0.1215, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7209759937032665, | |
| "grad_norm": 0.9903710831464596, | |
| "learning_rate": 9.89747469621411e-07, | |
| "loss": 0.1247, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.721763085399449, | |
| "grad_norm": 0.9627081908787005, | |
| "learning_rate": 9.845627996241459e-07, | |
| "loss": 0.1235, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.7225501770956316, | |
| "grad_norm": 0.9666668030573422, | |
| "learning_rate": 9.793884131712943e-07, | |
| "loss": 0.123, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7233372687918143, | |
| "grad_norm": 0.9644911943474369, | |
| "learning_rate": 9.742243453755202e-07, | |
| "loss": 0.1142, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.7241243604879969, | |
| "grad_norm": 0.9984844200263358, | |
| "learning_rate": 9.690706312794618e-07, | |
| "loss": 0.1251, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7249114521841794, | |
| "grad_norm": 0.988262708447867, | |
| "learning_rate": 9.639273058555004e-07, | |
| "loss": 0.1233, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.725698543880362, | |
| "grad_norm": 0.9062607929130434, | |
| "learning_rate": 9.587944040055225e-07, | |
| "loss": 0.1116, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.7264856355765447, | |
| "grad_norm": 1.0132516720132552, | |
| "learning_rate": 9.536719605606795e-07, | |
| "loss": 0.1314, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.9210291352044477, | |
| "learning_rate": 9.485600102811556e-07, | |
| "loss": 0.108, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7280598189689099, | |
| "grad_norm": 1.0099118734494892, | |
| "learning_rate": 9.434585878559277e-07, | |
| "loss": 0.1172, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7288469106650924, | |
| "grad_norm": 1.0237482529235973, | |
| "learning_rate": 9.383677279025347e-07, | |
| "loss": 0.1186, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7296340023612751, | |
| "grad_norm": 0.9855331385764105, | |
| "learning_rate": 9.332874649668369e-07, | |
| "loss": 0.1185, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.7304210940574577, | |
| "grad_norm": 0.9369233888911801, | |
| "learning_rate": 9.282178335227885e-07, | |
| "loss": 0.1067, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7312081857536403, | |
| "grad_norm": 1.025834900254658, | |
| "learning_rate": 9.231588679721956e-07, | |
| "loss": 0.1256, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.731995277449823, | |
| "grad_norm": 1.0004815551544541, | |
| "learning_rate": 9.181106026444913e-07, | |
| "loss": 0.1171, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7327823691460055, | |
| "grad_norm": 0.9247417584553485, | |
| "learning_rate": 9.130730717964948e-07, | |
| "loss": 0.1132, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.7335694608421881, | |
| "grad_norm": 0.9769073592720867, | |
| "learning_rate": 9.08046309612185e-07, | |
| "loss": 0.1242, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7343565525383707, | |
| "grad_norm": 0.96681906386633, | |
| "learning_rate": 9.030303502024662e-07, | |
| "loss": 0.1179, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.7351436442345534, | |
| "grad_norm": 1.021595769957744, | |
| "learning_rate": 8.980252276049345e-07, | |
| "loss": 0.1161, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7359307359307359, | |
| "grad_norm": 0.9231065432942811, | |
| "learning_rate": 8.930309757836517e-07, | |
| "loss": 0.1149, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7367178276269185, | |
| "grad_norm": 1.026367432921577, | |
| "learning_rate": 8.880476286289091e-07, | |
| "loss": 0.1284, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7375049193231011, | |
| "grad_norm": 0.981215601065822, | |
| "learning_rate": 8.830752199570033e-07, | |
| "loss": 0.1133, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.7382920110192838, | |
| "grad_norm": 0.9212608185738064, | |
| "learning_rate": 8.781137835100021e-07, | |
| "loss": 0.1077, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7390791027154664, | |
| "grad_norm": 0.9833427367903659, | |
| "learning_rate": 8.731633529555167e-07, | |
| "loss": 0.1164, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.7398661944116489, | |
| "grad_norm": 0.9854894539977124, | |
| "learning_rate": 8.682239618864763e-07, | |
| "loss": 0.1155, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7406532861078315, | |
| "grad_norm": 0.9551803394241506, | |
| "learning_rate": 8.632956438208962e-07, | |
| "loss": 0.1162, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.7414403778040142, | |
| "grad_norm": 0.9042419017178762, | |
| "learning_rate": 8.583784322016503e-07, | |
| "loss": 0.109, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7422274695001968, | |
| "grad_norm": 0.9609816099291726, | |
| "learning_rate": 8.534723603962497e-07, | |
| "loss": 0.1191, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.7430145611963794, | |
| "grad_norm": 1.0149972325544658, | |
| "learning_rate": 8.48577461696608e-07, | |
| "loss": 0.1192, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.743801652892562, | |
| "grad_norm": 0.9584184891745349, | |
| "learning_rate": 8.436937693188232e-07, | |
| "loss": 0.1267, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7445887445887446, | |
| "grad_norm": 0.9986011121611049, | |
| "learning_rate": 8.38821316402946e-07, | |
| "loss": 0.1177, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.7453758362849272, | |
| "grad_norm": 0.962256278467975, | |
| "learning_rate": 8.339601360127592e-07, | |
| "loss": 0.1131, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.7461629279811098, | |
| "grad_norm": 0.9419406227649391, | |
| "learning_rate": 8.291102611355526e-07, | |
| "loss": 0.1123, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7469500196772924, | |
| "grad_norm": 0.9728190857016107, | |
| "learning_rate": 8.242717246818957e-07, | |
| "loss": 0.1197, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.747737111373475, | |
| "grad_norm": 1.0169044023539633, | |
| "learning_rate": 8.1944455948542e-07, | |
| "loss": 0.1219, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7485242030696576, | |
| "grad_norm": 0.9972018368498321, | |
| "learning_rate": 8.146287983025902e-07, | |
| "loss": 0.1241, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.7493112947658402, | |
| "grad_norm": 1.040910663691627, | |
| "learning_rate": 8.098244738124888e-07, | |
| "loss": 0.1138, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.7500983864620229, | |
| "grad_norm": 1.0438538265069202, | |
| "learning_rate": 8.050316186165862e-07, | |
| "loss": 0.134, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.7508854781582054, | |
| "grad_norm": 0.9793759854817412, | |
| "learning_rate": 8.002502652385278e-07, | |
| "loss": 0.1241, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.751672569854388, | |
| "grad_norm": 0.9636283038275181, | |
| "learning_rate": 7.954804461239054e-07, | |
| "loss": 0.1171, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7524596615505706, | |
| "grad_norm": 0.9416057200961391, | |
| "learning_rate": 7.907221936400452e-07, | |
| "loss": 0.1194, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.7532467532467533, | |
| "grad_norm": 0.9258555685816136, | |
| "learning_rate": 7.859755400757793e-07, | |
| "loss": 0.1199, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.7540338449429359, | |
| "grad_norm": 0.9697084160189383, | |
| "learning_rate": 7.812405176412354e-07, | |
| "loss": 0.1206, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.7548209366391184, | |
| "grad_norm": 1.008937777573116, | |
| "learning_rate": 7.76517158467611e-07, | |
| "loss": 0.1238, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.755608028335301, | |
| "grad_norm": 0.987888023607684, | |
| "learning_rate": 7.718054946069589e-07, | |
| "loss": 0.1246, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7563951200314837, | |
| "grad_norm": 0.9699505992391279, | |
| "learning_rate": 7.671055580319706e-07, | |
| "loss": 0.1203, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.7571822117276663, | |
| "grad_norm": 0.9382257403962697, | |
| "learning_rate": 7.62417380635756e-07, | |
| "loss": 0.1151, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.7579693034238488, | |
| "grad_norm": 0.9437354430265479, | |
| "learning_rate": 7.577409942316305e-07, | |
| "loss": 0.1163, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.7587563951200315, | |
| "grad_norm": 0.9503047394426882, | |
| "learning_rate": 7.530764305528959e-07, | |
| "loss": 0.1211, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.7595434868162141, | |
| "grad_norm": 1.0356071156065598, | |
| "learning_rate": 7.484237212526288e-07, | |
| "loss": 0.1273, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7603305785123967, | |
| "grad_norm": 0.9856511020736725, | |
| "learning_rate": 7.437828979034606e-07, | |
| "loss": 0.1315, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.7611176702085793, | |
| "grad_norm": 0.9629717208752256, | |
| "learning_rate": 7.391539919973698e-07, | |
| "loss": 0.1062, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 0.9607163301231785, | |
| "learning_rate": 7.345370349454611e-07, | |
| "loss": 0.1189, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.7626918536009445, | |
| "grad_norm": 0.9578086152431808, | |
| "learning_rate": 7.2993205807776e-07, | |
| "loss": 0.1183, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.7634789452971271, | |
| "grad_norm": 0.9162347277154375, | |
| "learning_rate": 7.253390926429918e-07, | |
| "loss": 0.1104, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7642660369933097, | |
| "grad_norm": 0.9822027407988481, | |
| "learning_rate": 7.207581698083782e-07, | |
| "loss": 0.1304, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.7650531286894924, | |
| "grad_norm": 0.940452384125095, | |
| "learning_rate": 7.161893206594175e-07, | |
| "loss": 0.1168, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.7658402203856749, | |
| "grad_norm": 0.9588362662800347, | |
| "learning_rate": 7.116325761996818e-07, | |
| "loss": 0.1206, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.7666273120818575, | |
| "grad_norm": 1.011535036970359, | |
| "learning_rate": 7.070879673505976e-07, | |
| "loss": 0.1141, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.7674144037780402, | |
| "grad_norm": 0.9688050929102817, | |
| "learning_rate": 7.025555249512461e-07, | |
| "loss": 0.1134, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7682014954742228, | |
| "grad_norm": 0.9177610587932681, | |
| "learning_rate": 6.980352797581438e-07, | |
| "loss": 0.1089, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.7689885871704053, | |
| "grad_norm": 1.0472410615763514, | |
| "learning_rate": 6.935272624450432e-07, | |
| "loss": 0.1249, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.7697756788665879, | |
| "grad_norm": 0.9636506719030409, | |
| "learning_rate": 6.890315036027156e-07, | |
| "loss": 0.1166, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.7705627705627706, | |
| "grad_norm": 0.9412495575321557, | |
| "learning_rate": 6.845480337387525e-07, | |
| "loss": 0.1195, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.7713498622589532, | |
| "grad_norm": 0.9930839323289444, | |
| "learning_rate": 6.800768832773505e-07, | |
| "loss": 0.1267, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7721369539551358, | |
| "grad_norm": 0.9929049734904327, | |
| "learning_rate": 6.756180825591099e-07, | |
| "loss": 0.1199, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.7729240456513183, | |
| "grad_norm": 0.9842816070024283, | |
| "learning_rate": 6.711716618408282e-07, | |
| "loss": 0.1179, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.773711137347501, | |
| "grad_norm": 0.9944580491304532, | |
| "learning_rate": 6.66737651295292e-07, | |
| "loss": 0.1198, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.7744982290436836, | |
| "grad_norm": 0.9839708888147434, | |
| "learning_rate": 6.623160810110765e-07, | |
| "loss": 0.1193, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.7752853207398662, | |
| "grad_norm": 0.9098159637038072, | |
| "learning_rate": 6.579069809923367e-07, | |
| "loss": 0.1123, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7760724124360489, | |
| "grad_norm": 0.9606605502093222, | |
| "learning_rate": 6.535103811586085e-07, | |
| "loss": 0.1174, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.7768595041322314, | |
| "grad_norm": 1.0270282467875798, | |
| "learning_rate": 6.491263113446005e-07, | |
| "loss": 0.1287, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.777646595828414, | |
| "grad_norm": 0.9366844684145114, | |
| "learning_rate": 6.44754801299998e-07, | |
| "loss": 0.1158, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.7784336875245966, | |
| "grad_norm": 0.9037352215899601, | |
| "learning_rate": 6.403958806892535e-07, | |
| "loss": 0.1053, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.7792207792207793, | |
| "grad_norm": 0.9440225724017625, | |
| "learning_rate": 6.360495790913926e-07, | |
| "loss": 0.114, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7800078709169618, | |
| "grad_norm": 0.9351662026461205, | |
| "learning_rate": 6.317159259998074e-07, | |
| "loss": 0.113, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.7807949626131444, | |
| "grad_norm": 0.9872272779542443, | |
| "learning_rate": 6.273949508220612e-07, | |
| "loss": 0.1217, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.781582054309327, | |
| "grad_norm": 1.0021043961378415, | |
| "learning_rate": 6.23086682879686e-07, | |
| "loss": 0.1194, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.7823691460055097, | |
| "grad_norm": 0.9797645648660196, | |
| "learning_rate": 6.187911514079834e-07, | |
| "loss": 0.1294, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.7831562377016923, | |
| "grad_norm": 1.0054784443943467, | |
| "learning_rate": 6.14508385555829e-07, | |
| "loss": 0.1236, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7839433293978748, | |
| "grad_norm": 0.9433076242026539, | |
| "learning_rate": 6.102384143854698e-07, | |
| "loss": 0.1147, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.7847304210940574, | |
| "grad_norm": 0.9383907844400864, | |
| "learning_rate": 6.059812668723336e-07, | |
| "loss": 0.115, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.7855175127902401, | |
| "grad_norm": 0.9452315722932242, | |
| "learning_rate": 6.017369719048255e-07, | |
| "loss": 0.1154, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.7863046044864227, | |
| "grad_norm": 0.9247930090252802, | |
| "learning_rate": 5.975055582841358e-07, | |
| "loss": 0.1127, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.7870916961826053, | |
| "grad_norm": 1.0061872579787852, | |
| "learning_rate": 5.932870547240455e-07, | |
| "loss": 0.1183, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7870916961826053, | |
| "eval_loss": 0.11849173903465271, | |
| "eval_runtime": 18.0453, | |
| "eval_samples_per_second": 45.552, | |
| "eval_steps_per_second": 5.708, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7878787878787878, | |
| "grad_norm": 0.9623831636196449, | |
| "learning_rate": 5.890814898507277e-07, | |
| "loss": 0.1201, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.7886658795749705, | |
| "grad_norm": 1.0229456536544794, | |
| "learning_rate": 5.848888922025553e-07, | |
| "loss": 0.1223, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.7894529712711531, | |
| "grad_norm": 1.0277580850565635, | |
| "learning_rate": 5.8070929022991e-07, | |
| "loss": 0.1178, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.7902400629673357, | |
| "grad_norm": 1.0160977169162413, | |
| "learning_rate": 5.76542712294983e-07, | |
| "loss": 0.1216, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.7910271546635183, | |
| "grad_norm": 0.9181674404236817, | |
| "learning_rate": 5.723891866715899e-07, | |
| "loss": 0.1118, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7918142463597009, | |
| "grad_norm": 0.9799718055295829, | |
| "learning_rate": 5.682487415449719e-07, | |
| "loss": 0.1217, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.7926013380558835, | |
| "grad_norm": 0.980465660075739, | |
| "learning_rate": 5.641214050116098e-07, | |
| "loss": 0.1252, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.7933884297520661, | |
| "grad_norm": 0.9234542096536653, | |
| "learning_rate": 5.600072050790317e-07, | |
| "loss": 0.1096, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.7941755214482488, | |
| "grad_norm": 0.8947896913580902, | |
| "learning_rate": 5.559061696656199e-07, | |
| "loss": 0.1075, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.7949626131444313, | |
| "grad_norm": 0.9790572754851533, | |
| "learning_rate": 5.518183266004276e-07, | |
| "loss": 0.1171, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7957497048406139, | |
| "grad_norm": 0.9667752845159187, | |
| "learning_rate": 5.477437036229832e-07, | |
| "loss": 0.1098, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.7965367965367965, | |
| "grad_norm": 1.1238301396219903, | |
| "learning_rate": 5.436823283831083e-07, | |
| "loss": 0.1373, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.7973238882329792, | |
| "grad_norm": 0.9927017197297952, | |
| "learning_rate": 5.396342284407252e-07, | |
| "loss": 0.1188, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.7981109799291618, | |
| "grad_norm": 0.9934845633471078, | |
| "learning_rate": 5.355994312656734e-07, | |
| "loss": 0.1142, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.7988980716253443, | |
| "grad_norm": 0.9432843712008361, | |
| "learning_rate": 5.315779642375199e-07, | |
| "loss": 0.1158, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.799685163321527, | |
| "grad_norm": 1.0251992909650254, | |
| "learning_rate": 5.275698546453775e-07, | |
| "loss": 0.1175, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.8004722550177096, | |
| "grad_norm": 1.010003247709555, | |
| "learning_rate": 5.235751296877148e-07, | |
| "loss": 0.1223, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.8012593467138922, | |
| "grad_norm": 1.0290265946769084, | |
| "learning_rate": 5.195938164721767e-07, | |
| "loss": 0.1213, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.8020464384100747, | |
| "grad_norm": 1.0065180201235937, | |
| "learning_rate": 5.156259420153962e-07, | |
| "loss": 0.1238, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.8028335301062574, | |
| "grad_norm": 0.9673994358176322, | |
| "learning_rate": 5.116715332428118e-07, | |
| "loss": 0.1106, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.80362062180244, | |
| "grad_norm": 0.9855226309577549, | |
| "learning_rate": 5.077306169884888e-07, | |
| "loss": 0.1107, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.8044077134986226, | |
| "grad_norm": 0.9719491378095487, | |
| "learning_rate": 5.038032199949313e-07, | |
| "loss": 0.1241, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8051948051948052, | |
| "grad_norm": 0.9767976318784359, | |
| "learning_rate": 4.998893689129061e-07, | |
| "loss": 0.1165, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.8059818968909878, | |
| "grad_norm": 0.9563874747183178, | |
| "learning_rate": 4.959890903012568e-07, | |
| "loss": 0.1084, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.8067689885871704, | |
| "grad_norm": 0.9724824504692731, | |
| "learning_rate": 4.921024106267283e-07, | |
| "loss": 0.1199, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.807556080283353, | |
| "grad_norm": 0.9939320935755256, | |
| "learning_rate": 4.882293562637827e-07, | |
| "loss": 0.1221, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8083431719795356, | |
| "grad_norm": 0.9407936084194475, | |
| "learning_rate": 4.843699534944258e-07, | |
| "loss": 0.114, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.8091302636757183, | |
| "grad_norm": 1.00154528232707, | |
| "learning_rate": 4.805242285080222e-07, | |
| "loss": 0.116, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.8099173553719008, | |
| "grad_norm": 0.9808095624927836, | |
| "learning_rate": 4.7669220740112376e-07, | |
| "loss": 0.1166, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.8107044470680834, | |
| "grad_norm": 0.9094800308143445, | |
| "learning_rate": 4.728739161772874e-07, | |
| "loss": 0.1091, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.811491538764266, | |
| "grad_norm": 0.8938814168806941, | |
| "learning_rate": 4.690693807469035e-07, | |
| "loss": 0.1032, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.8122786304604487, | |
| "grad_norm": 0.9946966466940443, | |
| "learning_rate": 4.6527862692701487e-07, | |
| "loss": 0.1158, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.8130657221566312, | |
| "grad_norm": 0.9504344369313477, | |
| "learning_rate": 4.615016804411465e-07, | |
| "loss": 0.1114, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.8138528138528138, | |
| "grad_norm": 0.9453878426287087, | |
| "learning_rate": 4.5773856691912726e-07, | |
| "loss": 0.1208, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8146399055489965, | |
| "grad_norm": 1.0239509589587361, | |
| "learning_rate": 4.53989311896918e-07, | |
| "loss": 0.1186, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8154269972451791, | |
| "grad_norm": 0.915087834897205, | |
| "learning_rate": 4.502539408164386e-07, | |
| "loss": 0.106, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.8162140889413617, | |
| "grad_norm": 0.936045456280838, | |
| "learning_rate": 4.465324790253922e-07, | |
| "loss": 0.1104, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.8170011806375442, | |
| "grad_norm": 0.986165452237028, | |
| "learning_rate": 4.428249517770986e-07, | |
| "loss": 0.1137, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8177882723337269, | |
| "grad_norm": 0.9641211589952483, | |
| "learning_rate": 4.391313842303166e-07, | |
| "loss": 0.1164, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.8185753640299095, | |
| "grad_norm": 0.9334697738707801, | |
| "learning_rate": 4.3545180144907857e-07, | |
| "loss": 0.113, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8193624557260921, | |
| "grad_norm": 0.9871068120631671, | |
| "learning_rate": 4.3178622840251647e-07, | |
| "loss": 0.1241, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.8201495474222747, | |
| "grad_norm": 0.9529229208831895, | |
| "learning_rate": 4.2813468996469654e-07, | |
| "loss": 0.121, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8209366391184573, | |
| "grad_norm": 0.9336358260352323, | |
| "learning_rate": 4.2449721091444545e-07, | |
| "loss": 0.1134, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.8217237308146399, | |
| "grad_norm": 1.0307131336178375, | |
| "learning_rate": 4.2087381593518716e-07, | |
| "loss": 0.1274, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8225108225108225, | |
| "grad_norm": 0.9433490812621332, | |
| "learning_rate": 4.1726452961477147e-07, | |
| "loss": 0.116, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8232979142070052, | |
| "grad_norm": 0.9751019494649681, | |
| "learning_rate": 4.136693764453101e-07, | |
| "loss": 0.1129, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8240850059031877, | |
| "grad_norm": 1.056328093801445, | |
| "learning_rate": 4.1008838082300743e-07, | |
| "loss": 0.1168, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.8248720975993703, | |
| "grad_norm": 1.0079092402082175, | |
| "learning_rate": 4.065215670479991e-07, | |
| "loss": 0.1258, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8256591892955529, | |
| "grad_norm": 0.9852819205932097, | |
| "learning_rate": 4.02968959324182e-07, | |
| "loss": 0.1161, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.8264462809917356, | |
| "grad_norm": 0.9840178810234324, | |
| "learning_rate": 3.9943058175905493e-07, | |
| "loss": 0.1184, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8272333726879182, | |
| "grad_norm": 1.0203308364665442, | |
| "learning_rate": 3.9590645836355275e-07, | |
| "loss": 0.1232, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.8280204643841007, | |
| "grad_norm": 0.967005062015959, | |
| "learning_rate": 3.923966130518814e-07, | |
| "loss": 0.1209, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.8288075560802833, | |
| "grad_norm": 0.9434419280443521, | |
| "learning_rate": 3.889010696413606e-07, | |
| "loss": 0.1211, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.829594647776466, | |
| "grad_norm": 0.9442684717641329, | |
| "learning_rate": 3.8541985185225645e-07, | |
| "loss": 0.1078, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8303817394726486, | |
| "grad_norm": 0.9927726007062886, | |
| "learning_rate": 3.819529833076263e-07, | |
| "loss": 0.1214, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8311688311688312, | |
| "grad_norm": 0.9486732370194126, | |
| "learning_rate": 3.7850048753315274e-07, | |
| "loss": 0.1087, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8319559228650137, | |
| "grad_norm": 0.9803070634979109, | |
| "learning_rate": 3.750623879569895e-07, | |
| "loss": 0.1181, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.8327430145611964, | |
| "grad_norm": 1.0010555667628969, | |
| "learning_rate": 3.716387079095973e-07, | |
| "loss": 0.1172, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.833530106257379, | |
| "grad_norm": 0.9548728126471466, | |
| "learning_rate": 3.6822947062359004e-07, | |
| "loss": 0.1125, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.8343171979535616, | |
| "grad_norm": 1.0333251380116057, | |
| "learning_rate": 3.6483469923357327e-07, | |
| "loss": 0.1119, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8351042896497441, | |
| "grad_norm": 0.9473784570136893, | |
| "learning_rate": 3.614544167759901e-07, | |
| "loss": 0.1136, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.8358913813459268, | |
| "grad_norm": 0.9283902827668026, | |
| "learning_rate": 3.5808864618896295e-07, | |
| "loss": 0.1004, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8366784730421094, | |
| "grad_norm": 0.9856546672764643, | |
| "learning_rate": 3.5473741031213983e-07, | |
| "loss": 0.1136, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.837465564738292, | |
| "grad_norm": 0.955024488651013, | |
| "learning_rate": 3.51400731886537e-07, | |
| "loss": 0.1199, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8382526564344747, | |
| "grad_norm": 0.9527578800496054, | |
| "learning_rate": 3.4807863355438703e-07, | |
| "loss": 0.1178, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8390397481306572, | |
| "grad_norm": 0.9748866277343534, | |
| "learning_rate": 3.447711378589841e-07, | |
| "loss": 0.1126, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.8398268398268398, | |
| "grad_norm": 0.9722315190803439, | |
| "learning_rate": 3.414782672445291e-07, | |
| "loss": 0.1143, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.8406139315230224, | |
| "grad_norm": 1.0223007557494088, | |
| "learning_rate": 3.3820004405598157e-07, | |
| "loss": 0.1141, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.8414010232192051, | |
| "grad_norm": 0.969999076611352, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "loss": 0.1161, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.8421881149153877, | |
| "grad_norm": 0.9878840165050939, | |
| "learning_rate": 3.3168762883931256e-07, | |
| "loss": 0.1164, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8429752066115702, | |
| "grad_norm": 1.030579753477139, | |
| "learning_rate": 3.284534810035278e-07, | |
| "loss": 0.1258, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.8437622983077528, | |
| "grad_norm": 0.987395029549749, | |
| "learning_rate": 3.252340689780245e-07, | |
| "loss": 0.1219, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.8445493900039355, | |
| "grad_norm": 0.9750378310364627, | |
| "learning_rate": 3.2202941460927977e-07, | |
| "loss": 0.1275, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.8453364817001181, | |
| "grad_norm": 0.9446071297273908, | |
| "learning_rate": 3.1883953964363057e-07, | |
| "loss": 0.1177, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.8461235733963006, | |
| "grad_norm": 0.9469617356782836, | |
| "learning_rate": 3.156644657271196e-07, | |
| "loss": 0.1128, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8469106650924833, | |
| "grad_norm": 0.951509014508041, | |
| "learning_rate": 3.12504214405355e-07, | |
| "loss": 0.108, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.8476977567886659, | |
| "grad_norm": 0.9472516964934904, | |
| "learning_rate": 3.093588071233578e-07, | |
| "loss": 0.1141, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.8484848484848485, | |
| "grad_norm": 0.9084748554063148, | |
| "learning_rate": 3.06228265225422e-07, | |
| "loss": 0.1116, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.8492719401810311, | |
| "grad_norm": 0.9337280963011981, | |
| "learning_rate": 3.031126099549653e-07, | |
| "loss": 0.1119, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.8500590318772137, | |
| "grad_norm": 0.9417103223229273, | |
| "learning_rate": 3.000118624543888e-07, | |
| "loss": 0.1117, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8508461235733963, | |
| "grad_norm": 0.9259525236801444, | |
| "learning_rate": 2.9692604376492935e-07, | |
| "loss": 0.1067, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.8516332152695789, | |
| "grad_norm": 0.98856338356212, | |
| "learning_rate": 2.9385517482651974e-07, | |
| "loss": 0.1218, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.8524203069657615, | |
| "grad_norm": 0.9208981942587281, | |
| "learning_rate": 2.907992764776471e-07, | |
| "loss": 0.1078, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.8532073986619442, | |
| "grad_norm": 1.0070559943311361, | |
| "learning_rate": 2.877583694552083e-07, | |
| "loss": 0.1236, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.8539944903581267, | |
| "grad_norm": 1.0056266955151931, | |
| "learning_rate": 2.847324743943733e-07, | |
| "loss": 0.1168, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8547815820543093, | |
| "grad_norm": 0.9843297655404193, | |
| "learning_rate": 2.8172161182844076e-07, | |
| "loss": 0.1179, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.855568673750492, | |
| "grad_norm": 0.9794092387444499, | |
| "learning_rate": 2.7872580218870293e-07, | |
| "loss": 0.1143, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.8563557654466746, | |
| "grad_norm": 0.9193678895589191, | |
| "learning_rate": 2.757450658043029e-07, | |
| "loss": 0.1033, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.943695053791707, | |
| "learning_rate": 2.7277942290210105e-07, | |
| "loss": 0.1197, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.8579299488390397, | |
| "grad_norm": 0.9996493471626344, | |
| "learning_rate": 2.698288936065338e-07, | |
| "loss": 0.1121, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8587170405352224, | |
| "grad_norm": 0.9457549280655658, | |
| "learning_rate": 2.6689349793947993e-07, | |
| "loss": 0.1135, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.859504132231405, | |
| "grad_norm": 0.9370356637509019, | |
| "learning_rate": 2.639732558201219e-07, | |
| "loss": 0.1142, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.8602912239275876, | |
| "grad_norm": 0.9355392394517238, | |
| "learning_rate": 2.610681870648149e-07, | |
| "loss": 0.1101, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.8610783156237701, | |
| "grad_norm": 0.9395826896807081, | |
| "learning_rate": 2.5817831138694685e-07, | |
| "loss": 0.1143, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.8618654073199528, | |
| "grad_norm": 0.9169613408255519, | |
| "learning_rate": 2.553036483968094e-07, | |
| "loss": 0.1125, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8626524990161354, | |
| "grad_norm": 0.9214498686393413, | |
| "learning_rate": 2.5244421760146354e-07, | |
| "loss": 0.1061, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.863439590712318, | |
| "grad_norm": 0.9759019789855492, | |
| "learning_rate": 2.496000384046046e-07, | |
| "loss": 0.1134, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.8642266824085005, | |
| "grad_norm": 1.002942267675624, | |
| "learning_rate": 2.467711301064349e-07, | |
| "loss": 0.1249, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.8650137741046832, | |
| "grad_norm": 0.9774180094397517, | |
| "learning_rate": 2.4395751190352924e-07, | |
| "loss": 0.1192, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.8658008658008658, | |
| "grad_norm": 0.9977028529540362, | |
| "learning_rate": 2.411592028887058e-07, | |
| "loss": 0.1189, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8665879574970484, | |
| "grad_norm": 0.9904872465257003, | |
| "learning_rate": 2.383762220508984e-07, | |
| "loss": 0.1183, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.867375049193231, | |
| "grad_norm": 0.9148412123541501, | |
| "learning_rate": 2.356085882750242e-07, | |
| "loss": 0.1078, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.8681621408894136, | |
| "grad_norm": 1.0117176122067204, | |
| "learning_rate": 2.328563203418574e-07, | |
| "loss": 0.1217, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.8689492325855962, | |
| "grad_norm": 0.9872081797420905, | |
| "learning_rate": 2.3011943692790389e-07, | |
| "loss": 0.117, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.8697363242817788, | |
| "grad_norm": 0.9476791655485511, | |
| "learning_rate": 2.2739795660526948e-07, | |
| "loss": 0.1157, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8705234159779615, | |
| "grad_norm": 0.9183530530163464, | |
| "learning_rate": 2.246918978415394e-07, | |
| "loss": 0.1108, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.8713105076741441, | |
| "grad_norm": 0.9622583408924335, | |
| "learning_rate": 2.2200127899964786e-07, | |
| "loss": 0.1188, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.8720975993703266, | |
| "grad_norm": 0.9915067004751748, | |
| "learning_rate": 2.1932611833775846e-07, | |
| "loss": 0.1151, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.8728846910665092, | |
| "grad_norm": 0.9404810815181894, | |
| "learning_rate": 2.1666643400913512e-07, | |
| "loss": 0.1133, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.8736717827626919, | |
| "grad_norm": 0.9750904254975121, | |
| "learning_rate": 2.1402224406202377e-07, | |
| "loss": 0.1187, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8744588744588745, | |
| "grad_norm": 0.942666742797311, | |
| "learning_rate": 2.1139356643952667e-07, | |
| "loss": 0.1133, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.875245966155057, | |
| "grad_norm": 0.9261400322366565, | |
| "learning_rate": 2.0878041897948121e-07, | |
| "loss": 0.1095, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.8760330578512396, | |
| "grad_norm": 1.0714254257987408, | |
| "learning_rate": 2.0618281941434058e-07, | |
| "loss": 0.1197, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.8768201495474223, | |
| "grad_norm": 0.9322738584358286, | |
| "learning_rate": 2.036007853710503e-07, | |
| "loss": 0.114, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.8776072412436049, | |
| "grad_norm": 0.9346649367642453, | |
| "learning_rate": 2.0103433437093256e-07, | |
| "loss": 0.1027, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.8783943329397875, | |
| "grad_norm": 0.9499461297298013, | |
| "learning_rate": 1.9848348382956294e-07, | |
| "loss": 0.1228, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.87918142463597, | |
| "grad_norm": 0.9811562591520676, | |
| "learning_rate": 1.9594825105665654e-07, | |
| "loss": 0.1168, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.8799685163321527, | |
| "grad_norm": 0.901833893893408, | |
| "learning_rate": 1.934286532559468e-07, | |
| "loss": 0.0992, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.8807556080283353, | |
| "grad_norm": 0.9566664879297264, | |
| "learning_rate": 1.9092470752507225e-07, | |
| "loss": 0.1114, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.8815426997245179, | |
| "grad_norm": 0.8992036132523128, | |
| "learning_rate": 1.8843643085545677e-07, | |
| "loss": 0.1113, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8823297914207006, | |
| "grad_norm": 1.031563578089281, | |
| "learning_rate": 1.8596384013219726e-07, | |
| "loss": 0.1168, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.8831168831168831, | |
| "grad_norm": 0.942026348911581, | |
| "learning_rate": 1.8350695213394777e-07, | |
| "loss": 0.1206, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.8839039748130657, | |
| "grad_norm": 0.9439190891698341, | |
| "learning_rate": 1.8106578353280585e-07, | |
| "loss": 0.1138, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.8846910665092483, | |
| "grad_norm": 0.9261746330624396, | |
| "learning_rate": 1.7864035089419973e-07, | |
| "loss": 0.1118, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.885478158205431, | |
| "grad_norm": 0.9958265328548447, | |
| "learning_rate": 1.7623067067677467e-07, | |
| "loss": 0.1235, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.8862652499016135, | |
| "grad_norm": 1.0264812939307284, | |
| "learning_rate": 1.7383675923228372e-07, | |
| "loss": 0.1221, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.8870523415977961, | |
| "grad_norm": 1.0457105898882355, | |
| "learning_rate": 1.7145863280547348e-07, | |
| "loss": 0.1183, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.8878394332939787, | |
| "grad_norm": 0.9616200935637597, | |
| "learning_rate": 1.6909630753397716e-07, | |
| "loss": 0.1055, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.8886265249901614, | |
| "grad_norm": 1.0114965743550393, | |
| "learning_rate": 1.6674979944820258e-07, | |
| "loss": 0.1247, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.889413616686344, | |
| "grad_norm": 1.0002918205099012, | |
| "learning_rate": 1.644191244712251e-07, | |
| "loss": 0.1245, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8902007083825265, | |
| "grad_norm": 1.0106011414793612, | |
| "learning_rate": 1.621042984186777e-07, | |
| "loss": 0.1222, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.8909878000787091, | |
| "grad_norm": 0.9663301334412446, | |
| "learning_rate": 1.598053369986463e-07, | |
| "loss": 0.1194, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.8917748917748918, | |
| "grad_norm": 0.9465191160189073, | |
| "learning_rate": 1.5752225581155995e-07, | |
| "loss": 0.1085, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.8925619834710744, | |
| "grad_norm": 1.0466368501224192, | |
| "learning_rate": 1.5525507035008852e-07, | |
| "loss": 0.1306, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.893349075167257, | |
| "grad_norm": 0.9494535695924311, | |
| "learning_rate": 1.5300379599903408e-07, | |
| "loss": 0.1172, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.8941361668634396, | |
| "grad_norm": 0.9062893885912572, | |
| "learning_rate": 1.507684480352292e-07, | |
| "loss": 0.109, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.8949232585596222, | |
| "grad_norm": 0.9371709714503786, | |
| "learning_rate": 1.4854904162743127e-07, | |
| "loss": 0.1047, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.8957103502558048, | |
| "grad_norm": 1.0110155118454318, | |
| "learning_rate": 1.4634559183622193e-07, | |
| "loss": 0.126, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.8964974419519874, | |
| "grad_norm": 1.0367843431150414, | |
| "learning_rate": 1.4415811361390142e-07, | |
| "loss": 0.1251, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.89728453364817, | |
| "grad_norm": 0.9836901063346848, | |
| "learning_rate": 1.4198662180439166e-07, | |
| "loss": 0.1225, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8980716253443526, | |
| "grad_norm": 1.0729047805019176, | |
| "learning_rate": 1.3983113114313078e-07, | |
| "loss": 0.1321, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.8988587170405352, | |
| "grad_norm": 0.9172308694693998, | |
| "learning_rate": 1.3769165625697633e-07, | |
| "loss": 0.1094, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.8996458087367178, | |
| "grad_norm": 0.9246235655173746, | |
| "learning_rate": 1.355682116641052e-07, | |
| "loss": 0.1098, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.9004329004329005, | |
| "grad_norm": 0.9671203173685715, | |
| "learning_rate": 1.3346081177391474e-07, | |
| "loss": 0.1084, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.901219992129083, | |
| "grad_norm": 0.9683781341379243, | |
| "learning_rate": 1.3136947088692537e-07, | |
| "loss": 0.119, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9020070838252656, | |
| "grad_norm": 0.9395787949515245, | |
| "learning_rate": 1.2929420319468254e-07, | |
| "loss": 0.1135, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9027941755214482, | |
| "grad_norm": 0.9442679409050593, | |
| "learning_rate": 1.272350227796626e-07, | |
| "loss": 0.1215, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.9035812672176309, | |
| "grad_norm": 0.9549776744266577, | |
| "learning_rate": 1.2519194361517468e-07, | |
| "loss": 0.1065, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.9043683589138135, | |
| "grad_norm": 0.867426121426782, | |
| "learning_rate": 1.231649795652684e-07, | |
| "loss": 0.0945, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.905155450609996, | |
| "grad_norm": 0.9448412697453261, | |
| "learning_rate": 1.2115414438463646e-07, | |
| "loss": 0.1101, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9059425423061787, | |
| "grad_norm": 0.9917169805460462, | |
| "learning_rate": 1.1915945171852572e-07, | |
| "loss": 0.1163, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.9067296340023613, | |
| "grad_norm": 0.943816804931634, | |
| "learning_rate": 1.171809151026404e-07, | |
| "loss": 0.1079, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9075167256985439, | |
| "grad_norm": 0.9275143855618218, | |
| "learning_rate": 1.1521854796305243e-07, | |
| "loss": 0.1075, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.9083038173947264, | |
| "grad_norm": 1.0135210788160407, | |
| "learning_rate": 1.1327236361611066e-07, | |
| "loss": 0.1267, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.918433915733085, | |
| "learning_rate": 1.1134237526834901e-07, | |
| "loss": 0.1134, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.9098780007870917, | |
| "grad_norm": 1.0485642131970048, | |
| "learning_rate": 1.0942859601639793e-07, | |
| "loss": 0.1181, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9106650924832743, | |
| "grad_norm": 0.9666449795771067, | |
| "learning_rate": 1.0753103884689503e-07, | |
| "loss": 0.1142, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.911452184179457, | |
| "grad_norm": 0.9155262976479461, | |
| "learning_rate": 1.0564971663639761e-07, | |
| "loss": 0.1079, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9122392758756395, | |
| "grad_norm": 0.9136247354670238, | |
| "learning_rate": 1.0378464215129419e-07, | |
| "loss": 0.1131, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.9130263675718221, | |
| "grad_norm": 0.9434235785738653, | |
| "learning_rate": 1.0193582804771868e-07, | |
| "loss": 0.1126, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9138134592680047, | |
| "grad_norm": 0.9320439540121583, | |
| "learning_rate": 1.0010328687146464e-07, | |
| "loss": 0.1128, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.9146005509641874, | |
| "grad_norm": 0.9883313764295104, | |
| "learning_rate": 9.828703105789983e-08, | |
| "loss": 0.1189, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.91538764266037, | |
| "grad_norm": 0.962978283773575, | |
| "learning_rate": 9.648707293188092e-08, | |
| "loss": 0.1181, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.9161747343565525, | |
| "grad_norm": 0.9154295867986278, | |
| "learning_rate": 9.470342470767197e-08, | |
| "loss": 0.1077, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9169618260527351, | |
| "grad_norm": 0.9190343113758656, | |
| "learning_rate": 9.293609848885971e-08, | |
| "loss": 0.1101, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9177489177489178, | |
| "grad_norm": 0.8935174011429071, | |
| "learning_rate": 9.118510626827198e-08, | |
| "loss": 0.1112, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9185360094451004, | |
| "grad_norm": 0.9304289493526803, | |
| "learning_rate": 8.945045992789669e-08, | |
| "loss": 0.1037, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.9193231011412829, | |
| "grad_norm": 0.9708158046423621, | |
| "learning_rate": 8.773217123880074e-08, | |
| "loss": 0.1255, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9201101928374655, | |
| "grad_norm": 0.95525649253936, | |
| "learning_rate": 8.603025186105064e-08, | |
| "loss": 0.1119, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.9208972845336482, | |
| "grad_norm": 1.0094573892269945, | |
| "learning_rate": 8.434471334363204e-08, | |
| "loss": 0.1201, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9216843762298308, | |
| "grad_norm": 0.9367157782024292, | |
| "learning_rate": 8.267556712437342e-08, | |
| "loss": 0.1057, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.9224714679260134, | |
| "grad_norm": 0.9747763894177717, | |
| "learning_rate": 8.102282452986693e-08, | |
| "loss": 0.1098, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.9232585596221959, | |
| "grad_norm": 0.9372662016679384, | |
| "learning_rate": 7.938649677539268e-08, | |
| "loss": 0.1081, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.9240456513183786, | |
| "grad_norm": 0.9887803764792047, | |
| "learning_rate": 7.77665949648404e-08, | |
| "loss": 0.1199, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.9248327430145612, | |
| "grad_norm": 0.948252366615409, | |
| "learning_rate": 7.616313009063791e-08, | |
| "loss": 0.1064, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9256198347107438, | |
| "grad_norm": 0.948272373380358, | |
| "learning_rate": 7.457611303367196e-08, | |
| "loss": 0.1153, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.9264069264069265, | |
| "grad_norm": 0.951666041729817, | |
| "learning_rate": 7.300555456321884e-08, | |
| "loss": 0.1175, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.927194018103109, | |
| "grad_norm": 0.982566934759444, | |
| "learning_rate": 7.145146533686725e-08, | |
| "loss": 0.1164, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9279811097992916, | |
| "grad_norm": 0.9553483812186222, | |
| "learning_rate": 6.991385590044947e-08, | |
| "loss": 0.1169, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.9287682014954742, | |
| "grad_norm": 0.9724385774768447, | |
| "learning_rate": 6.839273668796747e-08, | |
| "loss": 0.1078, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9295552931916569, | |
| "grad_norm": 0.9324047746069145, | |
| "learning_rate": 6.688811802152279e-08, | |
| "loss": 0.1162, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.9303423848878394, | |
| "grad_norm": 0.9711188611376046, | |
| "learning_rate": 6.540001011124703e-08, | |
| "loss": 0.1089, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.931129476584022, | |
| "grad_norm": 1.0007682860058293, | |
| "learning_rate": 6.392842305523172e-08, | |
| "loss": 0.1225, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.9319165682802046, | |
| "grad_norm": 0.9074164360304593, | |
| "learning_rate": 6.247336683946031e-08, | |
| "loss": 0.1086, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.9327036599763873, | |
| "grad_norm": 0.9132051814101239, | |
| "learning_rate": 6.103485133774039e-08, | |
| "loss": 0.1168, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9334907516725699, | |
| "grad_norm": 0.9362633318018305, | |
| "learning_rate": 5.961288631163687e-08, | |
| "loss": 0.1162, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.9342778433687524, | |
| "grad_norm": 1.0037784789548483, | |
| "learning_rate": 5.820748141040444e-08, | |
| "loss": 0.1246, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.935064935064935, | |
| "grad_norm": 0.9109713869964553, | |
| "learning_rate": 5.681864617092414e-08, | |
| "loss": 0.1062, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.9358520267611177, | |
| "grad_norm": 0.9353212505070928, | |
| "learning_rate": 5.544639001763719e-08, | |
| "loss": 0.1116, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.9366391184573003, | |
| "grad_norm": 0.9763975248080838, | |
| "learning_rate": 5.4090722262481463e-08, | |
| "loss": 0.1183, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9374262101534829, | |
| "grad_norm": 0.9389308712439575, | |
| "learning_rate": 5.2751652104828245e-08, | |
| "loss": 0.1125, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.9382133018496654, | |
| "grad_norm": 0.9219409357377748, | |
| "learning_rate": 5.142918863141999e-08, | |
| "loss": 0.1045, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.9390003935458481, | |
| "grad_norm": 0.9112274236581333, | |
| "learning_rate": 5.012334081630821e-08, | |
| "loss": 0.1154, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.9397874852420307, | |
| "grad_norm": 1.0413638466805462, | |
| "learning_rate": 4.8834117520793754e-08, | |
| "loss": 0.1235, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.9405745769382133, | |
| "grad_norm": 0.929473983088696, | |
| "learning_rate": 4.756152749336468e-08, | |
| "loss": 0.1216, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9413616686343959, | |
| "grad_norm": 0.9499785152690334, | |
| "learning_rate": 4.6305579369638474e-08, | |
| "loss": 0.119, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.9421487603305785, | |
| "grad_norm": 0.9512339302739883, | |
| "learning_rate": 4.506628167230326e-08, | |
| "loss": 0.1128, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.9429358520267611, | |
| "grad_norm": 0.9789643064479855, | |
| "learning_rate": 4.384364281105974e-08, | |
| "loss": 0.1156, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.9437229437229437, | |
| "grad_norm": 0.9338458352272411, | |
| "learning_rate": 4.2637671082563225e-08, | |
| "loss": 0.1097, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.9445100354191264, | |
| "grad_norm": 0.9477658752462017, | |
| "learning_rate": 4.144837467036922e-08, | |
| "loss": 0.1062, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9452971271153089, | |
| "grad_norm": 0.9108658408708349, | |
| "learning_rate": 4.0275761644876785e-08, | |
| "loss": 0.113, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.9460842188114915, | |
| "grad_norm": 0.9670323890311822, | |
| "learning_rate": 3.911983996327251e-08, | |
| "loss": 0.1159, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.9468713105076741, | |
| "grad_norm": 0.9508320183305409, | |
| "learning_rate": 3.798061746947995e-08, | |
| "loss": 0.1183, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.9476584022038568, | |
| "grad_norm": 0.9446280692518585, | |
| "learning_rate": 3.6858101894102774e-08, | |
| "loss": 0.1039, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.9484454939000394, | |
| "grad_norm": 0.8915603321077954, | |
| "learning_rate": 3.575230085437448e-08, | |
| "loss": 0.1131, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.9492325855962219, | |
| "grad_norm": 1.0049928450920236, | |
| "learning_rate": 3.466322185410542e-08, | |
| "loss": 0.1075, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.9500196772924046, | |
| "grad_norm": 0.9377285133019151, | |
| "learning_rate": 3.3590872283633944e-08, | |
| "loss": 0.1047, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.9508067689885872, | |
| "grad_norm": 1.0283573568735918, | |
| "learning_rate": 3.253525941977309e-08, | |
| "loss": 0.1207, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.9515938606847698, | |
| "grad_norm": 0.8716127646526632, | |
| "learning_rate": 3.1496390425764246e-08, | |
| "loss": 0.1034, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 0.9413176172911034, | |
| "learning_rate": 3.047427235122663e-08, | |
| "loss": 0.1094, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.953168044077135, | |
| "grad_norm": 0.9235158713031617, | |
| "learning_rate": 2.9468912132110117e-08, | |
| "loss": 0.1112, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.9539551357733176, | |
| "grad_norm": 1.0172961607730988, | |
| "learning_rate": 2.8480316590648315e-08, | |
| "loss": 0.1174, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.9547422274695002, | |
| "grad_norm": 0.9633734296197954, | |
| "learning_rate": 2.750849243531223e-08, | |
| "loss": 0.119, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.9555293191656828, | |
| "grad_norm": 0.8730009976362983, | |
| "learning_rate": 2.655344626076417e-08, | |
| "loss": 0.1012, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.9563164108618654, | |
| "grad_norm": 0.9472387317654097, | |
| "learning_rate": 2.5615184547813364e-08, | |
| "loss": 0.1169, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.957103502558048, | |
| "grad_norm": 1.009282211648514, | |
| "learning_rate": 2.4693713663372643e-08, | |
| "loss": 0.1193, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.9578905942542306, | |
| "grad_norm": 0.9320555090575322, | |
| "learning_rate": 2.378903986041403e-08, | |
| "loss": 0.1134, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.9586776859504132, | |
| "grad_norm": 0.9964824551488268, | |
| "learning_rate": 2.2901169277927126e-08, | |
| "loss": 0.123, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.9594647776465959, | |
| "grad_norm": 0.9252548740156445, | |
| "learning_rate": 2.2030107940877733e-08, | |
| "loss": 0.1145, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.9602518693427784, | |
| "grad_norm": 1.0057993388132023, | |
| "learning_rate": 2.117586176016512e-08, | |
| "loss": 0.1246, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.961038961038961, | |
| "grad_norm": 0.953596307625609, | |
| "learning_rate": 2.0338436532584826e-08, | |
| "loss": 0.1113, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.9618260527351437, | |
| "grad_norm": 0.8951377954636907, | |
| "learning_rate": 1.9517837940786767e-08, | |
| "loss": 0.1033, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.9626131444313263, | |
| "grad_norm": 1.0221216536124687, | |
| "learning_rate": 1.8714071553238012e-08, | |
| "loss": 0.125, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.9634002361275088, | |
| "grad_norm": 0.9387870706305922, | |
| "learning_rate": 1.7927142824184784e-08, | |
| "loss": 0.1144, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.9641873278236914, | |
| "grad_norm": 0.9321486745496109, | |
| "learning_rate": 1.7157057093614704e-08, | |
| "loss": 0.1094, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.9649744195198741, | |
| "grad_norm": 1.0128840378759991, | |
| "learning_rate": 1.6403819587221814e-08, | |
| "loss": 0.1217, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.9657615112160567, | |
| "grad_norm": 0.924640460993744, | |
| "learning_rate": 1.5667435416370226e-08, | |
| "loss": 0.1115, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.9665486029122393, | |
| "grad_norm": 0.9932790569946806, | |
| "learning_rate": 1.494790957805997e-08, | |
| "loss": 0.115, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.9673356946084218, | |
| "grad_norm": 0.9381951620042324, | |
| "learning_rate": 1.4245246954892323e-08, | |
| "loss": 0.1096, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.9681227863046045, | |
| "grad_norm": 0.9070754098607924, | |
| "learning_rate": 1.3559452315037025e-08, | |
| "loss": 0.107, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.9689098780007871, | |
| "grad_norm": 0.9138545833001099, | |
| "learning_rate": 1.2890530312200944e-08, | |
| "loss": 0.1027, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.9696969696969697, | |
| "grad_norm": 0.959458803400461, | |
| "learning_rate": 1.2238485485594753e-08, | |
| "loss": 0.1163, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.9704840613931524, | |
| "grad_norm": 0.9577614004721761, | |
| "learning_rate": 1.160332225990296e-08, | |
| "loss": 0.1148, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.9712711530893349, | |
| "grad_norm": 0.9713054396060389, | |
| "learning_rate": 1.0985044945254763e-08, | |
| "loss": 0.1249, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.9720582447855175, | |
| "grad_norm": 0.9727071823355634, | |
| "learning_rate": 1.0383657737192964e-08, | |
| "loss": 0.1188, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.9728453364817001, | |
| "grad_norm": 0.9525541993565831, | |
| "learning_rate": 9.79916471664677e-09, | |
| "loss": 0.1129, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.9736324281778828, | |
| "grad_norm": 0.9792192000262094, | |
| "learning_rate": 9.231569849904309e-09, | |
| "loss": 0.124, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.9744195198740653, | |
| "grad_norm": 0.9711922853486228, | |
| "learning_rate": 8.680876988584607e-09, | |
| "loss": 0.1114, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.9752066115702479, | |
| "grad_norm": 0.9623623423156289, | |
| "learning_rate": 8.147089869612045e-09, | |
| "loss": 0.1064, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.9759937032664305, | |
| "grad_norm": 0.9583105118384698, | |
| "learning_rate": 7.630212115191381e-09, | |
| "loss": 0.1128, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9767807949626132, | |
| "grad_norm": 0.958416495157565, | |
| "learning_rate": 7.130247232782217e-09, | |
| "loss": 0.1124, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.9775678866587958, | |
| "grad_norm": 0.9739671638616083, | |
| "learning_rate": 6.647198615076789e-09, | |
| "loss": 0.1239, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.9783549783549783, | |
| "grad_norm": 0.9831503405039121, | |
| "learning_rate": 6.181069539974716e-09, | |
| "loss": 0.1152, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.9791420700511609, | |
| "grad_norm": 0.9837400272891019, | |
| "learning_rate": 5.7318631705630126e-09, | |
| "loss": 0.1196, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.9799291617473436, | |
| "grad_norm": 0.9663876709057511, | |
| "learning_rate": 5.299582555093052e-09, | |
| "loss": 0.1174, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.9807162534435262, | |
| "grad_norm": 0.9077428753737183, | |
| "learning_rate": 4.884230626960307e-09, | |
| "loss": 0.1046, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.9815033451397088, | |
| "grad_norm": 0.9624717672564354, | |
| "learning_rate": 4.485810204684638e-09, | |
| "loss": 0.1143, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.9822904368358913, | |
| "grad_norm": 0.9768731662603329, | |
| "learning_rate": 4.104323991891424e-09, | |
| "loss": 0.1111, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.983077528532074, | |
| "grad_norm": 0.9918364976987204, | |
| "learning_rate": 3.739774577292688e-09, | |
| "loss": 0.1146, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.9838646202282566, | |
| "grad_norm": 0.9235361594154657, | |
| "learning_rate": 3.392164434669609e-09, | |
| "loss": 0.115, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9846517119244392, | |
| "grad_norm": 0.9407569711042593, | |
| "learning_rate": 3.0614959228558728e-09, | |
| "loss": 0.1048, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.9854388036206218, | |
| "grad_norm": 0.9503971642912823, | |
| "learning_rate": 2.7477712857215676e-09, | |
| "loss": 0.1118, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.9862258953168044, | |
| "grad_norm": 0.9423931683522886, | |
| "learning_rate": 2.450992652157924e-09, | |
| "loss": 0.1052, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.987012987012987, | |
| "grad_norm": 0.9692730394836648, | |
| "learning_rate": 2.1711620360634344e-09, | |
| "loss": 0.1134, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.9878000787091696, | |
| "grad_norm": 0.8578557269739953, | |
| "learning_rate": 1.9082813363294205e-09, | |
| "loss": 0.1062, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.9885871704053523, | |
| "grad_norm": 0.9417059944579995, | |
| "learning_rate": 1.662352336827544e-09, | |
| "loss": 0.1198, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.9893742621015348, | |
| "grad_norm": 1.0199292836158018, | |
| "learning_rate": 1.4333767063973159e-09, | |
| "loss": 0.1085, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.9901613537977174, | |
| "grad_norm": 0.9283942300248242, | |
| "learning_rate": 1.221355998835272e-09, | |
| "loss": 0.1069, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.9909484454939, | |
| "grad_norm": 0.9097063653552931, | |
| "learning_rate": 1.0262916528841483e-09, | |
| "loss": 0.1074, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.9917355371900827, | |
| "grad_norm": 0.9806238353833303, | |
| "learning_rate": 8.481849922237217e-10, | |
| "loss": 0.1136, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9925226288862653, | |
| "grad_norm": 0.9354892517698297, | |
| "learning_rate": 6.870372254602631e-10, | |
| "loss": 0.1048, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.9933097205824478, | |
| "grad_norm": 0.9045069809298675, | |
| "learning_rate": 5.428494461201527e-10, | |
| "loss": 0.109, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.9940968122786304, | |
| "grad_norm": 0.9462573748322036, | |
| "learning_rate": 4.156226326415547e-10, | |
| "loss": 0.1156, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.9948839039748131, | |
| "grad_norm": 0.9123815541723352, | |
| "learning_rate": 3.0535764836747696e-10, | |
| "loss": 0.1078, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.9956709956709957, | |
| "grad_norm": 0.960544516493779, | |
| "learning_rate": 2.1205524154105372e-10, | |
| "loss": 0.1088, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.9964580873671782, | |
| "grad_norm": 0.8887242195384208, | |
| "learning_rate": 1.357160452988837e-10, | |
| "loss": 0.1031, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.9972451790633609, | |
| "grad_norm": 0.9502744942086568, | |
| "learning_rate": 7.63405776685322e-11, | |
| "loss": 0.1124, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.9980322707595435, | |
| "grad_norm": 0.8901953170723566, | |
| "learning_rate": 3.3929241563535056e-11, | |
| "loss": 0.1001, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.9988193624557261, | |
| "grad_norm": 0.9602428624879035, | |
| "learning_rate": 8.482324780900718e-12, | |
| "loss": 0.1116, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.9996064541519087, | |
| "grad_norm": 0.9779040145681426, | |
| "learning_rate": 0.0, | |
| "loss": 0.119, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9996064541519087, | |
| "step": 1270, | |
| "total_flos": 223330201436160.0, | |
| "train_loss": 0.14531472616308316, | |
| "train_runtime": 7100.3282, | |
| "train_samples_per_second": 11.452, | |
| "train_steps_per_second": 0.179 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1270, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 223330201436160.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |